feed-the-machine 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +170 -170
  3. package/bin/brain.py +1340 -0
  4. package/bin/convert_claude_skills_to_codex.py +490 -0
  5. package/bin/generate-manifest.mjs +463 -463
  6. package/bin/harden_codex_skills.py +141 -0
  7. package/bin/install.mjs +491 -491
  8. package/bin/migrate-eng-buddy-data.py +875 -0
  9. package/bin/playbook_engine/__init__.py +1 -0
  10. package/bin/playbook_engine/conftest.py +8 -0
  11. package/bin/playbook_engine/extractor.py +33 -0
  12. package/bin/playbook_engine/manager.py +102 -0
  13. package/bin/playbook_engine/models.py +84 -0
  14. package/bin/playbook_engine/registry.py +35 -0
  15. package/bin/playbook_engine/test_extractor.py +72 -0
  16. package/bin/playbook_engine/test_integration.py +129 -0
  17. package/bin/playbook_engine/test_manager.py +85 -0
  18. package/bin/playbook_engine/test_models.py +166 -0
  19. package/bin/playbook_engine/test_registry.py +67 -0
  20. package/bin/playbook_engine/test_tracer.py +86 -0
  21. package/bin/playbook_engine/tracer.py +93 -0
  22. package/bin/tasks_db.py +456 -0
  23. package/docs/HOOKS.md +243 -243
  24. package/docs/INBOX.md +233 -233
  25. package/ftm/SKILL.md +125 -122
  26. package/ftm-audit/SKILL.md +623 -623
  27. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  28. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  29. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  30. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  31. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  32. package/ftm-audit/scripts/run-knip.sh +23 -23
  33. package/ftm-audit.yml +2 -2
  34. package/ftm-brainstorm/SKILL.md +1003 -498
  35. package/ftm-brainstorm/evals/evals.json +180 -100
  36. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  37. package/ftm-brainstorm/references/agent-prompts.md +552 -224
  38. package/ftm-brainstorm/references/plan-template.md +209 -121
  39. package/ftm-brainstorm.yml +2 -2
  40. package/ftm-browse/SKILL.md +454 -454
  41. package/ftm-browse/daemon/browser-manager.ts +206 -206
  42. package/ftm-browse/daemon/bun.lock +30 -30
  43. package/ftm-browse/daemon/cli.ts +347 -347
  44. package/ftm-browse/daemon/commands.ts +410 -410
  45. package/ftm-browse/daemon/main.ts +357 -357
  46. package/ftm-browse/daemon/package.json +17 -17
  47. package/ftm-browse/daemon/server.ts +189 -189
  48. package/ftm-browse/daemon/snapshot.ts +519 -519
  49. package/ftm-browse/daemon/tsconfig.json +22 -22
  50. package/ftm-browse.yml +4 -4
  51. package/ftm-capture/SKILL.md +370 -370
  52. package/ftm-capture.yml +4 -4
  53. package/ftm-codex-gate/SKILL.md +361 -361
  54. package/ftm-codex-gate.yml +2 -2
  55. package/ftm-config/SKILL.md +422 -345
  56. package/ftm-config.default.yml +125 -82
  57. package/ftm-config.yml +44 -2
  58. package/ftm-council/SKILL.md +416 -416
  59. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  60. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  61. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  62. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  63. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  64. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  65. package/ftm-council.yml +2 -2
  66. package/ftm-dashboard/SKILL.md +163 -163
  67. package/ftm-dashboard.yml +4 -4
  68. package/ftm-debug/SKILL.md +1037 -1037
  69. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  70. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  71. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  72. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  73. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  74. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  75. package/ftm-debug.yml +2 -2
  76. package/ftm-diagram/SKILL.md +277 -277
  77. package/ftm-diagram.yml +2 -2
  78. package/ftm-executor/SKILL.md +777 -777
  79. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  80. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  81. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  82. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  83. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
  84. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  85. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  86. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  87. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -59
  88. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  89. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  90. package/ftm-executor/runtime/package.json +8 -8
  91. package/ftm-executor.yml +2 -2
  92. package/ftm-git/SKILL.md +441 -441
  93. package/ftm-git/evals/evals.json +26 -26
  94. package/ftm-git/evals/promptfoo.yaml +75 -75
  95. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  96. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  97. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  98. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  99. package/ftm-git.yml +2 -2
  100. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  101. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  102. package/ftm-inbox/backend/adapters/base.py +230 -230
  103. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  104. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  105. package/ftm-inbox/backend/adapters/jira.py +136 -136
  106. package/ftm-inbox/backend/adapters/registry.py +192 -192
  107. package/ftm-inbox/backend/adapters/slack.py +110 -110
  108. package/ftm-inbox/backend/db/connection.py +54 -54
  109. package/ftm-inbox/backend/db/schema.py +78 -78
  110. package/ftm-inbox/backend/executor/__init__.py +7 -7
  111. package/ftm-inbox/backend/executor/engine.py +149 -149
  112. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  113. package/ftm-inbox/backend/main.py +103 -103
  114. package/ftm-inbox/backend/models/__init__.py +1 -1
  115. package/ftm-inbox/backend/models/unified_task.py +36 -36
  116. package/ftm-inbox/backend/planner/__init__.py +6 -6
  117. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  118. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  119. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  120. package/ftm-inbox/backend/planner/generator.py +127 -127
  121. package/ftm-inbox/backend/planner/schema.py +34 -34
  122. package/ftm-inbox/backend/requirements.txt +5 -5
  123. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  124. package/ftm-inbox/backend/routes/execute.py +186 -186
  125. package/ftm-inbox/backend/routes/health.py +52 -52
  126. package/ftm-inbox/backend/routes/inbox.py +68 -68
  127. package/ftm-inbox/backend/routes/plan.py +271 -271
  128. package/ftm-inbox/bin/launchagent.mjs +91 -91
  129. package/ftm-inbox/bin/setup.mjs +188 -188
  130. package/ftm-inbox/bin/start.sh +10 -10
  131. package/ftm-inbox/bin/status.sh +17 -17
  132. package/ftm-inbox/bin/stop.sh +8 -8
  133. package/ftm-inbox/config.example.yml +55 -55
  134. package/ftm-inbox/package-lock.json +2898 -2898
  135. package/ftm-inbox/package.json +26 -26
  136. package/ftm-inbox/postcss.config.js +6 -6
  137. package/ftm-inbox/src/app.css +199 -199
  138. package/ftm-inbox/src/app.html +18 -18
  139. package/ftm-inbox/src/lib/api.ts +166 -166
  140. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  141. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  142. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  143. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  144. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  145. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  146. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  147. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  148. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  149. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  150. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  151. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  152. package/ftm-inbox/src/lib/theme.ts +47 -47
  153. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  154. package/ftm-inbox/src/routes/+page.svelte +401 -401
  155. package/ftm-inbox/svelte.config.js +12 -12
  156. package/ftm-inbox/tailwind.config.ts +63 -63
  157. package/ftm-inbox/tsconfig.json +13 -13
  158. package/ftm-inbox/vite.config.ts +6 -6
  159. package/ftm-intent/SKILL.md +241 -241
  160. package/ftm-intent.yml +2 -2
  161. package/ftm-manifest.json +3794 -3794
  162. package/ftm-map/SKILL.md +291 -291
  163. package/ftm-map/scripts/db.py +712 -712
  164. package/ftm-map/scripts/index.py +415 -415
  165. package/ftm-map/scripts/parser.py +224 -224
  166. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  167. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  168. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  169. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  170. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  171. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  172. package/ftm-map/scripts/query.py +301 -301
  173. package/ftm-map/scripts/ranker.py +377 -377
  174. package/ftm-map/scripts/requirements.txt +5 -5
  175. package/ftm-map/scripts/setup-hooks.sh +27 -27
  176. package/ftm-map/scripts/setup.sh +56 -56
  177. package/ftm-map/scripts/test_db.py +364 -364
  178. package/ftm-map/scripts/test_parser.py +174 -174
  179. package/ftm-map/scripts/test_query.py +183 -183
  180. package/ftm-map/scripts/test_ranker.py +199 -199
  181. package/ftm-map/scripts/views.py +591 -591
  182. package/ftm-map.yml +2 -2
  183. package/ftm-mind/SKILL.md +201 -1943
  184. package/ftm-mind/evals/promptfoo.yaml +142 -142
  185. package/ftm-mind/references/blackboard-protocol.md +110 -0
  186. package/ftm-mind/references/blackboard-schema.md +328 -328
  187. package/ftm-mind/references/complexity-guide.md +110 -110
  188. package/ftm-mind/references/complexity-sizing.md +138 -0
  189. package/ftm-mind/references/decide-act-protocol.md +172 -0
  190. package/ftm-mind/references/direct-execution.md +51 -0
  191. package/ftm-mind/references/environment-discovery.md +77 -0
  192. package/ftm-mind/references/event-registry.md +319 -319
  193. package/ftm-mind/references/mcp-inventory.md +300 -296
  194. package/ftm-mind/references/ops-routing.md +47 -0
  195. package/ftm-mind/references/orient-protocol.md +234 -0
  196. package/ftm-mind/references/personality.md +40 -0
  197. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  198. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  199. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  200. package/ftm-mind/references/reflexion-protocol.md +249 -249
  201. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  202. package/ftm-mind/references/routing-scenarios.md +35 -35
  203. package/ftm-mind.yml +2 -2
  204. package/ftm-ops.yml +4 -0
  205. package/ftm-pause/SKILL.md +395 -395
  206. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  207. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  208. package/ftm-pause.yml +2 -2
  209. package/ftm-researcher/SKILL.md +275 -275
  210. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  211. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  212. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  213. package/ftm-researcher/references/adaptive-search.md +116 -116
  214. package/ftm-researcher/references/agent-prompts.md +193 -193
  215. package/ftm-researcher/references/council-integration.md +193 -193
  216. package/ftm-researcher/references/output-format.md +203 -203
  217. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  218. package/ftm-researcher/scripts/score_credibility.py +234 -234
  219. package/ftm-researcher/scripts/validate_research.py +92 -92
  220. package/ftm-researcher.yml +2 -2
  221. package/ftm-resume/SKILL.md +518 -518
  222. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  223. package/ftm-resume.yml +2 -2
  224. package/ftm-retro/SKILL.md +380 -380
  225. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  226. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  227. package/ftm-retro.yml +2 -2
  228. package/ftm-routine/SKILL.md +170 -170
  229. package/ftm-routine.yml +4 -4
  230. package/ftm-state/blackboard/capabilities.json +5 -5
  231. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  232. package/ftm-state/blackboard/context.json +37 -23
  233. package/ftm-state/blackboard/experiences/doom-statusline-fix.json +26 -0
  234. package/ftm-state/blackboard/experiences/hackathon-pages-site.json +26 -0
  235. package/ftm-state/blackboard/experiences/hindsight-sso-kickoff.json +42 -0
  236. package/ftm-state/blackboard/experiences/index.json +58 -9
  237. package/ftm-state/blackboard/experiences/learning-ragnarok-api-access.json +23 -0
  238. package/ftm-state/blackboard/experiences/nordlayer-members-auto-assign.json +26 -0
  239. package/ftm-state/blackboard/experiences/saml2aws-stale-session-fix.json +41 -0
  240. package/ftm-state/blackboard/patterns.json +6 -6
  241. package/ftm-state/schemas/context.schema.json +130 -130
  242. package/ftm-state/schemas/experience-index.schema.json +77 -77
  243. package/ftm-state/schemas/experience.schema.json +78 -78
  244. package/ftm-state/schemas/patterns.schema.json +44 -44
  245. package/ftm-upgrade/SKILL.md +194 -194
  246. package/ftm-upgrade/scripts/check-version.sh +76 -76
  247. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  248. package/ftm-upgrade.yml +2 -2
  249. package/ftm-verify.yml +2 -2
  250. package/ftm.yml +2 -2
  251. package/hooks/ftm-auto-log.sh +137 -0
  252. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  253. package/hooks/ftm-discovery-reminder.sh +90 -90
  254. package/hooks/ftm-drafts-gate.sh +61 -61
  255. package/hooks/ftm-event-logger.mjs +107 -107
  256. package/hooks/ftm-install-hooks.sh +240 -0
  257. package/hooks/ftm-learning-capture.sh +117 -0
  258. package/hooks/ftm-map-autodetect.sh +79 -79
  259. package/hooks/ftm-pending-sync-check.sh +22 -22
  260. package/hooks/ftm-plan-gate.sh +92 -92
  261. package/hooks/ftm-post-commit-trigger.sh +57 -57
  262. package/hooks/ftm-post-compaction.sh +138 -0
  263. package/hooks/ftm-pre-compaction.sh +147 -0
  264. package/hooks/ftm-session-end.sh +52 -0
  265. package/hooks/ftm-session-snapshot.sh +213 -0
  266. package/hooks/settings-template.json +81 -81
  267. package/install.sh +363 -363
  268. package/package.json +84 -84
  269. package/uninstall.sh +25 -25
@@ -1,712 +1,712 @@
1
- """
2
- db.py — SQLite database module for ftm-map.
3
-
4
- Manages a 5-table schema (files, symbols, refs, file_edges, symbol_edges)
5
- plus FTS5 for full-text search over symbols. Provides CRUD operations,
6
- materialized edge rebuilding, and graph traversal queries.
7
-
8
- Schema overview:
9
- files — tracked source files with metadata
10
- symbols — indexed code symbols (functions, classes, methods, etc.)
11
- refs — unresolved references (calls, imports) keyed by symbol name
12
- file_edges — materialized file-level dependency graph
13
- symbol_edges — materialized symbol-level dependency graph
14
- symbols_fts — FTS5 virtual table for BM25-ranked search
15
- """
16
-
17
- import hashlib
18
- import os
19
- import sqlite3
20
- from pathlib import Path
21
- from typing import Optional
22
-
23
- # ---------------------------------------------------------------------------
24
- # Constants
25
- # ---------------------------------------------------------------------------
26
-
27
- DB_DIR = ".ftm-map"
28
- DB_PATH = os.path.join(DB_DIR, "map.db")
29
-
30
- # ---------------------------------------------------------------------------
31
- # Schema DDL
32
- # ---------------------------------------------------------------------------
33
-
34
- _SCHEMA = """
35
- CREATE TABLE IF NOT EXISTS files (
36
- id INTEGER PRIMARY KEY,
37
- path TEXT NOT NULL UNIQUE,
38
- lang TEXT,
39
- mtime REAL NOT NULL,
40
- hash TEXT,
41
- line_count INTEGER
42
- );
43
-
44
- CREATE TABLE IF NOT EXISTS symbols (
45
- id INTEGER PRIMARY KEY,
46
- file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
47
- name TEXT NOT NULL,
48
- qualified_name TEXT,
49
- kind TEXT NOT NULL,
50
- line_start INTEGER NOT NULL,
51
- line_end INTEGER,
52
- signature TEXT,
53
- parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL
54
- );
55
-
56
- CREATE TABLE IF NOT EXISTS refs (
57
- id INTEGER PRIMARY KEY,
58
- file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
59
- symbol_name TEXT NOT NULL,
60
- line INTEGER NOT NULL,
61
- kind TEXT DEFAULT 'call'
62
- );
63
-
64
- CREATE TABLE IF NOT EXISTS file_edges (
65
- source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
66
- target_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
67
- weight REAL DEFAULT 1.0,
68
- PRIMARY KEY (source_file_id, target_file_id)
69
- );
70
-
71
- CREATE TABLE IF NOT EXISTS symbol_edges (
72
- source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
73
- target_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
74
- kind TEXT NOT NULL,
75
- file_id INTEGER REFERENCES files(id),
76
- line INTEGER,
77
- PRIMARY KEY (source_symbol_id, target_symbol_id, kind)
78
- );
79
-
80
- CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
81
- CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
82
- CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
83
- CREATE INDEX IF NOT EXISTS idx_refs_file ON refs(file_id);
84
- CREATE INDEX IF NOT EXISTS idx_refs_symbol_name ON refs(symbol_name);
85
- CREATE INDEX IF NOT EXISTS idx_file_edges_target ON file_edges(target_file_id);
86
- CREATE INDEX IF NOT EXISTS idx_symbol_edges_target ON symbol_edges(target_symbol_id);
87
-
88
- CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
89
- name, qualified_name, signature,
90
- content=symbols, content_rowid=id,
91
- tokenize='porter'
92
- );
93
- """
94
-
95
- # ---------------------------------------------------------------------------
96
- # Connection management
97
- # ---------------------------------------------------------------------------
98
-
99
-
100
- def get_connection(project_root: str) -> sqlite3.Connection:
101
- """Return a connection to the project's map database.
102
-
103
- Creates .ftm-map/ and initialises the schema if they do not exist yet.
104
- WAL mode is enabled for concurrent readers; foreign-key enforcement is on.
105
- """
106
- db_path = os.path.join(project_root, DB_PATH)
107
- os.makedirs(os.path.dirname(db_path), exist_ok=True)
108
-
109
- conn = sqlite3.connect(db_path)
110
- conn.execute("PRAGMA journal_mode=WAL")
111
- conn.execute("PRAGMA foreign_keys=ON")
112
- conn.row_factory = sqlite3.Row
113
-
114
- _init_schema(conn)
115
- return conn
116
-
117
-
118
- def _init_schema(conn: sqlite3.Connection) -> None:
119
- """Create tables, indexes, and FTS5 virtual table if they do not exist."""
120
- conn.executescript(_SCHEMA)
121
- conn.commit()
122
-
123
-
124
- # ---------------------------------------------------------------------------
125
- # File CRUD
126
- # ---------------------------------------------------------------------------
127
-
128
-
129
- def add_file(
130
- conn: sqlite3.Connection,
131
- path: str,
132
- lang: str,
133
- mtime: float,
134
- hash: Optional[str] = None,
135
- line_count: Optional[int] = None,
136
- ) -> int:
137
- """Insert a file row. Returns the new file id."""
138
- cursor = conn.execute(
139
- """
140
- INSERT INTO files (path, lang, mtime, hash, line_count)
141
- VALUES (?, ?, ?, ?, ?)
142
- """,
143
- (path, lang, mtime, hash, line_count),
144
- )
145
- return cursor.lastrowid
146
-
147
-
148
- def get_file_by_path(conn: sqlite3.Connection, path: str) -> Optional[dict]:
149
- """Return a file row as a dict, or None if not found."""
150
- row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
151
- return dict(row) if row else None
152
-
153
-
154
- def remove_file(conn: sqlite3.Connection, path: str) -> None:
155
- """Delete a file and cascade to symbols, refs, and edges.
156
-
157
- FTS5 rows are removed explicitly before the symbol rows because the
158
- content= table does not handle cascaded deletes automatically.
159
- """
160
- file_row = get_file_by_path(conn, path)
161
- if file_row is None:
162
- return
163
-
164
- file_id = file_row["id"]
165
-
166
- # Clean up FTS entries for symbols in this file
167
- sym_ids = [
168
- row["id"]
169
- for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
170
- ]
171
- for sid in sym_ids:
172
- conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
173
-
174
- # CASCADE handles symbols, refs, file_edges, symbol_edges
175
- conn.execute("DELETE FROM files WHERE id=?", (file_id,))
176
-
177
-
178
- # ---------------------------------------------------------------------------
179
- # Symbol CRUD
180
- # ---------------------------------------------------------------------------
181
-
182
-
183
- def add_symbol(
184
- conn: sqlite3.Connection,
185
- file_id: int,
186
- name: str,
187
- kind: str,
188
- line_start: int,
189
- line_end: Optional[int] = None,
190
- qualified_name: Optional[str] = None,
191
- signature: Optional[str] = None,
192
- parent_id: Optional[int] = None,
193
- ) -> int:
194
- """Insert a symbol row and keep the FTS5 index in sync.
195
-
196
- Returns the new symbol id.
197
- """
198
- cursor = conn.execute(
199
- """
200
- INSERT INTO symbols
201
- (file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id)
202
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
203
- """,
204
- (file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id),
205
- )
206
- symbol_id = cursor.lastrowid
207
-
208
- # FTS5 content= tables require manual insert so BM25 ranking stays accurate.
209
- conn.execute(
210
- "INSERT INTO symbols_fts(rowid, name, qualified_name, signature) VALUES (?, ?, ?, ?)",
211
- (symbol_id, name, qualified_name or "", signature or ""),
212
- )
213
-
214
- return symbol_id
215
-
216
-
217
- def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
218
- """Return a symbol row as a dict, or None if not found."""
219
- row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
220
- return dict(row) if row else None
221
-
222
-
223
- def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
224
- """Return all symbols matching *name* (name is not guaranteed unique)."""
225
- rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
226
- return [dict(r) for r in rows]
227
-
228
-
229
- def get_symbols_by_file(conn: sqlite3.Connection, file_id: int) -> list:
230
- """Return all symbols belonging to a given file."""
231
- rows = conn.execute(
232
- "SELECT * FROM symbols WHERE file_id=? ORDER BY line_start",
233
- (file_id,),
234
- ).fetchall()
235
- return [dict(r) for r in rows]
236
-
237
-
238
- def remove_symbols_by_file(conn: sqlite3.Connection, file_path: str) -> None:
239
- """Delete all symbols for a given file path.
240
-
241
- Finds the file_id from the path, cleans up FTS entries, then deletes
242
- the symbols (CASCADE handles symbol_edges).
243
- """
244
- file_row = get_file_by_path(conn, file_path)
245
- if file_row is None:
246
- return
247
-
248
- file_id = file_row["id"]
249
-
250
- # Clean up FTS entries
251
- sym_ids = [
252
- row["id"]
253
- for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
254
- ]
255
- for sid in sym_ids:
256
- conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
257
-
258
- conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
259
-
260
-
261
- # ---------------------------------------------------------------------------
262
- # Reference CRUD
263
- # ---------------------------------------------------------------------------
264
-
265
-
266
- def add_reference(
267
- conn: sqlite3.Connection,
268
- file_id: int,
269
- symbol_name: str,
270
- line: int,
271
- kind: str = "call",
272
- ) -> int:
273
- """Insert a reference row. Returns the new ref id."""
274
- cursor = conn.execute(
275
- "INSERT INTO refs (file_id, symbol_name, line, kind) VALUES (?, ?, ?, ?)",
276
- (file_id, symbol_name, line, kind),
277
- )
278
- return cursor.lastrowid
279
-
280
-
281
- def get_references_by_file(conn: sqlite3.Connection, file_id: int) -> list:
282
- """Return all references in a given file."""
283
- rows = conn.execute(
284
- "SELECT * FROM refs WHERE file_id=? ORDER BY line",
285
- (file_id,),
286
- ).fetchall()
287
- return [dict(r) for r in rows]
288
-
289
-
290
- # ---------------------------------------------------------------------------
291
- # Edge CRUD
292
- # ---------------------------------------------------------------------------
293
-
294
-
295
- def add_edge(
296
- conn: sqlite3.Connection,
297
- source_id: int,
298
- target_id: int,
299
- kind: str,
300
- ) -> None:
301
- """Insert a directed symbol edge. Silently ignored if the edge already exists."""
302
- conn.execute(
303
- "INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind) VALUES (?, ?, ?)",
304
- (source_id, target_id, kind),
305
- )
306
-
307
-
308
- # ---------------------------------------------------------------------------
309
- # Materialized edge rebuilding
310
- # ---------------------------------------------------------------------------
311
-
312
-
313
- def rebuild_file_edges(conn: sqlite3.Connection) -> None:
314
- """Rebuild the file_edges table from refs and symbols.
315
-
316
- For each ref in refs, finds which file defines a symbol with that name,
317
- then creates a file_edge from the referencing file to the defining file.
318
- Duplicate edges are collapsed; weight defaults to 1.0.
319
- """
320
- conn.execute("DELETE FROM file_edges")
321
-
322
- conn.execute(
323
- """
324
- INSERT OR IGNORE INTO file_edges (source_file_id, target_file_id, weight)
325
- SELECT DISTINCT r.file_id, s.file_id, 1.0
326
- FROM refs r
327
- JOIN symbols s ON s.name = r.symbol_name
328
- WHERE r.file_id != s.file_id
329
- """
330
- )
331
-
332
-
333
- def rebuild_symbol_edges(conn: sqlite3.Connection) -> None:
334
- """Rebuild the symbol_edges table from refs and symbols.
335
-
336
- For each ref, finds the target symbol (by name match) and the nearest
337
- enclosing definition in the referencing file (the symbol whose line range
338
- contains the ref line). Creates a symbol_edge from the enclosing symbol
339
- to the target symbol.
340
- """
341
- conn.execute("DELETE FROM symbol_edges")
342
-
343
- # Find matching ref -> target symbol, with nearest enclosing source symbol.
344
- # The enclosing symbol is the one in the same file as the ref whose
345
- # line_start <= ref.line and (line_end >= ref.line OR line_end IS NULL),
346
- # ordered by line_start DESC to get the nearest (innermost) enclosure.
347
- conn.execute(
348
- """
349
- INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind, file_id, line)
350
- SELECT src.id, tgt.id, r.kind, r.file_id, r.line
351
- FROM refs r
352
- JOIN symbols tgt ON tgt.name = r.symbol_name
353
- JOIN symbols src ON src.file_id = r.file_id
354
- AND src.line_start <= r.line
355
- AND (src.line_end >= r.line OR src.line_end IS NULL)
356
- WHERE src.id != tgt.id
357
- GROUP BY r.id, tgt.id
358
- HAVING src.line_start = MAX(src.line_start)
359
- """
360
- )
361
-
362
-
363
- # ---------------------------------------------------------------------------
364
- # Graph traversal — recursive CTEs
365
- # ---------------------------------------------------------------------------
366
-
367
-
368
- def get_transitive_deps(
369
- conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
370
- ) -> list:
371
- """Return all symbols this symbol transitively depends on (forward closure).
372
-
373
- Cycle prevention is handled by tracking visited ids as a comma-separated
374
- path string inside the CTE; a node is skipped if its id already appears in
375
- the path string.
376
-
377
- Results are ordered by traversal depth (shallowest first) and deduplicated.
378
- """
379
- query = """
380
- WITH RECURSIVE dep_chain(id, name, kind, file_id, depth, path) AS (
381
- -- Base: direct dependencies of the seed symbol
382
- SELECT s.id,
383
- s.name,
384
- s.kind,
385
- s.file_id,
386
- 0,
387
- CAST(s.id AS TEXT)
388
- FROM symbol_edges e
389
- JOIN symbols s ON s.id = e.target_symbol_id
390
- WHERE e.source_symbol_id = ?
391
-
392
- UNION ALL
393
-
394
- -- Recursive: dependencies of already-visited nodes
395
- SELECT s.id,
396
- s.name,
397
- s.kind,
398
- s.file_id,
399
- dc.depth + 1,
400
- dc.path || ',' || CAST(s.id AS TEXT)
401
- FROM dep_chain dc
402
- JOIN symbol_edges e ON e.source_symbol_id = dc.id
403
- JOIN symbols s ON s.id = e.target_symbol_id
404
- WHERE dc.depth < ?
405
- AND INSTR(dc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
406
- )
407
- SELECT DISTINCT id, name, kind, file_id, depth
408
- FROM dep_chain
409
- ORDER BY depth
410
- """
411
- rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
412
- return [dict(r) for r in rows]
413
-
414
-
415
- def get_reverse_deps(
416
- conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
417
- ) -> list:
418
- """Return all symbols that transitively depend on this symbol (blast radius).
419
-
420
- Traverses symbol_edges in reverse (callers/importers of the seed symbol).
421
- Same cycle-prevention strategy as get_transitive_deps.
422
- """
423
- query = """
424
- WITH RECURSIVE rev_chain(id, name, kind, file_id, depth, path) AS (
425
- -- Base: direct dependents of the seed symbol
426
- SELECT s.id,
427
- s.name,
428
- s.kind,
429
- s.file_id,
430
- 0,
431
- CAST(s.id AS TEXT)
432
- FROM symbol_edges e
433
- JOIN symbols s ON s.id = e.source_symbol_id
434
- WHERE e.target_symbol_id = ?
435
-
436
- UNION ALL
437
-
438
- -- Recursive: dependents of already-visited nodes
439
- SELECT s.id,
440
- s.name,
441
- s.kind,
442
- s.file_id,
443
- rc.depth + 1,
444
- rc.path || ',' || CAST(s.id AS TEXT)
445
- FROM rev_chain rc
446
- JOIN symbol_edges e ON e.target_symbol_id = rc.id
447
- JOIN symbols s ON s.id = e.source_symbol_id
448
- WHERE rc.depth < ?
449
- AND INSTR(rc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
450
- )
451
- SELECT DISTINCT id, name, kind, file_id, depth
452
- FROM rev_chain
453
- ORDER BY depth
454
- """
455
- rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
456
- return [dict(r) for r in rows]
457
-
458
-
459
- # ---------------------------------------------------------------------------
460
- # Full-text search
461
- # ---------------------------------------------------------------------------
462
-
463
-
464
- def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> list:
465
- """BM25-ranked full-text search over symbol names, qualified names, and signatures.
466
-
467
- Returns up to *limit* symbol dicts with an additional 'rank' field.
468
- Lower rank values indicate better matches (BM25 scores are negative in
469
- SQLite's fts5 implementation).
470
- """
471
- query = """
472
- SELECT s.*, fts.rank
473
- FROM symbols_fts fts
474
- JOIN symbols s ON s.id = fts.rowid
475
- WHERE symbols_fts MATCH ?
476
- ORDER BY fts.rank
477
- LIMIT ?
478
- """
479
- rows = conn.execute(query, (query_text, limit)).fetchall()
480
- return [dict(r) for r in rows]
481
-
482
-
483
- # ---------------------------------------------------------------------------
484
- # Statistics
485
- # ---------------------------------------------------------------------------
486
-
487
-
488
- def get_stats(conn: sqlite3.Connection) -> dict:
489
- """Return high-level database statistics."""
490
- file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
491
- symbol_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
492
- edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
493
- reference_count = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
494
- file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
495
- return {
496
- "file_count": file_count,
497
- "symbol_count": symbol_count,
498
- "edge_count": edge_count,
499
- "reference_count": reference_count,
500
- "file_edge_count": file_edge_count,
501
- }
502
-
503
-
504
- # ---------------------------------------------------------------------------
505
- # Utility helpers
506
- # ---------------------------------------------------------------------------
507
-
508
-
509
- def hash_content(content: str) -> str:
510
- """Return a SHA-256 hex digest for *content*. Useful for change detection."""
511
- return hashlib.sha256(content.encode("utf-8")).hexdigest()
512
-
513
-
514
- # ---------------------------------------------------------------------------
515
- # Smoke-test entrypoint
516
- # ---------------------------------------------------------------------------
517
-
518
- if __name__ == "__main__":
519
- import tempfile
520
-
521
- print("Running db.py smoke tests ...")
522
-
523
- with tempfile.TemporaryDirectory() as tmp:
524
- conn = get_connection(tmp)
525
-
526
- # ---- add files ----
527
- fid_parser = add_file(conn, "src/parser.py", "python", 1000.0, hash="abc123", line_count=50)
528
- fid_lexer = add_file(conn, "src/lexer.py", "python", 1001.0, line_count=30)
529
- fid_index = add_file(conn, "src/index.py", "python", 1002.0)
530
- conn.commit()
531
-
532
- assert get_file_by_path(conn, "src/parser.py")["id"] == fid_parser, "get_file_by_path failed"
533
- assert get_file_by_path(conn, "nonexistent.py") is None, "get_file_by_path should return None"
534
- print(" [PASS] File CRUD")
535
-
536
- # ---- add symbols ----
537
- # parser.py: parse_file (lines 10-40)
538
- sid_parse = add_symbol(
539
- conn, fid_parser, "parse_file", "function", 10, line_end=40,
540
- qualified_name="parser.parse_file",
541
- signature="def parse_file(path: str) -> AST",
542
- )
543
- # lexer.py: tokenize (lines 1-20)
544
- sid_tokenize = add_symbol(
545
- conn, fid_lexer, "tokenize", "function", 1, line_end=20,
546
- qualified_name="lexer.tokenize",
547
- signature="def tokenize(src: str) -> list",
548
- )
549
- # index.py: build_index (lines 5-60) — calls parse_file and tokenize
550
- sid_build = add_symbol(
551
- conn, fid_index, "build_index", "function", 5, line_end=60,
552
- qualified_name="index.build_index",
553
- signature="def build_index(root: str) -> None",
554
- )
555
- conn.commit()
556
-
557
- assert get_symbol_by_id(conn, sid_parse)["name"] == "parse_file", "get_symbol_by_id failed"
558
- assert len(get_symbol_by_name(conn, "tokenize")) == 1, "get_symbol_by_name failed"
559
- assert len(get_symbols_by_file(conn, fid_parser)) == 1, "get_symbols_by_file failed"
560
- print(" [PASS] Symbol CRUD")
561
-
562
- # ---- add references ----
563
- # build_index calls parse_file at line 15 and tokenize at line 25
564
- ref1 = add_reference(conn, fid_index, "parse_file", 15, kind="call")
565
- ref2 = add_reference(conn, fid_index, "tokenize", 25, kind="call")
566
- # parse_file calls tokenize at line 20
567
- ref3 = add_reference(conn, fid_parser, "tokenize", 20, kind="call")
568
- conn.commit()
569
-
570
- refs_index = get_references_by_file(conn, fid_index)
571
- assert len(refs_index) == 2, f"expected 2 refs in index.py, got {len(refs_index)}"
572
- print(" [PASS] Reference CRUD")
573
-
574
- # ---- rebuild file edges ----
575
- rebuild_file_edges(conn)
576
- conn.commit()
577
-
578
- fe_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
579
- assert fe_count >= 2, f"expected >= 2 file edges, got {fe_count}"
580
-
581
- # index.py -> parser.py edge should exist
582
- fe = conn.execute(
583
- "SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
584
- (fid_index, fid_parser),
585
- ).fetchone()
586
- assert fe is not None, "file edge index->parser missing"
587
- # index.py -> lexer.py edge should exist
588
- fe2 = conn.execute(
589
- "SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
590
- (fid_index, fid_lexer),
591
- ).fetchone()
592
- assert fe2 is not None, "file edge index->lexer missing"
593
- print(" [PASS] rebuild_file_edges")
594
-
595
- # ---- rebuild symbol edges ----
596
- rebuild_symbol_edges(conn)
597
- conn.commit()
598
-
599
- se_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
600
- assert se_count >= 2, f"expected >= 2 symbol edges, got {se_count}"
601
-
602
- # build_index -> parse_file edge should exist
603
- se = conn.execute(
604
- "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
605
- (sid_build, sid_parse),
606
- ).fetchone()
607
- assert se is not None, "symbol edge build_index->parse_file missing"
608
-
609
- # build_index -> tokenize edge should exist
610
- se2 = conn.execute(
611
- "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
612
- (sid_build, sid_tokenize),
613
- ).fetchone()
614
- assert se2 is not None, "symbol edge build_index->tokenize missing"
615
-
616
- # parse_file -> tokenize edge should exist
617
- se3 = conn.execute(
618
- "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
619
- (sid_parse, sid_tokenize),
620
- ).fetchone()
621
- assert se3 is not None, "symbol edge parse_file->tokenize missing"
622
- print(" [PASS] rebuild_symbol_edges")
623
-
624
- # ---- transitive deps via symbol_edges ----
625
- deps = get_transitive_deps(conn, sid_build)
626
- dep_ids = {d["id"] for d in deps}
627
- assert sid_parse in dep_ids, f"transitive deps missing parse_file: {dep_ids}"
628
- assert sid_tokenize in dep_ids, f"transitive deps missing tokenize: {dep_ids}"
629
- print(" [PASS] get_transitive_deps")
630
-
631
- # ---- reverse deps via symbol_edges ----
632
- rdeps = get_reverse_deps(conn, sid_tokenize)
633
- rdep_ids = {d["id"] for d in rdeps}
634
- assert sid_parse in rdep_ids, f"reverse deps missing parse_file: {rdep_ids}"
635
- assert sid_build in rdep_ids, f"reverse deps missing build_index: {rdep_ids}"
636
- print(" [PASS] get_reverse_deps")
637
-
638
- # ---- FTS search ----
639
- results = fts_search(conn, "parse")
640
- assert any(r["name"] == "parse_file" for r in results), "FTS search for 'parse' failed"
641
-
642
- results_sig = fts_search(conn, "tokenize")
643
- assert any(r["name"] == "tokenize" for r in results_sig), "FTS search for 'tokenize' failed"
644
-
645
- results_qn = fts_search(conn, "index")
646
- assert any(r["name"] == "build_index" for r in results_qn), "FTS qualified_name search failed"
647
- print(" [PASS] FTS search")
648
-
649
- # ---- stats ----
650
- stats = get_stats(conn)
651
- assert stats["file_count"] == 3, f"expected 3 files, got {stats['file_count']}"
652
- assert stats["symbol_count"] == 3, f"expected 3 symbols, got {stats['symbol_count']}"
653
- assert stats["reference_count"] == 3, f"expected 3 refs, got {stats['reference_count']}"
654
- assert stats["edge_count"] >= 2, f"expected >= 2 symbol edges, got {stats['edge_count']}"
655
- assert stats["file_edge_count"] >= 2, f"expected >= 2 file edges, got {stats['file_edge_count']}"
656
- print(" [PASS] get_stats")
657
-
658
- # ---- add_edge (manual symbol edge) ----
659
- add_edge(conn, sid_parse, sid_build, "test_edge")
660
- conn.commit()
661
- manual_edge = conn.execute(
662
- "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=? AND kind=?",
663
- (sid_parse, sid_build, "test_edge"),
664
- ).fetchone()
665
- assert manual_edge is not None, "add_edge failed"
666
- # duplicate should be ignored
667
- add_edge(conn, sid_parse, sid_build, "test_edge")
668
- conn.commit()
669
- print(" [PASS] add_edge (manual)")
670
-
671
- # ---- CASCADE deletes ----
672
- # Remove lexer.py file -> tokenize symbol, refs to tokenize, and edges should cascade
673
- sym_count_before = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
674
- ref_count_before = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
675
-
676
- remove_file(conn, "src/lexer.py")
677
- conn.commit()
678
-
679
- assert get_file_by_path(conn, "src/lexer.py") is None, "file not removed"
680
- assert get_symbol_by_id(conn, sid_tokenize) is None, "symbol not cascaded on file delete"
681
-
682
- sym_count_after = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
683
- assert sym_count_after == sym_count_before - 1, f"expected {sym_count_before - 1} symbols, got {sym_count_after}"
684
-
685
- # Refs in lexer.py should be gone (there were none, but verify no error)
686
- # Symbol edges involving tokenize should be gone
687
- edges_to_tokenize = conn.execute(
688
- "SELECT COUNT(*) FROM symbol_edges WHERE target_symbol_id=? OR source_symbol_id=?",
689
- (sid_tokenize, sid_tokenize),
690
- ).fetchone()[0]
691
- assert edges_to_tokenize == 0, f"expected 0 edges involving deleted symbol, got {edges_to_tokenize}"
692
- print(" [PASS] CASCADE deletes (remove_file)")
693
-
694
- # ---- remove_symbols_by_file (without removing file) ----
695
- remove_symbols_by_file(conn, "src/parser.py")
696
- conn.commit()
697
- assert get_symbol_by_id(conn, sid_parse) is None, "remove_symbols_by_file failed"
698
- # File itself should still exist
699
- assert get_file_by_path(conn, "src/parser.py") is not None, "file should still exist after remove_symbols_by_file"
700
- print(" [PASS] remove_symbols_by_file")
701
-
702
- # ---- final stats ----
703
- final_stats = get_stats(conn)
704
- print(f"\n Final stats: {final_stats}")
705
-
706
- # ---- hash_content utility ----
707
- h = hash_content("hello world")
708
- assert len(h) == 64, "hash_content should return 64-char hex string"
709
- assert h == hash_content("hello world"), "hash_content should be deterministic"
710
- print(" [PASS] hash_content")
711
-
712
- print("\nAll smoke tests passed.")
1
+ """
2
+ db.py — SQLite database module for ftm-map.
3
+
4
+ Manages a 5-table schema (files, symbols, refs, file_edges, symbol_edges)
5
+ plus FTS5 for full-text search over symbols. Provides CRUD operations,
6
+ materialized edge rebuilding, and graph traversal queries.
7
+
8
+ Schema overview:
9
+ files — tracked source files with metadata
10
+ symbols — indexed code symbols (functions, classes, methods, etc.)
11
+ refs — unresolved references (calls, imports) keyed by symbol name
12
+ file_edges — materialized file-level dependency graph
13
+ symbol_edges — materialized symbol-level dependency graph
14
+ symbols_fts — FTS5 virtual table for BM25-ranked search
15
+ """
16
+
17
+ import hashlib
18
+ import os
19
+ import sqlite3
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Constants
25
+ # ---------------------------------------------------------------------------
26
+
27
+ DB_DIR = ".ftm-map"
28
+ DB_PATH = os.path.join(DB_DIR, "map.db")
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Schema DDL
32
+ # ---------------------------------------------------------------------------
33
+
34
+ _SCHEMA = """
35
+ CREATE TABLE IF NOT EXISTS files (
36
+ id INTEGER PRIMARY KEY,
37
+ path TEXT NOT NULL UNIQUE,
38
+ lang TEXT,
39
+ mtime REAL NOT NULL,
40
+ hash TEXT,
41
+ line_count INTEGER
42
+ );
43
+
44
+ CREATE TABLE IF NOT EXISTS symbols (
45
+ id INTEGER PRIMARY KEY,
46
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
47
+ name TEXT NOT NULL,
48
+ qualified_name TEXT,
49
+ kind TEXT NOT NULL,
50
+ line_start INTEGER NOT NULL,
51
+ line_end INTEGER,
52
+ signature TEXT,
53
+ parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL
54
+ );
55
+
56
+ CREATE TABLE IF NOT EXISTS refs (
57
+ id INTEGER PRIMARY KEY,
58
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
59
+ symbol_name TEXT NOT NULL,
60
+ line INTEGER NOT NULL,
61
+ kind TEXT DEFAULT 'call'
62
+ );
63
+
64
+ CREATE TABLE IF NOT EXISTS file_edges (
65
+ source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
66
+ target_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
67
+ weight REAL DEFAULT 1.0,
68
+ PRIMARY KEY (source_file_id, target_file_id)
69
+ );
70
+
71
+ CREATE TABLE IF NOT EXISTS symbol_edges (
72
+ source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
73
+ target_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
74
+ kind TEXT NOT NULL,
75
+ file_id INTEGER REFERENCES files(id),
76
+ line INTEGER,
77
+ PRIMARY KEY (source_symbol_id, target_symbol_id, kind)
78
+ );
79
+
80
+ CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
81
+ CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
82
+ CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
83
+ CREATE INDEX IF NOT EXISTS idx_refs_file ON refs(file_id);
84
+ CREATE INDEX IF NOT EXISTS idx_refs_symbol_name ON refs(symbol_name);
85
+ CREATE INDEX IF NOT EXISTS idx_file_edges_target ON file_edges(target_file_id);
86
+ CREATE INDEX IF NOT EXISTS idx_symbol_edges_target ON symbol_edges(target_symbol_id);
87
+
88
+ CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
89
+ name, qualified_name, signature,
90
+ content=symbols, content_rowid=id,
91
+ tokenize='porter'
92
+ );
93
+ """
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # Connection management
97
+ # ---------------------------------------------------------------------------
98
+
99
+
100
+ def get_connection(project_root: str) -> sqlite3.Connection:
101
+ """Return a connection to the project's map database.
102
+
103
+ Creates .ftm-map/ and initialises the schema if they do not exist yet.
104
+ WAL mode is enabled for concurrent readers; foreign-key enforcement is on.
105
+ """
106
+ db_path = os.path.join(project_root, DB_PATH)
107
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
108
+
109
+ conn = sqlite3.connect(db_path)
110
+ conn.execute("PRAGMA journal_mode=WAL")
111
+ conn.execute("PRAGMA foreign_keys=ON")
112
+ conn.row_factory = sqlite3.Row
113
+
114
+ _init_schema(conn)
115
+ return conn
116
+
117
+
118
+ def _init_schema(conn: sqlite3.Connection) -> None:
119
+ """Create tables, indexes, and FTS5 virtual table if they do not exist."""
120
+ conn.executescript(_SCHEMA)
121
+ conn.commit()
122
+
123
+
124
+ # ---------------------------------------------------------------------------
125
+ # File CRUD
126
+ # ---------------------------------------------------------------------------
127
+
128
+
129
+ def add_file(
130
+ conn: sqlite3.Connection,
131
+ path: str,
132
+ lang: str,
133
+ mtime: float,
134
+ hash: Optional[str] = None,
135
+ line_count: Optional[int] = None,
136
+ ) -> int:
137
+ """Insert a file row. Returns the new file id."""
138
+ cursor = conn.execute(
139
+ """
140
+ INSERT INTO files (path, lang, mtime, hash, line_count)
141
+ VALUES (?, ?, ?, ?, ?)
142
+ """,
143
+ (path, lang, mtime, hash, line_count),
144
+ )
145
+ return cursor.lastrowid
146
+
147
+
148
+ def get_file_by_path(conn: sqlite3.Connection, path: str) -> Optional[dict]:
149
+ """Return a file row as a dict, or None if not found."""
150
+ row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
151
+ return dict(row) if row else None
152
+
153
+
154
+ def remove_file(conn: sqlite3.Connection, path: str) -> None:
155
+ """Delete a file and cascade to symbols, refs, and edges.
156
+
157
+ FTS5 rows are removed explicitly before the symbol rows because the
158
+ content= table does not handle cascaded deletes automatically.
159
+ """
160
+ file_row = get_file_by_path(conn, path)
161
+ if file_row is None:
162
+ return
163
+
164
+ file_id = file_row["id"]
165
+
166
+ # Clean up FTS entries for symbols in this file
167
+ sym_ids = [
168
+ row["id"]
169
+ for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
170
+ ]
171
+ for sid in sym_ids:
172
+ conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
173
+
174
+ # CASCADE handles symbols, refs, file_edges, symbol_edges
175
+ conn.execute("DELETE FROM files WHERE id=?", (file_id,))
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # Symbol CRUD
180
+ # ---------------------------------------------------------------------------
181
+
182
+
183
+ def add_symbol(
184
+ conn: sqlite3.Connection,
185
+ file_id: int,
186
+ name: str,
187
+ kind: str,
188
+ line_start: int,
189
+ line_end: Optional[int] = None,
190
+ qualified_name: Optional[str] = None,
191
+ signature: Optional[str] = None,
192
+ parent_id: Optional[int] = None,
193
+ ) -> int:
194
+ """Insert a symbol row and keep the FTS5 index in sync.
195
+
196
+ Returns the new symbol id.
197
+ """
198
+ cursor = conn.execute(
199
+ """
200
+ INSERT INTO symbols
201
+ (file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id)
202
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
203
+ """,
204
+ (file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id),
205
+ )
206
+ symbol_id = cursor.lastrowid
207
+
208
+ # FTS5 content= tables require manual insert so BM25 ranking stays accurate.
209
+ conn.execute(
210
+ "INSERT INTO symbols_fts(rowid, name, qualified_name, signature) VALUES (?, ?, ?, ?)",
211
+ (symbol_id, name, qualified_name or "", signature or ""),
212
+ )
213
+
214
+ return symbol_id
215
+
216
+
217
+ def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
218
+ """Return a symbol row as a dict, or None if not found."""
219
+ row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
220
+ return dict(row) if row else None
221
+
222
+
223
+ def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
224
+ """Return all symbols matching *name* (name is not guaranteed unique)."""
225
+ rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
226
+ return [dict(r) for r in rows]
227
+
228
+
229
+ def get_symbols_by_file(conn: sqlite3.Connection, file_id: int) -> list:
230
+ """Return all symbols belonging to a given file."""
231
+ rows = conn.execute(
232
+ "SELECT * FROM symbols WHERE file_id=? ORDER BY line_start",
233
+ (file_id,),
234
+ ).fetchall()
235
+ return [dict(r) for r in rows]
236
+
237
+
238
+ def remove_symbols_by_file(conn: sqlite3.Connection, file_path: str) -> None:
239
+ """Delete all symbols for a given file path.
240
+
241
+ Finds the file_id from the path, cleans up FTS entries, then deletes
242
+ the symbols (CASCADE handles symbol_edges).
243
+ """
244
+ file_row = get_file_by_path(conn, file_path)
245
+ if file_row is None:
246
+ return
247
+
248
+ file_id = file_row["id"]
249
+
250
+ # Clean up FTS entries
251
+ sym_ids = [
252
+ row["id"]
253
+ for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
254
+ ]
255
+ for sid in sym_ids:
256
+ conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
257
+
258
+ conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
259
+
260
+
261
+ # ---------------------------------------------------------------------------
262
+ # Reference CRUD
263
+ # ---------------------------------------------------------------------------
264
+
265
+
266
+ def add_reference(
267
+ conn: sqlite3.Connection,
268
+ file_id: int,
269
+ symbol_name: str,
270
+ line: int,
271
+ kind: str = "call",
272
+ ) -> int:
273
+ """Insert a reference row. Returns the new ref id."""
274
+ cursor = conn.execute(
275
+ "INSERT INTO refs (file_id, symbol_name, line, kind) VALUES (?, ?, ?, ?)",
276
+ (file_id, symbol_name, line, kind),
277
+ )
278
+ return cursor.lastrowid
279
+
280
+
281
+ def get_references_by_file(conn: sqlite3.Connection, file_id: int) -> list:
282
+ """Return all references in a given file."""
283
+ rows = conn.execute(
284
+ "SELECT * FROM refs WHERE file_id=? ORDER BY line",
285
+ (file_id,),
286
+ ).fetchall()
287
+ return [dict(r) for r in rows]
288
+
289
+
290
+ # ---------------------------------------------------------------------------
291
+ # Edge CRUD
292
+ # ---------------------------------------------------------------------------
293
+
294
+
295
+ def add_edge(
296
+ conn: sqlite3.Connection,
297
+ source_id: int,
298
+ target_id: int,
299
+ kind: str,
300
+ ) -> None:
301
+ """Insert a directed symbol edge. Silently ignored if the edge already exists."""
302
+ conn.execute(
303
+ "INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind) VALUES (?, ?, ?)",
304
+ (source_id, target_id, kind),
305
+ )
306
+
307
+
308
+ # ---------------------------------------------------------------------------
309
+ # Materialized edge rebuilding
310
+ # ---------------------------------------------------------------------------
311
+
312
+
313
+ def rebuild_file_edges(conn: sqlite3.Connection) -> None:
314
+ """Rebuild the file_edges table from refs and symbols.
315
+
316
+ For each ref in refs, finds which file defines a symbol with that name,
317
+ then creates a file_edge from the referencing file to the defining file.
318
+ Duplicate edges are collapsed; weight defaults to 1.0.
319
+ """
320
+ conn.execute("DELETE FROM file_edges")
321
+
322
+ conn.execute(
323
+ """
324
+ INSERT OR IGNORE INTO file_edges (source_file_id, target_file_id, weight)
325
+ SELECT DISTINCT r.file_id, s.file_id, 1.0
326
+ FROM refs r
327
+ JOIN symbols s ON s.name = r.symbol_name
328
+ WHERE r.file_id != s.file_id
329
+ """
330
+ )
331
+
332
+
333
+ def rebuild_symbol_edges(conn: sqlite3.Connection) -> None:
334
+ """Rebuild the symbol_edges table from refs and symbols.
335
+
336
+ For each ref, finds the target symbol (by name match) and the nearest
337
+ enclosing definition in the referencing file (the symbol whose line range
338
+ contains the ref line). Creates a symbol_edge from the enclosing symbol
339
+ to the target symbol.
340
+ """
341
+ conn.execute("DELETE FROM symbol_edges")
342
+
343
+ # Find matching ref -> target symbol, with nearest enclosing source symbol.
344
+ # The enclosing symbol is the one in the same file as the ref whose
345
+ # line_start <= ref.line and (line_end >= ref.line OR line_end IS NULL),
346
+ # ordered by line_start DESC to get the nearest (innermost) enclosure.
347
+ conn.execute(
348
+ """
349
+ INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind, file_id, line)
350
+ SELECT src.id, tgt.id, r.kind, r.file_id, r.line
351
+ FROM refs r
352
+ JOIN symbols tgt ON tgt.name = r.symbol_name
353
+ JOIN symbols src ON src.file_id = r.file_id
354
+ AND src.line_start <= r.line
355
+ AND (src.line_end >= r.line OR src.line_end IS NULL)
356
+ WHERE src.id != tgt.id
357
+ GROUP BY r.id, tgt.id
358
+ HAVING src.line_start = MAX(src.line_start)
359
+ """
360
+ )
361
+
362
+
363
+ # ---------------------------------------------------------------------------
364
+ # Graph traversal — recursive CTEs
365
+ # ---------------------------------------------------------------------------
366
+
367
+
368
+ def get_transitive_deps(
369
+ conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
370
+ ) -> list:
371
+ """Return all symbols this symbol transitively depends on (forward closure).
372
+
373
+ Cycle prevention is handled by tracking visited ids as a comma-separated
374
+ path string inside the CTE; a node is skipped if its id already appears in
375
+ the path string.
376
+
377
+ Results are ordered by traversal depth (shallowest first) and deduplicated.
378
+ """
379
+ query = """
380
+ WITH RECURSIVE dep_chain(id, name, kind, file_id, depth, path) AS (
381
+ -- Base: direct dependencies of the seed symbol
382
+ SELECT s.id,
383
+ s.name,
384
+ s.kind,
385
+ s.file_id,
386
+ 0,
387
+ CAST(s.id AS TEXT)
388
+ FROM symbol_edges e
389
+ JOIN symbols s ON s.id = e.target_symbol_id
390
+ WHERE e.source_symbol_id = ?
391
+
392
+ UNION ALL
393
+
394
+ -- Recursive: dependencies of already-visited nodes
395
+ SELECT s.id,
396
+ s.name,
397
+ s.kind,
398
+ s.file_id,
399
+ dc.depth + 1,
400
+ dc.path || ',' || CAST(s.id AS TEXT)
401
+ FROM dep_chain dc
402
+ JOIN symbol_edges e ON e.source_symbol_id = dc.id
403
+ JOIN symbols s ON s.id = e.target_symbol_id
404
+ WHERE dc.depth < ?
405
+ AND INSTR(dc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
406
+ )
407
+ SELECT DISTINCT id, name, kind, file_id, depth
408
+ FROM dep_chain
409
+ ORDER BY depth
410
+ """
411
+ rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
412
+ return [dict(r) for r in rows]
413
+
414
+
415
+ def get_reverse_deps(
416
+ conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
417
+ ) -> list:
418
+ """Return all symbols that transitively depend on this symbol (blast radius).
419
+
420
+ Traverses symbol_edges in reverse (callers/importers of the seed symbol).
421
+ Same cycle-prevention strategy as get_transitive_deps.
422
+ """
423
+ query = """
424
+ WITH RECURSIVE rev_chain(id, name, kind, file_id, depth, path) AS (
425
+ -- Base: direct dependents of the seed symbol
426
+ SELECT s.id,
427
+ s.name,
428
+ s.kind,
429
+ s.file_id,
430
+ 0,
431
+ CAST(s.id AS TEXT)
432
+ FROM symbol_edges e
433
+ JOIN symbols s ON s.id = e.source_symbol_id
434
+ WHERE e.target_symbol_id = ?
435
+
436
+ UNION ALL
437
+
438
+ -- Recursive: dependents of already-visited nodes
439
+ SELECT s.id,
440
+ s.name,
441
+ s.kind,
442
+ s.file_id,
443
+ rc.depth + 1,
444
+ rc.path || ',' || CAST(s.id AS TEXT)
445
+ FROM rev_chain rc
446
+ JOIN symbol_edges e ON e.target_symbol_id = rc.id
447
+ JOIN symbols s ON s.id = e.source_symbol_id
448
+ WHERE rc.depth < ?
449
+ AND INSTR(rc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
450
+ )
451
+ SELECT DISTINCT id, name, kind, file_id, depth
452
+ FROM rev_chain
453
+ ORDER BY depth
454
+ """
455
+ rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
456
+ return [dict(r) for r in rows]
457
+
458
+
459
+ # ---------------------------------------------------------------------------
460
+ # Full-text search
461
+ # ---------------------------------------------------------------------------
462
+
463
+
464
+ def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> list:
465
+ """BM25-ranked full-text search over symbol names, qualified names, and signatures.
466
+
467
+ Returns up to *limit* symbol dicts with an additional 'rank' field.
468
+ Lower rank values indicate better matches (BM25 scores are negative in
469
+ SQLite's fts5 implementation).
470
+ """
471
+ query = """
472
+ SELECT s.*, fts.rank
473
+ FROM symbols_fts fts
474
+ JOIN symbols s ON s.id = fts.rowid
475
+ WHERE symbols_fts MATCH ?
476
+ ORDER BY fts.rank
477
+ LIMIT ?
478
+ """
479
+ rows = conn.execute(query, (query_text, limit)).fetchall()
480
+ return [dict(r) for r in rows]
481
+
482
+
483
+ # ---------------------------------------------------------------------------
484
+ # Statistics
485
+ # ---------------------------------------------------------------------------
486
+
487
+
488
+ def get_stats(conn: sqlite3.Connection) -> dict:
489
+ """Return high-level database statistics."""
490
+ file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
491
+ symbol_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
492
+ edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
493
+ reference_count = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
494
+ file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
495
+ return {
496
+ "file_count": file_count,
497
+ "symbol_count": symbol_count,
498
+ "edge_count": edge_count,
499
+ "reference_count": reference_count,
500
+ "file_edge_count": file_edge_count,
501
+ }
502
+
503
+
504
+ # ---------------------------------------------------------------------------
505
+ # Utility helpers
506
+ # ---------------------------------------------------------------------------
507
+
508
+
509
+ def hash_content(content: str) -> str:
510
+ """Return a SHA-256 hex digest for *content*. Useful for change detection."""
511
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
512
+
513
+
514
+ # ---------------------------------------------------------------------------
515
+ # Smoke-test entrypoint
516
+ # ---------------------------------------------------------------------------
517
+
518
+ if __name__ == "__main__":
519
+ import tempfile
520
+
521
+ print("Running db.py smoke tests ...")
522
+
523
+ with tempfile.TemporaryDirectory() as tmp:
524
+ conn = get_connection(tmp)
525
+
526
+ # ---- add files ----
527
+ fid_parser = add_file(conn, "src/parser.py", "python", 1000.0, hash="abc123", line_count=50)
528
+ fid_lexer = add_file(conn, "src/lexer.py", "python", 1001.0, line_count=30)
529
+ fid_index = add_file(conn, "src/index.py", "python", 1002.0)
530
+ conn.commit()
531
+
532
+ assert get_file_by_path(conn, "src/parser.py")["id"] == fid_parser, "get_file_by_path failed"
533
+ assert get_file_by_path(conn, "nonexistent.py") is None, "get_file_by_path should return None"
534
+ print(" [PASS] File CRUD")
535
+
536
+ # ---- add symbols ----
537
+ # parser.py: parse_file (lines 10-40)
538
+ sid_parse = add_symbol(
539
+ conn, fid_parser, "parse_file", "function", 10, line_end=40,
540
+ qualified_name="parser.parse_file",
541
+ signature="def parse_file(path: str) -> AST",
542
+ )
543
+ # lexer.py: tokenize (lines 1-20)
544
+ sid_tokenize = add_symbol(
545
+ conn, fid_lexer, "tokenize", "function", 1, line_end=20,
546
+ qualified_name="lexer.tokenize",
547
+ signature="def tokenize(src: str) -> list",
548
+ )
549
+ # index.py: build_index (lines 5-60) — calls parse_file and tokenize
550
+ sid_build = add_symbol(
551
+ conn, fid_index, "build_index", "function", 5, line_end=60,
552
+ qualified_name="index.build_index",
553
+ signature="def build_index(root: str) -> None",
554
+ )
555
+ conn.commit()
556
+
557
+ assert get_symbol_by_id(conn, sid_parse)["name"] == "parse_file", "get_symbol_by_id failed"
558
+ assert len(get_symbol_by_name(conn, "tokenize")) == 1, "get_symbol_by_name failed"
559
+ assert len(get_symbols_by_file(conn, fid_parser)) == 1, "get_symbols_by_file failed"
560
+ print(" [PASS] Symbol CRUD")
561
+
562
+ # ---- add references ----
563
+ # build_index calls parse_file at line 15 and tokenize at line 25
564
+ ref1 = add_reference(conn, fid_index, "parse_file", 15, kind="call")
565
+ ref2 = add_reference(conn, fid_index, "tokenize", 25, kind="call")
566
+ # parse_file calls tokenize at line 20
567
+ ref3 = add_reference(conn, fid_parser, "tokenize", 20, kind="call")
568
+ conn.commit()
569
+
570
+ refs_index = get_references_by_file(conn, fid_index)
571
+ assert len(refs_index) == 2, f"expected 2 refs in index.py, got {len(refs_index)}"
572
+ print(" [PASS] Reference CRUD")
573
+
574
+ # ---- rebuild file edges ----
575
+ rebuild_file_edges(conn)
576
+ conn.commit()
577
+
578
+ fe_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
579
+ assert fe_count >= 2, f"expected >= 2 file edges, got {fe_count}"
580
+
581
+ # index.py -> parser.py edge should exist
582
+ fe = conn.execute(
583
+ "SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
584
+ (fid_index, fid_parser),
585
+ ).fetchone()
586
+ assert fe is not None, "file edge index->parser missing"
587
+ # index.py -> lexer.py edge should exist
588
+ fe2 = conn.execute(
589
+ "SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
590
+ (fid_index, fid_lexer),
591
+ ).fetchone()
592
+ assert fe2 is not None, "file edge index->lexer missing"
593
+ print(" [PASS] rebuild_file_edges")
594
+
595
+ # ---- rebuild symbol edges ----
596
+ rebuild_symbol_edges(conn)
597
+ conn.commit()
598
+
599
+ se_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
600
+ assert se_count >= 2, f"expected >= 2 symbol edges, got {se_count}"
601
+
602
+ # build_index -> parse_file edge should exist
603
+ se = conn.execute(
604
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
605
+ (sid_build, sid_parse),
606
+ ).fetchone()
607
+ assert se is not None, "symbol edge build_index->parse_file missing"
608
+
609
+ # build_index -> tokenize edge should exist
610
+ se2 = conn.execute(
611
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
612
+ (sid_build, sid_tokenize),
613
+ ).fetchone()
614
+ assert se2 is not None, "symbol edge build_index->tokenize missing"
615
+
616
+ # parse_file -> tokenize edge should exist
617
+ se3 = conn.execute(
618
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
619
+ (sid_parse, sid_tokenize),
620
+ ).fetchone()
621
+ assert se3 is not None, "symbol edge parse_file->tokenize missing"
622
+ print(" [PASS] rebuild_symbol_edges")
623
+
624
+ # ---- transitive deps via symbol_edges ----
625
+ deps = get_transitive_deps(conn, sid_build)
626
+ dep_ids = {d["id"] for d in deps}
627
+ assert sid_parse in dep_ids, f"transitive deps missing parse_file: {dep_ids}"
628
+ assert sid_tokenize in dep_ids, f"transitive deps missing tokenize: {dep_ids}"
629
+ print(" [PASS] get_transitive_deps")
630
+
631
+ # ---- reverse deps via symbol_edges ----
632
+ rdeps = get_reverse_deps(conn, sid_tokenize)
633
+ rdep_ids = {d["id"] for d in rdeps}
634
+ assert sid_parse in rdep_ids, f"reverse deps missing parse_file: {rdep_ids}"
635
+ assert sid_build in rdep_ids, f"reverse deps missing build_index: {rdep_ids}"
636
+ print(" [PASS] get_reverse_deps")
637
+
638
+ # ---- FTS search ----
639
+ results = fts_search(conn, "parse")
640
+ assert any(r["name"] == "parse_file" for r in results), "FTS search for 'parse' failed"
641
+
642
+ results_sig = fts_search(conn, "tokenize")
643
+ assert any(r["name"] == "tokenize" for r in results_sig), "FTS search for 'tokenize' failed"
644
+
645
+ results_qn = fts_search(conn, "index")
646
+ assert any(r["name"] == "build_index" for r in results_qn), "FTS qualified_name search failed"
647
+ print(" [PASS] FTS search")
648
+
649
+ # ---- stats ----
650
+ stats = get_stats(conn)
651
+ assert stats["file_count"] == 3, f"expected 3 files, got {stats['file_count']}"
652
+ assert stats["symbol_count"] == 3, f"expected 3 symbols, got {stats['symbol_count']}"
653
+ assert stats["reference_count"] == 3, f"expected 3 refs, got {stats['reference_count']}"
654
+ assert stats["edge_count"] >= 2, f"expected >= 2 symbol edges, got {stats['edge_count']}"
655
+ assert stats["file_edge_count"] >= 2, f"expected >= 2 file edges, got {stats['file_edge_count']}"
656
+ print(" [PASS] get_stats")
657
+
658
+ # ---- add_edge (manual symbol edge) ----
659
+ add_edge(conn, sid_parse, sid_build, "test_edge")
660
+ conn.commit()
661
+ manual_edge = conn.execute(
662
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=? AND kind=?",
663
+ (sid_parse, sid_build, "test_edge"),
664
+ ).fetchone()
665
+ assert manual_edge is not None, "add_edge failed"
666
+ # duplicate should be ignored
667
+ add_edge(conn, sid_parse, sid_build, "test_edge")
668
+ conn.commit()
669
+ print(" [PASS] add_edge (manual)")
670
+
671
+ # ---- CASCADE deletes ----
672
+ # Remove lexer.py file -> tokenize symbol, refs to tokenize, and edges should cascade
673
+ sym_count_before = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
674
+ ref_count_before = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
675
+
676
+ remove_file(conn, "src/lexer.py")
677
+ conn.commit()
678
+
679
+ assert get_file_by_path(conn, "src/lexer.py") is None, "file not removed"
680
+ assert get_symbol_by_id(conn, sid_tokenize) is None, "symbol not cascaded on file delete"
681
+
682
+ sym_count_after = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
683
+ assert sym_count_after == sym_count_before - 1, f"expected {sym_count_before - 1} symbols, got {sym_count_after}"
684
+
685
+ # Refs in lexer.py should be gone (there were none, but verify no error)
686
+ # Symbol edges involving tokenize should be gone
687
+ edges_to_tokenize = conn.execute(
688
+ "SELECT COUNT(*) FROM symbol_edges WHERE target_symbol_id=? OR source_symbol_id=?",
689
+ (sid_tokenize, sid_tokenize),
690
+ ).fetchone()[0]
691
+ assert edges_to_tokenize == 0, f"expected 0 edges involving deleted symbol, got {edges_to_tokenize}"
692
+ print(" [PASS] CASCADE deletes (remove_file)")
693
+
694
+ # ---- remove_symbols_by_file (without removing file) ----
695
+ remove_symbols_by_file(conn, "src/parser.py")
696
+ conn.commit()
697
+ assert get_symbol_by_id(conn, sid_parse) is None, "remove_symbols_by_file failed"
698
+ # File itself should still exist
699
+ assert get_file_by_path(conn, "src/parser.py") is not None, "file should still exist after remove_symbols_by_file"
700
+ print(" [PASS] remove_symbols_by_file")
701
+
702
+ # ---- final stats ----
703
+ final_stats = get_stats(conn)
704
+ print(f"\n Final stats: {final_stats}")
705
+
706
+ # ---- hash_content utility ----
707
+ h = hash_content("hello world")
708
+ assert len(h) == 64, "hash_content should return 64-char hex string"
709
+ assert h == hash_content("hello world"), "hash_content should be deterministic"
710
+ print(" [PASS] hash_content")
711
+
712
+ print("\nAll smoke tests passed.")