feed-the-machine 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +82 -180
  2. package/ftm-git/SKILL.md +0 -1
  3. package/ftm-map/SKILL.md +46 -14
  4. package/ftm-map/scripts/db.py +439 -118
  5. package/ftm-map/scripts/index.py +128 -54
  6. package/ftm-map/scripts/parser.py +89 -320
  7. package/ftm-map/scripts/queries/go-tags.scm +20 -0
  8. package/ftm-map/scripts/queries/javascript-tags.scm +19 -7
  9. package/ftm-map/scripts/queries/python-tags.scm +22 -8
  10. package/ftm-map/scripts/queries/ruby-tags.scm +19 -0
  11. package/ftm-map/scripts/queries/rust-tags.scm +37 -0
  12. package/ftm-map/scripts/queries/typescript-tags.scm +20 -8
  13. package/ftm-map/scripts/query.py +176 -24
  14. package/ftm-map/scripts/ranker.py +377 -0
  15. package/ftm-map/scripts/requirements.txt +3 -0
  16. package/ftm-map/scripts/setup.sh +11 -0
  17. package/ftm-map/scripts/test_db.py +355 -115
  18. package/ftm-map/scripts/test_parser.py +169 -101
  19. package/ftm-map/scripts/test_query.py +178 -61
  20. package/ftm-map/scripts/test_ranker.py +199 -0
  21. package/ftm-map/scripts/views.py +107 -61
  22. package/ftm-mind/references/event-registry.md +0 -10
  23. package/hooks/ftm-blackboard-enforcer.sh +1 -4
  24. package/package.json +1 -1
  25. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  26. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  27. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  28. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  29. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  30. package/ftm-map/scripts/tests/fixtures/__init__.py +0 -0
  31. package/ftm-map/scripts/tests/fixtures/sample_project/api.ts +0 -16
  32. package/ftm-map/scripts/tests/fixtures/sample_project/auth.py +0 -15
  33. package/ftm-map/scripts/tests/fixtures/sample_project/utils.js +0 -16
@@ -1,14 +1,17 @@
1
1
  """
2
2
  db.py — SQLite database module for ftm-map.
3
3
 
4
- Manages the symbols/edges/FTS5 schema and provides CRUD operations for the
5
- code graph. This is a library module import it from index.py, query.py,
6
- and views.py.
4
+ Manages a 5-table schema (files, symbols, refs, file_edges, symbol_edges)
5
+ plus FTS5 for full-text search over symbols. Provides CRUD operations,
6
+ materialized edge rebuilding, and graph traversal queries.
7
7
 
8
8
  Schema overview:
9
+ files — tracked source files with metadata
9
10
  symbols — indexed code symbols (functions, classes, methods, etc.)
10
- edges directed dependency relationships between symbols
11
- symbols_fts FTS5 virtual table for full-text search (BM25-ranked)
11
+ refs unresolved references (calls, imports) keyed by symbol name
12
+ file_edges materialized file-level dependency graph
13
+ symbol_edges — materialized symbol-level dependency graph
14
+ symbols_fts — FTS5 virtual table for BM25-ranked search
12
15
  """
13
16
 
14
17
  import hashlib
@@ -29,32 +32,63 @@ DB_PATH = os.path.join(DB_DIR, "map.db")
29
32
  # ---------------------------------------------------------------------------
30
33
 
31
34
  _SCHEMA = """
35
+ CREATE TABLE IF NOT EXISTS files (
36
+ id INTEGER PRIMARY KEY,
37
+ path TEXT NOT NULL UNIQUE,
38
+ lang TEXT,
39
+ mtime REAL NOT NULL,
40
+ hash TEXT,
41
+ line_count INTEGER
42
+ );
43
+
32
44
  CREATE TABLE IF NOT EXISTS symbols (
33
- id INTEGER PRIMARY KEY,
34
- name TEXT NOT NULL,
35
- kind TEXT NOT NULL, -- 'function','class','method','variable','import','module'
36
- file_path TEXT NOT NULL,
37
- start_line INTEGER,
38
- end_line INTEGER,
39
- signature TEXT,
40
- doc_comment TEXT,
41
- content_hash TEXT -- hash of symbol body for change detection
45
+ id INTEGER PRIMARY KEY,
46
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
47
+ name TEXT NOT NULL,
48
+ qualified_name TEXT,
49
+ kind TEXT NOT NULL,
50
+ line_start INTEGER NOT NULL,
51
+ line_end INTEGER,
52
+ signature TEXT,
53
+ parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL
42
54
  );
43
55
 
44
- CREATE TABLE IF NOT EXISTS edges (
45
- source_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
46
- target_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
47
- kind TEXT NOT NULL, -- 'calls','imports','extends','implements','uses'
48
- PRIMARY KEY (source_id, target_id, kind)
56
+ CREATE TABLE IF NOT EXISTS refs (
57
+ id INTEGER PRIMARY KEY,
58
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
59
+ symbol_name TEXT NOT NULL,
60
+ line INTEGER NOT NULL,
61
+ kind TEXT DEFAULT 'call'
49
62
  );
50
63
 
51
- CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id);
52
- CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_path);
53
- CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
64
+ CREATE TABLE IF NOT EXISTS file_edges (
65
+ source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
66
+ target_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
67
+ weight REAL DEFAULT 1.0,
68
+ PRIMARY KEY (source_file_id, target_file_id)
69
+ );
70
+
71
+ CREATE TABLE IF NOT EXISTS symbol_edges (
72
+ source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
73
+ target_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
74
+ kind TEXT NOT NULL,
75
+ file_id INTEGER REFERENCES files(id),
76
+ line INTEGER,
77
+ PRIMARY KEY (source_symbol_id, target_symbol_id, kind)
78
+ );
79
+
80
+ CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
81
+ CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
82
+ CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
83
+ CREATE INDEX IF NOT EXISTS idx_refs_file ON refs(file_id);
84
+ CREATE INDEX IF NOT EXISTS idx_refs_symbol_name ON refs(symbol_name);
85
+ CREATE INDEX IF NOT EXISTS idx_file_edges_target ON file_edges(target_file_id);
86
+ CREATE INDEX IF NOT EXISTS idx_symbol_edges_target ON symbol_edges(target_symbol_id);
54
87
 
55
88
  CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
56
- name, signature, doc_comment, file_path,
57
- content=symbols, content_rowid=id
89
+ name, qualified_name, signature,
90
+ content=symbols, content_rowid=id,
91
+ tokenize='porter'
58
92
  );
59
93
  """
60
94
 
@@ -87,6 +121,60 @@ def _init_schema(conn: sqlite3.Connection) -> None:
87
121
  conn.commit()
88
122
 
89
123
 
124
+ # ---------------------------------------------------------------------------
125
+ # File CRUD
126
+ # ---------------------------------------------------------------------------
127
+
128
+
129
+ def add_file(
130
+ conn: sqlite3.Connection,
131
+ path: str,
132
+ lang: str,
133
+ mtime: float,
134
+ hash: Optional[str] = None,
135
+ line_count: Optional[int] = None,
136
+ ) -> int:
137
+ """Insert a file row. Returns the new file id."""
138
+ cursor = conn.execute(
139
+ """
140
+ INSERT INTO files (path, lang, mtime, hash, line_count)
141
+ VALUES (?, ?, ?, ?, ?)
142
+ """,
143
+ (path, lang, mtime, hash, line_count),
144
+ )
145
+ return cursor.lastrowid
146
+
147
+
148
+ def get_file_by_path(conn: sqlite3.Connection, path: str) -> Optional[dict]:
149
+ """Return a file row as a dict, or None if not found."""
150
+ row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
151
+ return dict(row) if row else None
152
+
153
+
154
+ def remove_file(conn: sqlite3.Connection, path: str) -> None:
155
+ """Delete a file and cascade to symbols, refs, and edges.
156
+
157
+ FTS5 rows are removed explicitly before the symbol rows because the
158
+ content= table does not handle cascaded deletes automatically.
159
+ """
160
+ file_row = get_file_by_path(conn, path)
161
+ if file_row is None:
162
+ return
163
+
164
+ file_id = file_row["id"]
165
+
166
+ # Clean up FTS entries for symbols in this file
167
+ sym_ids = [
168
+ row["id"]
169
+ for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
170
+ ]
171
+ for sid in sym_ids:
172
+ conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
173
+
174
+ # CASCADE handles symbols, refs, file_edges, symbol_edges
175
+ conn.execute("DELETE FROM files WHERE id=?", (file_id,))
176
+
177
+
90
178
  # ---------------------------------------------------------------------------
91
179
  # Symbol CRUD
92
180
  # ---------------------------------------------------------------------------
@@ -94,14 +182,14 @@ def _init_schema(conn: sqlite3.Connection) -> None:
94
182
 
95
183
  def add_symbol(
96
184
  conn: sqlite3.Connection,
185
+ file_id: int,
97
186
  name: str,
98
187
  kind: str,
99
- file_path: str,
100
- start_line: Optional[int],
101
- end_line: Optional[int],
188
+ line_start: int,
189
+ line_end: Optional[int] = None,
190
+ qualified_name: Optional[str] = None,
102
191
  signature: Optional[str] = None,
103
- doc_comment: Optional[str] = None,
104
- content_hash: Optional[str] = None,
192
+ parent_id: Optional[int] = None,
105
193
  ) -> int:
106
194
  """Insert a symbol row and keep the FTS5 index in sync.
107
195
 
@@ -110,48 +198,92 @@ def add_symbol(
110
198
  cursor = conn.execute(
111
199
  """
112
200
  INSERT INTO symbols
113
- (name, kind, file_path, start_line, end_line, signature, doc_comment, content_hash)
114
- VALUES (?,?,?,?,?,?,?,?)
201
+ (file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id)
202
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
115
203
  """,
116
- (name, kind, file_path, start_line, end_line, signature, doc_comment, content_hash),
204
+ (file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id),
117
205
  )
118
206
  symbol_id = cursor.lastrowid
119
207
 
120
208
  # FTS5 content= tables require manual insert so BM25 ranking stays accurate.
121
209
  conn.execute(
122
- "INSERT INTO symbols_fts(rowid, name, signature, doc_comment, file_path) VALUES (?,?,?,?,?)",
123
- (symbol_id, name, signature or "", doc_comment or "", file_path),
210
+ "INSERT INTO symbols_fts(rowid, name, qualified_name, signature) VALUES (?, ?, ?, ?)",
211
+ (symbol_id, name, qualified_name or "", signature or ""),
124
212
  )
125
213
 
126
214
  return symbol_id
127
215
 
128
216
 
217
+ def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
218
+ """Return a symbol row as a dict, or None if not found."""
219
+ row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
220
+ return dict(row) if row else None
221
+
222
+
223
+ def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
224
+ """Return all symbols matching *name* (name is not guaranteed unique)."""
225
+ rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
226
+ return [dict(r) for r in rows]
227
+
228
+
229
+ def get_symbols_by_file(conn: sqlite3.Connection, file_id: int) -> list:
230
+ """Return all symbols belonging to a given file."""
231
+ rows = conn.execute(
232
+ "SELECT * FROM symbols WHERE file_id=? ORDER BY line_start",
233
+ (file_id,),
234
+ ).fetchall()
235
+ return [dict(r) for r in rows]
236
+
237
+
129
238
  def remove_symbols_by_file(conn: sqlite3.Connection, file_path: str) -> None:
130
- """Delete all symbols (and their edges) for a given file.
239
+ """Delete all symbols for a given file path.
131
240
 
132
- FTS5 rows are removed explicitly before the symbol rows because the
133
- content= table does not handle cascaded deletes automatically.
134
- ON DELETE CASCADE handles edge cleanup via the symbols foreign key.
241
+ Finds the file_id from the path, cleans up FTS entries, then deletes
242
+ the symbols (CASCADE handles symbol_edges).
135
243
  """
136
- ids = [
244
+ file_row = get_file_by_path(conn, file_path)
245
+ if file_row is None:
246
+ return
247
+
248
+ file_id = file_row["id"]
249
+
250
+ # Clean up FTS entries
251
+ sym_ids = [
137
252
  row["id"]
138
- for row in conn.execute("SELECT id FROM symbols WHERE file_path=?", (file_path,))
253
+ for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
139
254
  ]
140
- for sid in ids:
255
+ for sid in sym_ids:
141
256
  conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
142
257
 
143
- conn.execute("DELETE FROM symbols WHERE file_path=?", (file_path,))
258
+ conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
144
259
 
145
260
 
146
- def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
147
- """Return a symbol row as a dict, or None if not found."""
148
- row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
149
- return dict(row) if row else None
261
+ # ---------------------------------------------------------------------------
262
+ # Reference CRUD
263
+ # ---------------------------------------------------------------------------
150
264
 
151
265
 
152
- def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
153
- """Return all symbols matching *name* (name is not guaranteed unique)."""
154
- rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
266
+ def add_reference(
267
+ conn: sqlite3.Connection,
268
+ file_id: int,
269
+ symbol_name: str,
270
+ line: int,
271
+ kind: str = "call",
272
+ ) -> int:
273
+ """Insert a reference row. Returns the new ref id."""
274
+ cursor = conn.execute(
275
+ "INSERT INTO refs (file_id, symbol_name, line, kind) VALUES (?, ?, ?, ?)",
276
+ (file_id, symbol_name, line, kind),
277
+ )
278
+ return cursor.lastrowid
279
+
280
+
281
+ def get_references_by_file(conn: sqlite3.Connection, file_id: int) -> list:
282
+ """Return all references in a given file."""
283
+ rows = conn.execute(
284
+ "SELECT * FROM refs WHERE file_id=? ORDER BY line",
285
+ (file_id,),
286
+ ).fetchall()
155
287
  return [dict(r) for r in rows]
156
288
 
157
289
 
@@ -160,14 +292,74 @@ def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
160
292
  # ---------------------------------------------------------------------------
161
293
 
162
294
 
163
- def add_edge(conn: sqlite3.Connection, source_id: int, target_id: int, kind: str) -> None:
164
- """Insert a directed edge. Silently ignored if the edge already exists."""
295
+ def add_edge(
296
+ conn: sqlite3.Connection,
297
+ source_id: int,
298
+ target_id: int,
299
+ kind: str,
300
+ ) -> None:
301
+ """Insert a directed symbol edge. Silently ignored if the edge already exists."""
165
302
  conn.execute(
166
- "INSERT OR IGNORE INTO edges (source_id, target_id, kind) VALUES (?,?,?)",
303
+ "INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind) VALUES (?, ?, ?)",
167
304
  (source_id, target_id, kind),
168
305
  )
169
306
 
170
307
 
308
+ # ---------------------------------------------------------------------------
309
+ # Materialized edge rebuilding
310
+ # ---------------------------------------------------------------------------
311
+
312
+
313
+ def rebuild_file_edges(conn: sqlite3.Connection) -> None:
314
+ """Rebuild the file_edges table from refs and symbols.
315
+
316
+ For each ref in refs, finds which file defines a symbol with that name,
317
+ then creates a file_edge from the referencing file to the defining file.
318
+ Duplicate edges are collapsed; weight defaults to 1.0.
319
+ """
320
+ conn.execute("DELETE FROM file_edges")
321
+
322
+ conn.execute(
323
+ """
324
+ INSERT OR IGNORE INTO file_edges (source_file_id, target_file_id, weight)
325
+ SELECT DISTINCT r.file_id, s.file_id, 1.0
326
+ FROM refs r
327
+ JOIN symbols s ON s.name = r.symbol_name
328
+ WHERE r.file_id != s.file_id
329
+ """
330
+ )
331
+
332
+
333
+ def rebuild_symbol_edges(conn: sqlite3.Connection) -> None:
334
+ """Rebuild the symbol_edges table from refs and symbols.
335
+
336
+ For each ref, finds the target symbol (by name match) and the nearest
337
+ enclosing definition in the referencing file (the symbol whose line range
338
+ contains the ref line). Creates a symbol_edge from the enclosing symbol
339
+ to the target symbol.
340
+ """
341
+ conn.execute("DELETE FROM symbol_edges")
342
+
343
+ # Find matching ref -> target symbol, with nearest enclosing source symbol.
344
+ # The enclosing symbol is the one in the same file as the ref whose
345
+ # line_start <= ref.line and (line_end >= ref.line OR line_end IS NULL),
346
+ # ordered by line_start DESC to get the nearest (innermost) enclosure.
347
+ conn.execute(
348
+ """
349
+ INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind, file_id, line)
350
+ SELECT src.id, tgt.id, r.kind, r.file_id, r.line
351
+ FROM refs r
352
+ JOIN symbols tgt ON tgt.name = r.symbol_name
353
+ JOIN symbols src ON src.file_id = r.file_id
354
+ AND src.line_start <= r.line
355
+ AND (src.line_end >= r.line OR src.line_end IS NULL)
356
+ WHERE src.id != tgt.id
357
+ GROUP BY r.id, tgt.id
358
+ HAVING src.line_start = MAX(src.line_start)
359
+ """
360
+ )
361
+
362
+
171
363
  # ---------------------------------------------------------------------------
172
364
  # Graph traversal — recursive CTEs
173
365
  # ---------------------------------------------------------------------------
@@ -185,17 +377,17 @@ def get_transitive_deps(
185
377
  Results are ordered by traversal depth (shallowest first) and deduplicated.
186
378
  """
187
379
  query = """
188
- WITH RECURSIVE dep_chain(id, name, kind, file_path, depth, path) AS (
380
+ WITH RECURSIVE dep_chain(id, name, kind, file_id, depth, path) AS (
189
381
  -- Base: direct dependencies of the seed symbol
190
382
  SELECT s.id,
191
383
  s.name,
192
384
  s.kind,
193
- s.file_path,
385
+ s.file_id,
194
386
  0,
195
387
  CAST(s.id AS TEXT)
196
- FROM edges e
197
- JOIN symbols s ON s.id = e.target_id
198
- WHERE e.source_id = ?
388
+ FROM symbol_edges e
389
+ JOIN symbols s ON s.id = e.target_symbol_id
390
+ WHERE e.source_symbol_id = ?
199
391
 
200
392
  UNION ALL
201
393
 
@@ -203,16 +395,16 @@ def get_transitive_deps(
203
395
  SELECT s.id,
204
396
  s.name,
205
397
  s.kind,
206
- s.file_path,
398
+ s.file_id,
207
399
  dc.depth + 1,
208
400
  dc.path || ',' || CAST(s.id AS TEXT)
209
401
  FROM dep_chain dc
210
- JOIN edges e ON e.source_id = dc.id
211
- JOIN symbols s ON s.id = e.target_id
402
+ JOIN symbol_edges e ON e.source_symbol_id = dc.id
403
+ JOIN symbols s ON s.id = e.target_symbol_id
212
404
  WHERE dc.depth < ?
213
405
  AND INSTR(dc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
214
406
  )
215
- SELECT DISTINCT id, name, kind, file_path, depth
407
+ SELECT DISTINCT id, name, kind, file_id, depth
216
408
  FROM dep_chain
217
409
  ORDER BY depth
218
410
  """
@@ -225,21 +417,21 @@ def get_reverse_deps(
225
417
  ) -> list:
226
418
  """Return all symbols that transitively depend on this symbol (blast radius).
227
419
 
228
- Traverses edges in reverse (callers/importers of the seed symbol).
420
+ Traverses symbol_edges in reverse (callers/importers of the seed symbol).
229
421
  Same cycle-prevention strategy as get_transitive_deps.
230
422
  """
231
423
  query = """
232
- WITH RECURSIVE rev_chain(id, name, kind, file_path, depth, path) AS (
424
+ WITH RECURSIVE rev_chain(id, name, kind, file_id, depth, path) AS (
233
425
  -- Base: direct dependents of the seed symbol
234
426
  SELECT s.id,
235
427
  s.name,
236
428
  s.kind,
237
- s.file_path,
429
+ s.file_id,
238
430
  0,
239
431
  CAST(s.id AS TEXT)
240
- FROM edges e
241
- JOIN symbols s ON s.id = e.source_id
242
- WHERE e.target_id = ?
432
+ FROM symbol_edges e
433
+ JOIN symbols s ON s.id = e.source_symbol_id
434
+ WHERE e.target_symbol_id = ?
243
435
 
244
436
  UNION ALL
245
437
 
@@ -247,16 +439,16 @@ def get_reverse_deps(
247
439
  SELECT s.id,
248
440
  s.name,
249
441
  s.kind,
250
- s.file_path,
442
+ s.file_id,
251
443
  rc.depth + 1,
252
444
  rc.path || ',' || CAST(s.id AS TEXT)
253
445
  FROM rev_chain rc
254
- JOIN edges e ON e.target_id = rc.id
255
- JOIN symbols s ON s.id = e.source_id
446
+ JOIN symbol_edges e ON e.target_symbol_id = rc.id
447
+ JOIN symbols s ON s.id = e.source_symbol_id
256
448
  WHERE rc.depth < ?
257
449
  AND INSTR(rc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
258
450
  )
259
- SELECT DISTINCT id, name, kind, file_path, depth
451
+ SELECT DISTINCT id, name, kind, file_id, depth
260
452
  FROM rev_chain
261
453
  ORDER BY depth
262
454
  """
@@ -270,7 +462,7 @@ def get_reverse_deps(
270
462
 
271
463
 
272
464
  def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> list:
273
- """BM25-ranked full-text search over symbol names, signatures, and doc comments.
465
+ """BM25-ranked full-text search over symbol names, qualified names, and signatures.
274
466
 
275
467
  Returns up to *limit* symbol dicts with an additional 'rank' field.
276
468
  Lower rank values indicate better matches (BM25 scores are negative in
@@ -295,15 +487,17 @@ def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> li
295
487
 
296
488
  def get_stats(conn: sqlite3.Connection) -> dict:
297
489
  """Return high-level database statistics."""
298
- symbols_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
299
- edges_count = conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0]
300
- files_count = conn.execute(
301
- "SELECT COUNT(DISTINCT file_path) FROM symbols"
302
- ).fetchone()[0]
490
+ file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
491
+ symbol_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
492
+ edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
493
+ reference_count = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
494
+ file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
303
495
  return {
304
- "symbols": symbols_count,
305
- "edges": edges_count,
306
- "files": files_count,
496
+ "file_count": file_count,
497
+ "symbol_count": symbol_count,
498
+ "edge_count": edge_count,
499
+ "reference_count": reference_count,
500
+ "file_edge_count": file_edge_count,
307
501
  }
308
502
 
309
503
 
@@ -324,68 +518,195 @@ def hash_content(content: str) -> str:
324
518
  if __name__ == "__main__":
325
519
  import tempfile
326
520
 
327
- print("Running db.py smoke tests ")
521
+ print("Running db.py smoke tests ...")
328
522
 
329
523
  with tempfile.TemporaryDirectory() as tmp:
330
524
  conn = get_connection(tmp)
331
525
 
332
- # ---- symbols ----
333
- sid_a = add_symbol(
334
- conn, "parse_file", "function", "src/parser.py", 10, 40,
526
+ # ---- add files ----
527
+ fid_parser = add_file(conn, "src/parser.py", "python", 1000.0, hash="abc123", line_count=50)
528
+ fid_lexer = add_file(conn, "src/lexer.py", "python", 1001.0, line_count=30)
529
+ fid_index = add_file(conn, "src/index.py", "python", 1002.0)
530
+ conn.commit()
531
+
532
+ assert get_file_by_path(conn, "src/parser.py")["id"] == fid_parser, "get_file_by_path failed"
533
+ assert get_file_by_path(conn, "nonexistent.py") is None, "get_file_by_path should return None"
534
+ print(" [PASS] File CRUD")
535
+
536
+ # ---- add symbols ----
537
+ # parser.py: parse_file (lines 10-40)
538
+ sid_parse = add_symbol(
539
+ conn, fid_parser, "parse_file", "function", 10, line_end=40,
540
+ qualified_name="parser.parse_file",
335
541
  signature="def parse_file(path: str) -> AST",
336
- doc_comment="Parse a source file and return its AST.",
337
- content_hash=hash_content("def parse_file(): pass"),
338
542
  )
339
- sid_b = add_symbol(
340
- conn, "tokenize", "function", "src/lexer.py", 1, 20,
543
+ # lexer.py: tokenize (lines 1-20)
544
+ sid_tokenize = add_symbol(
545
+ conn, fid_lexer, "tokenize", "function", 1, line_end=20,
546
+ qualified_name="lexer.tokenize",
341
547
  signature="def tokenize(src: str) -> list",
342
548
  )
343
- sid_c = add_symbol(
344
- conn, "build_index", "function", "src/index.py", 5, 60,
549
+ # index.py: build_index (lines 5-60) — calls parse_file and tokenize
550
+ sid_build = add_symbol(
551
+ conn, fid_index, "build_index", "function", 5, line_end=60,
552
+ qualified_name="index.build_index",
345
553
  signature="def build_index(root: str) -> None",
346
- doc_comment="Build the code graph index for a project.",
347
554
  )
555
+ conn.commit()
348
556
 
349
- assert get_symbol_by_id(conn, sid_a)["name"] == "parse_file", "get_symbol_by_id failed"
557
+ assert get_symbol_by_id(conn, sid_parse)["name"] == "parse_file", "get_symbol_by_id failed"
350
558
  assert len(get_symbol_by_name(conn, "tokenize")) == 1, "get_symbol_by_name failed"
559
+ assert len(get_symbols_by_file(conn, fid_parser)) == 1, "get_symbols_by_file failed"
560
+ print(" [PASS] Symbol CRUD")
561
+
562
+ # ---- add references ----
563
+ # build_index calls parse_file at line 15 and tokenize at line 25
564
+ ref1 = add_reference(conn, fid_index, "parse_file", 15, kind="call")
565
+ ref2 = add_reference(conn, fid_index, "tokenize", 25, kind="call")
566
+ # parse_file calls tokenize at line 20
567
+ ref3 = add_reference(conn, fid_parser, "tokenize", 20, kind="call")
568
+ conn.commit()
351
569
 
352
- # ---- edges ----
353
- # build_index parse_file tokenize
354
- add_edge(conn, sid_c, sid_a, "calls")
355
- add_edge(conn, sid_a, sid_b, "calls")
356
- add_edge(conn, sid_c, sid_a, "calls") # duplicate — should be ignored
570
+ refs_index = get_references_by_file(conn, fid_index)
571
+ assert len(refs_index) == 2, f"expected 2 refs in index.py, got {len(refs_index)}"
572
+ print(" [PASS] Reference CRUD")
357
573
 
574
+ # ---- rebuild file edges ----
575
+ rebuild_file_edges(conn)
358
576
  conn.commit()
359
577
 
360
- # ---- transitive deps ----
361
- deps = get_transitive_deps(conn, sid_c)
578
+ fe_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
579
+ assert fe_count >= 2, f"expected >= 2 file edges, got {fe_count}"
580
+
581
+ # index.py -> parser.py edge should exist
582
+ fe = conn.execute(
583
+ "SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
584
+ (fid_index, fid_parser),
585
+ ).fetchone()
586
+ assert fe is not None, "file edge index->parser missing"
587
+ # index.py -> lexer.py edge should exist
588
+ fe2 = conn.execute(
589
+ "SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
590
+ (fid_index, fid_lexer),
591
+ ).fetchone()
592
+ assert fe2 is not None, "file edge index->lexer missing"
593
+ print(" [PASS] rebuild_file_edges")
594
+
595
+ # ---- rebuild symbol edges ----
596
+ rebuild_symbol_edges(conn)
597
+ conn.commit()
598
+
599
+ se_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
600
+ assert se_count >= 2, f"expected >= 2 symbol edges, got {se_count}"
601
+
602
+ # build_index -> parse_file edge should exist
603
+ se = conn.execute(
604
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
605
+ (sid_build, sid_parse),
606
+ ).fetchone()
607
+ assert se is not None, "symbol edge build_index->parse_file missing"
608
+
609
+ # build_index -> tokenize edge should exist
610
+ se2 = conn.execute(
611
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
612
+ (sid_build, sid_tokenize),
613
+ ).fetchone()
614
+ assert se2 is not None, "symbol edge build_index->tokenize missing"
615
+
616
+ # parse_file -> tokenize edge should exist
617
+ se3 = conn.execute(
618
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
619
+ (sid_parse, sid_tokenize),
620
+ ).fetchone()
621
+ assert se3 is not None, "symbol edge parse_file->tokenize missing"
622
+ print(" [PASS] rebuild_symbol_edges")
623
+
624
+ # ---- transitive deps via symbol_edges ----
625
+ deps = get_transitive_deps(conn, sid_build)
362
626
  dep_ids = {d["id"] for d in deps}
363
- assert sid_a in dep_ids and sid_b in dep_ids, f"transitive deps wrong: {dep_ids}"
627
+ assert sid_parse in dep_ids, f"transitive deps missing parse_file: {dep_ids}"
628
+ assert sid_tokenize in dep_ids, f"transitive deps missing tokenize: {dep_ids}"
629
+ print(" [PASS] get_transitive_deps")
364
630
 
365
- # ---- reverse deps ----
366
- rdeps = get_reverse_deps(conn, sid_b)
631
+ # ---- reverse deps via symbol_edges ----
632
+ rdeps = get_reverse_deps(conn, sid_tokenize)
367
633
  rdep_ids = {d["id"] for d in rdeps}
368
- assert sid_a in rdep_ids and sid_c in rdep_ids, f"reverse deps wrong: {rdep_ids}"
634
+ assert sid_parse in rdep_ids, f"reverse deps missing parse_file: {rdep_ids}"
635
+ assert sid_build in rdep_ids, f"reverse deps missing build_index: {rdep_ids}"
636
+ print(" [PASS] get_reverse_deps")
369
637
 
370
638
  # ---- FTS search ----
371
639
  results = fts_search(conn, "parse")
372
- assert any(r["name"] == "parse_file" for r in results), "FTS search failed"
640
+ assert any(r["name"] == "parse_file" for r in results), "FTS search for 'parse' failed"
373
641
 
374
- results_doc = fts_search(conn, "index")
375
- assert any(r["name"] == "build_index" for r in results_doc), "FTS doc_comment search failed"
642
+ results_sig = fts_search(conn, "tokenize")
643
+ assert any(r["name"] == "tokenize" for r in results_sig), "FTS search for 'tokenize' failed"
376
644
 
377
- # ---- remove by file ----
378
- remove_symbols_by_file(conn, "src/lexer.py")
379
- conn.commit()
380
- assert get_symbol_by_id(conn, sid_b) is None, "remove_symbols_by_file failed"
645
+ results_qn = fts_search(conn, "index")
646
+ assert any(r["name"] == "build_index" for r in results_qn), "FTS qualified_name search failed"
647
+ print(" [PASS] FTS search")
381
648
 
382
649
  # ---- stats ----
383
650
  stats = get_stats(conn)
384
- assert stats["symbols"] == 2, f"expected 2 symbols after removal, got {stats['symbols']}"
385
- assert stats["files"] == 2, f"expected 2 files, got {stats['files']}"
386
- # edge from parse_file tokenize should be gone via CASCADE
387
- edge_count = conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0]
388
- assert edge_count == 1, f"expected 1 edge after file removal, got {edge_count}"
389
-
390
- print("All smoke tests passed.")
391
- print(f"Stats: {get_stats(conn)}")
651
+ assert stats["file_count"] == 3, f"expected 3 files, got {stats['file_count']}"
652
+ assert stats["symbol_count"] == 3, f"expected 3 symbols, got {stats['symbol_count']}"
653
+ assert stats["reference_count"] == 3, f"expected 3 refs, got {stats['reference_count']}"
654
+ assert stats["edge_count"] >= 2, f"expected >= 2 symbol edges, got {stats['edge_count']}"
655
+ assert stats["file_edge_count"] >= 2, f"expected >= 2 file edges, got {stats['file_edge_count']}"
656
+ print(" [PASS] get_stats")
657
+
658
+ # ---- add_edge (manual symbol edge) ----
659
+ add_edge(conn, sid_parse, sid_build, "test_edge")
660
+ conn.commit()
661
+ manual_edge = conn.execute(
662
+ "SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=? AND kind=?",
663
+ (sid_parse, sid_build, "test_edge"),
664
+ ).fetchone()
665
+ assert manual_edge is not None, "add_edge failed"
666
+ # duplicate should be ignored
667
+ add_edge(conn, sid_parse, sid_build, "test_edge")
668
+ conn.commit()
669
+ print(" [PASS] add_edge (manual)")
670
+
671
+ # ---- CASCADE deletes ----
672
+ # Remove lexer.py file -> tokenize symbol, refs to tokenize, and edges should cascade
673
+ sym_count_before = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
674
+ ref_count_before = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
675
+
676
+ remove_file(conn, "src/lexer.py")
677
+ conn.commit()
678
+
679
+ assert get_file_by_path(conn, "src/lexer.py") is None, "file not removed"
680
+ assert get_symbol_by_id(conn, sid_tokenize) is None, "symbol not cascaded on file delete"
681
+
682
+ sym_count_after = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
683
+ assert sym_count_after == sym_count_before - 1, f"expected {sym_count_before - 1} symbols, got {sym_count_after}"
684
+
685
+ # Refs in lexer.py should be gone (there were none, but verify no error)
686
+ # Symbol edges involving tokenize should be gone
687
+ edges_to_tokenize = conn.execute(
688
+ "SELECT COUNT(*) FROM symbol_edges WHERE target_symbol_id=? OR source_symbol_id=?",
689
+ (sid_tokenize, sid_tokenize),
690
+ ).fetchone()[0]
691
+ assert edges_to_tokenize == 0, f"expected 0 edges involving deleted symbol, got {edges_to_tokenize}"
692
+ print(" [PASS] CASCADE deletes (remove_file)")
693
+
694
+ # ---- remove_symbols_by_file (without removing file) ----
695
+ remove_symbols_by_file(conn, "src/parser.py")
696
+ conn.commit()
697
+ assert get_symbol_by_id(conn, sid_parse) is None, "remove_symbols_by_file failed"
698
+ # File itself should still exist
699
+ assert get_file_by_path(conn, "src/parser.py") is not None, "file should still exist after remove_symbols_by_file"
700
+ print(" [PASS] remove_symbols_by_file")
701
+
702
+ # ---- final stats ----
703
+ final_stats = get_stats(conn)
704
+ print(f"\n Final stats: {final_stats}")
705
+
706
+ # ---- hash_content utility ----
707
+ h = hash_content("hello world")
708
+ assert len(h) == 64, "hash_content should return 64-char hex string"
709
+ assert h == hash_content("hello world"), "hash_content should be deterministic"
710
+ print(" [PASS] hash_content")
711
+
712
+ print("\nAll smoke tests passed.")