feed-the-machine 1.3.1 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -180
- package/ftm-git/SKILL.md +0 -1
- package/ftm-map/SKILL.md +46 -14
- package/ftm-map/scripts/db.py +439 -118
- package/ftm-map/scripts/index.py +128 -54
- package/ftm-map/scripts/parser.py +89 -320
- package/ftm-map/scripts/queries/go-tags.scm +20 -0
- package/ftm-map/scripts/queries/javascript-tags.scm +19 -7
- package/ftm-map/scripts/queries/python-tags.scm +22 -8
- package/ftm-map/scripts/queries/ruby-tags.scm +19 -0
- package/ftm-map/scripts/queries/rust-tags.scm +37 -0
- package/ftm-map/scripts/queries/typescript-tags.scm +20 -8
- package/ftm-map/scripts/query.py +176 -24
- package/ftm-map/scripts/ranker.py +377 -0
- package/ftm-map/scripts/requirements.txt +3 -0
- package/ftm-map/scripts/setup.sh +11 -0
- package/ftm-map/scripts/test_db.py +355 -115
- package/ftm-map/scripts/test_parser.py +169 -101
- package/ftm-map/scripts/test_query.py +178 -61
- package/ftm-map/scripts/test_ranker.py +199 -0
- package/ftm-map/scripts/views.py +107 -61
- package/ftm-mind/references/event-registry.md +0 -10
- package/hooks/ftm-blackboard-enforcer.sh +1 -4
- package/package.json +1 -1
- package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
- package/ftm-map/scripts/tests/fixtures/__init__.py +0 -0
- package/ftm-map/scripts/tests/fixtures/sample_project/api.ts +0 -16
- package/ftm-map/scripts/tests/fixtures/sample_project/auth.py +0 -15
- package/ftm-map/scripts/tests/fixtures/sample_project/utils.js +0 -16
package/ftm-map/scripts/db.py
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
"""
|
|
2
2
|
db.py — SQLite database module for ftm-map.
|
|
3
3
|
|
|
4
|
-
Manages
|
|
5
|
-
|
|
6
|
-
and
|
|
4
|
+
Manages a 5-table schema (files, symbols, refs, file_edges, symbol_edges)
|
|
5
|
+
plus FTS5 for full-text search over symbols. Provides CRUD operations,
|
|
6
|
+
materialized edge rebuilding, and graph traversal queries.
|
|
7
7
|
|
|
8
8
|
Schema overview:
|
|
9
|
+
files — tracked source files with metadata
|
|
9
10
|
symbols — indexed code symbols (functions, classes, methods, etc.)
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
refs — unresolved references (calls, imports) keyed by symbol name
|
|
12
|
+
file_edges — materialized file-level dependency graph
|
|
13
|
+
symbol_edges — materialized symbol-level dependency graph
|
|
14
|
+
symbols_fts — FTS5 virtual table for BM25-ranked search
|
|
12
15
|
"""
|
|
13
16
|
|
|
14
17
|
import hashlib
|
|
@@ -29,32 +32,63 @@ DB_PATH = os.path.join(DB_DIR, "map.db")
|
|
|
29
32
|
# ---------------------------------------------------------------------------
|
|
30
33
|
|
|
31
34
|
_SCHEMA = """
|
|
35
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
36
|
+
id INTEGER PRIMARY KEY,
|
|
37
|
+
path TEXT NOT NULL UNIQUE,
|
|
38
|
+
lang TEXT,
|
|
39
|
+
mtime REAL NOT NULL,
|
|
40
|
+
hash TEXT,
|
|
41
|
+
line_count INTEGER
|
|
42
|
+
);
|
|
43
|
+
|
|
32
44
|
CREATE TABLE IF NOT EXISTS symbols (
|
|
33
|
-
id
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
45
|
+
id INTEGER PRIMARY KEY,
|
|
46
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
47
|
+
name TEXT NOT NULL,
|
|
48
|
+
qualified_name TEXT,
|
|
49
|
+
kind TEXT NOT NULL,
|
|
50
|
+
line_start INTEGER NOT NULL,
|
|
51
|
+
line_end INTEGER,
|
|
52
|
+
signature TEXT,
|
|
53
|
+
parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL
|
|
42
54
|
);
|
|
43
55
|
|
|
44
|
-
CREATE TABLE IF NOT EXISTS
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
56
|
+
CREATE TABLE IF NOT EXISTS refs (
|
|
57
|
+
id INTEGER PRIMARY KEY,
|
|
58
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
59
|
+
symbol_name TEXT NOT NULL,
|
|
60
|
+
line INTEGER NOT NULL,
|
|
61
|
+
kind TEXT DEFAULT 'call'
|
|
49
62
|
);
|
|
50
63
|
|
|
51
|
-
CREATE
|
|
52
|
-
|
|
53
|
-
|
|
64
|
+
CREATE TABLE IF NOT EXISTS file_edges (
|
|
65
|
+
source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
66
|
+
target_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
67
|
+
weight REAL DEFAULT 1.0,
|
|
68
|
+
PRIMARY KEY (source_file_id, target_file_id)
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
CREATE TABLE IF NOT EXISTS symbol_edges (
|
|
72
|
+
source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
|
73
|
+
target_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
|
74
|
+
kind TEXT NOT NULL,
|
|
75
|
+
file_id INTEGER REFERENCES files(id),
|
|
76
|
+
line INTEGER,
|
|
77
|
+
PRIMARY KEY (source_symbol_id, target_symbol_id, kind)
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
|
|
81
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
|
82
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
|
|
83
|
+
CREATE INDEX IF NOT EXISTS idx_refs_file ON refs(file_id);
|
|
84
|
+
CREATE INDEX IF NOT EXISTS idx_refs_symbol_name ON refs(symbol_name);
|
|
85
|
+
CREATE INDEX IF NOT EXISTS idx_file_edges_target ON file_edges(target_file_id);
|
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_symbol_edges_target ON symbol_edges(target_symbol_id);
|
|
54
87
|
|
|
55
88
|
CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
|
|
56
|
-
name,
|
|
57
|
-
content=symbols, content_rowid=id
|
|
89
|
+
name, qualified_name, signature,
|
|
90
|
+
content=symbols, content_rowid=id,
|
|
91
|
+
tokenize='porter'
|
|
58
92
|
);
|
|
59
93
|
"""
|
|
60
94
|
|
|
@@ -87,6 +121,60 @@ def _init_schema(conn: sqlite3.Connection) -> None:
|
|
|
87
121
|
conn.commit()
|
|
88
122
|
|
|
89
123
|
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# File CRUD
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def add_file(
|
|
130
|
+
conn: sqlite3.Connection,
|
|
131
|
+
path: str,
|
|
132
|
+
lang: str,
|
|
133
|
+
mtime: float,
|
|
134
|
+
hash: Optional[str] = None,
|
|
135
|
+
line_count: Optional[int] = None,
|
|
136
|
+
) -> int:
|
|
137
|
+
"""Insert a file row. Returns the new file id."""
|
|
138
|
+
cursor = conn.execute(
|
|
139
|
+
"""
|
|
140
|
+
INSERT INTO files (path, lang, mtime, hash, line_count)
|
|
141
|
+
VALUES (?, ?, ?, ?, ?)
|
|
142
|
+
""",
|
|
143
|
+
(path, lang, mtime, hash, line_count),
|
|
144
|
+
)
|
|
145
|
+
return cursor.lastrowid
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def get_file_by_path(conn: sqlite3.Connection, path: str) -> Optional[dict]:
|
|
149
|
+
"""Return a file row as a dict, or None if not found."""
|
|
150
|
+
row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
|
|
151
|
+
return dict(row) if row else None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def remove_file(conn: sqlite3.Connection, path: str) -> None:
|
|
155
|
+
"""Delete a file and cascade to symbols, refs, and edges.
|
|
156
|
+
|
|
157
|
+
FTS5 rows are removed explicitly before the symbol rows because the
|
|
158
|
+
content= table does not handle cascaded deletes automatically.
|
|
159
|
+
"""
|
|
160
|
+
file_row = get_file_by_path(conn, path)
|
|
161
|
+
if file_row is None:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
file_id = file_row["id"]
|
|
165
|
+
|
|
166
|
+
# Clean up FTS entries for symbols in this file
|
|
167
|
+
sym_ids = [
|
|
168
|
+
row["id"]
|
|
169
|
+
for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
|
|
170
|
+
]
|
|
171
|
+
for sid in sym_ids:
|
|
172
|
+
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
173
|
+
|
|
174
|
+
# CASCADE handles symbols, refs, file_edges, symbol_edges
|
|
175
|
+
conn.execute("DELETE FROM files WHERE id=?", (file_id,))
|
|
176
|
+
|
|
177
|
+
|
|
90
178
|
# ---------------------------------------------------------------------------
|
|
91
179
|
# Symbol CRUD
|
|
92
180
|
# ---------------------------------------------------------------------------
|
|
@@ -94,14 +182,14 @@ def _init_schema(conn: sqlite3.Connection) -> None:
|
|
|
94
182
|
|
|
95
183
|
def add_symbol(
|
|
96
184
|
conn: sqlite3.Connection,
|
|
185
|
+
file_id: int,
|
|
97
186
|
name: str,
|
|
98
187
|
kind: str,
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
188
|
+
line_start: int,
|
|
189
|
+
line_end: Optional[int] = None,
|
|
190
|
+
qualified_name: Optional[str] = None,
|
|
102
191
|
signature: Optional[str] = None,
|
|
103
|
-
|
|
104
|
-
content_hash: Optional[str] = None,
|
|
192
|
+
parent_id: Optional[int] = None,
|
|
105
193
|
) -> int:
|
|
106
194
|
"""Insert a symbol row and keep the FTS5 index in sync.
|
|
107
195
|
|
|
@@ -110,48 +198,92 @@ def add_symbol(
|
|
|
110
198
|
cursor = conn.execute(
|
|
111
199
|
"""
|
|
112
200
|
INSERT INTO symbols
|
|
113
|
-
(name,
|
|
114
|
-
VALUES (
|
|
201
|
+
(file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id)
|
|
202
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
115
203
|
""",
|
|
116
|
-
(name,
|
|
204
|
+
(file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id),
|
|
117
205
|
)
|
|
118
206
|
symbol_id = cursor.lastrowid
|
|
119
207
|
|
|
120
208
|
# FTS5 content= tables require manual insert so BM25 ranking stays accurate.
|
|
121
209
|
conn.execute(
|
|
122
|
-
"INSERT INTO symbols_fts(rowid, name,
|
|
123
|
-
(symbol_id, name,
|
|
210
|
+
"INSERT INTO symbols_fts(rowid, name, qualified_name, signature) VALUES (?, ?, ?, ?)",
|
|
211
|
+
(symbol_id, name, qualified_name or "", signature or ""),
|
|
124
212
|
)
|
|
125
213
|
|
|
126
214
|
return symbol_id
|
|
127
215
|
|
|
128
216
|
|
|
217
|
+
def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
|
|
218
|
+
"""Return a symbol row as a dict, or None if not found."""
|
|
219
|
+
row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
|
|
220
|
+
return dict(row) if row else None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
|
|
224
|
+
"""Return all symbols matching *name* (name is not guaranteed unique)."""
|
|
225
|
+
rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
|
|
226
|
+
return [dict(r) for r in rows]
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def get_symbols_by_file(conn: sqlite3.Connection, file_id: int) -> list:
|
|
230
|
+
"""Return all symbols belonging to a given file."""
|
|
231
|
+
rows = conn.execute(
|
|
232
|
+
"SELECT * FROM symbols WHERE file_id=? ORDER BY line_start",
|
|
233
|
+
(file_id,),
|
|
234
|
+
).fetchall()
|
|
235
|
+
return [dict(r) for r in rows]
|
|
236
|
+
|
|
237
|
+
|
|
129
238
|
def remove_symbols_by_file(conn: sqlite3.Connection, file_path: str) -> None:
|
|
130
|
-
"""Delete all symbols
|
|
239
|
+
"""Delete all symbols for a given file path.
|
|
131
240
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
ON DELETE CASCADE handles edge cleanup via the symbols foreign key.
|
|
241
|
+
Finds the file_id from the path, cleans up FTS entries, then deletes
|
|
242
|
+
the symbols (CASCADE handles symbol_edges).
|
|
135
243
|
"""
|
|
136
|
-
|
|
244
|
+
file_row = get_file_by_path(conn, file_path)
|
|
245
|
+
if file_row is None:
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
file_id = file_row["id"]
|
|
249
|
+
|
|
250
|
+
# Clean up FTS entries
|
|
251
|
+
sym_ids = [
|
|
137
252
|
row["id"]
|
|
138
|
-
for row in conn.execute("SELECT id FROM symbols WHERE
|
|
253
|
+
for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
|
|
139
254
|
]
|
|
140
|
-
for sid in
|
|
255
|
+
for sid in sym_ids:
|
|
141
256
|
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
142
257
|
|
|
143
|
-
conn.execute("DELETE FROM symbols WHERE
|
|
258
|
+
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
|
144
259
|
|
|
145
260
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
return dict(row) if row else None
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
# Reference CRUD
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
150
264
|
|
|
151
265
|
|
|
152
|
-
def
|
|
153
|
-
|
|
154
|
-
|
|
266
|
+
def add_reference(
|
|
267
|
+
conn: sqlite3.Connection,
|
|
268
|
+
file_id: int,
|
|
269
|
+
symbol_name: str,
|
|
270
|
+
line: int,
|
|
271
|
+
kind: str = "call",
|
|
272
|
+
) -> int:
|
|
273
|
+
"""Insert a reference row. Returns the new ref id."""
|
|
274
|
+
cursor = conn.execute(
|
|
275
|
+
"INSERT INTO refs (file_id, symbol_name, line, kind) VALUES (?, ?, ?, ?)",
|
|
276
|
+
(file_id, symbol_name, line, kind),
|
|
277
|
+
)
|
|
278
|
+
return cursor.lastrowid
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_references_by_file(conn: sqlite3.Connection, file_id: int) -> list:
|
|
282
|
+
"""Return all references in a given file."""
|
|
283
|
+
rows = conn.execute(
|
|
284
|
+
"SELECT * FROM refs WHERE file_id=? ORDER BY line",
|
|
285
|
+
(file_id,),
|
|
286
|
+
).fetchall()
|
|
155
287
|
return [dict(r) for r in rows]
|
|
156
288
|
|
|
157
289
|
|
|
@@ -160,14 +292,74 @@ def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
|
|
|
160
292
|
# ---------------------------------------------------------------------------
|
|
161
293
|
|
|
162
294
|
|
|
163
|
-
def add_edge(
|
|
164
|
-
|
|
295
|
+
def add_edge(
|
|
296
|
+
conn: sqlite3.Connection,
|
|
297
|
+
source_id: int,
|
|
298
|
+
target_id: int,
|
|
299
|
+
kind: str,
|
|
300
|
+
) -> None:
|
|
301
|
+
"""Insert a directed symbol edge. Silently ignored if the edge already exists."""
|
|
165
302
|
conn.execute(
|
|
166
|
-
"INSERT OR IGNORE INTO
|
|
303
|
+
"INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind) VALUES (?, ?, ?)",
|
|
167
304
|
(source_id, target_id, kind),
|
|
168
305
|
)
|
|
169
306
|
|
|
170
307
|
|
|
308
|
+
# ---------------------------------------------------------------------------
|
|
309
|
+
# Materialized edge rebuilding
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def rebuild_file_edges(conn: sqlite3.Connection) -> None:
|
|
314
|
+
"""Rebuild the file_edges table from refs and symbols.
|
|
315
|
+
|
|
316
|
+
For each ref in refs, finds which file defines a symbol with that name,
|
|
317
|
+
then creates a file_edge from the referencing file to the defining file.
|
|
318
|
+
Duplicate edges are collapsed; weight defaults to 1.0.
|
|
319
|
+
"""
|
|
320
|
+
conn.execute("DELETE FROM file_edges")
|
|
321
|
+
|
|
322
|
+
conn.execute(
|
|
323
|
+
"""
|
|
324
|
+
INSERT OR IGNORE INTO file_edges (source_file_id, target_file_id, weight)
|
|
325
|
+
SELECT DISTINCT r.file_id, s.file_id, 1.0
|
|
326
|
+
FROM refs r
|
|
327
|
+
JOIN symbols s ON s.name = r.symbol_name
|
|
328
|
+
WHERE r.file_id != s.file_id
|
|
329
|
+
"""
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def rebuild_symbol_edges(conn: sqlite3.Connection) -> None:
|
|
334
|
+
"""Rebuild the symbol_edges table from refs and symbols.
|
|
335
|
+
|
|
336
|
+
For each ref, finds the target symbol (by name match) and the nearest
|
|
337
|
+
enclosing definition in the referencing file (the symbol whose line range
|
|
338
|
+
contains the ref line). Creates a symbol_edge from the enclosing symbol
|
|
339
|
+
to the target symbol.
|
|
340
|
+
"""
|
|
341
|
+
conn.execute("DELETE FROM symbol_edges")
|
|
342
|
+
|
|
343
|
+
# Find matching ref -> target symbol, with nearest enclosing source symbol.
|
|
344
|
+
# The enclosing symbol is the one in the same file as the ref whose
|
|
345
|
+
# line_start <= ref.line and (line_end >= ref.line OR line_end IS NULL),
|
|
346
|
+
# ordered by line_start DESC to get the nearest (innermost) enclosure.
|
|
347
|
+
conn.execute(
|
|
348
|
+
"""
|
|
349
|
+
INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind, file_id, line)
|
|
350
|
+
SELECT src.id, tgt.id, r.kind, r.file_id, r.line
|
|
351
|
+
FROM refs r
|
|
352
|
+
JOIN symbols tgt ON tgt.name = r.symbol_name
|
|
353
|
+
JOIN symbols src ON src.file_id = r.file_id
|
|
354
|
+
AND src.line_start <= r.line
|
|
355
|
+
AND (src.line_end >= r.line OR src.line_end IS NULL)
|
|
356
|
+
WHERE src.id != tgt.id
|
|
357
|
+
GROUP BY r.id, tgt.id
|
|
358
|
+
HAVING src.line_start = MAX(src.line_start)
|
|
359
|
+
"""
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
171
363
|
# ---------------------------------------------------------------------------
|
|
172
364
|
# Graph traversal — recursive CTEs
|
|
173
365
|
# ---------------------------------------------------------------------------
|
|
@@ -185,17 +377,17 @@ def get_transitive_deps(
|
|
|
185
377
|
Results are ordered by traversal depth (shallowest first) and deduplicated.
|
|
186
378
|
"""
|
|
187
379
|
query = """
|
|
188
|
-
WITH RECURSIVE dep_chain(id, name, kind,
|
|
380
|
+
WITH RECURSIVE dep_chain(id, name, kind, file_id, depth, path) AS (
|
|
189
381
|
-- Base: direct dependencies of the seed symbol
|
|
190
382
|
SELECT s.id,
|
|
191
383
|
s.name,
|
|
192
384
|
s.kind,
|
|
193
|
-
s.
|
|
385
|
+
s.file_id,
|
|
194
386
|
0,
|
|
195
387
|
CAST(s.id AS TEXT)
|
|
196
|
-
FROM
|
|
197
|
-
JOIN symbols s ON s.id = e.
|
|
198
|
-
WHERE e.
|
|
388
|
+
FROM symbol_edges e
|
|
389
|
+
JOIN symbols s ON s.id = e.target_symbol_id
|
|
390
|
+
WHERE e.source_symbol_id = ?
|
|
199
391
|
|
|
200
392
|
UNION ALL
|
|
201
393
|
|
|
@@ -203,16 +395,16 @@ def get_transitive_deps(
|
|
|
203
395
|
SELECT s.id,
|
|
204
396
|
s.name,
|
|
205
397
|
s.kind,
|
|
206
|
-
s.
|
|
398
|
+
s.file_id,
|
|
207
399
|
dc.depth + 1,
|
|
208
400
|
dc.path || ',' || CAST(s.id AS TEXT)
|
|
209
401
|
FROM dep_chain dc
|
|
210
|
-
JOIN
|
|
211
|
-
JOIN symbols s
|
|
402
|
+
JOIN symbol_edges e ON e.source_symbol_id = dc.id
|
|
403
|
+
JOIN symbols s ON s.id = e.target_symbol_id
|
|
212
404
|
WHERE dc.depth < ?
|
|
213
405
|
AND INSTR(dc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
|
|
214
406
|
)
|
|
215
|
-
SELECT DISTINCT id, name, kind,
|
|
407
|
+
SELECT DISTINCT id, name, kind, file_id, depth
|
|
216
408
|
FROM dep_chain
|
|
217
409
|
ORDER BY depth
|
|
218
410
|
"""
|
|
@@ -225,21 +417,21 @@ def get_reverse_deps(
|
|
|
225
417
|
) -> list:
|
|
226
418
|
"""Return all symbols that transitively depend on this symbol (blast radius).
|
|
227
419
|
|
|
228
|
-
Traverses
|
|
420
|
+
Traverses symbol_edges in reverse (callers/importers of the seed symbol).
|
|
229
421
|
Same cycle-prevention strategy as get_transitive_deps.
|
|
230
422
|
"""
|
|
231
423
|
query = """
|
|
232
|
-
WITH RECURSIVE rev_chain(id, name, kind,
|
|
424
|
+
WITH RECURSIVE rev_chain(id, name, kind, file_id, depth, path) AS (
|
|
233
425
|
-- Base: direct dependents of the seed symbol
|
|
234
426
|
SELECT s.id,
|
|
235
427
|
s.name,
|
|
236
428
|
s.kind,
|
|
237
|
-
s.
|
|
429
|
+
s.file_id,
|
|
238
430
|
0,
|
|
239
431
|
CAST(s.id AS TEXT)
|
|
240
|
-
FROM
|
|
241
|
-
JOIN symbols s ON s.id = e.
|
|
242
|
-
WHERE e.
|
|
432
|
+
FROM symbol_edges e
|
|
433
|
+
JOIN symbols s ON s.id = e.source_symbol_id
|
|
434
|
+
WHERE e.target_symbol_id = ?
|
|
243
435
|
|
|
244
436
|
UNION ALL
|
|
245
437
|
|
|
@@ -247,16 +439,16 @@ def get_reverse_deps(
|
|
|
247
439
|
SELECT s.id,
|
|
248
440
|
s.name,
|
|
249
441
|
s.kind,
|
|
250
|
-
s.
|
|
442
|
+
s.file_id,
|
|
251
443
|
rc.depth + 1,
|
|
252
444
|
rc.path || ',' || CAST(s.id AS TEXT)
|
|
253
445
|
FROM rev_chain rc
|
|
254
|
-
JOIN
|
|
255
|
-
JOIN symbols s
|
|
446
|
+
JOIN symbol_edges e ON e.target_symbol_id = rc.id
|
|
447
|
+
JOIN symbols s ON s.id = e.source_symbol_id
|
|
256
448
|
WHERE rc.depth < ?
|
|
257
449
|
AND INSTR(rc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
|
|
258
450
|
)
|
|
259
|
-
SELECT DISTINCT id, name, kind,
|
|
451
|
+
SELECT DISTINCT id, name, kind, file_id, depth
|
|
260
452
|
FROM rev_chain
|
|
261
453
|
ORDER BY depth
|
|
262
454
|
"""
|
|
@@ -270,7 +462,7 @@ def get_reverse_deps(
|
|
|
270
462
|
|
|
271
463
|
|
|
272
464
|
def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> list:
|
|
273
|
-
"""BM25-ranked full-text search over symbol names,
|
|
465
|
+
"""BM25-ranked full-text search over symbol names, qualified names, and signatures.
|
|
274
466
|
|
|
275
467
|
Returns up to *limit* symbol dicts with an additional 'rank' field.
|
|
276
468
|
Lower rank values indicate better matches (BM25 scores are negative in
|
|
@@ -295,15 +487,17 @@ def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> li
|
|
|
295
487
|
|
|
296
488
|
def get_stats(conn: sqlite3.Connection) -> dict:
|
|
297
489
|
"""Return high-level database statistics."""
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
).fetchone()[0]
|
|
490
|
+
file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
491
|
+
symbol_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
492
|
+
edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
493
|
+
reference_count = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
|
|
494
|
+
file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
303
495
|
return {
|
|
304
|
-
"
|
|
305
|
-
"
|
|
306
|
-
"
|
|
496
|
+
"file_count": file_count,
|
|
497
|
+
"symbol_count": symbol_count,
|
|
498
|
+
"edge_count": edge_count,
|
|
499
|
+
"reference_count": reference_count,
|
|
500
|
+
"file_edge_count": file_edge_count,
|
|
307
501
|
}
|
|
308
502
|
|
|
309
503
|
|
|
@@ -324,68 +518,195 @@ def hash_content(content: str) -> str:
|
|
|
324
518
|
if __name__ == "__main__":
|
|
325
519
|
import tempfile
|
|
326
520
|
|
|
327
|
-
print("Running db.py smoke tests
|
|
521
|
+
print("Running db.py smoke tests ...")
|
|
328
522
|
|
|
329
523
|
with tempfile.TemporaryDirectory() as tmp:
|
|
330
524
|
conn = get_connection(tmp)
|
|
331
525
|
|
|
332
|
-
# ----
|
|
333
|
-
|
|
334
|
-
|
|
526
|
+
# ---- add files ----
|
|
527
|
+
fid_parser = add_file(conn, "src/parser.py", "python", 1000.0, hash="abc123", line_count=50)
|
|
528
|
+
fid_lexer = add_file(conn, "src/lexer.py", "python", 1001.0, line_count=30)
|
|
529
|
+
fid_index = add_file(conn, "src/index.py", "python", 1002.0)
|
|
530
|
+
conn.commit()
|
|
531
|
+
|
|
532
|
+
assert get_file_by_path(conn, "src/parser.py")["id"] == fid_parser, "get_file_by_path failed"
|
|
533
|
+
assert get_file_by_path(conn, "nonexistent.py") is None, "get_file_by_path should return None"
|
|
534
|
+
print(" [PASS] File CRUD")
|
|
535
|
+
|
|
536
|
+
# ---- add symbols ----
|
|
537
|
+
# parser.py: parse_file (lines 10-40)
|
|
538
|
+
sid_parse = add_symbol(
|
|
539
|
+
conn, fid_parser, "parse_file", "function", 10, line_end=40,
|
|
540
|
+
qualified_name="parser.parse_file",
|
|
335
541
|
signature="def parse_file(path: str) -> AST",
|
|
336
|
-
doc_comment="Parse a source file and return its AST.",
|
|
337
|
-
content_hash=hash_content("def parse_file(): pass"),
|
|
338
542
|
)
|
|
339
|
-
|
|
340
|
-
|
|
543
|
+
# lexer.py: tokenize (lines 1-20)
|
|
544
|
+
sid_tokenize = add_symbol(
|
|
545
|
+
conn, fid_lexer, "tokenize", "function", 1, line_end=20,
|
|
546
|
+
qualified_name="lexer.tokenize",
|
|
341
547
|
signature="def tokenize(src: str) -> list",
|
|
342
548
|
)
|
|
343
|
-
|
|
344
|
-
|
|
549
|
+
# index.py: build_index (lines 5-60) — calls parse_file and tokenize
|
|
550
|
+
sid_build = add_symbol(
|
|
551
|
+
conn, fid_index, "build_index", "function", 5, line_end=60,
|
|
552
|
+
qualified_name="index.build_index",
|
|
345
553
|
signature="def build_index(root: str) -> None",
|
|
346
|
-
doc_comment="Build the code graph index for a project.",
|
|
347
554
|
)
|
|
555
|
+
conn.commit()
|
|
348
556
|
|
|
349
|
-
assert get_symbol_by_id(conn,
|
|
557
|
+
assert get_symbol_by_id(conn, sid_parse)["name"] == "parse_file", "get_symbol_by_id failed"
|
|
350
558
|
assert len(get_symbol_by_name(conn, "tokenize")) == 1, "get_symbol_by_name failed"
|
|
559
|
+
assert len(get_symbols_by_file(conn, fid_parser)) == 1, "get_symbols_by_file failed"
|
|
560
|
+
print(" [PASS] Symbol CRUD")
|
|
561
|
+
|
|
562
|
+
# ---- add references ----
|
|
563
|
+
# build_index calls parse_file at line 15 and tokenize at line 25
|
|
564
|
+
ref1 = add_reference(conn, fid_index, "parse_file", 15, kind="call")
|
|
565
|
+
ref2 = add_reference(conn, fid_index, "tokenize", 25, kind="call")
|
|
566
|
+
# parse_file calls tokenize at line 20
|
|
567
|
+
ref3 = add_reference(conn, fid_parser, "tokenize", 20, kind="call")
|
|
568
|
+
conn.commit()
|
|
351
569
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
add_edge(conn, sid_a, sid_b, "calls")
|
|
356
|
-
add_edge(conn, sid_c, sid_a, "calls") # duplicate — should be ignored
|
|
570
|
+
refs_index = get_references_by_file(conn, fid_index)
|
|
571
|
+
assert len(refs_index) == 2, f"expected 2 refs in index.py, got {len(refs_index)}"
|
|
572
|
+
print(" [PASS] Reference CRUD")
|
|
357
573
|
|
|
574
|
+
# ---- rebuild file edges ----
|
|
575
|
+
rebuild_file_edges(conn)
|
|
358
576
|
conn.commit()
|
|
359
577
|
|
|
360
|
-
|
|
361
|
-
|
|
578
|
+
fe_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
579
|
+
assert fe_count >= 2, f"expected >= 2 file edges, got {fe_count}"
|
|
580
|
+
|
|
581
|
+
# index.py -> parser.py edge should exist
|
|
582
|
+
fe = conn.execute(
|
|
583
|
+
"SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
|
|
584
|
+
(fid_index, fid_parser),
|
|
585
|
+
).fetchone()
|
|
586
|
+
assert fe is not None, "file edge index->parser missing"
|
|
587
|
+
# index.py -> lexer.py edge should exist
|
|
588
|
+
fe2 = conn.execute(
|
|
589
|
+
"SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
|
|
590
|
+
(fid_index, fid_lexer),
|
|
591
|
+
).fetchone()
|
|
592
|
+
assert fe2 is not None, "file edge index->lexer missing"
|
|
593
|
+
print(" [PASS] rebuild_file_edges")
|
|
594
|
+
|
|
595
|
+
# ---- rebuild symbol edges ----
|
|
596
|
+
rebuild_symbol_edges(conn)
|
|
597
|
+
conn.commit()
|
|
598
|
+
|
|
599
|
+
se_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
600
|
+
assert se_count >= 2, f"expected >= 2 symbol edges, got {se_count}"
|
|
601
|
+
|
|
602
|
+
# build_index -> parse_file edge should exist
|
|
603
|
+
se = conn.execute(
|
|
604
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
605
|
+
(sid_build, sid_parse),
|
|
606
|
+
).fetchone()
|
|
607
|
+
assert se is not None, "symbol edge build_index->parse_file missing"
|
|
608
|
+
|
|
609
|
+
# build_index -> tokenize edge should exist
|
|
610
|
+
se2 = conn.execute(
|
|
611
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
612
|
+
(sid_build, sid_tokenize),
|
|
613
|
+
).fetchone()
|
|
614
|
+
assert se2 is not None, "symbol edge build_index->tokenize missing"
|
|
615
|
+
|
|
616
|
+
# parse_file -> tokenize edge should exist
|
|
617
|
+
se3 = conn.execute(
|
|
618
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
619
|
+
(sid_parse, sid_tokenize),
|
|
620
|
+
).fetchone()
|
|
621
|
+
assert se3 is not None, "symbol edge parse_file->tokenize missing"
|
|
622
|
+
print(" [PASS] rebuild_symbol_edges")
|
|
623
|
+
|
|
624
|
+
# ---- transitive deps via symbol_edges ----
|
|
625
|
+
deps = get_transitive_deps(conn, sid_build)
|
|
362
626
|
dep_ids = {d["id"] for d in deps}
|
|
363
|
-
assert
|
|
627
|
+
assert sid_parse in dep_ids, f"transitive deps missing parse_file: {dep_ids}"
|
|
628
|
+
assert sid_tokenize in dep_ids, f"transitive deps missing tokenize: {dep_ids}"
|
|
629
|
+
print(" [PASS] get_transitive_deps")
|
|
364
630
|
|
|
365
|
-
# ---- reverse deps ----
|
|
366
|
-
rdeps = get_reverse_deps(conn,
|
|
631
|
+
# ---- reverse deps via symbol_edges ----
|
|
632
|
+
rdeps = get_reverse_deps(conn, sid_tokenize)
|
|
367
633
|
rdep_ids = {d["id"] for d in rdeps}
|
|
368
|
-
assert
|
|
634
|
+
assert sid_parse in rdep_ids, f"reverse deps missing parse_file: {rdep_ids}"
|
|
635
|
+
assert sid_build in rdep_ids, f"reverse deps missing build_index: {rdep_ids}"
|
|
636
|
+
print(" [PASS] get_reverse_deps")
|
|
369
637
|
|
|
370
638
|
# ---- FTS search ----
|
|
371
639
|
results = fts_search(conn, "parse")
|
|
372
|
-
assert any(r["name"] == "parse_file" for r in results), "FTS search failed"
|
|
640
|
+
assert any(r["name"] == "parse_file" for r in results), "FTS search for 'parse' failed"
|
|
373
641
|
|
|
374
|
-
|
|
375
|
-
assert any(r["name"] == "
|
|
642
|
+
results_sig = fts_search(conn, "tokenize")
|
|
643
|
+
assert any(r["name"] == "tokenize" for r in results_sig), "FTS search for 'tokenize' failed"
|
|
376
644
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
assert get_symbol_by_id(conn, sid_b) is None, "remove_symbols_by_file failed"
|
|
645
|
+
results_qn = fts_search(conn, "index")
|
|
646
|
+
assert any(r["name"] == "build_index" for r in results_qn), "FTS qualified_name search failed"
|
|
647
|
+
print(" [PASS] FTS search")
|
|
381
648
|
|
|
382
649
|
# ---- stats ----
|
|
383
650
|
stats = get_stats(conn)
|
|
384
|
-
assert stats["
|
|
385
|
-
assert stats["
|
|
386
|
-
|
|
387
|
-
edge_count
|
|
388
|
-
assert
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
651
|
+
assert stats["file_count"] == 3, f"expected 3 files, got {stats['file_count']}"
|
|
652
|
+
assert stats["symbol_count"] == 3, f"expected 3 symbols, got {stats['symbol_count']}"
|
|
653
|
+
assert stats["reference_count"] == 3, f"expected 3 refs, got {stats['reference_count']}"
|
|
654
|
+
assert stats["edge_count"] >= 2, f"expected >= 2 symbol edges, got {stats['edge_count']}"
|
|
655
|
+
assert stats["file_edge_count"] >= 2, f"expected >= 2 file edges, got {stats['file_edge_count']}"
|
|
656
|
+
print(" [PASS] get_stats")
|
|
657
|
+
|
|
658
|
+
# ---- add_edge (manual symbol edge) ----
|
|
659
|
+
add_edge(conn, sid_parse, sid_build, "test_edge")
|
|
660
|
+
conn.commit()
|
|
661
|
+
manual_edge = conn.execute(
|
|
662
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=? AND kind=?",
|
|
663
|
+
(sid_parse, sid_build, "test_edge"),
|
|
664
|
+
).fetchone()
|
|
665
|
+
assert manual_edge is not None, "add_edge failed"
|
|
666
|
+
# duplicate should be ignored
|
|
667
|
+
add_edge(conn, sid_parse, sid_build, "test_edge")
|
|
668
|
+
conn.commit()
|
|
669
|
+
print(" [PASS] add_edge (manual)")
|
|
670
|
+
|
|
671
|
+
# ---- CASCADE deletes ----
|
|
672
|
+
# Remove lexer.py file -> tokenize symbol, refs to tokenize, and edges should cascade
|
|
673
|
+
sym_count_before = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
674
|
+
ref_count_before = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
|
|
675
|
+
|
|
676
|
+
remove_file(conn, "src/lexer.py")
|
|
677
|
+
conn.commit()
|
|
678
|
+
|
|
679
|
+
assert get_file_by_path(conn, "src/lexer.py") is None, "file not removed"
|
|
680
|
+
assert get_symbol_by_id(conn, sid_tokenize) is None, "symbol not cascaded on file delete"
|
|
681
|
+
|
|
682
|
+
sym_count_after = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
683
|
+
assert sym_count_after == sym_count_before - 1, f"expected {sym_count_before - 1} symbols, got {sym_count_after}"
|
|
684
|
+
|
|
685
|
+
# Refs in lexer.py should be gone (there were none, but verify no error)
|
|
686
|
+
# Symbol edges involving tokenize should be gone
|
|
687
|
+
edges_to_tokenize = conn.execute(
|
|
688
|
+
"SELECT COUNT(*) FROM symbol_edges WHERE target_symbol_id=? OR source_symbol_id=?",
|
|
689
|
+
(sid_tokenize, sid_tokenize),
|
|
690
|
+
).fetchone()[0]
|
|
691
|
+
assert edges_to_tokenize == 0, f"expected 0 edges involving deleted symbol, got {edges_to_tokenize}"
|
|
692
|
+
print(" [PASS] CASCADE deletes (remove_file)")
|
|
693
|
+
|
|
694
|
+
# ---- remove_symbols_by_file (without removing file) ----
|
|
695
|
+
remove_symbols_by_file(conn, "src/parser.py")
|
|
696
|
+
conn.commit()
|
|
697
|
+
assert get_symbol_by_id(conn, sid_parse) is None, "remove_symbols_by_file failed"
|
|
698
|
+
# File itself should still exist
|
|
699
|
+
assert get_file_by_path(conn, "src/parser.py") is not None, "file should still exist after remove_symbols_by_file"
|
|
700
|
+
print(" [PASS] remove_symbols_by_file")
|
|
701
|
+
|
|
702
|
+
# ---- final stats ----
|
|
703
|
+
final_stats = get_stats(conn)
|
|
704
|
+
print(f"\n Final stats: {final_stats}")
|
|
705
|
+
|
|
706
|
+
# ---- hash_content utility ----
|
|
707
|
+
h = hash_content("hello world")
|
|
708
|
+
assert len(h) == 64, "hash_content should return 64-char hex string"
|
|
709
|
+
assert h == hash_content("hello world"), "hash_content should be deterministic"
|
|
710
|
+
print(" [PASS] hash_content")
|
|
711
|
+
|
|
712
|
+
print("\nAll smoke tests passed.")
|