feed-the-machine 1.3.1 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -180
- package/ftm-git/SKILL.md +0 -1
- package/ftm-map/SKILL.md +46 -14
- package/ftm-map/scripts/db.py +439 -118
- package/ftm-map/scripts/index.py +128 -54
- package/ftm-map/scripts/parser.py +89 -320
- package/ftm-map/scripts/queries/go-tags.scm +20 -0
- package/ftm-map/scripts/queries/javascript-tags.scm +19 -7
- package/ftm-map/scripts/queries/python-tags.scm +22 -8
- package/ftm-map/scripts/queries/ruby-tags.scm +19 -0
- package/ftm-map/scripts/queries/rust-tags.scm +37 -0
- package/ftm-map/scripts/queries/typescript-tags.scm +20 -8
- package/ftm-map/scripts/query.py +176 -24
- package/ftm-map/scripts/ranker.py +377 -0
- package/ftm-map/scripts/requirements.txt +3 -0
- package/ftm-map/scripts/setup.sh +11 -0
- package/ftm-map/scripts/test_db.py +355 -115
- package/ftm-map/scripts/test_parser.py +169 -101
- package/ftm-map/scripts/test_query.py +178 -61
- package/ftm-map/scripts/test_ranker.py +199 -0
- package/ftm-map/scripts/views.py +107 -61
- package/ftm-mind/references/event-registry.md +0 -10
- package/hooks/ftm-blackboard-enforcer.sh +1 -4
- package/package.json +1 -1
- package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
- package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
- package/ftm-map/scripts/tests/fixtures/__init__.py +0 -0
- package/ftm-map/scripts/tests/fixtures/sample_project/api.ts +0 -16
- package/ftm-map/scripts/tests/fixtures/sample_project/auth.py +0 -15
- package/ftm-map/scripts/tests/fixtures/sample_project/utils.js +0 -16
package/ftm-map/scripts/index.py
CHANGED
|
@@ -1,12 +1,21 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""ftm-map indexer: builds the code knowledge graph from source files.
|
|
2
|
+
"""ftm-map indexer: builds the code knowledge graph from source files.
|
|
3
|
+
|
|
4
|
+
Two-phase indexing:
|
|
5
|
+
Phase 1 — Parse each file with tree-sitter, insert file/symbol/ref rows.
|
|
6
|
+
Phase 2 — Materialize file_edges with Aider-style weight heuristics and
|
|
7
|
+
symbol_edges via enclosing-scope resolution.
|
|
8
|
+
"""
|
|
3
9
|
|
|
4
10
|
import argparse
|
|
5
11
|
import json
|
|
12
|
+
import math
|
|
6
13
|
import os
|
|
14
|
+
import re
|
|
7
15
|
import subprocess
|
|
8
16
|
import sys
|
|
9
17
|
import time
|
|
18
|
+
from collections import Counter
|
|
10
19
|
from datetime import datetime, timezone
|
|
11
20
|
from pathlib import Path
|
|
12
21
|
|
|
@@ -15,13 +24,14 @@ sys.path.insert(0, os.path.dirname(__file__))
|
|
|
15
24
|
|
|
16
25
|
from db import (
|
|
17
26
|
get_connection,
|
|
27
|
+
add_file,
|
|
18
28
|
add_symbol,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
get_symbol_by_name,
|
|
29
|
+
add_reference,
|
|
30
|
+
remove_file,
|
|
22
31
|
get_stats,
|
|
32
|
+
rebuild_symbol_edges,
|
|
23
33
|
)
|
|
24
|
-
from parser import
|
|
34
|
+
from parser import get_tags, detect_language, EXTENSION_MAP, compute_content_hash
|
|
25
35
|
|
|
26
36
|
META_REGISTRY = os.path.expanduser("~/.claude/ftm-state/maps/index.json")
|
|
27
37
|
|
|
@@ -67,60 +77,121 @@ def discover_files(project_root: str) -> list[str]:
|
|
|
67
77
|
|
|
68
78
|
|
|
69
79
|
def index_files(conn, files: list[str], project_root: str) -> dict:
|
|
70
|
-
"""Parse and insert symbols
|
|
80
|
+
"""Parse and insert files, symbols, references, then materialize edges.
|
|
71
81
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
82
|
+
Phase 1 — For each file: read source, compute hash, insert file row,
|
|
83
|
+
extract def/ref tags via tree-sitter, insert symbol and ref rows.
|
|
84
|
+
Phase 2 — Build file_edges with Aider weight heuristics (long descriptive
|
|
85
|
+
names 10x, private 0.1x, overloaded 0.1x, sqrt-dampened counts).
|
|
86
|
+
Then rebuild symbol_edges via enclosing-scope resolution.
|
|
77
87
|
|
|
78
|
-
Returns a dict with
|
|
88
|
+
Returns a dict with symbols, references, file_edges, symbol_edges counts.
|
|
79
89
|
"""
|
|
80
90
|
total_symbols = 0
|
|
81
|
-
|
|
91
|
+
total_refs = 0
|
|
82
92
|
|
|
83
|
-
#
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
# Phase 1: parse each file and insert rows
|
|
95
|
+
# ------------------------------------------------------------------
|
|
84
96
|
for fpath in files:
|
|
85
97
|
if not os.path.exists(fpath):
|
|
86
98
|
print(f"Warning: file not found, skipping: {fpath}", file=sys.stderr)
|
|
87
99
|
continue
|
|
88
100
|
|
|
89
101
|
rel_path = os.path.relpath(fpath, project_root)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
for sym in symbols:
|
|
93
|
-
add_symbol(
|
|
94
|
-
conn,
|
|
95
|
-
name=sym.name,
|
|
96
|
-
kind=sym.kind,
|
|
97
|
-
file_path=rel_path,
|
|
98
|
-
start_line=sym.start_line,
|
|
99
|
-
end_line=sym.end_line,
|
|
100
|
-
signature=sym.signature,
|
|
101
|
-
doc_comment=sym.doc_comment,
|
|
102
|
-
content_hash=sym.content_hash,
|
|
103
|
-
)
|
|
104
|
-
total_symbols += 1
|
|
102
|
+
lang = detect_language(fpath)
|
|
103
|
+
mtime = os.path.getmtime(fpath)
|
|
105
104
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
105
|
+
# Stream-friendly: read once, extract metadata, then release
|
|
106
|
+
try:
|
|
107
|
+
with open(fpath, "r", encoding="utf-8", errors="replace") as fh:
|
|
108
|
+
source = fh.read()
|
|
109
|
+
except (IOError, OSError) as exc:
|
|
110
|
+
print(f"Warning: Cannot read {fpath}: {exc}", file=sys.stderr)
|
|
109
111
|
continue
|
|
110
112
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
source_rows = get_symbol_by_name(conn, rel.source_name)
|
|
114
|
-
target_rows = get_symbol_by_name(conn, rel.target_name)
|
|
113
|
+
line_count = source.count("\n") + 1
|
|
114
|
+
content_hash = compute_content_hash(source)
|
|
115
115
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
116
|
+
# Insert file record
|
|
117
|
+
file_id = add_file(
|
|
118
|
+
conn, rel_path, lang, mtime,
|
|
119
|
+
hash=content_hash, line_count=line_count,
|
|
120
|
+
)
|
|
119
121
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
+
# Extract def/ref tags via tree-sitter
|
|
123
|
+
tags = get_tags(fpath, rel_path)
|
|
124
|
+
|
|
125
|
+
for tag in tags:
|
|
126
|
+
if tag.kind == "def":
|
|
127
|
+
add_symbol(conn, file_id, tag.name, "definition", tag.line, signature=None)
|
|
128
|
+
total_symbols += 1
|
|
129
|
+
elif tag.kind == "ref":
|
|
130
|
+
add_reference(conn, file_id, tag.name, tag.line, kind="call")
|
|
131
|
+
total_refs += 1
|
|
132
|
+
|
|
133
|
+
# ------------------------------------------------------------------
|
|
134
|
+
# Phase 2: materialize edges
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
# Build defines map: ident -> set of file_ids that define it
|
|
138
|
+
defines = {}
|
|
139
|
+
for row in conn.execute("SELECT name, file_id FROM symbols").fetchall():
|
|
140
|
+
defines.setdefault(row["name"], set()).add(row["file_id"])
|
|
141
|
+
|
|
142
|
+
# Build references map: ident -> list of file_ids that reference it
|
|
143
|
+
references_map = {}
|
|
144
|
+
for row in conn.execute("SELECT symbol_name, file_id FROM refs").fetchall():
|
|
145
|
+
references_map.setdefault(row["symbol_name"], []).append(row["file_id"])
|
|
146
|
+
|
|
147
|
+
# Materialize file_edges with Aider weight heuristics
|
|
148
|
+
conn.execute("DELETE FROM file_edges")
|
|
149
|
+
|
|
150
|
+
for ident, ref_file_ids in references_map.items():
|
|
151
|
+
definers = defines.get(ident, set())
|
|
152
|
+
if not definers:
|
|
153
|
+
continue
|
|
122
154
|
|
|
123
|
-
|
|
155
|
+
# Aider weight heuristics
|
|
156
|
+
mul = 1.0
|
|
157
|
+
# Long descriptive names (camelCase or snake_case, >= 8 chars) weighted higher
|
|
158
|
+
if len(ident) >= 8 and re.match(r"[a-z_]+[A-Z]|[a-z]+_[a-z]", ident):
|
|
159
|
+
mul *= 10
|
|
160
|
+
# Private names weighted lower
|
|
161
|
+
if ident.startswith("_"):
|
|
162
|
+
mul *= 0.1
|
|
163
|
+
# Overloaded names (defined in many files) weighted lower
|
|
164
|
+
if len(definers) >= 5:
|
|
165
|
+
mul *= 0.1
|
|
166
|
+
|
|
167
|
+
# Count refs per file, then create weighted edges
|
|
168
|
+
ref_counts = Counter(ref_file_ids)
|
|
169
|
+
|
|
170
|
+
for ref_file_id, count in ref_counts.items():
|
|
171
|
+
weight = mul * math.sqrt(count)
|
|
172
|
+
for def_file_id in definers:
|
|
173
|
+
if ref_file_id != def_file_id: # No self-edges
|
|
174
|
+
conn.execute(
|
|
175
|
+
"""INSERT INTO file_edges (source_file_id, target_file_id, weight)
|
|
176
|
+
VALUES (?, ?, ?)
|
|
177
|
+
ON CONFLICT(source_file_id, target_file_id)
|
|
178
|
+
DO UPDATE SET weight = MAX(weight, excluded.weight)""",
|
|
179
|
+
(ref_file_id, def_file_id, weight),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Materialize symbol_edges via enclosing-scope resolution
|
|
183
|
+
rebuild_symbol_edges(conn)
|
|
184
|
+
|
|
185
|
+
# Gather edge counts
|
|
186
|
+
file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
187
|
+
symbol_edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"symbols": total_symbols,
|
|
191
|
+
"references": total_refs,
|
|
192
|
+
"file_edges": file_edge_count,
|
|
193
|
+
"symbol_edges": symbol_edge_count,
|
|
194
|
+
}
|
|
124
195
|
|
|
125
196
|
|
|
126
197
|
# ---------------------------------------------------------------------------
|
|
@@ -143,16 +214,14 @@ def bootstrap(project_root: str) -> None:
|
|
|
143
214
|
|
|
144
215
|
conn = get_connection(abs_root)
|
|
145
216
|
try:
|
|
146
|
-
# Full rebuild — clear
|
|
147
|
-
# FTS5 rows must be removed before symbol rows
|
|
148
|
-
# table does not cascade deletes.
|
|
217
|
+
# Full rebuild — clear all tables. CASCADE handles symbols, refs, edges.
|
|
218
|
+
# FTS5 rows must be removed before symbol rows (content= table).
|
|
149
219
|
symbol_ids = [
|
|
150
220
|
row[0] for row in conn.execute("SELECT id FROM symbols").fetchall()
|
|
151
221
|
]
|
|
152
222
|
for sid in symbol_ids:
|
|
153
223
|
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
154
|
-
conn.execute("DELETE FROM
|
|
155
|
-
conn.execute("DELETE FROM edges")
|
|
224
|
+
conn.execute("DELETE FROM files")
|
|
156
225
|
|
|
157
226
|
stats = index_files(conn, files, abs_root)
|
|
158
227
|
conn.commit()
|
|
@@ -162,7 +231,9 @@ def bootstrap(project_root: str) -> None:
|
|
|
162
231
|
"mode": "bootstrap",
|
|
163
232
|
"files_parsed": len(files),
|
|
164
233
|
"symbols": stats["symbols"],
|
|
165
|
-
"
|
|
234
|
+
"references": stats["references"],
|
|
235
|
+
"file_edges": stats["file_edges"],
|
|
236
|
+
"symbol_edges": stats["symbol_edges"],
|
|
166
237
|
"duration_s": round(duration, 2),
|
|
167
238
|
}
|
|
168
239
|
print(json.dumps(result))
|
|
@@ -185,8 +256,9 @@ def incremental(project_root: str, files_str: str) -> None:
|
|
|
185
256
|
"""Incremental update: re-index only the specified files.
|
|
186
257
|
|
|
187
258
|
*files_str* is a comma-separated list of file paths (relative or absolute).
|
|
188
|
-
Old symbol/edge data for each file is
|
|
189
|
-
entries do not accumulate.
|
|
259
|
+
Old file/symbol/ref/edge data for each file is cascade-deleted via
|
|
260
|
+
remove_file() before re-parsing so stale entries do not accumulate.
|
|
261
|
+
All edges are rebuilt since changes can ripple across files.
|
|
190
262
|
"""
|
|
191
263
|
abs_root = os.path.abspath(project_root)
|
|
192
264
|
start = time.time()
|
|
@@ -198,10 +270,10 @@ def incremental(project_root: str, files_str: str) -> None:
|
|
|
198
270
|
|
|
199
271
|
conn = get_connection(abs_root)
|
|
200
272
|
try:
|
|
201
|
-
# Remove stale data for all targeted files
|
|
273
|
+
# Remove stale data for all targeted files (cascading delete).
|
|
202
274
|
for fpath in abs_files:
|
|
203
275
|
rel_path = os.path.relpath(fpath, abs_root)
|
|
204
|
-
|
|
276
|
+
remove_file(conn, rel_path)
|
|
205
277
|
|
|
206
278
|
existing_files = [f for f in abs_files if os.path.exists(f)]
|
|
207
279
|
if not existing_files:
|
|
@@ -221,11 +293,13 @@ def incremental(project_root: str, files_str: str) -> None:
|
|
|
221
293
|
"mode": "incremental",
|
|
222
294
|
"files_parsed": len(existing_files),
|
|
223
295
|
"symbols": stats["symbols"],
|
|
224
|
-
"
|
|
296
|
+
"references": stats["references"],
|
|
297
|
+
"file_edges": stats["file_edges"],
|
|
298
|
+
"symbol_edges": stats["symbol_edges"],
|
|
225
299
|
"duration_s": round(duration, 2),
|
|
226
300
|
}
|
|
227
301
|
print(json.dumps(result))
|
|
228
|
-
update_meta_registry(abs_root, db_stats["
|
|
302
|
+
update_meta_registry(abs_root, db_stats["symbol_count"])
|
|
229
303
|
except Exception as exc: # noqa: BLE001
|
|
230
304
|
print(f"Error during incremental update: {exc}", file=sys.stderr)
|
|
231
305
|
conn.rollback()
|