feed-the-machine 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/ftm-git/SKILL.md +0 -1
  2. package/ftm-map/SKILL.md +46 -14
  3. package/ftm-map/scripts/db.py +439 -118
  4. package/ftm-map/scripts/index.py +128 -54
  5. package/ftm-map/scripts/parser.py +89 -320
  6. package/ftm-map/scripts/queries/go-tags.scm +20 -0
  7. package/ftm-map/scripts/queries/javascript-tags.scm +19 -7
  8. package/ftm-map/scripts/queries/python-tags.scm +22 -8
  9. package/ftm-map/scripts/queries/ruby-tags.scm +19 -0
  10. package/ftm-map/scripts/queries/rust-tags.scm +37 -0
  11. package/ftm-map/scripts/queries/typescript-tags.scm +20 -8
  12. package/ftm-map/scripts/query.py +176 -24
  13. package/ftm-map/scripts/ranker.py +377 -0
  14. package/ftm-map/scripts/requirements.txt +3 -0
  15. package/ftm-map/scripts/setup.sh +11 -0
  16. package/ftm-map/scripts/test_db.py +355 -115
  17. package/ftm-map/scripts/test_parser.py +169 -101
  18. package/ftm-map/scripts/test_query.py +178 -61
  19. package/ftm-map/scripts/test_ranker.py +199 -0
  20. package/ftm-map/scripts/views.py +107 -61
  21. package/ftm-mind/references/event-registry.md +0 -10
  22. package/hooks/ftm-blackboard-enforcer.sh +1 -4
  23. package/package.json +1 -1
  24. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  25. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  26. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  27. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  28. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  29. package/ftm-map/scripts/tests/fixtures/__init__.py +0 -0
  30. package/ftm-map/scripts/tests/fixtures/sample_project/api.ts +0 -16
  31. package/ftm-map/scripts/tests/fixtures/sample_project/auth.py +0 -15
  32. package/ftm-map/scripts/tests/fixtures/sample_project/utils.js +0 -16
@@ -1,12 +1,21 @@
1
1
  #!/usr/bin/env python3
2
- """ftm-map indexer: builds the code knowledge graph from source files."""
2
+ """ftm-map indexer: builds the code knowledge graph from source files.
3
+
4
+ Two-phase indexing:
5
+ Phase 1 — Parse each file with tree-sitter, insert file/symbol/ref rows.
6
+ Phase 2 — Materialize file_edges with Aider-style weight heuristics and
7
+ symbol_edges via enclosing-scope resolution.
8
+ """
3
9
 
4
10
  import argparse
5
11
  import json
12
+ import math
6
13
  import os
14
+ import re
7
15
  import subprocess
8
16
  import sys
9
17
  import time
18
+ from collections import Counter
10
19
  from datetime import datetime, timezone
11
20
  from pathlib import Path
12
21
 
@@ -15,13 +24,14 @@ sys.path.insert(0, os.path.dirname(__file__))
15
24
 
16
25
  from db import (
17
26
  get_connection,
27
+ add_file,
18
28
  add_symbol,
19
- remove_symbols_by_file,
20
- add_edge,
21
- get_symbol_by_name,
29
+ add_reference,
30
+ remove_file,
22
31
  get_stats,
32
+ rebuild_symbol_edges,
23
33
  )
24
- from parser import parse_file, extract_relationships, EXTENSION_MAP
34
+ from parser import get_tags, detect_language, EXTENSION_MAP, compute_content_hash
25
35
 
26
36
  META_REGISTRY = os.path.expanduser("~/.claude/ftm-state/maps/index.json")
27
37
 
@@ -67,60 +77,121 @@ def discover_files(project_root: str) -> list[str]:
67
77
 
68
78
 
69
79
  def index_files(conn, files: list[str], project_root: str) -> dict:
70
- """Parse and insert symbols + edges for a list of absolute file paths.
80
+ """Parse and insert files, symbols, references, then materialize edges.
71
81
 
72
- Two-phase approach:
73
- Phase 1 parse every file and insert all symbols so that the
74
- symbol table is fully populated before edge resolution.
75
- Phase 2 extract relationships and resolve source/target names to
76
- existing symbol IDs. Unknown targets are silently skipped.
82
+ Phase 1 — For each file: read source, compute hash, insert file row,
83
+ extract def/ref tags via tree-sitter, insert symbol and ref rows.
84
+ Phase 2 Build file_edges with Aider weight heuristics (long descriptive
85
+ names 10x, private 0.1x, overloaded 0.1x, sqrt-dampened counts).
86
+ Then rebuild symbol_edges via enclosing-scope resolution.
77
87
 
78
- Returns a dict with 'symbols' and 'edges' counts.
88
+ Returns a dict with symbols, references, file_edges, symbol_edges counts.
79
89
  """
80
90
  total_symbols = 0
81
- total_edges = 0
91
+ total_refs = 0
82
92
 
83
- # Phase 1: insert all symbols first so cross-file edges can be resolved.
93
+ # ------------------------------------------------------------------
94
+ # Phase 1: parse each file and insert rows
95
+ # ------------------------------------------------------------------
84
96
  for fpath in files:
85
97
  if not os.path.exists(fpath):
86
98
  print(f"Warning: file not found, skipping: {fpath}", file=sys.stderr)
87
99
  continue
88
100
 
89
101
  rel_path = os.path.relpath(fpath, project_root)
90
- symbols = parse_file(fpath)
91
-
92
- for sym in symbols:
93
- add_symbol(
94
- conn,
95
- name=sym.name,
96
- kind=sym.kind,
97
- file_path=rel_path,
98
- start_line=sym.start_line,
99
- end_line=sym.end_line,
100
- signature=sym.signature,
101
- doc_comment=sym.doc_comment,
102
- content_hash=sym.content_hash,
103
- )
104
- total_symbols += 1
102
+ lang = detect_language(fpath)
103
+ mtime = os.path.getmtime(fpath)
105
104
 
106
- # Phase 2: resolve and insert edges.
107
- for fpath in files:
108
- if not os.path.exists(fpath):
105
+ # Stream-friendly: read once, extract metadata, then release
106
+ try:
107
+ with open(fpath, "r", encoding="utf-8", errors="replace") as fh:
108
+ source = fh.read()
109
+ except (IOError, OSError) as exc:
110
+ print(f"Warning: Cannot read {fpath}: {exc}", file=sys.stderr)
109
111
  continue
110
112
 
111
- rels = extract_relationships(fpath)
112
- for rel in rels:
113
- source_rows = get_symbol_by_name(conn, rel.source_name)
114
- target_rows = get_symbol_by_name(conn, rel.target_name)
113
+ line_count = source.count("\n") + 1
114
+ content_hash = compute_content_hash(source)
115
115
 
116
- # Skip if either end of the relationship is unresolvable.
117
- if not source_rows or not target_rows:
118
- continue
116
+ # Insert file record
117
+ file_id = add_file(
118
+ conn, rel_path, lang, mtime,
119
+ hash=content_hash, line_count=line_count,
120
+ )
119
121
 
120
- add_edge(conn, source_rows[0]["id"], target_rows[0]["id"], rel.kind)
121
- total_edges += 1
122
+ # Extract def/ref tags via tree-sitter
123
+ tags = get_tags(fpath, rel_path)
124
+
125
+ for tag in tags:
126
+ if tag.kind == "def":
127
+ add_symbol(conn, file_id, tag.name, "definition", tag.line, signature=None)
128
+ total_symbols += 1
129
+ elif tag.kind == "ref":
130
+ add_reference(conn, file_id, tag.name, tag.line, kind="call")
131
+ total_refs += 1
132
+
133
+ # ------------------------------------------------------------------
134
+ # Phase 2: materialize edges
135
+ # ------------------------------------------------------------------
136
+
137
+ # Build defines map: ident -> set of file_ids that define it
138
+ defines = {}
139
+ for row in conn.execute("SELECT name, file_id FROM symbols").fetchall():
140
+ defines.setdefault(row["name"], set()).add(row["file_id"])
141
+
142
+ # Build references map: ident -> list of file_ids that reference it
143
+ references_map = {}
144
+ for row in conn.execute("SELECT symbol_name, file_id FROM refs").fetchall():
145
+ references_map.setdefault(row["symbol_name"], []).append(row["file_id"])
146
+
147
+ # Materialize file_edges with Aider weight heuristics
148
+ conn.execute("DELETE FROM file_edges")
149
+
150
+ for ident, ref_file_ids in references_map.items():
151
+ definers = defines.get(ident, set())
152
+ if not definers:
153
+ continue
122
154
 
123
- return {"symbols": total_symbols, "edges": total_edges}
155
+ # Aider weight heuristics
156
+ mul = 1.0
157
+ # Long descriptive names (camelCase or snake_case, >= 8 chars) weighted higher
158
+ if len(ident) >= 8 and re.match(r"[a-z_]+[A-Z]|[a-z]+_[a-z]", ident):
159
+ mul *= 10
160
+ # Private names weighted lower
161
+ if ident.startswith("_"):
162
+ mul *= 0.1
163
+ # Overloaded names (defined in many files) weighted lower
164
+ if len(definers) >= 5:
165
+ mul *= 0.1
166
+
167
+ # Count refs per file, then create weighted edges
168
+ ref_counts = Counter(ref_file_ids)
169
+
170
+ for ref_file_id, count in ref_counts.items():
171
+ weight = mul * math.sqrt(count)
172
+ for def_file_id in definers:
173
+ if ref_file_id != def_file_id: # No self-edges
174
+ conn.execute(
175
+ """INSERT INTO file_edges (source_file_id, target_file_id, weight)
176
+ VALUES (?, ?, ?)
177
+ ON CONFLICT(source_file_id, target_file_id)
178
+ DO UPDATE SET weight = MAX(weight, excluded.weight)""",
179
+ (ref_file_id, def_file_id, weight),
180
+ )
181
+
182
+ # Materialize symbol_edges via enclosing-scope resolution
183
+ rebuild_symbol_edges(conn)
184
+
185
+ # Gather edge counts
186
+ file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
187
+ symbol_edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
188
+
189
+ return {
190
+ "symbols": total_symbols,
191
+ "references": total_refs,
192
+ "file_edges": file_edge_count,
193
+ "symbol_edges": symbol_edge_count,
194
+ }
124
195
 
125
196
 
126
197
  # ---------------------------------------------------------------------------
@@ -143,16 +214,14 @@ def bootstrap(project_root: str) -> None:
143
214
 
144
215
  conn = get_connection(abs_root)
145
216
  try:
146
- # Full rebuild — clear existing content first.
147
- # FTS5 rows must be removed before symbol rows because the content=
148
- # table does not cascade deletes.
217
+ # Full rebuild — clear all tables. CASCADE handles symbols, refs, edges.
218
+ # FTS5 rows must be removed before symbol rows (content= table).
149
219
  symbol_ids = [
150
220
  row[0] for row in conn.execute("SELECT id FROM symbols").fetchall()
151
221
  ]
152
222
  for sid in symbol_ids:
153
223
  conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
154
- conn.execute("DELETE FROM symbols")
155
- conn.execute("DELETE FROM edges")
224
+ conn.execute("DELETE FROM files")
156
225
 
157
226
  stats = index_files(conn, files, abs_root)
158
227
  conn.commit()
@@ -162,7 +231,9 @@ def bootstrap(project_root: str) -> None:
162
231
  "mode": "bootstrap",
163
232
  "files_parsed": len(files),
164
233
  "symbols": stats["symbols"],
165
- "edges": stats["edges"],
234
+ "references": stats["references"],
235
+ "file_edges": stats["file_edges"],
236
+ "symbol_edges": stats["symbol_edges"],
166
237
  "duration_s": round(duration, 2),
167
238
  }
168
239
  print(json.dumps(result))
@@ -185,8 +256,9 @@ def incremental(project_root: str, files_str: str) -> None:
185
256
  """Incremental update: re-index only the specified files.
186
257
 
187
258
  *files_str* is a comma-separated list of file paths (relative or absolute).
188
- Old symbol/edge data for each file is removed before re-parsing so stale
189
- entries do not accumulate.
259
+ Old file/symbol/ref/edge data for each file is cascade-deleted via
260
+ remove_file() before re-parsing so stale entries do not accumulate.
261
+ All edges are rebuilt since changes can ripple across files.
190
262
  """
191
263
  abs_root = os.path.abspath(project_root)
192
264
  start = time.time()
@@ -198,10 +270,10 @@ def incremental(project_root: str, files_str: str) -> None:
198
270
 
199
271
  conn = get_connection(abs_root)
200
272
  try:
201
- # Remove stale data for all targeted files before re-parsing.
273
+ # Remove stale data for all targeted files (cascading delete).
202
274
  for fpath in abs_files:
203
275
  rel_path = os.path.relpath(fpath, abs_root)
204
- remove_symbols_by_file(conn, rel_path)
276
+ remove_file(conn, rel_path)
205
277
 
206
278
  existing_files = [f for f in abs_files if os.path.exists(f)]
207
279
  if not existing_files:
@@ -221,11 +293,13 @@ def incremental(project_root: str, files_str: str) -> None:
221
293
  "mode": "incremental",
222
294
  "files_parsed": len(existing_files),
223
295
  "symbols": stats["symbols"],
224
- "edges": stats["edges"],
296
+ "references": stats["references"],
297
+ "file_edges": stats["file_edges"],
298
+ "symbol_edges": stats["symbol_edges"],
225
299
  "duration_s": round(duration, 2),
226
300
  }
227
301
  print(json.dumps(result))
228
- update_meta_registry(abs_root, db_stats["symbols"])
302
+ update_meta_registry(abs_root, db_stats["symbol_count"])
229
303
  except Exception as exc: # noqa: BLE001
230
304
  print(f"Error during incremental update: {exc}", file=sys.stderr)
231
305
  conn.rollback()