feed-the-machine 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +170 -170
  3. package/bin/generate-manifest.mjs +463 -463
  4. package/bin/install.mjs +491 -491
  5. package/docs/HOOKS.md +243 -243
  6. package/docs/INBOX.md +233 -233
  7. package/ftm/SKILL.md +122 -122
  8. package/ftm-audit/SKILL.md +623 -541
  9. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  10. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  11. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  12. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  13. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  14. package/ftm-audit/scripts/run-knip.sh +23 -23
  15. package/ftm-audit.yml +2 -2
  16. package/ftm-brainstorm/SKILL.md +498 -498
  17. package/ftm-brainstorm/evals/evals.json +100 -100
  18. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  19. package/ftm-brainstorm/references/agent-prompts.md +224 -224
  20. package/ftm-brainstorm/references/plan-template.md +121 -121
  21. package/ftm-brainstorm.yml +2 -2
  22. package/ftm-browse/SKILL.md +454 -454
  23. package/ftm-browse/daemon/browser-manager.ts +206 -206
  24. package/ftm-browse/daemon/bun.lock +30 -30
  25. package/ftm-browse/daemon/cli.ts +347 -347
  26. package/ftm-browse/daemon/commands.ts +410 -410
  27. package/ftm-browse/daemon/main.ts +357 -357
  28. package/ftm-browse/daemon/package.json +17 -17
  29. package/ftm-browse/daemon/server.ts +189 -189
  30. package/ftm-browse/daemon/snapshot.ts +519 -519
  31. package/ftm-browse/daemon/tsconfig.json +22 -22
  32. package/ftm-browse.yml +4 -4
  33. package/ftm-capture/SKILL.md +370 -370
  34. package/ftm-capture.yml +4 -4
  35. package/ftm-codex-gate/SKILL.md +361 -361
  36. package/ftm-codex-gate.yml +2 -2
  37. package/ftm-config/SKILL.md +345 -345
  38. package/ftm-config.default.yml +82 -80
  39. package/ftm-config.yml +2 -2
  40. package/ftm-council/SKILL.md +416 -416
  41. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  42. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  43. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  44. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  45. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  46. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  47. package/ftm-council.yml +2 -2
  48. package/ftm-dashboard/SKILL.md +163 -163
  49. package/ftm-dashboard.yml +4 -4
  50. package/ftm-debug/SKILL.md +1037 -1037
  51. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  52. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  53. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  54. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  55. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  56. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  57. package/ftm-debug.yml +2 -2
  58. package/ftm-diagram/SKILL.md +277 -277
  59. package/ftm-diagram.yml +2 -2
  60. package/ftm-executor/SKILL.md +777 -767
  61. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  62. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  63. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  64. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  65. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
  66. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  67. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  68. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  69. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -44
  70. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  71. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  72. package/ftm-executor/runtime/package.json +8 -8
  73. package/ftm-executor.yml +2 -2
  74. package/ftm-git/SKILL.md +441 -441
  75. package/ftm-git/evals/evals.json +26 -26
  76. package/ftm-git/evals/promptfoo.yaml +75 -75
  77. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  78. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  79. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  80. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  81. package/ftm-git.yml +2 -2
  82. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  83. package/ftm-inbox/backend/adapters/base.py +230 -230
  84. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  85. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  86. package/ftm-inbox/backend/adapters/jira.py +136 -136
  87. package/ftm-inbox/backend/adapters/registry.py +192 -192
  88. package/ftm-inbox/backend/adapters/slack.py +110 -110
  89. package/ftm-inbox/backend/db/connection.py +54 -54
  90. package/ftm-inbox/backend/db/schema.py +78 -78
  91. package/ftm-inbox/backend/executor/__init__.py +7 -7
  92. package/ftm-inbox/backend/executor/engine.py +149 -149
  93. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  94. package/ftm-inbox/backend/main.py +103 -103
  95. package/ftm-inbox/backend/models/__init__.py +1 -1
  96. package/ftm-inbox/backend/models/unified_task.py +36 -36
  97. package/ftm-inbox/backend/planner/__init__.py +6 -6
  98. package/ftm-inbox/backend/planner/generator.py +127 -127
  99. package/ftm-inbox/backend/planner/schema.py +34 -34
  100. package/ftm-inbox/backend/requirements.txt +5 -5
  101. package/ftm-inbox/backend/routes/execute.py +186 -186
  102. package/ftm-inbox/backend/routes/health.py +52 -52
  103. package/ftm-inbox/backend/routes/inbox.py +68 -68
  104. package/ftm-inbox/backend/routes/plan.py +271 -271
  105. package/ftm-inbox/bin/launchagent.mjs +91 -91
  106. package/ftm-inbox/bin/setup.mjs +188 -188
  107. package/ftm-inbox/bin/start.sh +10 -10
  108. package/ftm-inbox/bin/status.sh +17 -17
  109. package/ftm-inbox/bin/stop.sh +8 -8
  110. package/ftm-inbox/config.example.yml +55 -55
  111. package/ftm-inbox/package-lock.json +2898 -2898
  112. package/ftm-inbox/package.json +26 -26
  113. package/ftm-inbox/postcss.config.js +6 -6
  114. package/ftm-inbox/src/app.css +199 -199
  115. package/ftm-inbox/src/app.html +18 -18
  116. package/ftm-inbox/src/lib/api.ts +166 -166
  117. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  118. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  119. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  120. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  121. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  122. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  123. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  124. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  125. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  126. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  127. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  128. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  129. package/ftm-inbox/src/lib/theme.ts +47 -47
  130. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  131. package/ftm-inbox/src/routes/+page.svelte +401 -401
  132. package/ftm-inbox/svelte.config.js +12 -12
  133. package/ftm-inbox/tailwind.config.ts +63 -63
  134. package/ftm-inbox/tsconfig.json +13 -13
  135. package/ftm-inbox/vite.config.ts +6 -6
  136. package/ftm-intent/SKILL.md +241 -241
  137. package/ftm-intent.yml +2 -2
  138. package/ftm-manifest.json +3794 -3794
  139. package/ftm-map/SKILL.md +291 -291
  140. package/ftm-map/scripts/db.py +712 -712
  141. package/ftm-map/scripts/index.py +415 -415
  142. package/ftm-map/scripts/parser.py +224 -224
  143. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  144. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  145. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  146. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  147. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  148. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  149. package/ftm-map/scripts/query.py +301 -301
  150. package/ftm-map/scripts/ranker.py +377 -377
  151. package/ftm-map/scripts/requirements.txt +5 -5
  152. package/ftm-map/scripts/setup-hooks.sh +27 -27
  153. package/ftm-map/scripts/setup.sh +56 -56
  154. package/ftm-map/scripts/test_db.py +364 -364
  155. package/ftm-map/scripts/test_parser.py +174 -174
  156. package/ftm-map/scripts/test_query.py +183 -183
  157. package/ftm-map/scripts/test_ranker.py +199 -199
  158. package/ftm-map/scripts/views.py +591 -591
  159. package/ftm-map.yml +2 -2
  160. package/ftm-mind/SKILL.md +1943 -1943
  161. package/ftm-mind/evals/promptfoo.yaml +142 -142
  162. package/ftm-mind/references/blackboard-schema.md +328 -328
  163. package/ftm-mind/references/complexity-guide.md +110 -110
  164. package/ftm-mind/references/event-registry.md +319 -319
  165. package/ftm-mind/references/mcp-inventory.md +296 -296
  166. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  167. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  168. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  169. package/ftm-mind/references/reflexion-protocol.md +249 -249
  170. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  171. package/ftm-mind/references/routing-scenarios.md +35 -35
  172. package/ftm-mind.yml +2 -2
  173. package/ftm-pause/SKILL.md +395 -395
  174. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  175. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  176. package/ftm-pause.yml +2 -2
  177. package/ftm-researcher/SKILL.md +275 -275
  178. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  179. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  180. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  181. package/ftm-researcher/references/adaptive-search.md +116 -116
  182. package/ftm-researcher/references/agent-prompts.md +193 -193
  183. package/ftm-researcher/references/council-integration.md +193 -193
  184. package/ftm-researcher/references/output-format.md +203 -203
  185. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  186. package/ftm-researcher/scripts/score_credibility.py +234 -234
  187. package/ftm-researcher/scripts/validate_research.py +92 -92
  188. package/ftm-researcher.yml +2 -2
  189. package/ftm-resume/SKILL.md +518 -518
  190. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  191. package/ftm-resume.yml +2 -2
  192. package/ftm-retro/SKILL.md +380 -380
  193. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  194. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  195. package/ftm-retro.yml +2 -2
  196. package/ftm-routine/SKILL.md +170 -170
  197. package/ftm-routine.yml +4 -4
  198. package/ftm-state/blackboard/capabilities.json +5 -5
  199. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  200. package/ftm-state/blackboard/context.json +23 -23
  201. package/ftm-state/blackboard/experiences/index.json +9 -9
  202. package/ftm-state/blackboard/patterns.json +6 -6
  203. package/ftm-state/schemas/context.schema.json +130 -130
  204. package/ftm-state/schemas/experience-index.schema.json +77 -77
  205. package/ftm-state/schemas/experience.schema.json +78 -78
  206. package/ftm-state/schemas/patterns.schema.json +44 -44
  207. package/ftm-upgrade/SKILL.md +194 -194
  208. package/ftm-upgrade/scripts/check-version.sh +76 -76
  209. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  210. package/ftm-upgrade.yml +2 -2
  211. package/ftm-verify.yml +2 -2
  212. package/ftm.yml +2 -2
  213. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  214. package/hooks/ftm-discovery-reminder.sh +90 -90
  215. package/hooks/ftm-drafts-gate.sh +61 -61
  216. package/hooks/ftm-event-logger.mjs +107 -107
  217. package/hooks/ftm-map-autodetect.sh +79 -79
  218. package/hooks/ftm-pending-sync-check.sh +22 -22
  219. package/hooks/ftm-plan-gate.sh +92 -92
  220. package/hooks/ftm-post-commit-trigger.sh +57 -57
  221. package/hooks/settings-template.json +81 -81
  222. package/install.sh +363 -363
  223. package/package.json +84 -84
  224. package/uninstall.sh +25 -25
@@ -1,377 +1,377 @@
1
- """
2
- ranker.py -- PageRank-based context selection engine for ftm-map.
3
-
4
- Implements Aider-style personalized PageRank over the file-level dependency graph
5
- with task-aware personalization and token-budget binary search.
6
- """
7
-
8
- import math
9
- import os
10
- import sys
11
-
12
- sys.path.insert(0, os.path.dirname(__file__))
13
-
14
- import numpy as np
15
- import scipy.sparse as sp
16
-
17
- # Try fast-pagerank first, fall back to scipy power iteration
18
- try:
19
- from fast_pagerank import pagerank_power
20
- HAS_FAST_PAGERANK = True
21
- except ImportError:
22
- HAS_FAST_PAGERANK = False
23
-
24
-
25
- def build_adjacency_matrix(conn):
26
- """Build undirected sparse adjacency matrix from file_edges.
27
-
28
- Returns (matrix, file_id_to_idx, idx_to_file_id) where:
29
- - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
30
- - file_id_to_idx maps file_id -> matrix index
31
- - idx_to_file_id maps matrix index -> file_id
32
- """
33
- # Get all files
34
- files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
35
- if not files:
36
- return None, {}, {}
37
-
38
- file_ids = [row['id'] for row in files]
39
- file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
40
- idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
41
- n = len(file_ids)
42
-
43
- # Get edges
44
- edges = conn.execute(
45
- "SELECT source_file_id, target_file_id, weight FROM file_edges"
46
- ).fetchall()
47
-
48
- if not edges:
49
- return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
50
-
51
- rows, cols, data = [], [], []
52
- for edge in edges:
53
- src_idx = file_id_to_idx.get(edge['source_file_id'])
54
- tgt_idx = file_id_to_idx.get(edge['target_file_id'])
55
- if src_idx is not None and tgt_idx is not None:
56
- rows.append(src_idx)
57
- cols.append(tgt_idx)
58
- data.append(edge['weight'])
59
-
60
- # Build directed matrix, then symmetrize for undirected PageRank
61
- A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
62
- A_undirected = A + A.T # Symmetrize
63
-
64
- return A_undirected, file_id_to_idx, idx_to_file_id
65
-
66
-
67
- def build_personalization(
68
- conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
69
- ):
70
- """Build personalization vector for PageRank.
71
-
72
- Three channels:
73
- - seed_files: file paths get 100x weight
74
- - seed_keywords: FTS5 matches get 30x weight
75
- - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
76
-
77
- Returns normalized numpy array (sums to 1.0).
78
- """
79
- n = len(file_id_to_idx)
80
- if n == 0:
81
- return None
82
-
83
- pers = np.ones(n) # Base: uniform weight of 1
84
-
85
- # Channel 1: Seed files (100x)
86
- if seed_files:
87
- for fpath in seed_files:
88
- file_row = conn.execute(
89
- "SELECT id FROM files WHERE path=?", (fpath,)
90
- ).fetchone()
91
- if file_row and file_row['id'] in file_id_to_idx:
92
- idx = file_id_to_idx[file_row['id']]
93
- pers[idx] *= 100
94
-
95
- # Channel 2: Seed keywords via FTS5 (30x)
96
- if seed_keywords:
97
- for kw in seed_keywords:
98
- try:
99
- fts_results = conn.execute(
100
- "SELECT s.file_id FROM symbols_fts fts "
101
- "JOIN symbols s ON s.id = fts.rowid "
102
- "WHERE symbols_fts MATCH ? LIMIT 50",
103
- (kw,),
104
- ).fetchall()
105
- for row in fts_results:
106
- if row['file_id'] in file_id_to_idx:
107
- pers[file_id_to_idx[row['file_id']]] *= 30
108
- except Exception:
109
- pass # FTS query syntax errors are non-fatal
110
-
111
- # Channel 3: Seed symbols (80x defining, 40x referencing)
112
- if seed_symbols:
113
- for sym_name in seed_symbols:
114
- # Defining files get 80x
115
- def_files = conn.execute(
116
- "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
117
- ).fetchall()
118
- for row in def_files:
119
- if row['file_id'] in file_id_to_idx:
120
- pers[file_id_to_idx[row['file_id']]] *= 80
121
-
122
- # Referencing files get 40x
123
- ref_files = conn.execute(
124
- "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
125
- ).fetchall()
126
- for row in ref_files:
127
- if row['file_id'] in file_id_to_idx:
128
- pers[file_id_to_idx[row['file_id']]] *= 40
129
-
130
- # Normalize to sum to 1
131
- total = pers.sum()
132
- if total > 0:
133
- pers /= total
134
-
135
- return pers
136
-
137
-
138
- def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
139
- """Run PageRank on the adjacency matrix.
140
-
141
- Uses fast-pagerank if available, otherwise scipy power iteration.
142
- Returns numpy array of scores indexed by matrix position.
143
- """
144
- n = adj_matrix.shape[0]
145
- if n == 0:
146
- return np.array([])
147
-
148
- if HAS_FAST_PAGERANK and personalization is not None:
149
- try:
150
- scores = pagerank_power(
151
- adj_matrix, p=damping, personalize=personalization, tol=tol
152
- )
153
- return scores
154
- except Exception:
155
- pass # Fall through to scipy implementation
156
-
157
- # Scipy power iteration fallback
158
- # Normalize adjacency matrix columns (column-stochastic transition matrix)
159
- col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
160
- col_sums[col_sums == 0] = 1 # Avoid division by zero for dangling nodes
161
-
162
- # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
163
- D_inv = sp.diags(1.0 / col_sums)
164
- M = adj_matrix @ D_inv
165
-
166
- # Initialize personalization / teleport vector
167
- if personalization is not None:
168
- v = personalization.copy()
169
- else:
170
- v = np.ones(n) / n
171
-
172
- scores = v.copy()
173
-
174
- # Dangling nodes: columns with zero outgoing weight
175
- dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
176
-
177
- for _ in range(max_iter):
178
- prev = scores.copy()
179
-
180
- # PageRank iteration with dangling-node redistribution
181
- dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
182
- scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
183
-
184
- # Check convergence via L1 norm
185
- if np.abs(scores - prev).sum() < tol:
186
- break
187
-
188
- return scores
189
-
190
-
191
- def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
192
- """Rank all files by structural importance with personalization.
193
-
194
- Returns sorted list of (file_path, score) tuples, highest score first.
195
- """
196
- adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
197
- if adj is None or adj.shape[0] == 0:
198
- return []
199
-
200
- pers = build_personalization(
201
- conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
202
- )
203
- scores = run_pagerank(adj, pers)
204
-
205
- # Map scores back to file paths
206
- results = []
207
- for idx, score in enumerate(scores):
208
- file_id = idx_to_fid[idx]
209
- file_row = conn.execute(
210
- "SELECT path FROM files WHERE id=?", (file_id,)
211
- ).fetchone()
212
- if file_row:
213
- results.append((file_row['path'], float(score)))
214
-
215
- results.sort(key=lambda x: x[1], reverse=True)
216
- return results
217
-
218
-
219
- def fit_to_budget(ranked_files, conn, token_budget):
220
- """Select files + key symbols that fit within token budget.
221
-
222
- Uses binary search with 15% tolerance (Aider's approach).
223
- Token estimation: ~25 tokens per tag/symbol entry.
224
-
225
- Returns (result_list, total_tokens) where result_list contains dicts:
226
- [{path, score, symbols: [name, ...], tokens}]
227
- """
228
- if not ranked_files or token_budget <= 0:
229
- return [], 0
230
-
231
- def estimate_tokens(file_list):
232
- """Estimate tokens for a list of files based on their symbol count."""
233
- total = 0
234
- for fpath, _ in file_list:
235
- file_row = conn.execute(
236
- "SELECT id, line_count FROM files WHERE path=?", (fpath,)
237
- ).fetchone()
238
- if not file_row:
239
- continue
240
- syms = conn.execute(
241
- "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
242
- (file_row['id'],),
243
- ).fetchall()
244
- for _sym in syms:
245
- # ~25 tokens per tag entry (Aider's estimate)
246
- total += 25
247
- return total
248
-
249
- # Binary search: find max number of files that fits within budget
250
- lo, hi = 1, len(ranked_files)
251
- best = 1
252
-
253
- while lo <= hi:
254
- mid = (lo + hi) // 2
255
- tokens = estimate_tokens(ranked_files[:mid])
256
- if tokens <= token_budget:
257
- best = mid
258
- lo = mid + 1
259
- else:
260
- hi = mid - 1
261
-
262
- # Allow 15% tolerance -- greedily add more files if within tolerance
263
- tolerance_budget = token_budget * 1.15
264
- while best < len(ranked_files):
265
- tokens = estimate_tokens(ranked_files[: best + 1])
266
- if tokens <= tolerance_budget:
267
- best += 1
268
- else:
269
- break
270
-
271
- # Build output with symbols for each selected file
272
- result = []
273
- total_tokens = 0
274
- for fpath, score in ranked_files[:best]:
275
- file_row = conn.execute(
276
- "SELECT id FROM files WHERE path=?", (fpath,)
277
- ).fetchone()
278
- if not file_row:
279
- continue
280
- syms = conn.execute(
281
- "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
282
- (file_row['id'],),
283
- ).fetchall()
284
- sym_names = [s['name'] for s in syms]
285
- entry_tokens = len(sym_names) * 25
286
- total_tokens += entry_tokens
287
- result.append({
288
- "path": fpath,
289
- "score": round(score, 6),
290
- "symbols": sym_names,
291
- "tokens": entry_tokens,
292
- })
293
-
294
- return result, total_tokens
295
-
296
-
297
- # ---------------------------------------------------------------------------
298
- # Smoke test
299
- # ---------------------------------------------------------------------------
300
-
301
- if __name__ == "__main__":
302
- import tempfile
303
- from db import (
304
- get_connection,
305
- add_file,
306
- add_symbol,
307
- add_reference,
308
- rebuild_file_edges,
309
- rebuild_symbol_edges,
310
- )
311
-
312
- print("Running ranker.py smoke tests ...")
313
-
314
- with tempfile.TemporaryDirectory() as tmp:
315
- conn = get_connection(tmp)
316
-
317
- # Create a small graph: 3 files with cross-references
318
- f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
319
- f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
320
- f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
321
-
322
- # Symbols
323
- add_symbol(
324
- conn, f1, "authenticate", "definition", 1, 20,
325
- signature="def authenticate(req)",
326
- )
327
- add_symbol(conn, f1, "verify_token", "definition", 25, 40)
328
- add_symbol(
329
- conn, f2, "handle_request", "definition", 1, 50,
330
- signature="def handle_request(req)",
331
- )
332
- add_symbol(conn, f3, "format_date", "definition", 1, 10)
333
- add_symbol(conn, f3, "parse_config", "definition", 15, 25)
334
-
335
- # References: api.py references auth.py functions, and utils.py
336
- add_reference(conn, f2, "authenticate", 10)
337
- add_reference(conn, f2, "verify_token", 15)
338
- add_reference(conn, f2, "format_date", 20)
339
- add_reference(conn, f2, "parse_config", 25)
340
- # auth.py also references utils
341
- add_reference(conn, f1, "parse_config", 30)
342
-
343
- # Materialize edges
344
- rebuild_file_edges(conn)
345
- conn.commit()
346
-
347
- # Test 1: Uniform PageRank
348
- results = rank_files(conn)
349
- print(f" Uniform PageRank: {len(results)} files ranked")
350
- for path, score in results:
351
- print(f" {path}: {score:.6f}")
352
- assert len(results) == 3
353
-
354
- # Test 2: Personalized -- seed auth.py
355
- results_pers = rank_files(conn, seed_files=["src/auth.py"])
356
- print(f" Personalized (seed auth.py): {len(results_pers)} files")
357
- for path, score in results_pers:
358
- print(f" {path}: {score:.6f}")
359
- # auth.py should be ranked higher with personalization
360
- auth_score = next(s for p, s in results_pers if p == "src/auth.py")
361
- auth_uniform = next(s for p, s in results if p == "src/auth.py")
362
- print(f" Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
363
-
364
- # Test 3: Budget fitting
365
- budget_result, total_tokens = fit_to_budget(results, conn, 200)
366
- print(f" Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
367
- assert total_tokens <= 200 * 1.15 # 15% tolerance
368
-
369
- # Test 4: Keyword personalization
370
- results_kw = rank_files(conn, seed_keywords=["authenticate"])
371
- print(f" Keyword personalized: {len(results_kw)} files")
372
-
373
- # Test 5: Symbol personalization
374
- results_sym = rank_files(conn, seed_symbols=["authenticate"])
375
- print(f" Symbol personalized: {len(results_sym)} files")
376
-
377
- print("\nAll ranker smoke tests passed.")
1
+ """
2
+ ranker.py -- PageRank-based context selection engine for ftm-map.
3
+
4
+ Implements Aider-style personalized PageRank over the file-level dependency graph
5
+ with task-aware personalization and token-budget binary search.
6
+ """
7
+
8
+ import math
9
+ import os
10
+ import sys
11
+
12
+ sys.path.insert(0, os.path.dirname(__file__))
13
+
14
+ import numpy as np
15
+ import scipy.sparse as sp
16
+
17
+ # Try fast-pagerank first, fall back to scipy power iteration
18
+ try:
19
+ from fast_pagerank import pagerank_power
20
+ HAS_FAST_PAGERANK = True
21
+ except ImportError:
22
+ HAS_FAST_PAGERANK = False
23
+
24
+
25
+ def build_adjacency_matrix(conn):
26
+ """Build undirected sparse adjacency matrix from file_edges.
27
+
28
+ Returns (matrix, file_id_to_idx, idx_to_file_id) where:
29
+ - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
30
+ - file_id_to_idx maps file_id -> matrix index
31
+ - idx_to_file_id maps matrix index -> file_id
32
+ """
33
+ # Get all files
34
+ files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
35
+ if not files:
36
+ return None, {}, {}
37
+
38
+ file_ids = [row['id'] for row in files]
39
+ file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
40
+ idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
41
+ n = len(file_ids)
42
+
43
+ # Get edges
44
+ edges = conn.execute(
45
+ "SELECT source_file_id, target_file_id, weight FROM file_edges"
46
+ ).fetchall()
47
+
48
+ if not edges:
49
+ return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
50
+
51
+ rows, cols, data = [], [], []
52
+ for edge in edges:
53
+ src_idx = file_id_to_idx.get(edge['source_file_id'])
54
+ tgt_idx = file_id_to_idx.get(edge['target_file_id'])
55
+ if src_idx is not None and tgt_idx is not None:
56
+ rows.append(src_idx)
57
+ cols.append(tgt_idx)
58
+ data.append(edge['weight'])
59
+
60
+ # Build directed matrix, then symmetrize for undirected PageRank
61
+ A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
62
+ A_undirected = A + A.T # Symmetrize
63
+
64
+ return A_undirected, file_id_to_idx, idx_to_file_id
65
+
66
+
67
+ def build_personalization(
68
+ conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
69
+ ):
70
+ """Build personalization vector for PageRank.
71
+
72
+ Three channels:
73
+ - seed_files: file paths get 100x weight
74
+ - seed_keywords: FTS5 matches get 30x weight
75
+ - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
76
+
77
+ Returns normalized numpy array (sums to 1.0).
78
+ """
79
+ n = len(file_id_to_idx)
80
+ if n == 0:
81
+ return None
82
+
83
+ pers = np.ones(n) # Base: uniform weight of 1
84
+
85
+ # Channel 1: Seed files (100x)
86
+ if seed_files:
87
+ for fpath in seed_files:
88
+ file_row = conn.execute(
89
+ "SELECT id FROM files WHERE path=?", (fpath,)
90
+ ).fetchone()
91
+ if file_row and file_row['id'] in file_id_to_idx:
92
+ idx = file_id_to_idx[file_row['id']]
93
+ pers[idx] *= 100
94
+
95
+ # Channel 2: Seed keywords via FTS5 (30x)
96
+ if seed_keywords:
97
+ for kw in seed_keywords:
98
+ try:
99
+ fts_results = conn.execute(
100
+ "SELECT s.file_id FROM symbols_fts fts "
101
+ "JOIN symbols s ON s.id = fts.rowid "
102
+ "WHERE symbols_fts MATCH ? LIMIT 50",
103
+ (kw,),
104
+ ).fetchall()
105
+ for row in fts_results:
106
+ if row['file_id'] in file_id_to_idx:
107
+ pers[file_id_to_idx[row['file_id']]] *= 30
108
+ except Exception:
109
+ pass # FTS query syntax errors are non-fatal
110
+
111
+ # Channel 3: Seed symbols (80x defining, 40x referencing)
112
+ if seed_symbols:
113
+ for sym_name in seed_symbols:
114
+ # Defining files get 80x
115
+ def_files = conn.execute(
116
+ "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
117
+ ).fetchall()
118
+ for row in def_files:
119
+ if row['file_id'] in file_id_to_idx:
120
+ pers[file_id_to_idx[row['file_id']]] *= 80
121
+
122
+ # Referencing files get 40x
123
+ ref_files = conn.execute(
124
+ "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
125
+ ).fetchall()
126
+ for row in ref_files:
127
+ if row['file_id'] in file_id_to_idx:
128
+ pers[file_id_to_idx[row['file_id']]] *= 40
129
+
130
+ # Normalize to sum to 1
131
+ total = pers.sum()
132
+ if total > 0:
133
+ pers /= total
134
+
135
+ return pers
136
+
137
+
138
+ def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
139
+ """Run PageRank on the adjacency matrix.
140
+
141
+ Uses fast-pagerank if available, otherwise scipy power iteration.
142
+ Returns numpy array of scores indexed by matrix position.
143
+ """
144
+ n = adj_matrix.shape[0]
145
+ if n == 0:
146
+ return np.array([])
147
+
148
+ if HAS_FAST_PAGERANK and personalization is not None:
149
+ try:
150
+ scores = pagerank_power(
151
+ adj_matrix, p=damping, personalize=personalization, tol=tol
152
+ )
153
+ return scores
154
+ except Exception:
155
+ pass # Fall through to scipy implementation
156
+
157
+ # Scipy power iteration fallback
158
+ # Normalize adjacency matrix columns (column-stochastic transition matrix)
159
+ col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
160
+ col_sums[col_sums == 0] = 1 # Avoid division by zero for dangling nodes
161
+
162
+ # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
163
+ D_inv = sp.diags(1.0 / col_sums)
164
+ M = adj_matrix @ D_inv
165
+
166
+ # Initialize personalization / teleport vector
167
+ if personalization is not None:
168
+ v = personalization.copy()
169
+ else:
170
+ v = np.ones(n) / n
171
+
172
+ scores = v.copy()
173
+
174
+ # Dangling nodes: columns with zero outgoing weight
175
+ dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
176
+
177
+ for _ in range(max_iter):
178
+ prev = scores.copy()
179
+
180
+ # PageRank iteration with dangling-node redistribution
181
+ dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
182
+ scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
183
+
184
+ # Check convergence via L1 norm
185
+ if np.abs(scores - prev).sum() < tol:
186
+ break
187
+
188
+ return scores
189
+
190
+
191
+ def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
192
+ """Rank all files by structural importance with personalization.
193
+
194
+ Returns sorted list of (file_path, score) tuples, highest score first.
195
+ """
196
+ adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
197
+ if adj is None or adj.shape[0] == 0:
198
+ return []
199
+
200
+ pers = build_personalization(
201
+ conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
202
+ )
203
+ scores = run_pagerank(adj, pers)
204
+
205
+ # Map scores back to file paths
206
+ results = []
207
+ for idx, score in enumerate(scores):
208
+ file_id = idx_to_fid[idx]
209
+ file_row = conn.execute(
210
+ "SELECT path FROM files WHERE id=?", (file_id,)
211
+ ).fetchone()
212
+ if file_row:
213
+ results.append((file_row['path'], float(score)))
214
+
215
+ results.sort(key=lambda x: x[1], reverse=True)
216
+ return results
217
+
218
+
219
+ def fit_to_budget(ranked_files, conn, token_budget):
220
+ """Select files + key symbols that fit within token budget.
221
+
222
+ Uses binary search with 15% tolerance (Aider's approach).
223
+ Token estimation: ~25 tokens per tag/symbol entry.
224
+
225
+ Returns (result_list, total_tokens) where result_list contains dicts:
226
+ [{path, score, symbols: [name, ...], tokens}]
227
+ """
228
+ if not ranked_files or token_budget <= 0:
229
+ return [], 0
230
+
231
+ def estimate_tokens(file_list):
232
+ """Estimate tokens for a list of files based on their symbol count."""
233
+ total = 0
234
+ for fpath, _ in file_list:
235
+ file_row = conn.execute(
236
+ "SELECT id, line_count FROM files WHERE path=?", (fpath,)
237
+ ).fetchone()
238
+ if not file_row:
239
+ continue
240
+ syms = conn.execute(
241
+ "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
242
+ (file_row['id'],),
243
+ ).fetchall()
244
+ for _sym in syms:
245
+ # ~25 tokens per tag entry (Aider's estimate)
246
+ total += 25
247
+ return total
248
+
249
+ # Binary search: find max number of files that fits within budget
250
+ lo, hi = 1, len(ranked_files)
251
+ best = 1
252
+
253
+ while lo <= hi:
254
+ mid = (lo + hi) // 2
255
+ tokens = estimate_tokens(ranked_files[:mid])
256
+ if tokens <= token_budget:
257
+ best = mid
258
+ lo = mid + 1
259
+ else:
260
+ hi = mid - 1
261
+
262
+ # Allow 15% tolerance -- greedily add more files if within tolerance
263
+ tolerance_budget = token_budget * 1.15
264
+ while best < len(ranked_files):
265
+ tokens = estimate_tokens(ranked_files[: best + 1])
266
+ if tokens <= tolerance_budget:
267
+ best += 1
268
+ else:
269
+ break
270
+
271
+ # Build output with symbols for each selected file
272
+ result = []
273
+ total_tokens = 0
274
+ for fpath, score in ranked_files[:best]:
275
+ file_row = conn.execute(
276
+ "SELECT id FROM files WHERE path=?", (fpath,)
277
+ ).fetchone()
278
+ if not file_row:
279
+ continue
280
+ syms = conn.execute(
281
+ "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
282
+ (file_row['id'],),
283
+ ).fetchall()
284
+ sym_names = [s['name'] for s in syms]
285
+ entry_tokens = len(sym_names) * 25
286
+ total_tokens += entry_tokens
287
+ result.append({
288
+ "path": fpath,
289
+ "score": round(score, 6),
290
+ "symbols": sym_names,
291
+ "tokens": entry_tokens,
292
+ })
293
+
294
+ return result, total_tokens
295
+
296
+
297
+ # ---------------------------------------------------------------------------
298
+ # Smoke test
299
+ # ---------------------------------------------------------------------------
300
+
301
+ if __name__ == "__main__":
302
+ import tempfile
303
+ from db import (
304
+ get_connection,
305
+ add_file,
306
+ add_symbol,
307
+ add_reference,
308
+ rebuild_file_edges,
309
+ rebuild_symbol_edges,
310
+ )
311
+
312
+ print("Running ranker.py smoke tests ...")
313
+
314
+ with tempfile.TemporaryDirectory() as tmp:
315
+ conn = get_connection(tmp)
316
+
317
+ # Create a small graph: 3 files with cross-references
318
+ f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
319
+ f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
320
+ f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
321
+
322
+ # Symbols
323
+ add_symbol(
324
+ conn, f1, "authenticate", "definition", 1, 20,
325
+ signature="def authenticate(req)",
326
+ )
327
+ add_symbol(conn, f1, "verify_token", "definition", 25, 40)
328
+ add_symbol(
329
+ conn, f2, "handle_request", "definition", 1, 50,
330
+ signature="def handle_request(req)",
331
+ )
332
+ add_symbol(conn, f3, "format_date", "definition", 1, 10)
333
+ add_symbol(conn, f3, "parse_config", "definition", 15, 25)
334
+
335
+ # References: api.py references auth.py functions, and utils.py
336
+ add_reference(conn, f2, "authenticate", 10)
337
+ add_reference(conn, f2, "verify_token", 15)
338
+ add_reference(conn, f2, "format_date", 20)
339
+ add_reference(conn, f2, "parse_config", 25)
340
+ # auth.py also references utils
341
+ add_reference(conn, f1, "parse_config", 30)
342
+
343
+ # Materialize edges
344
+ rebuild_file_edges(conn)
345
+ conn.commit()
346
+
347
+ # Test 1: Uniform PageRank
348
+ results = rank_files(conn)
349
+ print(f" Uniform PageRank: {len(results)} files ranked")
350
+ for path, score in results:
351
+ print(f" {path}: {score:.6f}")
352
+ assert len(results) == 3
353
+
354
+ # Test 2: Personalized -- seed auth.py
355
+ results_pers = rank_files(conn, seed_files=["src/auth.py"])
356
+ print(f" Personalized (seed auth.py): {len(results_pers)} files")
357
+ for path, score in results_pers:
358
+ print(f" {path}: {score:.6f}")
359
+ # auth.py should be ranked higher with personalization
360
+ auth_score = next(s for p, s in results_pers if p == "src/auth.py")
361
+ auth_uniform = next(s for p, s in results if p == "src/auth.py")
362
+ print(f" Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
363
+
364
+ # Test 3: Budget fitting
365
+ budget_result, total_tokens = fit_to_budget(results, conn, 200)
366
+ print(f" Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
367
+ assert total_tokens <= 200 * 1.15 # 15% tolerance
368
+
369
+ # Test 4: Keyword personalization
370
+ results_kw = rank_files(conn, seed_keywords=["authenticate"])
371
+ print(f" Keyword personalized: {len(results_kw)} files")
372
+
373
+ # Test 5: Symbol personalization
374
+ results_sym = rank_files(conn, seed_symbols=["authenticate"])
375
+ print(f" Symbol personalized: {len(results_sym)} files")
376
+
377
+ print("\nAll ranker smoke tests passed.")