feed-the-machine 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +170 -170
  3. package/bin/brain.py +1340 -0
  4. package/bin/convert_claude_skills_to_codex.py +490 -0
  5. package/bin/generate-manifest.mjs +463 -463
  6. package/bin/harden_codex_skills.py +141 -0
  7. package/bin/install.mjs +491 -491
  8. package/bin/migrate-eng-buddy-data.py +875 -0
  9. package/bin/playbook_engine/__init__.py +1 -0
  10. package/bin/playbook_engine/conftest.py +8 -0
  11. package/bin/playbook_engine/extractor.py +33 -0
  12. package/bin/playbook_engine/manager.py +102 -0
  13. package/bin/playbook_engine/models.py +84 -0
  14. package/bin/playbook_engine/registry.py +35 -0
  15. package/bin/playbook_engine/test_extractor.py +72 -0
  16. package/bin/playbook_engine/test_integration.py +129 -0
  17. package/bin/playbook_engine/test_manager.py +85 -0
  18. package/bin/playbook_engine/test_models.py +166 -0
  19. package/bin/playbook_engine/test_registry.py +67 -0
  20. package/bin/playbook_engine/test_tracer.py +86 -0
  21. package/bin/playbook_engine/tracer.py +93 -0
  22. package/bin/tasks_db.py +456 -0
  23. package/docs/HOOKS.md +243 -243
  24. package/docs/INBOX.md +233 -233
  25. package/ftm/SKILL.md +125 -122
  26. package/ftm-audit/SKILL.md +623 -623
  27. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  28. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  29. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  30. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  31. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  32. package/ftm-audit/scripts/run-knip.sh +23 -23
  33. package/ftm-audit.yml +2 -2
  34. package/ftm-brainstorm/SKILL.md +1003 -498
  35. package/ftm-brainstorm/evals/evals.json +180 -100
  36. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  37. package/ftm-brainstorm/references/agent-prompts.md +552 -224
  38. package/ftm-brainstorm/references/plan-template.md +209 -121
  39. package/ftm-brainstorm.yml +2 -2
  40. package/ftm-browse/SKILL.md +454 -454
  41. package/ftm-browse/daemon/browser-manager.ts +206 -206
  42. package/ftm-browse/daemon/bun.lock +30 -30
  43. package/ftm-browse/daemon/cli.ts +347 -347
  44. package/ftm-browse/daemon/commands.ts +410 -410
  45. package/ftm-browse/daemon/main.ts +357 -357
  46. package/ftm-browse/daemon/package.json +17 -17
  47. package/ftm-browse/daemon/server.ts +189 -189
  48. package/ftm-browse/daemon/snapshot.ts +519 -519
  49. package/ftm-browse/daemon/tsconfig.json +22 -22
  50. package/ftm-browse.yml +4 -4
  51. package/ftm-capture/SKILL.md +370 -370
  52. package/ftm-capture.yml +4 -4
  53. package/ftm-codex-gate/SKILL.md +361 -361
  54. package/ftm-codex-gate.yml +2 -2
  55. package/ftm-config/SKILL.md +422 -345
  56. package/ftm-config.default.yml +125 -82
  57. package/ftm-config.yml +44 -2
  58. package/ftm-council/SKILL.md +416 -416
  59. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  60. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  61. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  62. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  63. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  64. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  65. package/ftm-council.yml +2 -2
  66. package/ftm-dashboard/SKILL.md +163 -163
  67. package/ftm-dashboard.yml +4 -4
  68. package/ftm-debug/SKILL.md +1037 -1037
  69. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  70. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  71. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  72. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  73. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  74. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  75. package/ftm-debug.yml +2 -2
  76. package/ftm-diagram/SKILL.md +277 -277
  77. package/ftm-diagram.yml +2 -2
  78. package/ftm-executor/SKILL.md +777 -777
  79. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  80. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  81. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  82. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  83. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
  84. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  85. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  86. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  87. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -59
  88. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  89. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  90. package/ftm-executor/runtime/package.json +8 -8
  91. package/ftm-executor.yml +2 -2
  92. package/ftm-git/SKILL.md +441 -441
  93. package/ftm-git/evals/evals.json +26 -26
  94. package/ftm-git/evals/promptfoo.yaml +75 -75
  95. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  96. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  97. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  98. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  99. package/ftm-git.yml +2 -2
  100. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  101. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  102. package/ftm-inbox/backend/adapters/base.py +230 -230
  103. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  104. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  105. package/ftm-inbox/backend/adapters/jira.py +136 -136
  106. package/ftm-inbox/backend/adapters/registry.py +192 -192
  107. package/ftm-inbox/backend/adapters/slack.py +110 -110
  108. package/ftm-inbox/backend/db/connection.py +54 -54
  109. package/ftm-inbox/backend/db/schema.py +78 -78
  110. package/ftm-inbox/backend/executor/__init__.py +7 -7
  111. package/ftm-inbox/backend/executor/engine.py +149 -149
  112. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  113. package/ftm-inbox/backend/main.py +103 -103
  114. package/ftm-inbox/backend/models/__init__.py +1 -1
  115. package/ftm-inbox/backend/models/unified_task.py +36 -36
  116. package/ftm-inbox/backend/planner/__init__.py +6 -6
  117. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  118. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  119. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  120. package/ftm-inbox/backend/planner/generator.py +127 -127
  121. package/ftm-inbox/backend/planner/schema.py +34 -34
  122. package/ftm-inbox/backend/requirements.txt +5 -5
  123. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  124. package/ftm-inbox/backend/routes/execute.py +186 -186
  125. package/ftm-inbox/backend/routes/health.py +52 -52
  126. package/ftm-inbox/backend/routes/inbox.py +68 -68
  127. package/ftm-inbox/backend/routes/plan.py +271 -271
  128. package/ftm-inbox/bin/launchagent.mjs +91 -91
  129. package/ftm-inbox/bin/setup.mjs +188 -188
  130. package/ftm-inbox/bin/start.sh +10 -10
  131. package/ftm-inbox/bin/status.sh +17 -17
  132. package/ftm-inbox/bin/stop.sh +8 -8
  133. package/ftm-inbox/config.example.yml +55 -55
  134. package/ftm-inbox/package-lock.json +2898 -2898
  135. package/ftm-inbox/package.json +26 -26
  136. package/ftm-inbox/postcss.config.js +6 -6
  137. package/ftm-inbox/src/app.css +199 -199
  138. package/ftm-inbox/src/app.html +18 -18
  139. package/ftm-inbox/src/lib/api.ts +166 -166
  140. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  141. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  142. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  143. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  144. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  145. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  146. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  147. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  148. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  149. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  150. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  151. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  152. package/ftm-inbox/src/lib/theme.ts +47 -47
  153. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  154. package/ftm-inbox/src/routes/+page.svelte +401 -401
  155. package/ftm-inbox/svelte.config.js +12 -12
  156. package/ftm-inbox/tailwind.config.ts +63 -63
  157. package/ftm-inbox/tsconfig.json +13 -13
  158. package/ftm-inbox/vite.config.ts +6 -6
  159. package/ftm-intent/SKILL.md +241 -241
  160. package/ftm-intent.yml +2 -2
  161. package/ftm-manifest.json +3794 -3794
  162. package/ftm-map/SKILL.md +291 -291
  163. package/ftm-map/scripts/db.py +712 -712
  164. package/ftm-map/scripts/index.py +415 -415
  165. package/ftm-map/scripts/parser.py +224 -224
  166. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  167. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  168. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  169. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  170. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  171. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  172. package/ftm-map/scripts/query.py +301 -301
  173. package/ftm-map/scripts/ranker.py +377 -377
  174. package/ftm-map/scripts/requirements.txt +5 -5
  175. package/ftm-map/scripts/setup-hooks.sh +27 -27
  176. package/ftm-map/scripts/setup.sh +56 -56
  177. package/ftm-map/scripts/test_db.py +364 -364
  178. package/ftm-map/scripts/test_parser.py +174 -174
  179. package/ftm-map/scripts/test_query.py +183 -183
  180. package/ftm-map/scripts/test_ranker.py +199 -199
  181. package/ftm-map/scripts/views.py +591 -591
  182. package/ftm-map.yml +2 -2
  183. package/ftm-mind/SKILL.md +201 -1943
  184. package/ftm-mind/evals/promptfoo.yaml +142 -142
  185. package/ftm-mind/references/blackboard-protocol.md +110 -0
  186. package/ftm-mind/references/blackboard-schema.md +328 -328
  187. package/ftm-mind/references/complexity-guide.md +110 -110
  188. package/ftm-mind/references/complexity-sizing.md +138 -0
  189. package/ftm-mind/references/decide-act-protocol.md +172 -0
  190. package/ftm-mind/references/direct-execution.md +51 -0
  191. package/ftm-mind/references/environment-discovery.md +77 -0
  192. package/ftm-mind/references/event-registry.md +319 -319
  193. package/ftm-mind/references/mcp-inventory.md +300 -296
  194. package/ftm-mind/references/ops-routing.md +47 -0
  195. package/ftm-mind/references/orient-protocol.md +234 -0
  196. package/ftm-mind/references/personality.md +40 -0
  197. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  198. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  199. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  200. package/ftm-mind/references/reflexion-protocol.md +249 -249
  201. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  202. package/ftm-mind/references/routing-scenarios.md +35 -35
  203. package/ftm-mind.yml +2 -2
  204. package/ftm-ops.yml +4 -0
  205. package/ftm-pause/SKILL.md +395 -395
  206. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  207. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  208. package/ftm-pause.yml +2 -2
  209. package/ftm-researcher/SKILL.md +275 -275
  210. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  211. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  212. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  213. package/ftm-researcher/references/adaptive-search.md +116 -116
  214. package/ftm-researcher/references/agent-prompts.md +193 -193
  215. package/ftm-researcher/references/council-integration.md +193 -193
  216. package/ftm-researcher/references/output-format.md +203 -203
  217. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  218. package/ftm-researcher/scripts/score_credibility.py +234 -234
  219. package/ftm-researcher/scripts/validate_research.py +92 -92
  220. package/ftm-researcher.yml +2 -2
  221. package/ftm-resume/SKILL.md +518 -518
  222. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  223. package/ftm-resume.yml +2 -2
  224. package/ftm-retro/SKILL.md +380 -380
  225. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  226. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  227. package/ftm-retro.yml +2 -2
  228. package/ftm-routine/SKILL.md +170 -170
  229. package/ftm-routine.yml +4 -4
  230. package/ftm-state/blackboard/capabilities.json +5 -5
  231. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  232. package/ftm-state/blackboard/context.json +37 -23
  233. package/ftm-state/blackboard/experiences/doom-statusline-fix.json +26 -0
  234. package/ftm-state/blackboard/experiences/hackathon-pages-site.json +26 -0
  235. package/ftm-state/blackboard/experiences/hindsight-sso-kickoff.json +42 -0
  236. package/ftm-state/blackboard/experiences/index.json +58 -9
  237. package/ftm-state/blackboard/experiences/learning-ragnarok-api-access.json +23 -0
  238. package/ftm-state/blackboard/experiences/nordlayer-members-auto-assign.json +26 -0
  239. package/ftm-state/blackboard/experiences/saml2aws-stale-session-fix.json +41 -0
  240. package/ftm-state/blackboard/patterns.json +6 -6
  241. package/ftm-state/schemas/context.schema.json +130 -130
  242. package/ftm-state/schemas/experience-index.schema.json +77 -77
  243. package/ftm-state/schemas/experience.schema.json +78 -78
  244. package/ftm-state/schemas/patterns.schema.json +44 -44
  245. package/ftm-upgrade/SKILL.md +194 -194
  246. package/ftm-upgrade/scripts/check-version.sh +76 -76
  247. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  248. package/ftm-upgrade.yml +2 -2
  249. package/ftm-verify.yml +2 -2
  250. package/ftm.yml +2 -2
  251. package/hooks/ftm-auto-log.sh +137 -0
  252. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  253. package/hooks/ftm-discovery-reminder.sh +90 -90
  254. package/hooks/ftm-drafts-gate.sh +61 -61
  255. package/hooks/ftm-event-logger.mjs +107 -107
  256. package/hooks/ftm-install-hooks.sh +240 -0
  257. package/hooks/ftm-learning-capture.sh +117 -0
  258. package/hooks/ftm-map-autodetect.sh +79 -79
  259. package/hooks/ftm-pending-sync-check.sh +22 -22
  260. package/hooks/ftm-plan-gate.sh +92 -92
  261. package/hooks/ftm-post-commit-trigger.sh +57 -57
  262. package/hooks/ftm-post-compaction.sh +138 -0
  263. package/hooks/ftm-pre-compaction.sh +147 -0
  264. package/hooks/ftm-session-end.sh +52 -0
  265. package/hooks/ftm-session-snapshot.sh +213 -0
  266. package/hooks/settings-template.json +81 -81
  267. package/install.sh +363 -363
  268. package/package.json +84 -84
  269. package/uninstall.sh +25 -25
@@ -1,377 +1,377 @@
1
- """
2
- ranker.py -- PageRank-based context selection engine for ftm-map.
3
-
4
- Implements Aider-style personalized PageRank over the file-level dependency graph
5
- with task-aware personalization and token-budget binary search.
6
- """
7
-
8
- import math
9
- import os
10
- import sys
11
-
12
- sys.path.insert(0, os.path.dirname(__file__))
13
-
14
- import numpy as np
15
- import scipy.sparse as sp
16
-
17
- # Try fast-pagerank first, fall back to scipy power iteration
18
- try:
19
- from fast_pagerank import pagerank_power
20
- HAS_FAST_PAGERANK = True
21
- except ImportError:
22
- HAS_FAST_PAGERANK = False
23
-
24
-
25
- def build_adjacency_matrix(conn):
26
- """Build undirected sparse adjacency matrix from file_edges.
27
-
28
- Returns (matrix, file_id_to_idx, idx_to_file_id) where:
29
- - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
30
- - file_id_to_idx maps file_id -> matrix index
31
- - idx_to_file_id maps matrix index -> file_id
32
- """
33
- # Get all files
34
- files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
35
- if not files:
36
- return None, {}, {}
37
-
38
- file_ids = [row['id'] for row in files]
39
- file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
40
- idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
41
- n = len(file_ids)
42
-
43
- # Get edges
44
- edges = conn.execute(
45
- "SELECT source_file_id, target_file_id, weight FROM file_edges"
46
- ).fetchall()
47
-
48
- if not edges:
49
- return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
50
-
51
- rows, cols, data = [], [], []
52
- for edge in edges:
53
- src_idx = file_id_to_idx.get(edge['source_file_id'])
54
- tgt_idx = file_id_to_idx.get(edge['target_file_id'])
55
- if src_idx is not None and tgt_idx is not None:
56
- rows.append(src_idx)
57
- cols.append(tgt_idx)
58
- data.append(edge['weight'])
59
-
60
- # Build directed matrix, then symmetrize for undirected PageRank
61
- A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
62
- A_undirected = A + A.T # Symmetrize
63
-
64
- return A_undirected, file_id_to_idx, idx_to_file_id
65
-
66
-
67
- def build_personalization(
68
- conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
69
- ):
70
- """Build personalization vector for PageRank.
71
-
72
- Three channels:
73
- - seed_files: file paths get 100x weight
74
- - seed_keywords: FTS5 matches get 30x weight
75
- - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
76
-
77
- Returns normalized numpy array (sums to 1.0).
78
- """
79
- n = len(file_id_to_idx)
80
- if n == 0:
81
- return None
82
-
83
- pers = np.ones(n) # Base: uniform weight of 1
84
-
85
- # Channel 1: Seed files (100x)
86
- if seed_files:
87
- for fpath in seed_files:
88
- file_row = conn.execute(
89
- "SELECT id FROM files WHERE path=?", (fpath,)
90
- ).fetchone()
91
- if file_row and file_row['id'] in file_id_to_idx:
92
- idx = file_id_to_idx[file_row['id']]
93
- pers[idx] *= 100
94
-
95
- # Channel 2: Seed keywords via FTS5 (30x)
96
- if seed_keywords:
97
- for kw in seed_keywords:
98
- try:
99
- fts_results = conn.execute(
100
- "SELECT s.file_id FROM symbols_fts fts "
101
- "JOIN symbols s ON s.id = fts.rowid "
102
- "WHERE symbols_fts MATCH ? LIMIT 50",
103
- (kw,),
104
- ).fetchall()
105
- for row in fts_results:
106
- if row['file_id'] in file_id_to_idx:
107
- pers[file_id_to_idx[row['file_id']]] *= 30
108
- except Exception:
109
- pass # FTS query syntax errors are non-fatal
110
-
111
- # Channel 3: Seed symbols (80x defining, 40x referencing)
112
- if seed_symbols:
113
- for sym_name in seed_symbols:
114
- # Defining files get 80x
115
- def_files = conn.execute(
116
- "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
117
- ).fetchall()
118
- for row in def_files:
119
- if row['file_id'] in file_id_to_idx:
120
- pers[file_id_to_idx[row['file_id']]] *= 80
121
-
122
- # Referencing files get 40x
123
- ref_files = conn.execute(
124
- "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
125
- ).fetchall()
126
- for row in ref_files:
127
- if row['file_id'] in file_id_to_idx:
128
- pers[file_id_to_idx[row['file_id']]] *= 40
129
-
130
- # Normalize to sum to 1
131
- total = pers.sum()
132
- if total > 0:
133
- pers /= total
134
-
135
- return pers
136
-
137
-
138
- def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
139
- """Run PageRank on the adjacency matrix.
140
-
141
- Uses fast-pagerank if available, otherwise scipy power iteration.
142
- Returns numpy array of scores indexed by matrix position.
143
- """
144
- n = adj_matrix.shape[0]
145
- if n == 0:
146
- return np.array([])
147
-
148
- if HAS_FAST_PAGERANK and personalization is not None:
149
- try:
150
- scores = pagerank_power(
151
- adj_matrix, p=damping, personalize=personalization, tol=tol
152
- )
153
- return scores
154
- except Exception:
155
- pass # Fall through to scipy implementation
156
-
157
- # Scipy power iteration fallback
158
- # Normalize adjacency matrix columns (column-stochastic transition matrix)
159
- col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
160
- col_sums[col_sums == 0] = 1 # Avoid division by zero for dangling nodes
161
-
162
- # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
163
- D_inv = sp.diags(1.0 / col_sums)
164
- M = adj_matrix @ D_inv
165
-
166
- # Initialize personalization / teleport vector
167
- if personalization is not None:
168
- v = personalization.copy()
169
- else:
170
- v = np.ones(n) / n
171
-
172
- scores = v.copy()
173
-
174
- # Dangling nodes: columns with zero outgoing weight
175
- dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
176
-
177
- for _ in range(max_iter):
178
- prev = scores.copy()
179
-
180
- # PageRank iteration with dangling-node redistribution
181
- dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
182
- scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
183
-
184
- # Check convergence via L1 norm
185
- if np.abs(scores - prev).sum() < tol:
186
- break
187
-
188
- return scores
189
-
190
-
191
- def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
192
- """Rank all files by structural importance with personalization.
193
-
194
- Returns sorted list of (file_path, score) tuples, highest score first.
195
- """
196
- adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
197
- if adj is None or adj.shape[0] == 0:
198
- return []
199
-
200
- pers = build_personalization(
201
- conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
202
- )
203
- scores = run_pagerank(adj, pers)
204
-
205
- # Map scores back to file paths
206
- results = []
207
- for idx, score in enumerate(scores):
208
- file_id = idx_to_fid[idx]
209
- file_row = conn.execute(
210
- "SELECT path FROM files WHERE id=?", (file_id,)
211
- ).fetchone()
212
- if file_row:
213
- results.append((file_row['path'], float(score)))
214
-
215
- results.sort(key=lambda x: x[1], reverse=True)
216
- return results
217
-
218
-
219
- def fit_to_budget(ranked_files, conn, token_budget):
220
- """Select files + key symbols that fit within token budget.
221
-
222
- Uses binary search with 15% tolerance (Aider's approach).
223
- Token estimation: ~25 tokens per tag/symbol entry.
224
-
225
- Returns (result_list, total_tokens) where result_list contains dicts:
226
- [{path, score, symbols: [name, ...], tokens}]
227
- """
228
- if not ranked_files or token_budget <= 0:
229
- return [], 0
230
-
231
- def estimate_tokens(file_list):
232
- """Estimate tokens for a list of files based on their symbol count."""
233
- total = 0
234
- for fpath, _ in file_list:
235
- file_row = conn.execute(
236
- "SELECT id, line_count FROM files WHERE path=?", (fpath,)
237
- ).fetchone()
238
- if not file_row:
239
- continue
240
- syms = conn.execute(
241
- "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
242
- (file_row['id'],),
243
- ).fetchall()
244
- for _sym in syms:
245
- # ~25 tokens per tag entry (Aider's estimate)
246
- total += 25
247
- return total
248
-
249
- # Binary search: find max number of files that fits within budget
250
- lo, hi = 1, len(ranked_files)
251
- best = 1
252
-
253
- while lo <= hi:
254
- mid = (lo + hi) // 2
255
- tokens = estimate_tokens(ranked_files[:mid])
256
- if tokens <= token_budget:
257
- best = mid
258
- lo = mid + 1
259
- else:
260
- hi = mid - 1
261
-
262
- # Allow 15% tolerance -- greedily add more files if within tolerance
263
- tolerance_budget = token_budget * 1.15
264
- while best < len(ranked_files):
265
- tokens = estimate_tokens(ranked_files[: best + 1])
266
- if tokens <= tolerance_budget:
267
- best += 1
268
- else:
269
- break
270
-
271
- # Build output with symbols for each selected file
272
- result = []
273
- total_tokens = 0
274
- for fpath, score in ranked_files[:best]:
275
- file_row = conn.execute(
276
- "SELECT id FROM files WHERE path=?", (fpath,)
277
- ).fetchone()
278
- if not file_row:
279
- continue
280
- syms = conn.execute(
281
- "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
282
- (file_row['id'],),
283
- ).fetchall()
284
- sym_names = [s['name'] for s in syms]
285
- entry_tokens = len(sym_names) * 25
286
- total_tokens += entry_tokens
287
- result.append({
288
- "path": fpath,
289
- "score": round(score, 6),
290
- "symbols": sym_names,
291
- "tokens": entry_tokens,
292
- })
293
-
294
- return result, total_tokens
295
-
296
-
297
- # ---------------------------------------------------------------------------
298
- # Smoke test
299
- # ---------------------------------------------------------------------------
300
-
301
- if __name__ == "__main__":
302
- import tempfile
303
- from db import (
304
- get_connection,
305
- add_file,
306
- add_symbol,
307
- add_reference,
308
- rebuild_file_edges,
309
- rebuild_symbol_edges,
310
- )
311
-
312
- print("Running ranker.py smoke tests ...")
313
-
314
- with tempfile.TemporaryDirectory() as tmp:
315
- conn = get_connection(tmp)
316
-
317
- # Create a small graph: 3 files with cross-references
318
- f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
319
- f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
320
- f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
321
-
322
- # Symbols
323
- add_symbol(
324
- conn, f1, "authenticate", "definition", 1, 20,
325
- signature="def authenticate(req)",
326
- )
327
- add_symbol(conn, f1, "verify_token", "definition", 25, 40)
328
- add_symbol(
329
- conn, f2, "handle_request", "definition", 1, 50,
330
- signature="def handle_request(req)",
331
- )
332
- add_symbol(conn, f3, "format_date", "definition", 1, 10)
333
- add_symbol(conn, f3, "parse_config", "definition", 15, 25)
334
-
335
- # References: api.py references auth.py functions, and utils.py
336
- add_reference(conn, f2, "authenticate", 10)
337
- add_reference(conn, f2, "verify_token", 15)
338
- add_reference(conn, f2, "format_date", 20)
339
- add_reference(conn, f2, "parse_config", 25)
340
- # auth.py also references utils
341
- add_reference(conn, f1, "parse_config", 30)
342
-
343
- # Materialize edges
344
- rebuild_file_edges(conn)
345
- conn.commit()
346
-
347
- # Test 1: Uniform PageRank
348
- results = rank_files(conn)
349
- print(f" Uniform PageRank: {len(results)} files ranked")
350
- for path, score in results:
351
- print(f" {path}: {score:.6f}")
352
- assert len(results) == 3
353
-
354
- # Test 2: Personalized -- seed auth.py
355
- results_pers = rank_files(conn, seed_files=["src/auth.py"])
356
- print(f" Personalized (seed auth.py): {len(results_pers)} files")
357
- for path, score in results_pers:
358
- print(f" {path}: {score:.6f}")
359
- # auth.py should be ranked higher with personalization
360
- auth_score = next(s for p, s in results_pers if p == "src/auth.py")
361
- auth_uniform = next(s for p, s in results if p == "src/auth.py")
362
- print(f" Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
363
-
364
- # Test 3: Budget fitting
365
- budget_result, total_tokens = fit_to_budget(results, conn, 200)
366
- print(f" Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
367
- assert total_tokens <= 200 * 1.15 # 15% tolerance
368
-
369
- # Test 4: Keyword personalization
370
- results_kw = rank_files(conn, seed_keywords=["authenticate"])
371
- print(f" Keyword personalized: {len(results_kw)} files")
372
-
373
- # Test 5: Symbol personalization
374
- results_sym = rank_files(conn, seed_symbols=["authenticate"])
375
- print(f" Symbol personalized: {len(results_sym)} files")
376
-
377
- print("\nAll ranker smoke tests passed.")
1
+ """
2
+ ranker.py -- PageRank-based context selection engine for ftm-map.
3
+
4
+ Implements Aider-style personalized PageRank over the file-level dependency graph
5
+ with task-aware personalization and token-budget binary search.
6
+ """
7
+
8
+ import math
9
+ import os
10
+ import sys
11
+
12
+ sys.path.insert(0, os.path.dirname(__file__))
13
+
14
+ import numpy as np
15
+ import scipy.sparse as sp
16
+
17
+ # Try fast-pagerank first, fall back to scipy power iteration
18
+ try:
19
+ from fast_pagerank import pagerank_power
20
+ HAS_FAST_PAGERANK = True
21
+ except ImportError:
22
+ HAS_FAST_PAGERANK = False
23
+
24
+
25
+ def build_adjacency_matrix(conn):
26
+ """Build undirected sparse adjacency matrix from file_edges.
27
+
28
+ Returns (matrix, file_id_to_idx, idx_to_file_id) where:
29
+ - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
30
+ - file_id_to_idx maps file_id -> matrix index
31
+ - idx_to_file_id maps matrix index -> file_id
32
+ """
33
+ # Get all files
34
+ files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
35
+ if not files:
36
+ return None, {}, {}
37
+
38
+ file_ids = [row['id'] for row in files]
39
+ file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
40
+ idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
41
+ n = len(file_ids)
42
+
43
+ # Get edges
44
+ edges = conn.execute(
45
+ "SELECT source_file_id, target_file_id, weight FROM file_edges"
46
+ ).fetchall()
47
+
48
+ if not edges:
49
+ return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
50
+
51
+ rows, cols, data = [], [], []
52
+ for edge in edges:
53
+ src_idx = file_id_to_idx.get(edge['source_file_id'])
54
+ tgt_idx = file_id_to_idx.get(edge['target_file_id'])
55
+ if src_idx is not None and tgt_idx is not None:
56
+ rows.append(src_idx)
57
+ cols.append(tgt_idx)
58
+ data.append(edge['weight'])
59
+
60
+ # Build directed matrix, then symmetrize for undirected PageRank
61
+ A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
62
+ A_undirected = A + A.T # Symmetrize
63
+
64
+ return A_undirected, file_id_to_idx, idx_to_file_id
65
+
66
+
67
+ def build_personalization(
68
+ conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
69
+ ):
70
+ """Build personalization vector for PageRank.
71
+
72
+ Three channels:
73
+ - seed_files: file paths get 100x weight
74
+ - seed_keywords: FTS5 matches get 30x weight
75
+ - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
76
+
77
+ Returns normalized numpy array (sums to 1.0).
78
+ """
79
+ n = len(file_id_to_idx)
80
+ if n == 0:
81
+ return None
82
+
83
+ pers = np.ones(n) # Base: uniform weight of 1
84
+
85
+ # Channel 1: Seed files (100x)
86
+ if seed_files:
87
+ for fpath in seed_files:
88
+ file_row = conn.execute(
89
+ "SELECT id FROM files WHERE path=?", (fpath,)
90
+ ).fetchone()
91
+ if file_row and file_row['id'] in file_id_to_idx:
92
+ idx = file_id_to_idx[file_row['id']]
93
+ pers[idx] *= 100
94
+
95
+ # Channel 2: Seed keywords via FTS5 (30x)
96
+ if seed_keywords:
97
+ for kw in seed_keywords:
98
+ try:
99
+ fts_results = conn.execute(
100
+ "SELECT s.file_id FROM symbols_fts fts "
101
+ "JOIN symbols s ON s.id = fts.rowid "
102
+ "WHERE symbols_fts MATCH ? LIMIT 50",
103
+ (kw,),
104
+ ).fetchall()
105
+ for row in fts_results:
106
+ if row['file_id'] in file_id_to_idx:
107
+ pers[file_id_to_idx[row['file_id']]] *= 30
108
+ except Exception:
109
+ pass # FTS query syntax errors are non-fatal
110
+
111
+ # Channel 3: Seed symbols (80x defining, 40x referencing)
112
+ if seed_symbols:
113
+ for sym_name in seed_symbols:
114
+ # Defining files get 80x
115
+ def_files = conn.execute(
116
+ "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
117
+ ).fetchall()
118
+ for row in def_files:
119
+ if row['file_id'] in file_id_to_idx:
120
+ pers[file_id_to_idx[row['file_id']]] *= 80
121
+
122
+ # Referencing files get 40x
123
+ ref_files = conn.execute(
124
+ "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
125
+ ).fetchall()
126
+ for row in ref_files:
127
+ if row['file_id'] in file_id_to_idx:
128
+ pers[file_id_to_idx[row['file_id']]] *= 40
129
+
130
+ # Normalize to sum to 1
131
+ total = pers.sum()
132
+ if total > 0:
133
+ pers /= total
134
+
135
+ return pers
136
+
137
+
138
+ def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
139
+ """Run PageRank on the adjacency matrix.
140
+
141
+ Uses fast-pagerank if available, otherwise scipy power iteration.
142
+ Returns numpy array of scores indexed by matrix position.
143
+ """
144
+ n = adj_matrix.shape[0]
145
+ if n == 0:
146
+ return np.array([])
147
+
148
+ if HAS_FAST_PAGERANK and personalization is not None:
149
+ try:
150
+ scores = pagerank_power(
151
+ adj_matrix, p=damping, personalize=personalization, tol=tol
152
+ )
153
+ return scores
154
+ except Exception:
155
+ pass # Fall through to scipy implementation
156
+
157
+ # Scipy power iteration fallback
158
+ # Normalize adjacency matrix columns (column-stochastic transition matrix)
159
+ col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
160
+ col_sums[col_sums == 0] = 1 # Avoid division by zero for dangling nodes
161
+
162
+ # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
163
+ D_inv = sp.diags(1.0 / col_sums)
164
+ M = adj_matrix @ D_inv
165
+
166
+ # Initialize personalization / teleport vector
167
+ if personalization is not None:
168
+ v = personalization.copy()
169
+ else:
170
+ v = np.ones(n) / n
171
+
172
+ scores = v.copy()
173
+
174
+ # Dangling nodes: columns with zero outgoing weight
175
+ dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
176
+
177
+ for _ in range(max_iter):
178
+ prev = scores.copy()
179
+
180
+ # PageRank iteration with dangling-node redistribution
181
+ dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
182
+ scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
183
+
184
+ # Check convergence via L1 norm
185
+ if np.abs(scores - prev).sum() < tol:
186
+ break
187
+
188
+ return scores
189
+
190
+
191
+ def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
192
+ """Rank all files by structural importance with personalization.
193
+
194
+ Returns sorted list of (file_path, score) tuples, highest score first.
195
+ """
196
+ adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
197
+ if adj is None or adj.shape[0] == 0:
198
+ return []
199
+
200
+ pers = build_personalization(
201
+ conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
202
+ )
203
+ scores = run_pagerank(adj, pers)
204
+
205
+ # Map scores back to file paths
206
+ results = []
207
+ for idx, score in enumerate(scores):
208
+ file_id = idx_to_fid[idx]
209
+ file_row = conn.execute(
210
+ "SELECT path FROM files WHERE id=?", (file_id,)
211
+ ).fetchone()
212
+ if file_row:
213
+ results.append((file_row['path'], float(score)))
214
+
215
+ results.sort(key=lambda x: x[1], reverse=True)
216
+ return results
217
+
218
+
219
+ def fit_to_budget(ranked_files, conn, token_budget):
220
+ """Select files + key symbols that fit within token budget.
221
+
222
+ Uses binary search with 15% tolerance (Aider's approach).
223
+ Token estimation: ~25 tokens per tag/symbol entry.
224
+
225
+ Returns (result_list, total_tokens) where result_list contains dicts:
226
+ [{path, score, symbols: [name, ...], tokens}]
227
+ """
228
+ if not ranked_files or token_budget <= 0:
229
+ return [], 0
230
+
231
+ def estimate_tokens(file_list):
232
+ """Estimate tokens for a list of files based on their symbol count."""
233
+ total = 0
234
+ for fpath, _ in file_list:
235
+ file_row = conn.execute(
236
+ "SELECT id, line_count FROM files WHERE path=?", (fpath,)
237
+ ).fetchone()
238
+ if not file_row:
239
+ continue
240
+ syms = conn.execute(
241
+ "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
242
+ (file_row['id'],),
243
+ ).fetchall()
244
+ for _sym in syms:
245
+ # ~25 tokens per tag entry (Aider's estimate)
246
+ total += 25
247
+ return total
248
+
249
+ # Binary search: find max number of files that fits within budget
250
+ lo, hi = 1, len(ranked_files)
251
+ best = 1
252
+
253
+ while lo <= hi:
254
+ mid = (lo + hi) // 2
255
+ tokens = estimate_tokens(ranked_files[:mid])
256
+ if tokens <= token_budget:
257
+ best = mid
258
+ lo = mid + 1
259
+ else:
260
+ hi = mid - 1
261
+
262
+ # Allow 15% tolerance -- greedily add more files if within tolerance
263
+ tolerance_budget = token_budget * 1.15
264
+ while best < len(ranked_files):
265
+ tokens = estimate_tokens(ranked_files[: best + 1])
266
+ if tokens <= tolerance_budget:
267
+ best += 1
268
+ else:
269
+ break
270
+
271
+ # Build output with symbols for each selected file
272
+ result = []
273
+ total_tokens = 0
274
+ for fpath, score in ranked_files[:best]:
275
+ file_row = conn.execute(
276
+ "SELECT id FROM files WHERE path=?", (fpath,)
277
+ ).fetchone()
278
+ if not file_row:
279
+ continue
280
+ syms = conn.execute(
281
+ "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
282
+ (file_row['id'],),
283
+ ).fetchall()
284
+ sym_names = [s['name'] for s in syms]
285
+ entry_tokens = len(sym_names) * 25
286
+ total_tokens += entry_tokens
287
+ result.append({
288
+ "path": fpath,
289
+ "score": round(score, 6),
290
+ "symbols": sym_names,
291
+ "tokens": entry_tokens,
292
+ })
293
+
294
+ return result, total_tokens
295
+
296
+
297
+ # ---------------------------------------------------------------------------
298
+ # Smoke test
299
+ # ---------------------------------------------------------------------------
300
+
301
+ if __name__ == "__main__":
302
+ import tempfile
303
+ from db import (
304
+ get_connection,
305
+ add_file,
306
+ add_symbol,
307
+ add_reference,
308
+ rebuild_file_edges,
309
+ rebuild_symbol_edges,
310
+ )
311
+
312
+ print("Running ranker.py smoke tests ...")
313
+
314
+ with tempfile.TemporaryDirectory() as tmp:
315
+ conn = get_connection(tmp)
316
+
317
+ # Create a small graph: 3 files with cross-references
318
+ f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
319
+ f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
320
+ f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
321
+
322
+ # Symbols
323
+ add_symbol(
324
+ conn, f1, "authenticate", "definition", 1, 20,
325
+ signature="def authenticate(req)",
326
+ )
327
+ add_symbol(conn, f1, "verify_token", "definition", 25, 40)
328
+ add_symbol(
329
+ conn, f2, "handle_request", "definition", 1, 50,
330
+ signature="def handle_request(req)",
331
+ )
332
+ add_symbol(conn, f3, "format_date", "definition", 1, 10)
333
+ add_symbol(conn, f3, "parse_config", "definition", 15, 25)
334
+
335
+ # References: api.py references auth.py functions, and utils.py
336
+ add_reference(conn, f2, "authenticate", 10)
337
+ add_reference(conn, f2, "verify_token", 15)
338
+ add_reference(conn, f2, "format_date", 20)
339
+ add_reference(conn, f2, "parse_config", 25)
340
+ # auth.py also references utils
341
+ add_reference(conn, f1, "parse_config", 30)
342
+
343
+ # Materialize edges
344
+ rebuild_file_edges(conn)
345
+ conn.commit()
346
+
347
+ # Test 1: Uniform PageRank
348
+ results = rank_files(conn)
349
+ print(f" Uniform PageRank: {len(results)} files ranked")
350
+ for path, score in results:
351
+ print(f" {path}: {score:.6f}")
352
+ assert len(results) == 3
353
+
354
+ # Test 2: Personalized -- seed auth.py
355
+ results_pers = rank_files(conn, seed_files=["src/auth.py"])
356
+ print(f" Personalized (seed auth.py): {len(results_pers)} files")
357
+ for path, score in results_pers:
358
+ print(f" {path}: {score:.6f}")
359
+ # auth.py should be ranked higher with personalization
360
+ auth_score = next(s for p, s in results_pers if p == "src/auth.py")
361
+ auth_uniform = next(s for p, s in results if p == "src/auth.py")
362
+ print(f" Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
363
+
364
+ # Test 3: Budget fitting
365
+ budget_result, total_tokens = fit_to_budget(results, conn, 200)
366
+ print(f" Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
367
+ assert total_tokens <= 200 * 1.15 # 15% tolerance
368
+
369
+ # Test 4: Keyword personalization
370
+ results_kw = rank_files(conn, seed_keywords=["authenticate"])
371
+ print(f" Keyword personalized: {len(results_kw)} files")
372
+
373
+ # Test 5: Symbol personalization
374
+ results_sym = rank_files(conn, seed_symbols=["authenticate"])
375
+ print(f" Symbol personalized: {len(results_sym)} files")
376
+
377
+ print("\nAll ranker smoke tests passed.")