feed-the-machine 1.6.1 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +262 -170
  3. package/bin/__pycache__/tasks_db.cpython-314.pyc +0 -0
  4. package/bin/brain.py +1340 -0
  5. package/bin/convert_claude_skills_to_codex.py +490 -0
  6. package/bin/generate-manifest.mjs +463 -463
  7. package/bin/harden_codex_skills.py +141 -0
  8. package/bin/install.mjs +491 -491
  9. package/bin/migrate-eng-buddy-data.py +875 -0
  10. package/bin/playbook_engine/__init__.py +1 -0
  11. package/bin/playbook_engine/conftest.py +8 -0
  12. package/bin/playbook_engine/extractor.py +33 -0
  13. package/bin/playbook_engine/manager.py +102 -0
  14. package/bin/playbook_engine/models.py +84 -0
  15. package/bin/playbook_engine/registry.py +35 -0
  16. package/bin/playbook_engine/test_extractor.py +72 -0
  17. package/bin/playbook_engine/test_integration.py +129 -0
  18. package/bin/playbook_engine/test_manager.py +85 -0
  19. package/bin/playbook_engine/test_models.py +166 -0
  20. package/bin/playbook_engine/test_registry.py +67 -0
  21. package/bin/playbook_engine/test_tracer.py +86 -0
  22. package/bin/playbook_engine/tracer.py +93 -0
  23. package/bin/tasks_db.py +456 -0
  24. package/docs/HOOKS.md +243 -243
  25. package/docs/INBOX.md +233 -233
  26. package/ftm/SKILL.md +125 -122
  27. package/ftm-audit/SKILL.md +673 -623
  28. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  29. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  30. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  31. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  32. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  33. package/ftm-audit/scripts/run-knip.sh +23 -23
  34. package/ftm-audit.yml +2 -2
  35. package/ftm-brainstorm/SKILL.md +1003 -498
  36. package/ftm-brainstorm/evals/evals.json +180 -100
  37. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  38. package/ftm-brainstorm/references/agent-prompts.md +552 -224
  39. package/ftm-brainstorm/references/plan-template.md +209 -121
  40. package/ftm-brainstorm.yml +2 -2
  41. package/ftm-browse/SKILL.md +454 -454
  42. package/ftm-browse/daemon/browser-manager.ts +206 -206
  43. package/ftm-browse/daemon/bun.lock +30 -30
  44. package/ftm-browse/daemon/cli.ts +347 -347
  45. package/ftm-browse/daemon/commands.ts +410 -410
  46. package/ftm-browse/daemon/main.ts +357 -357
  47. package/ftm-browse/daemon/package.json +17 -17
  48. package/ftm-browse/daemon/server.ts +189 -189
  49. package/ftm-browse/daemon/snapshot.ts +519 -519
  50. package/ftm-browse/daemon/tsconfig.json +22 -22
  51. package/ftm-browse.yml +4 -4
  52. package/ftm-capture/SKILL.md +370 -370
  53. package/ftm-capture.yml +4 -4
  54. package/ftm-codex-gate/SKILL.md +361 -361
  55. package/ftm-codex-gate.yml +2 -2
  56. package/ftm-config/SKILL.md +422 -345
  57. package/ftm-config.default.yml +125 -82
  58. package/ftm-config.yml +44 -2
  59. package/ftm-council/SKILL.md +416 -416
  60. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  61. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  62. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  63. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  64. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  65. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  66. package/ftm-council-chat.yml +2 -0
  67. package/ftm-council.yml +2 -2
  68. package/ftm-dashboard/SKILL.md +163 -163
  69. package/ftm-dashboard.yml +4 -4
  70. package/ftm-debug/SKILL.md +1037 -1037
  71. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  72. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  73. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  74. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  75. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  76. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  77. package/ftm-debug.yml +2 -2
  78. package/ftm-diagram/SKILL.md +277 -277
  79. package/ftm-diagram.yml +2 -2
  80. package/ftm-executor/SKILL.md +777 -777
  81. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  82. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  83. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  84. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  85. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +81 -72
  86. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  87. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  88. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  89. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -59
  90. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  91. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  92. package/ftm-executor/runtime/package.json +8 -8
  93. package/ftm-executor.yml +2 -2
  94. package/ftm-git/SKILL.md +441 -441
  95. package/ftm-git/evals/evals.json +26 -26
  96. package/ftm-git/evals/promptfoo.yaml +75 -75
  97. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  98. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  99. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  100. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  101. package/ftm-git.yml +2 -2
  102. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  103. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  104. package/ftm-inbox/backend/adapters/base.py +230 -230
  105. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  106. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  107. package/ftm-inbox/backend/adapters/jira.py +136 -136
  108. package/ftm-inbox/backend/adapters/registry.py +192 -192
  109. package/ftm-inbox/backend/adapters/slack.py +110 -110
  110. package/ftm-inbox/backend/db/connection.py +54 -54
  111. package/ftm-inbox/backend/db/schema.py +78 -78
  112. package/ftm-inbox/backend/executor/__init__.py +7 -7
  113. package/ftm-inbox/backend/executor/engine.py +149 -149
  114. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  115. package/ftm-inbox/backend/main.py +103 -103
  116. package/ftm-inbox/backend/models/__init__.py +1 -1
  117. package/ftm-inbox/backend/models/unified_task.py +36 -36
  118. package/ftm-inbox/backend/planner/__init__.py +6 -6
  119. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  120. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  121. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  122. package/ftm-inbox/backend/planner/generator.py +127 -127
  123. package/ftm-inbox/backend/planner/schema.py +34 -34
  124. package/ftm-inbox/backend/requirements.txt +5 -5
  125. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  126. package/ftm-inbox/backend/routes/execute.py +186 -186
  127. package/ftm-inbox/backend/routes/health.py +52 -52
  128. package/ftm-inbox/backend/routes/inbox.py +68 -68
  129. package/ftm-inbox/backend/routes/plan.py +271 -271
  130. package/ftm-inbox/bin/launchagent.mjs +91 -91
  131. package/ftm-inbox/bin/setup.mjs +188 -188
  132. package/ftm-inbox/bin/start.sh +10 -10
  133. package/ftm-inbox/bin/status.sh +17 -17
  134. package/ftm-inbox/bin/stop.sh +8 -8
  135. package/ftm-inbox/config.example.yml +55 -55
  136. package/ftm-inbox/package-lock.json +2898 -2898
  137. package/ftm-inbox/package.json +26 -26
  138. package/ftm-inbox/postcss.config.js +6 -6
  139. package/ftm-inbox/src/app.css +199 -199
  140. package/ftm-inbox/src/app.html +18 -18
  141. package/ftm-inbox/src/lib/api.ts +166 -166
  142. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  143. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  144. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  145. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  146. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  147. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  148. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  149. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  150. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  151. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  152. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  153. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  154. package/ftm-inbox/src/lib/theme.ts +47 -47
  155. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  156. package/ftm-inbox/src/routes/+page.svelte +401 -401
  157. package/ftm-inbox/svelte.config.js +12 -12
  158. package/ftm-inbox/tailwind.config.ts +63 -63
  159. package/ftm-inbox/tsconfig.json +13 -13
  160. package/ftm-inbox/vite.config.ts +6 -6
  161. package/ftm-intent/SKILL.md +241 -241
  162. package/ftm-intent.yml +2 -2
  163. package/ftm-manifest.json +3794 -3794
  164. package/ftm-map/SKILL.md +291 -291
  165. package/ftm-map/scripts/db.py +712 -712
  166. package/ftm-map/scripts/index.py +415 -415
  167. package/ftm-map/scripts/parser.py +224 -224
  168. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  169. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  170. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  171. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  172. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  173. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  174. package/ftm-map/scripts/query.py +301 -301
  175. package/ftm-map/scripts/ranker.py +377 -377
  176. package/ftm-map/scripts/requirements.txt +5 -5
  177. package/ftm-map/scripts/setup-hooks.sh +27 -27
  178. package/ftm-map/scripts/setup.sh +56 -56
  179. package/ftm-map/scripts/test_db.py +364 -364
  180. package/ftm-map/scripts/test_parser.py +174 -174
  181. package/ftm-map/scripts/test_query.py +183 -183
  182. package/ftm-map/scripts/test_ranker.py +199 -199
  183. package/ftm-map/scripts/views.py +591 -591
  184. package/ftm-map.yml +2 -2
  185. package/ftm-mind/SKILL.md +201 -1943
  186. package/ftm-mind/evals/promptfoo.yaml +142 -142
  187. package/ftm-mind/references/blackboard-protocol.md +110 -0
  188. package/ftm-mind/references/blackboard-schema.md +328 -328
  189. package/ftm-mind/references/complexity-guide.md +110 -110
  190. package/ftm-mind/references/complexity-sizing.md +138 -0
  191. package/ftm-mind/references/decide-act-protocol.md +172 -0
  192. package/ftm-mind/references/direct-execution.md +51 -0
  193. package/ftm-mind/references/environment-discovery.md +77 -0
  194. package/ftm-mind/references/event-registry.md +319 -319
  195. package/ftm-mind/references/mcp-inventory.md +300 -296
  196. package/ftm-mind/references/ops-routing.md +47 -0
  197. package/ftm-mind/references/orient-protocol.md +234 -0
  198. package/ftm-mind/references/personality.md +40 -0
  199. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  200. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  201. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  202. package/ftm-mind/references/reflexion-protocol.md +249 -249
  203. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  204. package/ftm-mind/references/routing-scenarios.md +35 -35
  205. package/ftm-mind.yml +2 -2
  206. package/ftm-ops.yml +4 -0
  207. package/ftm-pause/SKILL.md +395 -395
  208. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  209. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  210. package/ftm-pause.yml +2 -2
  211. package/ftm-researcher/SKILL.md +275 -275
  212. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  213. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  214. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  215. package/ftm-researcher/references/adaptive-search.md +116 -116
  216. package/ftm-researcher/references/agent-prompts.md +193 -193
  217. package/ftm-researcher/references/council-integration.md +193 -193
  218. package/ftm-researcher/references/output-format.md +203 -203
  219. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  220. package/ftm-researcher/scripts/score_credibility.py +234 -234
  221. package/ftm-researcher/scripts/validate_research.py +92 -92
  222. package/ftm-researcher.yml +2 -2
  223. package/ftm-resume/SKILL.md +518 -518
  224. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  225. package/ftm-resume.yml +2 -2
  226. package/ftm-retro/SKILL.md +380 -380
  227. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  228. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  229. package/ftm-retro.yml +2 -2
  230. package/ftm-routine/SKILL.md +170 -170
  231. package/ftm-routine.yml +4 -4
  232. package/ftm-state/blackboard/capabilities.json +5 -5
  233. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  234. package/ftm-state/blackboard/context.json +37 -23
  235. package/ftm-state/blackboard/experiences/doom-statusline-fix.json +26 -0
  236. package/ftm-state/blackboard/experiences/hackathon-pages-site.json +26 -0
  237. package/ftm-state/blackboard/experiences/hindsight-sso-kickoff.json +42 -0
  238. package/ftm-state/blackboard/experiences/index.json +58 -9
  239. package/ftm-state/blackboard/experiences/learning-ragnarok-api-access.json +23 -0
  240. package/ftm-state/blackboard/experiences/nordlayer-members-auto-assign.json +26 -0
  241. package/ftm-state/blackboard/experiences/saml2aws-stale-session-fix.json +41 -0
  242. package/ftm-state/blackboard/patterns.json +6 -6
  243. package/ftm-state/schemas/context.schema.json +130 -130
  244. package/ftm-state/schemas/experience-index.schema.json +77 -77
  245. package/ftm-state/schemas/experience.schema.json +78 -78
  246. package/ftm-state/schemas/patterns.schema.json +44 -44
  247. package/ftm-upgrade/SKILL.md +194 -194
  248. package/ftm-upgrade/scripts/check-version.sh +76 -76
  249. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  250. package/ftm-upgrade.yml +2 -2
  251. package/ftm-verify.yml +2 -2
  252. package/ftm.yml +2 -2
  253. package/hooks/ftm-auto-log.sh +137 -0
  254. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  255. package/hooks/ftm-discovery-reminder.sh +90 -90
  256. package/hooks/ftm-drafts-gate.sh +61 -61
  257. package/hooks/ftm-event-logger.mjs +107 -107
  258. package/hooks/ftm-install-hooks.sh +240 -0
  259. package/hooks/ftm-learning-capture.sh +117 -0
  260. package/hooks/ftm-map-autodetect.sh +79 -79
  261. package/hooks/ftm-pending-sync-check.sh +22 -22
  262. package/hooks/ftm-plan-gate.sh +92 -92
  263. package/hooks/ftm-post-commit-trigger.sh +57 -57
  264. package/hooks/ftm-post-compaction.sh +138 -0
  265. package/hooks/ftm-pre-compaction.sh +147 -0
  266. package/hooks/ftm-session-end.sh +52 -0
  267. package/hooks/ftm-session-snapshot.sh +213 -0
  268. package/hooks/ftm-task-loader.sh +100 -0
  269. package/hooks/settings-template.json +91 -81
  270. package/install.sh +363 -363
  271. package/package.json +84 -84
  272. package/uninstall.sh +25 -25
@@ -1,377 +1,377 @@
1
- """
2
- ranker.py -- PageRank-based context selection engine for ftm-map.
3
-
4
- Implements Aider-style personalized PageRank over the file-level dependency graph
5
- with task-aware personalization and token-budget binary search.
6
- """
7
-
8
- import math
9
- import os
10
- import sys
11
-
12
- sys.path.insert(0, os.path.dirname(__file__))
13
-
14
- import numpy as np
15
- import scipy.sparse as sp
16
-
17
- # Try fast-pagerank first, fall back to scipy power iteration
18
- try:
19
- from fast_pagerank import pagerank_power
20
- HAS_FAST_PAGERANK = True
21
- except ImportError:
22
- HAS_FAST_PAGERANK = False
23
-
24
-
25
- def build_adjacency_matrix(conn):
26
- """Build undirected sparse adjacency matrix from file_edges.
27
-
28
- Returns (matrix, file_id_to_idx, idx_to_file_id) where:
29
- - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
30
- - file_id_to_idx maps file_id -> matrix index
31
- - idx_to_file_id maps matrix index -> file_id
32
- """
33
- # Get all files
34
- files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
35
- if not files:
36
- return None, {}, {}
37
-
38
- file_ids = [row['id'] for row in files]
39
- file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
40
- idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
41
- n = len(file_ids)
42
-
43
- # Get edges
44
- edges = conn.execute(
45
- "SELECT source_file_id, target_file_id, weight FROM file_edges"
46
- ).fetchall()
47
-
48
- if not edges:
49
- return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
50
-
51
- rows, cols, data = [], [], []
52
- for edge in edges:
53
- src_idx = file_id_to_idx.get(edge['source_file_id'])
54
- tgt_idx = file_id_to_idx.get(edge['target_file_id'])
55
- if src_idx is not None and tgt_idx is not None:
56
- rows.append(src_idx)
57
- cols.append(tgt_idx)
58
- data.append(edge['weight'])
59
-
60
- # Build directed matrix, then symmetrize for undirected PageRank
61
- A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
62
- A_undirected = A + A.T # Symmetrize
63
-
64
- return A_undirected, file_id_to_idx, idx_to_file_id
65
-
66
-
67
- def build_personalization(
68
- conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
69
- ):
70
- """Build personalization vector for PageRank.
71
-
72
- Three channels:
73
- - seed_files: file paths get 100x weight
74
- - seed_keywords: FTS5 matches get 30x weight
75
- - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
76
-
77
- Returns normalized numpy array (sums to 1.0).
78
- """
79
- n = len(file_id_to_idx)
80
- if n == 0:
81
- return None
82
-
83
- pers = np.ones(n) # Base: uniform weight of 1
84
-
85
- # Channel 1: Seed files (100x)
86
- if seed_files:
87
- for fpath in seed_files:
88
- file_row = conn.execute(
89
- "SELECT id FROM files WHERE path=?", (fpath,)
90
- ).fetchone()
91
- if file_row and file_row['id'] in file_id_to_idx:
92
- idx = file_id_to_idx[file_row['id']]
93
- pers[idx] *= 100
94
-
95
- # Channel 2: Seed keywords via FTS5 (30x)
96
- if seed_keywords:
97
- for kw in seed_keywords:
98
- try:
99
- fts_results = conn.execute(
100
- "SELECT s.file_id FROM symbols_fts fts "
101
- "JOIN symbols s ON s.id = fts.rowid "
102
- "WHERE symbols_fts MATCH ? LIMIT 50",
103
- (kw,),
104
- ).fetchall()
105
- for row in fts_results:
106
- if row['file_id'] in file_id_to_idx:
107
- pers[file_id_to_idx[row['file_id']]] *= 30
108
- except Exception:
109
- pass # FTS query syntax errors are non-fatal
110
-
111
- # Channel 3: Seed symbols (80x defining, 40x referencing)
112
- if seed_symbols:
113
- for sym_name in seed_symbols:
114
- # Defining files get 80x
115
- def_files = conn.execute(
116
- "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
117
- ).fetchall()
118
- for row in def_files:
119
- if row['file_id'] in file_id_to_idx:
120
- pers[file_id_to_idx[row['file_id']]] *= 80
121
-
122
- # Referencing files get 40x
123
- ref_files = conn.execute(
124
- "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
125
- ).fetchall()
126
- for row in ref_files:
127
- if row['file_id'] in file_id_to_idx:
128
- pers[file_id_to_idx[row['file_id']]] *= 40
129
-
130
- # Normalize to sum to 1
131
- total = pers.sum()
132
- if total > 0:
133
- pers /= total
134
-
135
- return pers
136
-
137
-
138
- def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
139
- """Run PageRank on the adjacency matrix.
140
-
141
- Uses fast-pagerank if available, otherwise scipy power iteration.
142
- Returns numpy array of scores indexed by matrix position.
143
- """
144
- n = adj_matrix.shape[0]
145
- if n == 0:
146
- return np.array([])
147
-
148
- if HAS_FAST_PAGERANK and personalization is not None:
149
- try:
150
- scores = pagerank_power(
151
- adj_matrix, p=damping, personalize=personalization, tol=tol
152
- )
153
- return scores
154
- except Exception:
155
- pass # Fall through to scipy implementation
156
-
157
- # Scipy power iteration fallback
158
- # Normalize adjacency matrix columns (column-stochastic transition matrix)
159
- col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
160
- col_sums[col_sums == 0] = 1 # Avoid division by zero for dangling nodes
161
-
162
- # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
163
- D_inv = sp.diags(1.0 / col_sums)
164
- M = adj_matrix @ D_inv
165
-
166
- # Initialize personalization / teleport vector
167
- if personalization is not None:
168
- v = personalization.copy()
169
- else:
170
- v = np.ones(n) / n
171
-
172
- scores = v.copy()
173
-
174
- # Dangling nodes: columns with zero outgoing weight
175
- dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
176
-
177
- for _ in range(max_iter):
178
- prev = scores.copy()
179
-
180
- # PageRank iteration with dangling-node redistribution
181
- dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
182
- scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
183
-
184
- # Check convergence via L1 norm
185
- if np.abs(scores - prev).sum() < tol:
186
- break
187
-
188
- return scores
189
-
190
-
191
- def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
192
- """Rank all files by structural importance with personalization.
193
-
194
- Returns sorted list of (file_path, score) tuples, highest score first.
195
- """
196
- adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
197
- if adj is None or adj.shape[0] == 0:
198
- return []
199
-
200
- pers = build_personalization(
201
- conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
202
- )
203
- scores = run_pagerank(adj, pers)
204
-
205
- # Map scores back to file paths
206
- results = []
207
- for idx, score in enumerate(scores):
208
- file_id = idx_to_fid[idx]
209
- file_row = conn.execute(
210
- "SELECT path FROM files WHERE id=?", (file_id,)
211
- ).fetchone()
212
- if file_row:
213
- results.append((file_row['path'], float(score)))
214
-
215
- results.sort(key=lambda x: x[1], reverse=True)
216
- return results
217
-
218
-
219
- def fit_to_budget(ranked_files, conn, token_budget):
220
- """Select files + key symbols that fit within token budget.
221
-
222
- Uses binary search with 15% tolerance (Aider's approach).
223
- Token estimation: ~25 tokens per tag/symbol entry.
224
-
225
- Returns (result_list, total_tokens) where result_list contains dicts:
226
- [{path, score, symbols: [name, ...], tokens}]
227
- """
228
- if not ranked_files or token_budget <= 0:
229
- return [], 0
230
-
231
- def estimate_tokens(file_list):
232
- """Estimate tokens for a list of files based on their symbol count."""
233
- total = 0
234
- for fpath, _ in file_list:
235
- file_row = conn.execute(
236
- "SELECT id, line_count FROM files WHERE path=?", (fpath,)
237
- ).fetchone()
238
- if not file_row:
239
- continue
240
- syms = conn.execute(
241
- "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
242
- (file_row['id'],),
243
- ).fetchall()
244
- for _sym in syms:
245
- # ~25 tokens per tag entry (Aider's estimate)
246
- total += 25
247
- return total
248
-
249
- # Binary search: find max number of files that fits within budget
250
- lo, hi = 1, len(ranked_files)
251
- best = 1
252
-
253
- while lo <= hi:
254
- mid = (lo + hi) // 2
255
- tokens = estimate_tokens(ranked_files[:mid])
256
- if tokens <= token_budget:
257
- best = mid
258
- lo = mid + 1
259
- else:
260
- hi = mid - 1
261
-
262
- # Allow 15% tolerance -- greedily add more files if within tolerance
263
- tolerance_budget = token_budget * 1.15
264
- while best < len(ranked_files):
265
- tokens = estimate_tokens(ranked_files[: best + 1])
266
- if tokens <= tolerance_budget:
267
- best += 1
268
- else:
269
- break
270
-
271
- # Build output with symbols for each selected file
272
- result = []
273
- total_tokens = 0
274
- for fpath, score in ranked_files[:best]:
275
- file_row = conn.execute(
276
- "SELECT id FROM files WHERE path=?", (fpath,)
277
- ).fetchone()
278
- if not file_row:
279
- continue
280
- syms = conn.execute(
281
- "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
282
- (file_row['id'],),
283
- ).fetchall()
284
- sym_names = [s['name'] for s in syms]
285
- entry_tokens = len(sym_names) * 25
286
- total_tokens += entry_tokens
287
- result.append({
288
- "path": fpath,
289
- "score": round(score, 6),
290
- "symbols": sym_names,
291
- "tokens": entry_tokens,
292
- })
293
-
294
- return result, total_tokens
295
-
296
-
297
- # ---------------------------------------------------------------------------
298
- # Smoke test
299
- # ---------------------------------------------------------------------------
300
-
301
- if __name__ == "__main__":
302
- import tempfile
303
- from db import (
304
- get_connection,
305
- add_file,
306
- add_symbol,
307
- add_reference,
308
- rebuild_file_edges,
309
- rebuild_symbol_edges,
310
- )
311
-
312
- print("Running ranker.py smoke tests ...")
313
-
314
- with tempfile.TemporaryDirectory() as tmp:
315
- conn = get_connection(tmp)
316
-
317
- # Create a small graph: 3 files with cross-references
318
- f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
319
- f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
320
- f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
321
-
322
- # Symbols
323
- add_symbol(
324
- conn, f1, "authenticate", "definition", 1, 20,
325
- signature="def authenticate(req)",
326
- )
327
- add_symbol(conn, f1, "verify_token", "definition", 25, 40)
328
- add_symbol(
329
- conn, f2, "handle_request", "definition", 1, 50,
330
- signature="def handle_request(req)",
331
- )
332
- add_symbol(conn, f3, "format_date", "definition", 1, 10)
333
- add_symbol(conn, f3, "parse_config", "definition", 15, 25)
334
-
335
- # References: api.py references auth.py functions, and utils.py
336
- add_reference(conn, f2, "authenticate", 10)
337
- add_reference(conn, f2, "verify_token", 15)
338
- add_reference(conn, f2, "format_date", 20)
339
- add_reference(conn, f2, "parse_config", 25)
340
- # auth.py also references utils
341
- add_reference(conn, f1, "parse_config", 30)
342
-
343
- # Materialize edges
344
- rebuild_file_edges(conn)
345
- conn.commit()
346
-
347
- # Test 1: Uniform PageRank
348
- results = rank_files(conn)
349
- print(f" Uniform PageRank: {len(results)} files ranked")
350
- for path, score in results:
351
- print(f" {path}: {score:.6f}")
352
- assert len(results) == 3
353
-
354
- # Test 2: Personalized -- seed auth.py
355
- results_pers = rank_files(conn, seed_files=["src/auth.py"])
356
- print(f" Personalized (seed auth.py): {len(results_pers)} files")
357
- for path, score in results_pers:
358
- print(f" {path}: {score:.6f}")
359
- # auth.py should be ranked higher with personalization
360
- auth_score = next(s for p, s in results_pers if p == "src/auth.py")
361
- auth_uniform = next(s for p, s in results if p == "src/auth.py")
362
- print(f" Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
363
-
364
- # Test 3: Budget fitting
365
- budget_result, total_tokens = fit_to_budget(results, conn, 200)
366
- print(f" Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
367
- assert total_tokens <= 200 * 1.15 # 15% tolerance
368
-
369
- # Test 4: Keyword personalization
370
- results_kw = rank_files(conn, seed_keywords=["authenticate"])
371
- print(f" Keyword personalized: {len(results_kw)} files")
372
-
373
- # Test 5: Symbol personalization
374
- results_sym = rank_files(conn, seed_symbols=["authenticate"])
375
- print(f" Symbol personalized: {len(results_sym)} files")
376
-
377
- print("\nAll ranker smoke tests passed.")
1
+ """
2
+ ranker.py -- PageRank-based context selection engine for ftm-map.
3
+
4
+ Implements Aider-style personalized PageRank over the file-level dependency graph
5
+ with task-aware personalization and token-budget binary search.
6
+ """
7
+
8
+ import math
9
+ import os
10
+ import sys
11
+
12
+ sys.path.insert(0, os.path.dirname(__file__))
13
+
14
+ import numpy as np
15
+ import scipy.sparse as sp
16
+
17
+ # Try fast-pagerank first, fall back to scipy power iteration
18
+ try:
19
+ from fast_pagerank import pagerank_power
20
+ HAS_FAST_PAGERANK = True
21
+ except ImportError:
22
+ HAS_FAST_PAGERANK = False
23
+
24
+
25
+ def build_adjacency_matrix(conn):
26
+ """Build undirected sparse adjacency matrix from file_edges.
27
+
28
+ Returns (matrix, file_id_to_idx, idx_to_file_id) where:
29
+ - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
30
+ - file_id_to_idx maps file_id -> matrix index
31
+ - idx_to_file_id maps matrix index -> file_id
32
+ """
33
+ # Get all files
34
+ files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
35
+ if not files:
36
+ return None, {}, {}
37
+
38
+ file_ids = [row['id'] for row in files]
39
+ file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
40
+ idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
41
+ n = len(file_ids)
42
+
43
+ # Get edges
44
+ edges = conn.execute(
45
+ "SELECT source_file_id, target_file_id, weight FROM file_edges"
46
+ ).fetchall()
47
+
48
+ if not edges:
49
+ return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
50
+
51
+ rows, cols, data = [], [], []
52
+ for edge in edges:
53
+ src_idx = file_id_to_idx.get(edge['source_file_id'])
54
+ tgt_idx = file_id_to_idx.get(edge['target_file_id'])
55
+ if src_idx is not None and tgt_idx is not None:
56
+ rows.append(src_idx)
57
+ cols.append(tgt_idx)
58
+ data.append(edge['weight'])
59
+
60
+ # Build directed matrix, then symmetrize for undirected PageRank
61
+ A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
62
+ A_undirected = A + A.T # Symmetrize
63
+
64
+ return A_undirected, file_id_to_idx, idx_to_file_id
65
+
66
+
67
+ def build_personalization(
68
+ conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
69
+ ):
70
+ """Build personalization vector for PageRank.
71
+
72
+ Three channels:
73
+ - seed_files: file paths get 100x weight
74
+ - seed_keywords: FTS5 matches get 30x weight
75
+ - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
76
+
77
+ Returns normalized numpy array (sums to 1.0).
78
+ """
79
+ n = len(file_id_to_idx)
80
+ if n == 0:
81
+ return None
82
+
83
+ pers = np.ones(n) # Base: uniform weight of 1
84
+
85
+ # Channel 1: Seed files (100x)
86
+ if seed_files:
87
+ for fpath in seed_files:
88
+ file_row = conn.execute(
89
+ "SELECT id FROM files WHERE path=?", (fpath,)
90
+ ).fetchone()
91
+ if file_row and file_row['id'] in file_id_to_idx:
92
+ idx = file_id_to_idx[file_row['id']]
93
+ pers[idx] *= 100
94
+
95
+ # Channel 2: Seed keywords via FTS5 (30x)
96
+ if seed_keywords:
97
+ for kw in seed_keywords:
98
+ try:
99
+ fts_results = conn.execute(
100
+ "SELECT s.file_id FROM symbols_fts fts "
101
+ "JOIN symbols s ON s.id = fts.rowid "
102
+ "WHERE symbols_fts MATCH ? LIMIT 50",
103
+ (kw,),
104
+ ).fetchall()
105
+ for row in fts_results:
106
+ if row['file_id'] in file_id_to_idx:
107
+ pers[file_id_to_idx[row['file_id']]] *= 30
108
+ except Exception:
109
+ pass # FTS query syntax errors are non-fatal
110
+
111
+ # Channel 3: Seed symbols (80x defining, 40x referencing)
112
+ if seed_symbols:
113
+ for sym_name in seed_symbols:
114
+ # Defining files get 80x
115
+ def_files = conn.execute(
116
+ "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
117
+ ).fetchall()
118
+ for row in def_files:
119
+ if row['file_id'] in file_id_to_idx:
120
+ pers[file_id_to_idx[row['file_id']]] *= 80
121
+
122
+ # Referencing files get 40x
123
+ ref_files = conn.execute(
124
+ "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
125
+ ).fetchall()
126
+ for row in ref_files:
127
+ if row['file_id'] in file_id_to_idx:
128
+ pers[file_id_to_idx[row['file_id']]] *= 40
129
+
130
+ # Normalize to sum to 1
131
+ total = pers.sum()
132
+ if total > 0:
133
+ pers /= total
134
+
135
+ return pers
136
+
137
+
138
+ def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
139
+ """Run PageRank on the adjacency matrix.
140
+
141
+ Uses fast-pagerank if available, otherwise scipy power iteration.
142
+ Returns numpy array of scores indexed by matrix position.
143
+ """
144
+ n = adj_matrix.shape[0]
145
+ if n == 0:
146
+ return np.array([])
147
+
148
+ if HAS_FAST_PAGERANK and personalization is not None:
149
+ try:
150
+ scores = pagerank_power(
151
+ adj_matrix, p=damping, personalize=personalization, tol=tol
152
+ )
153
+ return scores
154
+ except Exception:
155
+ pass # Fall through to scipy implementation
156
+
157
+ # Scipy power iteration fallback
158
+ # Normalize adjacency matrix columns (column-stochastic transition matrix)
159
+ col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
160
+ col_sums[col_sums == 0] = 1 # Avoid division by zero for dangling nodes
161
+
162
+ # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
163
+ D_inv = sp.diags(1.0 / col_sums)
164
+ M = adj_matrix @ D_inv
165
+
166
+ # Initialize personalization / teleport vector
167
+ if personalization is not None:
168
+ v = personalization.copy()
169
+ else:
170
+ v = np.ones(n) / n
171
+
172
+ scores = v.copy()
173
+
174
+ # Dangling nodes: columns with zero outgoing weight
175
+ dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
176
+
177
+ for _ in range(max_iter):
178
+ prev = scores.copy()
179
+
180
+ # PageRank iteration with dangling-node redistribution
181
+ dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
182
+ scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
183
+
184
+ # Check convergence via L1 norm
185
+ if np.abs(scores - prev).sum() < tol:
186
+ break
187
+
188
+ return scores
189
+
190
+
191
+ def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
192
+ """Rank all files by structural importance with personalization.
193
+
194
+ Returns sorted list of (file_path, score) tuples, highest score first.
195
+ """
196
+ adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
197
+ if adj is None or adj.shape[0] == 0:
198
+ return []
199
+
200
+ pers = build_personalization(
201
+ conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
202
+ )
203
+ scores = run_pagerank(adj, pers)
204
+
205
+ # Map scores back to file paths
206
+ results = []
207
+ for idx, score in enumerate(scores):
208
+ file_id = idx_to_fid[idx]
209
+ file_row = conn.execute(
210
+ "SELECT path FROM files WHERE id=?", (file_id,)
211
+ ).fetchone()
212
+ if file_row:
213
+ results.append((file_row['path'], float(score)))
214
+
215
+ results.sort(key=lambda x: x[1], reverse=True)
216
+ return results
217
+
218
+
219
+ def fit_to_budget(ranked_files, conn, token_budget):
220
+ """Select files + key symbols that fit within token budget.
221
+
222
+ Uses binary search with 15% tolerance (Aider's approach).
223
+ Token estimation: ~25 tokens per tag/symbol entry.
224
+
225
+ Returns (result_list, total_tokens) where result_list contains dicts:
226
+ [{path, score, symbols: [name, ...], tokens}]
227
+ """
228
+ if not ranked_files or token_budget <= 0:
229
+ return [], 0
230
+
231
+ def estimate_tokens(file_list):
232
+ """Estimate tokens for a list of files based on their symbol count."""
233
+ total = 0
234
+ for fpath, _ in file_list:
235
+ file_row = conn.execute(
236
+ "SELECT id, line_count FROM files WHERE path=?", (fpath,)
237
+ ).fetchone()
238
+ if not file_row:
239
+ continue
240
+ syms = conn.execute(
241
+ "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
242
+ (file_row['id'],),
243
+ ).fetchall()
244
+ for _sym in syms:
245
+ # ~25 tokens per tag entry (Aider's estimate)
246
+ total += 25
247
+ return total
248
+
249
+ # Binary search: find max number of files that fits within budget
250
+ lo, hi = 1, len(ranked_files)
251
+ best = 1
252
+
253
+ while lo <= hi:
254
+ mid = (lo + hi) // 2
255
+ tokens = estimate_tokens(ranked_files[:mid])
256
+ if tokens <= token_budget:
257
+ best = mid
258
+ lo = mid + 1
259
+ else:
260
+ hi = mid - 1
261
+
262
+ # Allow 15% tolerance -- greedily add more files if within tolerance
263
+ tolerance_budget = token_budget * 1.15
264
+ while best < len(ranked_files):
265
+ tokens = estimate_tokens(ranked_files[: best + 1])
266
+ if tokens <= tolerance_budget:
267
+ best += 1
268
+ else:
269
+ break
270
+
271
+ # Build output with symbols for each selected file
272
+ result = []
273
+ total_tokens = 0
274
+ for fpath, score in ranked_files[:best]:
275
+ file_row = conn.execute(
276
+ "SELECT id FROM files WHERE path=?", (fpath,)
277
+ ).fetchone()
278
+ if not file_row:
279
+ continue
280
+ syms = conn.execute(
281
+ "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
282
+ (file_row['id'],),
283
+ ).fetchall()
284
+ sym_names = [s['name'] for s in syms]
285
+ entry_tokens = len(sym_names) * 25
286
+ total_tokens += entry_tokens
287
+ result.append({
288
+ "path": fpath,
289
+ "score": round(score, 6),
290
+ "symbols": sym_names,
291
+ "tokens": entry_tokens,
292
+ })
293
+
294
+ return result, total_tokens
295
+
296
+
297
+ # ---------------------------------------------------------------------------
298
+ # Smoke test
299
+ # ---------------------------------------------------------------------------
300
+
301
+ if __name__ == "__main__":
302
+ import tempfile
303
+ from db import (
304
+ get_connection,
305
+ add_file,
306
+ add_symbol,
307
+ add_reference,
308
+ rebuild_file_edges,
309
+ rebuild_symbol_edges,
310
+ )
311
+
312
+ print("Running ranker.py smoke tests ...")
313
+
314
+ with tempfile.TemporaryDirectory() as tmp:
315
+ conn = get_connection(tmp)
316
+
317
+ # Create a small graph: 3 files with cross-references
318
+ f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
319
+ f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
320
+ f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
321
+
322
+ # Symbols
323
+ add_symbol(
324
+ conn, f1, "authenticate", "definition", 1, 20,
325
+ signature="def authenticate(req)",
326
+ )
327
+ add_symbol(conn, f1, "verify_token", "definition", 25, 40)
328
+ add_symbol(
329
+ conn, f2, "handle_request", "definition", 1, 50,
330
+ signature="def handle_request(req)",
331
+ )
332
+ add_symbol(conn, f3, "format_date", "definition", 1, 10)
333
+ add_symbol(conn, f3, "parse_config", "definition", 15, 25)
334
+
335
+ # References: api.py references auth.py functions, and utils.py
336
+ add_reference(conn, f2, "authenticate", 10)
337
+ add_reference(conn, f2, "verify_token", 15)
338
+ add_reference(conn, f2, "format_date", 20)
339
+ add_reference(conn, f2, "parse_config", 25)
340
+ # auth.py also references utils
341
+ add_reference(conn, f1, "parse_config", 30)
342
+
343
+ # Materialize edges
344
+ rebuild_file_edges(conn)
345
+ conn.commit()
346
+
347
+ # Test 1: Uniform PageRank
348
+ results = rank_files(conn)
349
+ print(f" Uniform PageRank: {len(results)} files ranked")
350
+ for path, score in results:
351
+ print(f" {path}: {score:.6f}")
352
+ assert len(results) == 3
353
+
354
+ # Test 2: Personalized -- seed auth.py
355
+ results_pers = rank_files(conn, seed_files=["src/auth.py"])
356
+ print(f" Personalized (seed auth.py): {len(results_pers)} files")
357
+ for path, score in results_pers:
358
+ print(f" {path}: {score:.6f}")
359
+ # auth.py should be ranked higher with personalization
360
+ auth_score = next(s for p, s in results_pers if p == "src/auth.py")
361
+ auth_uniform = next(s for p, s in results if p == "src/auth.py")
362
+ print(f" Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
363
+
364
+ # Test 3: Budget fitting
365
+ budget_result, total_tokens = fit_to_budget(results, conn, 200)
366
+ print(f" Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
367
+ assert total_tokens <= 200 * 1.15 # 15% tolerance
368
+
369
+ # Test 4: Keyword personalization
370
+ results_kw = rank_files(conn, seed_keywords=["authenticate"])
371
+ print(f" Keyword personalized: {len(results_kw)} files")
372
+
373
+ # Test 5: Symbol personalization
374
+ results_sym = rank_files(conn, seed_symbols=["authenticate"])
375
+ print(f" Symbol personalized: {len(results_sym)} files")
376
+
377
+ print("\nAll ranker smoke tests passed.")