feed-the-machine 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +170 -170
  3. package/bin/brain.py +1340 -0
  4. package/bin/convert_claude_skills_to_codex.py +490 -0
  5. package/bin/generate-manifest.mjs +463 -463
  6. package/bin/harden_codex_skills.py +141 -0
  7. package/bin/install.mjs +491 -491
  8. package/bin/migrate-eng-buddy-data.py +875 -0
  9. package/bin/playbook_engine/__init__.py +1 -0
  10. package/bin/playbook_engine/conftest.py +8 -0
  11. package/bin/playbook_engine/extractor.py +33 -0
  12. package/bin/playbook_engine/manager.py +102 -0
  13. package/bin/playbook_engine/models.py +84 -0
  14. package/bin/playbook_engine/registry.py +35 -0
  15. package/bin/playbook_engine/test_extractor.py +72 -0
  16. package/bin/playbook_engine/test_integration.py +129 -0
  17. package/bin/playbook_engine/test_manager.py +85 -0
  18. package/bin/playbook_engine/test_models.py +166 -0
  19. package/bin/playbook_engine/test_registry.py +67 -0
  20. package/bin/playbook_engine/test_tracer.py +86 -0
  21. package/bin/playbook_engine/tracer.py +93 -0
  22. package/bin/tasks_db.py +456 -0
  23. package/docs/HOOKS.md +243 -243
  24. package/docs/INBOX.md +233 -233
  25. package/ftm/SKILL.md +125 -122
  26. package/ftm-audit/SKILL.md +623 -623
  27. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  28. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  29. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  30. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  31. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  32. package/ftm-audit/scripts/run-knip.sh +23 -23
  33. package/ftm-audit.yml +2 -2
  34. package/ftm-brainstorm/SKILL.md +1003 -498
  35. package/ftm-brainstorm/evals/evals.json +180 -100
  36. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  37. package/ftm-brainstorm/references/agent-prompts.md +552 -224
  38. package/ftm-brainstorm/references/plan-template.md +209 -121
  39. package/ftm-brainstorm.yml +2 -2
  40. package/ftm-browse/SKILL.md +454 -454
  41. package/ftm-browse/daemon/browser-manager.ts +206 -206
  42. package/ftm-browse/daemon/bun.lock +30 -30
  43. package/ftm-browse/daemon/cli.ts +347 -347
  44. package/ftm-browse/daemon/commands.ts +410 -410
  45. package/ftm-browse/daemon/main.ts +357 -357
  46. package/ftm-browse/daemon/package.json +17 -17
  47. package/ftm-browse/daemon/server.ts +189 -189
  48. package/ftm-browse/daemon/snapshot.ts +519 -519
  49. package/ftm-browse/daemon/tsconfig.json +22 -22
  50. package/ftm-browse.yml +4 -4
  51. package/ftm-capture/SKILL.md +370 -370
  52. package/ftm-capture.yml +4 -4
  53. package/ftm-codex-gate/SKILL.md +361 -361
  54. package/ftm-codex-gate.yml +2 -2
  55. package/ftm-config/SKILL.md +422 -345
  56. package/ftm-config.default.yml +125 -82
  57. package/ftm-config.yml +44 -2
  58. package/ftm-council/SKILL.md +416 -416
  59. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  60. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  61. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  62. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  63. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  64. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  65. package/ftm-council.yml +2 -2
  66. package/ftm-dashboard/SKILL.md +163 -163
  67. package/ftm-dashboard.yml +4 -4
  68. package/ftm-debug/SKILL.md +1037 -1037
  69. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  70. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  71. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  72. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  73. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  74. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  75. package/ftm-debug.yml +2 -2
  76. package/ftm-diagram/SKILL.md +277 -277
  77. package/ftm-diagram.yml +2 -2
  78. package/ftm-executor/SKILL.md +777 -777
  79. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  80. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  81. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  82. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  83. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
  84. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  85. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  86. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  87. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -59
  88. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  89. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  90. package/ftm-executor/runtime/package.json +8 -8
  91. package/ftm-executor.yml +2 -2
  92. package/ftm-git/SKILL.md +441 -441
  93. package/ftm-git/evals/evals.json +26 -26
  94. package/ftm-git/evals/promptfoo.yaml +75 -75
  95. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  96. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  97. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  98. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  99. package/ftm-git.yml +2 -2
  100. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  101. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  102. package/ftm-inbox/backend/adapters/base.py +230 -230
  103. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  104. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  105. package/ftm-inbox/backend/adapters/jira.py +136 -136
  106. package/ftm-inbox/backend/adapters/registry.py +192 -192
  107. package/ftm-inbox/backend/adapters/slack.py +110 -110
  108. package/ftm-inbox/backend/db/connection.py +54 -54
  109. package/ftm-inbox/backend/db/schema.py +78 -78
  110. package/ftm-inbox/backend/executor/__init__.py +7 -7
  111. package/ftm-inbox/backend/executor/engine.py +149 -149
  112. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  113. package/ftm-inbox/backend/main.py +103 -103
  114. package/ftm-inbox/backend/models/__init__.py +1 -1
  115. package/ftm-inbox/backend/models/unified_task.py +36 -36
  116. package/ftm-inbox/backend/planner/__init__.py +6 -6
  117. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  118. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  119. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  120. package/ftm-inbox/backend/planner/generator.py +127 -127
  121. package/ftm-inbox/backend/planner/schema.py +34 -34
  122. package/ftm-inbox/backend/requirements.txt +5 -5
  123. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  124. package/ftm-inbox/backend/routes/execute.py +186 -186
  125. package/ftm-inbox/backend/routes/health.py +52 -52
  126. package/ftm-inbox/backend/routes/inbox.py +68 -68
  127. package/ftm-inbox/backend/routes/plan.py +271 -271
  128. package/ftm-inbox/bin/launchagent.mjs +91 -91
  129. package/ftm-inbox/bin/setup.mjs +188 -188
  130. package/ftm-inbox/bin/start.sh +10 -10
  131. package/ftm-inbox/bin/status.sh +17 -17
  132. package/ftm-inbox/bin/stop.sh +8 -8
  133. package/ftm-inbox/config.example.yml +55 -55
  134. package/ftm-inbox/package-lock.json +2898 -2898
  135. package/ftm-inbox/package.json +26 -26
  136. package/ftm-inbox/postcss.config.js +6 -6
  137. package/ftm-inbox/src/app.css +199 -199
  138. package/ftm-inbox/src/app.html +18 -18
  139. package/ftm-inbox/src/lib/api.ts +166 -166
  140. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  141. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  142. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  143. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  144. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  145. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  146. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  147. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  148. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  149. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  150. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  151. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  152. package/ftm-inbox/src/lib/theme.ts +47 -47
  153. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  154. package/ftm-inbox/src/routes/+page.svelte +401 -401
  155. package/ftm-inbox/svelte.config.js +12 -12
  156. package/ftm-inbox/tailwind.config.ts +63 -63
  157. package/ftm-inbox/tsconfig.json +13 -13
  158. package/ftm-inbox/vite.config.ts +6 -6
  159. package/ftm-intent/SKILL.md +241 -241
  160. package/ftm-intent.yml +2 -2
  161. package/ftm-manifest.json +3794 -3794
  162. package/ftm-map/SKILL.md +291 -291
  163. package/ftm-map/scripts/db.py +712 -712
  164. package/ftm-map/scripts/index.py +415 -415
  165. package/ftm-map/scripts/parser.py +224 -224
  166. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  167. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  168. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  169. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  170. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  171. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  172. package/ftm-map/scripts/query.py +301 -301
  173. package/ftm-map/scripts/ranker.py +377 -377
  174. package/ftm-map/scripts/requirements.txt +5 -5
  175. package/ftm-map/scripts/setup-hooks.sh +27 -27
  176. package/ftm-map/scripts/setup.sh +56 -56
  177. package/ftm-map/scripts/test_db.py +364 -364
  178. package/ftm-map/scripts/test_parser.py +174 -174
  179. package/ftm-map/scripts/test_query.py +183 -183
  180. package/ftm-map/scripts/test_ranker.py +199 -199
  181. package/ftm-map/scripts/views.py +591 -591
  182. package/ftm-map.yml +2 -2
  183. package/ftm-mind/SKILL.md +201 -1943
  184. package/ftm-mind/evals/promptfoo.yaml +142 -142
  185. package/ftm-mind/references/blackboard-protocol.md +110 -0
  186. package/ftm-mind/references/blackboard-schema.md +328 -328
  187. package/ftm-mind/references/complexity-guide.md +110 -110
  188. package/ftm-mind/references/complexity-sizing.md +138 -0
  189. package/ftm-mind/references/decide-act-protocol.md +172 -0
  190. package/ftm-mind/references/direct-execution.md +51 -0
  191. package/ftm-mind/references/environment-discovery.md +77 -0
  192. package/ftm-mind/references/event-registry.md +319 -319
  193. package/ftm-mind/references/mcp-inventory.md +300 -296
  194. package/ftm-mind/references/ops-routing.md +47 -0
  195. package/ftm-mind/references/orient-protocol.md +234 -0
  196. package/ftm-mind/references/personality.md +40 -0
  197. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  198. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  199. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  200. package/ftm-mind/references/reflexion-protocol.md +249 -249
  201. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  202. package/ftm-mind/references/routing-scenarios.md +35 -35
  203. package/ftm-mind.yml +2 -2
  204. package/ftm-ops.yml +4 -0
  205. package/ftm-pause/SKILL.md +395 -395
  206. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  207. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  208. package/ftm-pause.yml +2 -2
  209. package/ftm-researcher/SKILL.md +275 -275
  210. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  211. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  212. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  213. package/ftm-researcher/references/adaptive-search.md +116 -116
  214. package/ftm-researcher/references/agent-prompts.md +193 -193
  215. package/ftm-researcher/references/council-integration.md +193 -193
  216. package/ftm-researcher/references/output-format.md +203 -203
  217. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  218. package/ftm-researcher/scripts/score_credibility.py +234 -234
  219. package/ftm-researcher/scripts/validate_research.py +92 -92
  220. package/ftm-researcher.yml +2 -2
  221. package/ftm-resume/SKILL.md +518 -518
  222. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  223. package/ftm-resume.yml +2 -2
  224. package/ftm-retro/SKILL.md +380 -380
  225. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  226. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  227. package/ftm-retro.yml +2 -2
  228. package/ftm-routine/SKILL.md +170 -170
  229. package/ftm-routine.yml +4 -4
  230. package/ftm-state/blackboard/capabilities.json +5 -5
  231. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  232. package/ftm-state/blackboard/context.json +37 -23
  233. package/ftm-state/blackboard/experiences/doom-statusline-fix.json +26 -0
  234. package/ftm-state/blackboard/experiences/hackathon-pages-site.json +26 -0
  235. package/ftm-state/blackboard/experiences/hindsight-sso-kickoff.json +42 -0
  236. package/ftm-state/blackboard/experiences/index.json +58 -9
  237. package/ftm-state/blackboard/experiences/learning-ragnarok-api-access.json +23 -0
  238. package/ftm-state/blackboard/experiences/nordlayer-members-auto-assign.json +26 -0
  239. package/ftm-state/blackboard/experiences/saml2aws-stale-session-fix.json +41 -0
  240. package/ftm-state/blackboard/patterns.json +6 -6
  241. package/ftm-state/schemas/context.schema.json +130 -130
  242. package/ftm-state/schemas/experience-index.schema.json +77 -77
  243. package/ftm-state/schemas/experience.schema.json +78 -78
  244. package/ftm-state/schemas/patterns.schema.json +44 -44
  245. package/ftm-upgrade/SKILL.md +194 -194
  246. package/ftm-upgrade/scripts/check-version.sh +76 -76
  247. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  248. package/ftm-upgrade.yml +2 -2
  249. package/ftm-verify.yml +2 -2
  250. package/ftm.yml +2 -2
  251. package/hooks/ftm-auto-log.sh +137 -0
  252. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  253. package/hooks/ftm-discovery-reminder.sh +90 -90
  254. package/hooks/ftm-drafts-gate.sh +61 -61
  255. package/hooks/ftm-event-logger.mjs +107 -107
  256. package/hooks/ftm-install-hooks.sh +240 -0
  257. package/hooks/ftm-learning-capture.sh +117 -0
  258. package/hooks/ftm-map-autodetect.sh +79 -79
  259. package/hooks/ftm-pending-sync-check.sh +22 -22
  260. package/hooks/ftm-plan-gate.sh +92 -92
  261. package/hooks/ftm-post-commit-trigger.sh +57 -57
  262. package/hooks/ftm-post-compaction.sh +138 -0
  263. package/hooks/ftm-pre-compaction.sh +147 -0
  264. package/hooks/ftm-session-end.sh +52 -0
  265. package/hooks/ftm-session-snapshot.sh +213 -0
  266. package/hooks/settings-template.json +81 -81
  267. package/install.sh +363 -363
  268. package/package.json +84 -84
  269. package/uninstall.sh +25 -25
@@ -1,275 +1,275 @@
1
- ---
2
- name: ftm-researcher
3
- description: Deep parallel research engine with 7 domain-specialized finder agents, adversarial review via ftm-council, adaptive wave-based search, structured reconciliation with disagreement maps, credibility scoring, and conversational iteration. Use when the user wants thorough research on any topic — "research X", "find out about Y", "what's the state of the art on Z", "compare approaches to W", "deep dive into X", "look into Y". Also invoked by ftm-brainstorm for its research sprints. Triggers on "research", "investigate", "deep dive", "state of the art", "compare", "find examples of", "what's out there for", "how do others handle", "find me evidence", "look into". For idea exploration and brainstorming, use ftm-brainstorm instead (which calls ftm-researcher internally for research).
4
- ---
5
-
6
- # ftm-researcher
7
-
8
- Deep parallel research engine with 7 domain-specialized finder agents, adversarial review via ftm-council, adaptive wave-based search, structured reconciliation with disagreement maps, credibility scoring, and conversational iteration.
9
-
10
- ## Events
11
-
12
- ### Emits
13
- - `research_complete` — when synthesis pipeline finishes and structured output is ready
14
- - Payload: `{ query, mode, findings_count, consensus_count, contested_count, unique_count, sources_count, duration_ms }`
15
- - `task_completed` — when the full research session finishes (including any conversational iteration)
16
- - Payload: `{ task_title, duration_ms }`
17
-
18
- ### Listens To
19
- - `task_received` — begin research when ftm-mind or ftm-brainstorm routes a research request
20
- - Expected payload: `{ task_description, plan_path, wave_number, task_number }`
21
- - Note: `depth_mode` and `context_register` are derived internally from request context, not from event payload
22
-
23
- ## Config Read
24
-
25
- Read `~/.claude/ftm-config.yml`:
26
- - Use `planning` model from the active profile for finder agents
27
- - Use `review` model for fallback challenger agents
28
- - Read `execution.per_skill_overrides.ftm-researcher` for agent cap (default 10 if override absent, fall back to `execution.max_parallel_agents` if neither is set)
29
-
30
- ## Blackboard Read
31
-
32
- On startup, load context from the FTM blackboard:
33
- 1. Load `~/.claude/ftm-blackboard/context.json`
34
- 2. Filter experiences by `task_type: "research"`
35
- 3. Load matching experience files to inform agent dispatch and subtopic decomposition
36
- 4. Load `~/.claude/ftm-blackboard/patterns.json` for recurring research patterns
37
-
38
- ## Mode System
39
-
40
- Three depth modes calibrate agent count, synthesis pipeline, and council invocation:
41
-
42
- ```
43
- Quick: 3 finders (Web Surveyor, GitHub Miner, Codebase Analyst), no council, no reconciler.
44
- Single-pass synthesis by orchestrator. ~1-2 min.
45
-
46
- Standard: 7 finders + reconciler, no council. Normalize → rank → reconcile. ~3-5 min.
47
-
48
- Deep: 7 finders → adaptive wave 2 → ftm-council → reconciler. Full pipeline. ~5-10 min.
49
- ```
50
-
51
- Mode is detected from request context:
52
- - "quick look" / "briefly" / "just a quick" → quick mode
53
- - "deep dive" / "thorough" / "comprehensive" / "exhaustive" → deep mode
54
- - Default (no explicit signal) → standard mode
55
-
56
- ## The Main Loop
57
-
58
- ```
59
- PHASE 0: REPO SCAN
60
- Silent background Explore agent scans the local codebase (same as ftm-brainstorm).
61
- Produces: project_context { tech_stack, key_files, existing_patterns, integration_points }
62
- Used by: Codebase Analyst finder + orchestrator subtopic decomposition
63
-
64
- PHASE 1: INTAKE
65
- - Parse the research question
66
- - Detect depth mode
67
- - Decompose into 7 subtopics (one per finder domain)
68
- - Load blackboard context and filter relevant prior research
69
-
70
- PHASE 2: WAVE 1
71
- - Dispatch 7 finders in parallel, each with:
72
- - Their unique domain constraint
73
- - Their assigned subtopic
74
- - Project context from Phase 0
75
- - Context register (accumulated findings from prior waves/turns)
76
- - Summary of previous findings to build on (do NOT re-search)
77
- - Collect all findings (3-8 per agent = 21-56 total)
78
-
79
- PHASE 3: ADAPTIVE REFINEMENT (deep mode only)
80
- - Analyze wave 1 findings across 4 dimensions:
81
- SATURATED: subtopic has 3+ diverse findings — reassign agent to a gap
82
- THIN: subtopic has 1-2 findings — same agent, more specific query
83
- GAP: subtopic has 0 findings — agent gets broader query + alternative terms
84
- CONTESTED: 2+ agents directly contradict — assign 2 agents (one per side) to resolve
85
- SURPRISE: findings outside original subtopics — assign most relevant agent to explore
86
- - Dispatch wave 2 agents with reshaped queries
87
- - Merge wave 2 findings with wave 1 before synthesis
88
-
89
- PHASE 4: SYNTHESIS PIPELINE
90
- See ftm-researcher/references/synthesis-pipeline.md for full pipeline.
91
- Summary:
92
- 1. Normalize & deduplicate (group by semantic similarity, track agent_count, source diversity)
93
- 2. Adversarial review: ftm-council (deep mode) or fallback challengers (standard mode)
94
- 3. Pairwise rank contested claims (LLM-as-judge tournament)
95
- 4. Reconcile into disagreement map (consensus / contested / unique / refuted tiers)
96
-
97
- PHASE 5: PRESENT
98
- - Render disagreement map as structured markdown
99
- - Show consensus findings, contested pairs, unique insights (flagged), refuted claims
100
- - Include source summary table (type | count | avg credibility)
101
- - Emit `research_complete` event
102
-
103
- PHASE 6: ITERATE
104
- - Enter conversational iteration mode
105
- - Wait for user response
106
- - Route based on intent (see Conversational Iteration Protocol below)
107
- ```
108
-
109
- ## Conversational Iteration Protocol
110
-
111
- After presenting results, the skill enters iteration mode. Route user responses:
112
-
113
- - "dig deeper on finding #N" / "more on #N" → spawn 3 targeted agents on that specific finding's topic
114
- - "I disagree with X" / "I think X is wrong because Y" → spawn counter-evidence agents, update findings
115
- - "focus on [angle]" / "what about the security angle" → reshape subtopics with new weighting, re-dispatch
116
- - "council finding #N" / "get more opinions on #N" → route specific claim to ftm-council
117
- - "more on [agent]'s findings" → re-dispatch that agent with broader query
118
- - "compare A vs B" → spawn comparison agent with both findings as context
119
- - "done" / "thanks" / "that's enough" / "looks good" → finalize, write blackboard, emit events
120
-
121
- Each iteration:
122
- 1. Updates the structured JSON artifact
123
- 2. Re-renders the markdown output
124
- 3. Updates the context register for subsequent turns
125
-
126
- ## Agent Roster
127
-
128
- See `ftm-researcher/references/agent-prompts.md` for full prompts.
129
-
130
- | Agent | Domain | Source Types |
131
- |---|---|---|
132
- | Web Surveyor | Blog posts, case studies, tutorials, technical write-ups | blog, news |
133
- | Academic Scout | Papers (arxiv, ACM, IEEE), official docs, RFCs, specs | peer_reviewed, primary, official_docs |
134
- | GitHub Miner | GitHub repos, OSS implementations, code patterns | code_repo |
135
- | Competitive Analyst | Products, user reviews (Reddit/HN/Twitter), market analysis | forum, news |
136
- | Stack Overflow Digger | Stack Overflow, community Q&A, pitfalls, solved problems | qa_site |
137
- | Codebase Analyst | Local repo only — Grep, Read, Glob tools, git log | codebase |
138
- | Historical Investigator | Solutions from 5-10+ years ago, evolution, failed approaches | primary, blog |
139
-
140
- ## Synthesis Pipeline
141
-
142
- See `ftm-researcher/references/synthesis-pipeline.md` for full specification.
143
-
144
- 5 phases: Normalize → Adversarial Review → Pairwise Rank → Reconcile → Render
145
-
146
- Output tiers:
147
- 1. **Consensus** — 3+ agents agree, council agreed, multiple source types. Highest confidence.
148
- 2. **Contested** — Council disagreed or pairwise ranking was close. Present both sides.
149
- 3. **Unique Insights** — 1 agent only, not contradicted. High value OR hallucination — flag for user.
150
- 4. **Refuted** — Council rejected or pairwise loser with weak evidence. Still present briefly.
151
-
152
- ## Adaptive Search
153
-
154
- See `ftm-researcher/references/adaptive-search.md` for full protocol.
155
-
156
- Deep mode only. Reshapes wave 2 queries based on wave 1 coverage analysis across 4 dimensions: SATURATED, THIN, GAP, CONTESTED, SURPRISE.
157
-
158
- ## Output Format
159
-
160
- See `ftm-researcher/references/output-format.md` for JSON schema and markdown template.
161
-
162
- Primary output: structured JSON artifact for skill-to-skill consumption (ftm-brainstorm, ftm-executor).
163
- Secondary output: rendered markdown for human display.
164
-
165
- ## Council Integration
166
-
167
- See `ftm-researcher/references/council-integration.md` for full protocol.
168
-
169
- Deep mode only. Routes top claims through ftm-council (Claude + Codex + Gemini independent review).
170
-
171
- Fallback (council unavailable): 2 standalone agents on the `review` model:
172
- - Devil's Advocate — finds reasons each claim is WRONG
173
- - Edge Case Hunter — finds where each claim BREAKS
174
-
175
- ## Credibility Scoring
176
-
177
- See `ftm-researcher/scripts/score_credibility.py` for implementation.
178
-
179
- 4 dimensions (weighted):
180
- - Source type weight (35%): primary > peer_reviewed > official_docs > news > blog > forum
181
- - Recency (20%): decay based on age, faster for fast-moving topics
182
- - Domain authority (25%): HIGH_AUTHORITY domains (arxiv, MDN, AWS docs) score 0.9
183
- - Bias detection (20%): sensationalism penalties, balanced language bonuses
184
-
185
- Bonuses and penalties:
186
- - Corroboration bonus: +0.15 if independently found by 2+ agents from different source types
187
- - Circular sourcing: -0.20 flag if multiple sources trace to same original
188
-
189
- Trust levels: high (>=0.75) | moderate (>=0.55) | low (>=0.35) | verify (<0.35)
190
-
191
- ## Blackboard Write
192
-
193
- After `research_complete` or session end:
194
- 1. Update `~/.claude/ftm-blackboard/context.json` with research session summary
195
- 2. Write experience file: `~/.claude/ftm-blackboard/experiences/research-[timestamp].json`
196
- - Fields: query, mode, findings_count, top_consensus_claims, source_diversity, duration_ms
197
- 3. Update `~/.claude/ftm-blackboard/index.json` with new experience entry
198
- 4. Emit `task_completed` event
199
-
200
- ## Session State (for ftm-pause/resume)
201
-
202
- The following state is persisted for pause/resume support:
203
- - Current phase (0-6)
204
- - Depth mode
205
- - All wave 1 and wave 2 findings (raw)
206
- - Synthesis state (normalized claims, council verdicts, ranked pairs)
207
- - Disagreement map (current version)
208
- - Conversation history (iteration turns)
209
- - Context register (accumulated findings across turns)
210
- - Project context from Phase 0 repo scan
211
-
212
- ## References
213
-
214
- - `ftm-researcher/references/agent-prompts.md` — 7 finder agent prompts + orchestrator decomposition protocol
215
- - `ftm-researcher/references/synthesis-pipeline.md` — 5-phase synthesis pipeline + reconciler prompt
216
- - `ftm-researcher/references/adaptive-search.md` — Wave 1 → wave 2 refinement protocol
217
- - `ftm-researcher/references/output-format.md` — JSON schema + markdown template + iteration protocol
218
- - `ftm-researcher/references/council-integration.md` — ftm-council interface + fallback challenger prompts
219
- - `ftm-researcher/scripts/score_credibility.py` — Source credibility scoring
220
- - `ftm-researcher/scripts/validate_research.py` — Research output validation
221
-
222
- ## Requirements
223
-
224
- - config: `~/.claude/ftm-config.yml` | optional | planning and review model profiles, per_skill_overrides.ftm-researcher agent cap
225
- - reference: `ftm-researcher/references/agent-prompts.md` | required | 7 finder agent prompts and orchestrator decomposition protocol
226
- - reference: `ftm-researcher/references/synthesis-pipeline.md` | required | 5-phase synthesis pipeline
227
- - reference: `ftm-researcher/references/adaptive-search.md` | optional | wave 2 adaptive refinement (deep mode only)
228
- - reference: `ftm-researcher/references/output-format.md` | required | JSON schema and markdown template
229
- - reference: `ftm-researcher/references/council-integration.md` | optional | ftm-council interface (deep mode only)
230
- - reference: `~/.claude/ftm-blackboard/context.json` | optional | session state
231
- - reference: `~/.claude/ftm-blackboard/patterns.json` | optional | recurring research patterns
232
-
233
- ## Risk
234
-
235
- - level: read_only
236
- - scope: reads web sources and local codebase via agents; writes blackboard experience entry; writes structured JSON artifact; does not modify project source files
237
- - rollback: no project mutations; blackboard write can be reverted by editing JSON files
238
-
239
- ## Approval Gates
240
-
241
- - trigger: research complete and user says "done" / "thanks" | action: finalize, write blackboard, emit events
242
- - trigger: deep mode and ftm-council invoked | action: council runs automatically on top claims (no user gate needed for this step)
243
- - complexity_routing: micro → auto | small → auto | medium → auto | large → auto | xl → auto
244
-
245
- ## Fallbacks
246
-
247
- - condition: ftm-council not available (deep mode) | action: use 2 fallback challenger agents (Devil's Advocate + Edge Case Hunter) instead
248
- - condition: agent cap exceeded | action: queue excess agents and dispatch after current wave completes
249
- - condition: research agent returns no findings | action: broaden query and retry; if still empty, report "No prior art found — this may be novel"
250
- - condition: blackboard missing | action: proceed without experience-informed shortcuts
251
-
252
- ## Capabilities
253
-
254
- - mcp: `WebSearch` | optional | finder agents for web, GitHub, and competitive research
255
- - mcp: `WebFetch` | optional | fetching specific URLs found during research
256
- - mcp: `sequential-thinking` | optional | complex synthesis and reconciliation
257
-
258
- ## Event Payloads
259
-
260
- ### research_complete
261
- - skill: string — "ftm-researcher"
262
- - query: string — original research question
263
- - mode: string — "quick" | "standard" | "deep"
264
- - findings_count: number — total normalized findings
265
- - consensus_count: number — findings with 3+ agent agreement
266
- - contested_count: number — findings with council disagreement
267
- - unique_count: number — single-agent findings
268
- - sources_count: number — total sources cited
269
- - council_used: boolean — whether ftm-council was invoked
270
- - duration_ms: number — total research duration
271
-
272
- ### task_completed
273
- - skill: string — "ftm-researcher"
274
- - task_title: string — research topic title
275
- - duration_ms: number — total session duration including iterations
1
+ ---
2
+ name: ftm-researcher
3
+ description: Deep parallel research engine with 7 domain-specialized finder agents, adversarial review via ftm-council, adaptive wave-based search, structured reconciliation with disagreement maps, credibility scoring, and conversational iteration. Use when the user wants thorough research on any topic — "research X", "find out about Y", "what's the state of the art on Z", "compare approaches to W", "deep dive into X", "look into Y". Also invoked by ftm-brainstorm for its research sprints. Triggers on "research", "investigate", "deep dive", "state of the art", "compare", "find examples of", "what's out there for", "how do others handle", "find me evidence", "look into". For idea exploration and brainstorming, use ftm-brainstorm instead (which calls ftm-researcher internally for research).
4
+ ---
5
+
6
+ # ftm-researcher
7
+
8
+ Deep parallel research engine with 7 domain-specialized finder agents, adversarial review via ftm-council, adaptive wave-based search, structured reconciliation with disagreement maps, credibility scoring, and conversational iteration.
9
+
10
+ ## Events
11
+
12
+ ### Emits
13
+ - `research_complete` — when synthesis pipeline finishes and structured output is ready
14
+ - Payload: `{ query, mode, findings_count, consensus_count, contested_count, unique_count, sources_count, duration_ms }`
15
+ - `task_completed` — when the full research session finishes (including any conversational iteration)
16
+ - Payload: `{ task_title, duration_ms }`
17
+
18
+ ### Listens To
19
+ - `task_received` — begin research when ftm-mind or ftm-brainstorm routes a research request
20
+ - Expected payload: `{ task_description, plan_path, wave_number, task_number }`
21
+ - Note: `depth_mode` and `context_register` are derived internally from request context, not from event payload
22
+
23
+ ## Config Read
24
+
25
+ Read `~/.claude/ftm-config.yml`:
26
+ - Use `planning` model from the active profile for finder agents
27
+ - Use `review` model for fallback challenger agents
28
+ - Read `execution.per_skill_overrides.ftm-researcher` for agent cap (default 10 if override absent, fall back to `execution.max_parallel_agents` if neither is set)
29
+
30
+ ## Blackboard Read
31
+
32
+ On startup, load context from the FTM blackboard:
33
+ 1. Load `~/.claude/ftm-blackboard/context.json`
34
+ 2. Filter experiences by `task_type: "research"`
35
+ 3. Load matching experience files to inform agent dispatch and subtopic decomposition
36
+ 4. Load `~/.claude/ftm-blackboard/patterns.json` for recurring research patterns
37
+
38
+ ## Mode System
39
+
40
+ Three depth modes calibrate agent count, synthesis pipeline, and council invocation:
41
+
42
+ ```
43
+ Quick: 3 finders (Web Surveyor, GitHub Miner, Codebase Analyst), no council, no reconciler.
44
+ Single-pass synthesis by orchestrator. ~1-2 min.
45
+
46
+ Standard: 7 finders + reconciler, no council. Normalize → rank → reconcile. ~3-5 min.
47
+
48
+ Deep: 7 finders → adaptive wave 2 → ftm-council → reconciler. Full pipeline. ~5-10 min.
49
+ ```
50
+
51
+ Mode is detected from request context:
52
+ - "quick look" / "briefly" / "just a quick" → quick mode
53
+ - "deep dive" / "thorough" / "comprehensive" / "exhaustive" → deep mode
54
+ - Default (no explicit signal) → standard mode
55
+
56
+ ## The Main Loop
57
+
58
+ ```
59
+ PHASE 0: REPO SCAN
60
+ Silent background Explore agent scans the local codebase (same as ftm-brainstorm).
61
+ Produces: project_context { tech_stack, key_files, existing_patterns, integration_points }
62
+ Used by: Codebase Analyst finder + orchestrator subtopic decomposition
63
+
64
+ PHASE 1: INTAKE
65
+ - Parse the research question
66
+ - Detect depth mode
67
+ - Decompose into 7 subtopics (one per finder domain)
68
+ - Load blackboard context and filter relevant prior research
69
+
70
+ PHASE 2: WAVE 1
71
+ - Dispatch 7 finders in parallel, each with:
72
+ - Their unique domain constraint
73
+ - Their assigned subtopic
74
+ - Project context from Phase 0
75
+ - Context register (accumulated findings from prior waves/turns)
76
+ - Summary of previous findings to build on (do NOT re-search)
77
+ - Collect all findings (3-8 per agent = 21-56 total)
78
+
79
+ PHASE 3: ADAPTIVE REFINEMENT (deep mode only)
80
+ - Analyze wave 1 findings across 4 dimensions:
81
+ SATURATED: subtopic has 3+ diverse findings — reassign agent to a gap
82
+ THIN: subtopic has 1-2 findings — same agent, more specific query
83
+ GAP: subtopic has 0 findings — agent gets broader query + alternative terms
84
+ CONTESTED: 2+ agents directly contradict — assign 2 agents (one per side) to resolve
85
+ SURPRISE: findings outside original subtopics — assign most relevant agent to explore
86
+ - Dispatch wave 2 agents with reshaped queries
87
+ - Merge wave 2 findings with wave 1 before synthesis
88
+
89
+ PHASE 4: SYNTHESIS PIPELINE
90
+ See ftm-researcher/references/synthesis-pipeline.md for full pipeline.
91
+ Summary:
92
+ 1. Normalize & deduplicate (group by semantic similarity, track agent_count, source diversity)
93
+ 2. Adversarial review: ftm-council (deep mode) or fallback challengers (standard mode)
94
+ 3. Pairwise rank contested claims (LLM-as-judge tournament)
95
+ 4. Reconcile into disagreement map (consensus / contested / unique / refuted tiers)
96
+
97
+ PHASE 5: PRESENT
98
+ - Render disagreement map as structured markdown
99
+ - Show consensus findings, contested pairs, unique insights (flagged), refuted claims
100
+ - Include source summary table (type | count | avg credibility)
101
+ - Emit `research_complete` event
102
+
103
+ PHASE 6: ITERATE
104
+ - Enter conversational iteration mode
105
+ - Wait for user response
106
+ - Route based on intent (see Conversational Iteration Protocol below)
107
+ ```
108
+
109
+ ## Conversational Iteration Protocol
110
+
111
+ After presenting results, the skill enters iteration mode. Route user responses:
112
+
113
+ - "dig deeper on finding #N" / "more on #N" → spawn 3 targeted agents on that specific finding's topic
114
+ - "I disagree with X" / "I think X is wrong because Y" → spawn counter-evidence agents, update findings
115
+ - "focus on [angle]" / "what about the security angle" → reshape subtopics with new weighting, re-dispatch
116
+ - "council finding #N" / "get more opinions on #N" → route specific claim to ftm-council
117
+ - "more on [agent]'s findings" → re-dispatch that agent with broader query
118
+ - "compare A vs B" → spawn comparison agent with both findings as context
119
+ - "done" / "thanks" / "that's enough" / "looks good" → finalize, write blackboard, emit events
120
+
121
+ Each iteration:
122
+ 1. Updates the structured JSON artifact
123
+ 2. Re-renders the markdown output
124
+ 3. Updates the context register for subsequent turns
125
+
126
+ ## Agent Roster
127
+
128
+ See `ftm-researcher/references/agent-prompts.md` for full prompts.
129
+
130
+ | Agent | Domain | Source Types |
131
+ |---|---|---|
132
+ | Web Surveyor | Blog posts, case studies, tutorials, technical write-ups | blog, news |
133
+ | Academic Scout | Papers (arxiv, ACM, IEEE), official docs, RFCs, specs | peer_reviewed, primary, official_docs |
134
+ | GitHub Miner | GitHub repos, OSS implementations, code patterns | code_repo |
135
+ | Competitive Analyst | Products, user reviews (Reddit/HN/Twitter), market analysis | forum, news |
136
+ | Stack Overflow Digger | Stack Overflow, community Q&A, pitfalls, solved problems | qa_site |
137
+ | Codebase Analyst | Local repo only — Grep, Read, Glob tools, git log | codebase |
138
+ | Historical Investigator | Solutions from 5-10+ years ago, evolution, failed approaches | primary, blog |
139
+
140
+ ## Synthesis Pipeline
141
+
142
+ See `ftm-researcher/references/synthesis-pipeline.md` for full specification.
143
+
144
+ 5 phases: Normalize → Adversarial Review → Pairwise Rank → Reconcile → Render
145
+
146
+ Output tiers:
147
+ 1. **Consensus** — 3+ agents agree, council agreed, multiple source types. Highest confidence.
148
+ 2. **Contested** — Council disagreed or pairwise ranking was close. Present both sides.
149
+ 3. **Unique Insights** — 1 agent only, not contradicted. High value OR hallucination — flag for user.
150
+ 4. **Refuted** — Council rejected or pairwise loser with weak evidence. Still present briefly.
151
+
152
+ ## Adaptive Search
153
+
154
+ See `ftm-researcher/references/adaptive-search.md` for full protocol.
155
+
156
+ Deep mode only. Reshapes wave 2 queries based on wave 1 coverage analysis across 4 dimensions: SATURATED, THIN, GAP, CONTESTED, SURPRISE.
157
+
158
+ ## Output Format
159
+
160
+ See `ftm-researcher/references/output-format.md` for JSON schema and markdown template.
161
+
162
+ Primary output: structured JSON artifact for skill-to-skill consumption (ftm-brainstorm, ftm-executor).
163
+ Secondary output: rendered markdown for human display.
164
+
165
+ ## Council Integration
166
+
167
+ See `ftm-researcher/references/council-integration.md` for full protocol.
168
+
169
+ Deep mode only. Routes top claims through ftm-council (Claude + Codex + Gemini independent review).
170
+
171
+ Fallback (council unavailable): 2 standalone agents on the `review` model:
172
+ - Devil's Advocate — finds reasons each claim is WRONG
173
+ - Edge Case Hunter — finds where each claim BREAKS
174
+
175
+ ## Credibility Scoring
176
+
177
+ See `ftm-researcher/scripts/score_credibility.py` for implementation.
178
+
179
+ 4 dimensions (weighted):
180
+ - Source type weight (35%): primary > peer_reviewed > official_docs > news > blog > forum
181
+ - Recency (20%): decay based on age, faster for fast-moving topics
182
+ - Domain authority (25%): HIGH_AUTHORITY domains (arxiv, MDN, AWS docs) score 0.9
183
+ - Bias detection (20%): sensationalism penalties, balanced language bonuses
184
+
185
+ Bonuses and penalties:
186
+ - Corroboration bonus: +0.15 if independently found by 2+ agents from different source types
187
+ - Circular sourcing: -0.20 flag if multiple sources trace to same original
188
+
189
+ Trust levels: high (>=0.75) | moderate (>=0.55) | low (>=0.35) | verify (<0.35)
190
+
191
+ ## Blackboard Write
192
+
193
+ After `research_complete` or session end:
194
+ 1. Update `~/.claude/ftm-blackboard/context.json` with research session summary
195
+ 2. Write experience file: `~/.claude/ftm-blackboard/experiences/research-[timestamp].json`
196
+ - Fields: query, mode, findings_count, top_consensus_claims, source_diversity, duration_ms
197
+ 3. Update `~/.claude/ftm-blackboard/index.json` with new experience entry
198
+ 4. Emit `task_completed` event
199
+
200
+ ## Session State (for ftm-pause/resume)
201
+
202
+ The following state is persisted for pause/resume support:
203
+ - Current phase (0-6)
204
+ - Depth mode
205
+ - All wave 1 and wave 2 findings (raw)
206
+ - Synthesis state (normalized claims, council verdicts, ranked pairs)
207
+ - Disagreement map (current version)
208
+ - Conversation history (iteration turns)
209
+ - Context register (accumulated findings across turns)
210
+ - Project context from Phase 0 repo scan
211
+
212
+ ## References
213
+
214
+ - `ftm-researcher/references/agent-prompts.md` — 7 finder agent prompts + orchestrator decomposition protocol
215
+ - `ftm-researcher/references/synthesis-pipeline.md` — 5-phase synthesis pipeline + reconciler prompt
216
+ - `ftm-researcher/references/adaptive-search.md` — Wave 1 → wave 2 refinement protocol
217
+ - `ftm-researcher/references/output-format.md` — JSON schema + markdown template + iteration protocol
218
+ - `ftm-researcher/references/council-integration.md` — ftm-council interface + fallback challenger prompts
219
+ - `ftm-researcher/scripts/score_credibility.py` — Source credibility scoring
220
+ - `ftm-researcher/scripts/validate_research.py` — Research output validation
221
+
222
+ ## Requirements
223
+
224
+ - config: `~/.claude/ftm-config.yml` | optional | planning and review model profiles, per_skill_overrides.ftm-researcher agent cap
225
+ - reference: `ftm-researcher/references/agent-prompts.md` | required | 7 finder agent prompts and orchestrator decomposition protocol
226
+ - reference: `ftm-researcher/references/synthesis-pipeline.md` | required | 5-phase synthesis pipeline
227
+ - reference: `ftm-researcher/references/adaptive-search.md` | optional | wave 2 adaptive refinement (deep mode only)
228
+ - reference: `ftm-researcher/references/output-format.md` | required | JSON schema and markdown template
229
+ - reference: `ftm-researcher/references/council-integration.md` | optional | ftm-council interface (deep mode only)
230
+ - reference: `~/.claude/ftm-blackboard/context.json` | optional | session state
231
+ - reference: `~/.claude/ftm-blackboard/patterns.json` | optional | recurring research patterns
232
+
233
+ ## Risk
234
+
235
+ - level: read_only
236
+ - scope: reads web sources and local codebase via agents; writes blackboard experience entry; writes structured JSON artifact; does not modify project source files
237
+ - rollback: no project mutations; blackboard write can be reverted by editing JSON files
238
+
239
+ ## Approval Gates
240
+
241
+ - trigger: research complete and user says "done" / "thanks" | action: finalize, write blackboard, emit events
242
+ - trigger: deep mode and ftm-council invoked | action: council runs automatically on top claims (no user gate needed for this step)
243
+ - complexity_routing: micro → auto | small → auto | medium → auto | large → auto | xl → auto
244
+
245
+ ## Fallbacks
246
+
247
+ - condition: ftm-council not available (deep mode) | action: use 2 fallback challenger agents (Devil's Advocate + Edge Case Hunter) instead
248
+ - condition: agent cap exceeded | action: queue excess agents and dispatch after current wave completes
249
+ - condition: research agent returns no findings | action: broaden query and retry; if still empty, report "No prior art found — this may be novel"
250
+ - condition: blackboard missing | action: proceed without experience-informed shortcuts
251
+
252
+ ## Capabilities
253
+
254
+ - mcp: `WebSearch` | optional | finder agents for web, GitHub, and competitive research
255
+ - mcp: `WebFetch` | optional | fetching specific URLs found during research
256
+ - mcp: `sequential-thinking` | optional | complex synthesis and reconciliation
257
+
258
+ ## Event Payloads
259
+
260
+ ### research_complete
261
+ - skill: string — "ftm-researcher"
262
+ - query: string — original research question
263
+ - mode: string — "quick" | "standard" | "deep"
264
+ - findings_count: number — total normalized findings
265
+ - consensus_count: number — findings with 3+ agent agreement
266
+ - contested_count: number — findings with council disagreement
267
+ - unique_count: number — single-agent findings
268
+ - sources_count: number — total sources cited
269
+ - council_used: boolean — whether ftm-council was invoked
270
+ - duration_ms: number — total research duration
271
+
272
+ ### task_completed
273
+ - skill: string — "ftm-researcher"
274
+ - task_title: string — research topic title
275
+ - duration_ms: number — total session duration including iterations
@@ -1,17 +1,17 @@
1
- # ftm-researcher/evals/agent-diversity.yaml
2
- description: Verify 7 finder agents produce non-overlapping results from different domains
3
- prompts:
4
- - vars:
5
- input: "Research how to implement WebSocket connections in a Node.js application"
6
- assert:
7
- - type: contains
8
- value: "web_surveyor"
9
- - type: contains
10
- value: "github_miner"
11
- - type: contains
12
- value: "codebase_analyst"
13
- - type: javascript
14
- value: |
15
- // Verify at least 5 different agent_roles appear in findings
16
- const roles = new Set(output.findings?.map(f => f.agent_role) || []);
17
- return roles.size >= 5;
1
+ # ftm-researcher/evals/agent-diversity.yaml
2
+ description: Verify 7 finder agents produce non-overlapping results from different domains
3
+ prompts:
4
+ - vars:
5
+ input: "Research how to implement WebSocket connections in a Node.js application"
6
+ assert:
7
+ - type: contains
8
+ value: "web_surveyor"
9
+ - type: contains
10
+ value: "github_miner"
11
+ - type: contains
12
+ value: "codebase_analyst"
13
+ - type: javascript
14
+ value: |
15
+ // Verify at least 5 different agent_roles appear in findings
16
+ const roles = new Set(output.findings?.map(f => f.agent_role) || []);
17
+ return roles.size >= 5;
@@ -1,12 +1,12 @@
1
- # ftm-researcher/evals/synthesis-quality.yaml
2
- description: Verify synthesis pipeline produces valid disagreement maps
3
- prompts:
4
- - vars:
5
- input: "Given these 10 findings from different agents, produce a disagreement map"
6
- assert:
7
- - type: contains
8
- value: "consensus"
9
- - type: contains
10
- value: "contested"
11
- - type: contains
12
- value: "unique_insights"
1
+ # ftm-researcher/evals/synthesis-quality.yaml
2
+ description: Verify synthesis pipeline produces valid disagreement maps
3
+ prompts:
4
+ - vars:
5
+ input: "Given these 10 findings from different agents, produce a disagreement map"
6
+ assert:
7
+ - type: contains
8
+ value: "consensus"
9
+ - type: contains
10
+ value: "contested"
11
+ - type: contains
12
+ value: "unique_insights"