feed-the-machine 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/bin/generate-manifest.mjs +253 -0
  2. package/bin/install.mjs +134 -4
  3. package/docs/HOOKS.md +243 -0
  4. package/docs/INBOX.md +233 -0
  5. package/ftm/SKILL.md +34 -0
  6. package/ftm-audit/SKILL.md +69 -0
  7. package/ftm-brainstorm/SKILL.md +51 -0
  8. package/ftm-browse/SKILL.md +39 -0
  9. package/ftm-capture/SKILL.md +370 -0
  10. package/ftm-capture.yml +4 -0
  11. package/ftm-codex-gate/SKILL.md +59 -0
  12. package/ftm-config/SKILL.md +35 -0
  13. package/ftm-council/SKILL.md +56 -0
  14. package/ftm-dashboard/SKILL.md +163 -0
  15. package/ftm-debug/SKILL.md +84 -0
  16. package/ftm-diagram/SKILL.md +44 -0
  17. package/ftm-executor/SKILL.md +97 -0
  18. package/ftm-git/SKILL.md +60 -0
  19. package/ftm-inbox/backend/__init__.py +0 -0
  20. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  21. package/ftm-inbox/backend/adapters/__init__.py +0 -0
  22. package/ftm-inbox/backend/adapters/_retry.py +64 -0
  23. package/ftm-inbox/backend/adapters/base.py +230 -0
  24. package/ftm-inbox/backend/adapters/freshservice.py +104 -0
  25. package/ftm-inbox/backend/adapters/gmail.py +125 -0
  26. package/ftm-inbox/backend/adapters/jira.py +136 -0
  27. package/ftm-inbox/backend/adapters/registry.py +192 -0
  28. package/ftm-inbox/backend/adapters/slack.py +110 -0
  29. package/ftm-inbox/backend/db/__init__.py +0 -0
  30. package/ftm-inbox/backend/db/connection.py +54 -0
  31. package/ftm-inbox/backend/db/schema.py +78 -0
  32. package/ftm-inbox/backend/executor/__init__.py +7 -0
  33. package/ftm-inbox/backend/executor/engine.py +149 -0
  34. package/ftm-inbox/backend/executor/step_runner.py +98 -0
  35. package/ftm-inbox/backend/main.py +103 -0
  36. package/ftm-inbox/backend/models/__init__.py +1 -0
  37. package/ftm-inbox/backend/models/unified_task.py +36 -0
  38. package/ftm-inbox/backend/planner/__init__.py +6 -0
  39. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  40. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  41. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  42. package/ftm-inbox/backend/planner/generator.py +127 -0
  43. package/ftm-inbox/backend/planner/schema.py +34 -0
  44. package/ftm-inbox/backend/requirements.txt +5 -0
  45. package/ftm-inbox/backend/routes/__init__.py +0 -0
  46. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  47. package/ftm-inbox/backend/routes/execute.py +186 -0
  48. package/ftm-inbox/backend/routes/health.py +52 -0
  49. package/ftm-inbox/backend/routes/inbox.py +68 -0
  50. package/ftm-inbox/backend/routes/plan.py +271 -0
  51. package/ftm-inbox/bin/launchagent.mjs +91 -0
  52. package/ftm-inbox/bin/setup.mjs +188 -0
  53. package/ftm-inbox/bin/start.sh +10 -0
  54. package/ftm-inbox/bin/status.sh +17 -0
  55. package/ftm-inbox/bin/stop.sh +8 -0
  56. package/ftm-inbox/config.example.yml +55 -0
  57. package/ftm-inbox/package-lock.json +2898 -0
  58. package/ftm-inbox/package.json +26 -0
  59. package/ftm-inbox/postcss.config.js +6 -0
  60. package/ftm-inbox/src/app.css +199 -0
  61. package/ftm-inbox/src/app.html +18 -0
  62. package/ftm-inbox/src/lib/api.ts +166 -0
  63. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -0
  64. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -0
  65. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -0
  66. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -0
  67. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -0
  68. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -0
  69. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -0
  70. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -0
  71. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -0
  72. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -0
  73. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -0
  74. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -0
  75. package/ftm-inbox/src/lib/theme.ts +47 -0
  76. package/ftm-inbox/src/routes/+layout.svelte +76 -0
  77. package/ftm-inbox/src/routes/+page.svelte +401 -0
  78. package/ftm-inbox/static/favicon.png +0 -0
  79. package/ftm-inbox/svelte.config.js +12 -0
  80. package/ftm-inbox/tailwind.config.ts +63 -0
  81. package/ftm-inbox/tsconfig.json +13 -0
  82. package/ftm-inbox/vite.config.ts +6 -0
  83. package/ftm-intent/SKILL.md +44 -0
  84. package/ftm-manifest.json +3794 -0
  85. package/ftm-map/SKILL.md +259 -0
  86. package/ftm-map/scripts/db.py +391 -0
  87. package/ftm-map/scripts/index.py +341 -0
  88. package/ftm-map/scripts/parser.py +455 -0
  89. package/ftm-map/scripts/queries/.gitkeep +0 -0
  90. package/ftm-map/scripts/queries/javascript-tags.scm +23 -0
  91. package/ftm-map/scripts/queries/python-tags.scm +17 -0
  92. package/ftm-map/scripts/queries/typescript-tags.scm +29 -0
  93. package/ftm-map/scripts/query.py +149 -0
  94. package/ftm-map/scripts/requirements.txt +2 -0
  95. package/ftm-map/scripts/setup-hooks.sh +27 -0
  96. package/ftm-map/scripts/setup.sh +45 -0
  97. package/ftm-map/scripts/test_db.py +124 -0
  98. package/ftm-map/scripts/test_parser.py +106 -0
  99. package/ftm-map/scripts/test_query.py +66 -0
  100. package/ftm-map/scripts/tests/fixtures/__init__.py +0 -0
  101. package/ftm-map/scripts/tests/fixtures/sample_project/api.ts +16 -0
  102. package/ftm-map/scripts/tests/fixtures/sample_project/auth.py +15 -0
  103. package/ftm-map/scripts/tests/fixtures/sample_project/utils.js +16 -0
  104. package/ftm-map/scripts/views.py +545 -0
  105. package/ftm-mind/SKILL.md +173 -66
  106. package/ftm-pause/SKILL.md +43 -0
  107. package/ftm-researcher/SKILL.md +275 -0
  108. package/ftm-researcher/evals/agent-diversity.yaml +17 -0
  109. package/ftm-researcher/evals/synthesis-quality.yaml +12 -0
  110. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -0
  111. package/ftm-researcher/references/adaptive-search.md +116 -0
  112. package/ftm-researcher/references/agent-prompts.md +193 -0
  113. package/ftm-researcher/references/council-integration.md +193 -0
  114. package/ftm-researcher/references/output-format.md +203 -0
  115. package/ftm-researcher/references/synthesis-pipeline.md +165 -0
  116. package/ftm-researcher/scripts/score_credibility.py +234 -0
  117. package/ftm-researcher/scripts/validate_research.py +92 -0
  118. package/ftm-resume/SKILL.md +47 -0
  119. package/ftm-retro/SKILL.md +54 -0
  120. package/ftm-routine/SKILL.md +170 -0
  121. package/ftm-state/blackboard/capabilities.json +5 -0
  122. package/ftm-state/blackboard/capabilities.schema.json +27 -0
  123. package/ftm-upgrade/SKILL.md +41 -0
  124. package/ftm-upgrade/scripts/check-version.sh +1 -1
  125. package/ftm-upgrade/scripts/upgrade.sh +1 -1
  126. package/hooks/ftm-blackboard-enforcer.sh +94 -0
  127. package/hooks/ftm-discovery-reminder.sh +90 -0
  128. package/hooks/ftm-drafts-gate.sh +61 -0
  129. package/hooks/ftm-event-logger.mjs +107 -0
  130. package/hooks/ftm-map-autodetect.sh +79 -0
  131. package/hooks/ftm-pending-sync-check.sh +22 -0
  132. package/hooks/ftm-plan-gate.sh +96 -0
  133. package/hooks/ftm-post-commit-trigger.sh +57 -0
  134. package/hooks/settings-template.json +81 -0
  135. package/install.sh +140 -11
  136. package/package.json +12 -2
@@ -0,0 +1,17 @@
1
+ # ftm-researcher/evals/agent-diversity.yaml
2
+ description: Verify 7 finder agents produce non-overlapping results from different domains
3
+ prompts:
4
+ - vars:
5
+ input: "Research how to implement WebSocket connections in a Node.js application"
6
+ assert:
7
+ - type: contains
8
+ value: "web_surveyor"
9
+ - type: contains
10
+ value: "github_miner"
11
+ - type: contains
12
+ value: "codebase_analyst"
13
+ - type: javascript
14
+ value: |
15
+ // Verify at least 5 different agent_roles appear in findings
16
+ const roles = new Set(output.findings?.map(f => f.agent_role) || []);
17
+ return roles.size >= 5;
@@ -0,0 +1,12 @@
1
+ # ftm-researcher/evals/synthesis-quality.yaml
2
+ description: Verify synthesis pipeline produces valid disagreement maps
3
+ prompts:
4
+ - vars:
5
+ input: "Given these 10 findings from different agents, produce a disagreement map"
6
+ assert:
7
+ - type: contains
8
+ value: "consensus"
9
+ - type: contains
10
+ value: "contested"
11
+ - type: contains
12
+ value: "unique_insights"
@@ -0,0 +1,39 @@
1
+ # ftm-researcher/evals/trigger-accuracy.yaml
2
+ description: Verify ftm-researcher triggers on research requests and not on brainstorm/debug/other
3
+ prompts:
4
+ - vars:
5
+ input: "research parallel agent architectures"
6
+ assert:
7
+ - type: contains
8
+ value: "ftm-researcher"
9
+ - vars:
10
+ input: "what's the state of the art on LLM fine-tuning"
11
+ assert:
12
+ - type: contains
13
+ value: "ftm-researcher"
14
+ - vars:
15
+ input: "find me examples of rate limiting in Go"
16
+ assert:
17
+ - type: contains
18
+ value: "ftm-researcher"
19
+ - vars:
20
+ input: "compare Redis vs Memcached"
21
+ assert:
22
+ - type: contains
23
+ value: "ftm-researcher"
24
+ # Should NOT trigger
25
+ - vars:
26
+ input: "I have an idea for a dashboard"
27
+ assert:
28
+ - type: not-contains
29
+ value: "ftm-researcher"
30
+ - vars:
31
+ input: "debug this flaky test"
32
+ assert:
33
+ - type: not-contains
34
+ value: "ftm-researcher"
35
+ - vars:
36
+ input: "help me brainstorm auth design"
37
+ assert:
38
+ - type: not-contains
39
+ value: "ftm-researcher"
@@ -0,0 +1,116 @@
1
+ # Adaptive Search Protocol
2
+
3
+ Wave 1 → Wave 2 refinement for Deep mode research.
4
+
5
+ ---
6
+
7
+ ## When It Runs
8
+
9
+ Only in Deep mode. After wave 1 findings are normalized (Phase 1 of synthesis).
10
+
11
+ ---
12
+
13
+ ## How It Works
14
+
15
+ The orchestrator analyzes wave 1 findings across 4 dimensions:
16
+
17
+ ### 1. Coverage Analysis
18
+
19
+ For each original subtopic:
20
+ - **SATURATED** (3+ findings with diverse sources): Well-covered. Agent can be reassigned.
21
+ - **THIN** (1-2 findings): Partially covered. Same agent gets a refined query.
22
+ - **GAP** (0 findings): Not covered. Agent gets a broader query + alternative search terms.
23
+
24
+ ### 2. Contradiction Detection
25
+
26
+ - Identify claims where 2+ agents directly contradict each other
27
+ - Mark these subtopics as CONTESTED — wave 2 agents prioritize resolution
28
+ - For each contradiction, note: which agents, which claims, what the disagreement is
29
+
30
+ ### 3. Depth Opportunities
31
+
32
+ - Identify findings that mention specific tools, libraries, or approaches worth deeper investigation
33
+ - Generate drill-down queries for wave 2
34
+ - Prioritize depth opportunities that the user's response highlighted as important
35
+
36
+ ### 4. Surprise Detection
37
+
38
+ - Identify findings that don't fit any original subtopic — unexpected angles
39
+ - Generate new subtopics to explore these surprises
40
+ - Surprises are high-value: they represent information the user didn't know to ask about
41
+
42
+ ---
43
+
44
+ ## Wave 2 Dispatch
45
+
46
+ Reassign agents based on analysis:
47
+
48
+ | Coverage Status | Action |
49
+ |---|---|
50
+ | SATURATED | Reassign agent to a GAP or CONTESTED area |
51
+ | THIN | Same agent, refined query with more specific terms |
52
+ | GAP | Agent gets broader query + alternative search terms |
53
+ | CONTESTED | Assign 2 agents (one per side) to find resolution evidence |
54
+ | SURPRISE | Assign the most relevant agent to explore the unexpected angle |
55
+
56
+ ### Agent Reassignment Rules
57
+
58
+ 1. Prefer reassigning agents whose original domain is closest to the gap
59
+ 2. If a GAP exists in the academic domain, reassign Academic Scout even if it was SATURATED
60
+ 3. Codebase Analyst is never reassigned — it always re-searches with refined local queries
61
+ 4. If all subtopics are SATURATED, focus wave 2 on depth opportunities and surprises
62
+
63
+ ### Context Injection for Wave 2
64
+
65
+ All wave 2 agents receive:
66
+ - Full wave 1 findings summary (so they don't re-search)
67
+ - Their specific wave 2 mission (gap-fill, deepen, resolve, or explore)
68
+ - Explicit instruction: "Build on wave 1, do not repeat it"
69
+ - The contradiction details if they're resolving a CONTESTED subtopic
70
+
71
+ ---
72
+
73
+ ## Merge Protocol
74
+
75
+ Wave 2 findings merge with wave 1 before entering the synthesis pipeline:
76
+
77
+ 1. Wave 2 findings are added to the findings pool with `wave: 2` marker
78
+ 2. The normalize phase (Phase 1) runs again across ALL findings (wave 1 + wave 2)
79
+ 3. Deduplication groups wave 1 and wave 2 findings together — if wave 2 confirms a wave 1 finding, the agent_count increases
80
+ 4. New wave 2 findings that weren't in wave 1 are added as new unique claims
81
+ 5. The wave marker is preserved through synthesis for traceability
82
+
83
+ ### Contradiction Resolution
84
+
85
+ When wave 2 agents were dispatched to resolve a CONTESTED subtopic:
86
+ - If wave 2 finds evidence strongly supporting one side, the contest is resolved
87
+ - If wave 2 finds evidence supporting both sides, the contest remains but with richer context
88
+ - The pairwise ranking (Phase 3) benefits from the additional evidence
89
+
90
+ ---
91
+
92
+ ## Orchestrator Analysis Template
93
+
94
+ After wave 1 normalization, the orchestrator produces this analysis:
95
+
96
+ ```
97
+ COVERAGE ANALYSIS:
98
+ 1. [subtopic]: SATURATED | THIN | GAP — [N findings, M source types]
99
+ 2. [subtopic]: SATURATED | THIN | GAP — [N findings, M source types]
100
+ ...
101
+
102
+ CONTRADICTIONS DETECTED:
103
+ - [Agent A] claims [X] vs [Agent B] claims [Y] — on subtopic [Z]
104
+
105
+ DEPTH OPPORTUNITIES:
106
+ - Finding [N] mentions [specific tool/approach] worth investigating
107
+ - Finding [M] suggests [unexpected constraint] that needs validation
108
+
109
+ SURPRISES:
110
+ - [Agent] found [unexpected finding] not covered by any original subtopic
111
+
112
+ WAVE 2 PLAN:
113
+ - [Agent]: [mission] — [refined query]
114
+ - [Agent]: [mission] — [refined query]
115
+ ...
116
+ ```
@@ -0,0 +1,193 @@
1
+ # Agent Prompts: 7 Finder Agents + Orchestrator
2
+
3
+ ## Orchestrator Protocol: Subtopic Decomposition
4
+
5
+ Given research question Q, decompose into 7 facets:
6
+
7
+ 1. GENERAL LANDSCAPE (→ Web Surveyor): What's the current state? Blog posts, case studies, tutorials.
8
+ 2. THEORETICAL FOUNDATIONS (→ Academic Scout): What does the research say? Papers, official docs, specs.
9
+ 3. IMPLEMENTATION PATTERNS (→ GitHub Miner): How have others built this? Repos, code, OSS.
10
+ 4. MARKET REALITY (→ Competitive Analyst): What products exist? User reviews, complaints, gaps.
11
+ 5. PRACTITIONER WISDOM (→ Stack Overflow Digger): What pitfalls exist? Common mistakes, solved problems.
12
+ 6. LOCAL CONTEXT (→ Codebase Analyst): How does our project relate? Existing patterns, conventions, integration points.
13
+ 7. HISTORICAL EVOLUTION (→ Historical Investigator): How was this solved before? What failed? What evolved?
14
+
15
+ For each facet, generate a specific search query tailored to the information domain.
16
+
17
+ ### Decomposition Rules
18
+
19
+ - Each subtopic maps to exactly one finder's domain
20
+ - No overlap between subtopics
21
+ - Coverage of the full research question
22
+ - Adaptation to question type (technical, market, conceptual, comparative)
23
+
24
+ ### Quick Mode Subset
25
+
26
+ In Quick mode, only dispatch 3 finders: Web Surveyor, GitHub Miner, Codebase Analyst.
27
+ The orchestrator generates subtopics for only these 3 domains.
28
+
29
+ ---
30
+
31
+ ## Finder Agent Prompt Template
32
+
33
+ Each agent prompt follows this structure. The orchestrator fills in the template variables at dispatch time.
34
+
35
+ ```
36
+ RESEARCH QUESTION: {Q}
37
+ YOUR SUBTOPIC: {specific facet assigned by orchestrator}
38
+ PROJECT CONTEXT: {from Phase 0 repo scan}
39
+ CONTEXT REGISTER: {accumulated findings from prior waves/turns}
40
+ PREVIOUS FINDINGS TO BUILD ON: {summary — do NOT re-search these}
41
+ DEPTH LEVEL: {broad | focused | implementation}
42
+ ```
43
+
44
+ ### Return Format (all agents)
45
+
46
+ For each finding, return:
47
+ - claim: [one-sentence factual claim]
48
+ - evidence: [2-3 sentence supporting detail]
49
+ - source_url: [URL]
50
+ - source_type: [primary | peer_reviewed | official_docs | news | blog | forum | code_repo | qa_site | codebase]
51
+ - confidence: [0.0-1.0, self-assessed]
52
+ - agent_role: [your role name]
53
+
54
+ Return 3-8 findings. Quality over quantity. If your domain has nothing relevant, return 0 findings with a note explaining why.
55
+
56
+ ---
57
+
58
+ ## Agent 1: Web Surveyor
59
+
60
+ You are the Web Surveyor — your domain is the general web landscape: blog posts, case studies, tutorials, and technical write-ups.
61
+
62
+ DOMAIN CONSTRAINT: Blog posts, case studies, tutorials, technical write-ups. Use WebSearch tool.
63
+ ANTI-REDUNDANCY: Do NOT search GitHub repos, academic papers, or Stack Overflow.
64
+
65
+ ### Depth-Specific Instructions
66
+
67
+ **BROAD:** Map the territory. What are the 3-5 major approaches? What's typically harder than expected? Search: "[core concept] architecture", "[concept] case study", "how [company] built [feature]".
68
+
69
+ **FOCUSED:** Drill into the user's chosen approach. Find gotchas, failure modes, scaling limits. Compare 2-3 real implementations. Search: "[specific approach] [stack] production", "[approach] lessons learned".
70
+
71
+ **IMPLEMENTATION:** Find concrete patterns, library recommendations, config examples. Search: "[specific library] [framework] tutorial", "[exact pattern] implementation".
72
+
73
+ ---
74
+
75
+ ## Agent 2: Academic Scout
76
+
77
+ You are the Academic Scout — your domain is research papers, specifications, and official documentation.
78
+
79
+ DOMAIN CONSTRAINT: Papers (arxiv, ACM, IEEE), official documentation, RFCs, specifications. WebSearch filtered to academic domains.
80
+ ANTI-REDUNDANCY: Do NOT search blogs, forums, or product sites.
81
+
82
+ ### Depth-Specific Instructions
83
+
84
+ **BROAD:** What does the research community say about this? What theoretical foundations exist? Search: "[concept] survey paper", "site:arxiv.org [concept]", "[concept] RFC".
85
+
86
+ **FOCUSED:** Find papers that address the specific approach. What are the proven theoretical limits? Search: "[specific approach] analysis", "[approach] formal verification", "[approach] benchmark".
87
+
88
+ **IMPLEMENTATION:** Find reference implementations from papers, official specs with code examples. Search: "[algorithm] reference implementation", "[spec] code example".
89
+
90
+ ---
91
+
92
+ ## Agent 3: GitHub Miner
93
+
94
+ You are the GitHub Miner — your domain is open-source code, repositories, and implementation patterns.
95
+
96
+ DOMAIN CONSTRAINT: GitHub repos, code patterns, OSS implementations. WebSearch filtered to github.com.
97
+ ANTI-REDUNDANCY: Do NOT search blogs or Q&A sites. Report: repo URL, stars, last commit, architecture notes.
98
+
99
+ ### Depth-Specific Instructions
100
+
101
+ **BROAD:** Find the most-starred repos. What patterns emerge across repos? Search: "[concept] [language]", "awesome-[concept]".
102
+
103
+ **FOCUSED:** Find repos using the SAME stack. Dig into architecture decisions, open issues. Search: "[approach] [exact framework]", "[approach] example [language]".
104
+
105
+ **IMPLEMENTATION:** Find repos that solved the EXACT sub-problem. Look at specific files/functions, test suites. Search: "[specific library] [pattern] example", "[exact integration] starter".
106
+
107
+ ---
108
+
109
+ ## Agent 4: Competitive Analyst
110
+
111
+ You are the Competitive Analyst — your domain is the market landscape: products, tools, user reviews, and gaps.
112
+
113
+ DOMAIN CONSTRAINT: Products, tools, user reviews on Reddit/HN/Twitter, market analysis. WebSearch filtered to reddit.com, news.ycombinator.com, product sites.
114
+ ANTI-REDUNDANCY: Do NOT search GitHub repos or academic papers. Focus on what users love/hate.
115
+
116
+ ### Depth-Specific Instructions
117
+
118
+ **BROAD:** What products/tools exist? What do users love/hate? Where are the gaps? Search: "site:reddit.com [problem] recommendation", "site:news.ycombinator.com [concept]".
119
+
120
+ **FOCUSED:** Deep-dive 2-3 most relevant competitors. How do they handle the specific challenge? Search: "[product] review", "[product] vs [product]", "[product] limitations".
121
+
122
+ **IMPLEMENTATION:** How do competitors implement the specific feature? Public APIs, SDKs? Search: "[product] API", "[product] architecture", "[product] integration guide".
123
+
124
+ ---
125
+
126
+ ## Agent 5: Stack Overflow Digger
127
+
128
+ You are the Stack Overflow Digger — your domain is practitioner wisdom: common pitfalls, solved problems, and battle-tested solutions.
129
+
130
+ DOMAIN CONSTRAINT: Stack Overflow, community Q&A, common pitfalls, solved problems. WebSearch filtered to stackoverflow.com, stackexchange.com.
131
+ ANTI-REDUNDANCY: Do NOT search GitHub or blogs. Focus on battle-tested solutions and known footguns.
132
+
133
+ ### Depth-Specific Instructions
134
+
135
+ **BROAD:** What are the common mistakes people make? What questions come up repeatedly? Search: "site:stackoverflow.com [concept] [common error]".
136
+
137
+ **FOCUSED:** What are the subtle gotchas for this specific approach? Search: "site:stackoverflow.com [approach] gotcha", "[approach] edge case".
138
+
139
+ **IMPLEMENTATION:** Find accepted answers with code for the exact pattern needed. Search: "site:stackoverflow.com [exact problem] [language] [framework]".
140
+
141
+ ---
142
+
143
+ ## Agent 6: Codebase Analyst
144
+
145
+ You are the Codebase Analyst — your domain is the LOCAL repository only. You search the user's codebase for relevant patterns, conventions, and integration points.
146
+
147
+ DOMAIN CONSTRAINT: Local repo ONLY. Uses Grep, Read, Glob tools. Searches code, git log, architecture docs, INTENT.md, ARCHITECTURE.mmd.
148
+ ANTI-REDUNDANCY: Do NOT use WebSearch. No external sources. All findings cite file paths and line numbers.
149
+
150
+ ### Instructions
151
+
152
+ 1. Search the codebase for existing patterns related to the research question
153
+ 2. Check git log for recent changes in relevant areas
154
+ 3. Read INTENT.md and ARCHITECTURE.mmd if they exist
155
+ 4. Identify: existing conventions, integration points, potential conflicts, reusable components
156
+ 5. Report findings with exact file paths and line numbers
157
+
158
+ ### Return Format (extended)
159
+
160
+ In addition to the standard return format, include:
161
+ - file_path: [exact path]
162
+ - line_number: [line or range]
163
+ - pattern_type: [convention | integration_point | reusable_component | potential_conflict]
164
+
165
+ ---
166
+
167
+ ## Agent 7: Historical Investigator
168
+
169
+ You are the Historical Investigator — your domain is the past: how problems were solved before, what failed, what evolved over time.
170
+
171
+ DOMAIN CONSTRAINT: How this was solved 5-10+ years ago. WebSearch with date filters (before:2024). Archive.org, historical blog posts, deprecated tools.
172
+ ANTI-REDUNDANCY: Do NOT search for current solutions. Focus on evolution, failed approaches, what changed and why.
173
+
174
+ ### Depth-Specific Instructions
175
+
176
+ **BROAD:** What approaches were tried and abandoned? What paradigm shifts happened? Search: "[concept] history", "[concept] before:2020", "[deprecated tool] replaced by".
177
+
178
+ **FOCUSED:** Why did the old approach fail for this specific use case? What lessons were learned? Search: "[old approach] postmortem", "[approach] deprecated because", "[concept] evolution".
179
+
180
+ **IMPLEMENTATION:** What migration patterns exist from old to new? Search: "[old tool] to [new tool] migration", "[old pattern] modernization".
181
+
182
+ ---
183
+
184
+ ## Dispatch Checklist
185
+
186
+ Before spawning agents each turn, verify:
187
+
188
+ 1. Subtopic decomposition is complete (7 facets for standard/deep, 3 for quick)
189
+ 2. Context register is up to date (includes user's latest response)
190
+ 3. Depth level is set correctly for mode and wave
191
+ 4. Previous findings are summarized so agents don't re-search
192
+ 5. Each agent has its unique domain constraint and anti-redundancy rules
193
+ 6. Project context from Phase 0 is included
@@ -0,0 +1,193 @@
1
+ # ftm-council Integration
2
+
3
+ ## When Council Is Invoked
4
+
5
+ - Deep mode only (standard and quick skip council)
6
+ - After normalize & dedup (Phase 1 of synthesis)
7
+ - Input: all claims with agent_count >= 2, plus high-confidence unique claims (confidence > 0.8)
8
+
9
+ ---
10
+
11
+ ## Interface Contract
12
+
13
+ ftm-researcher prepares a structured prompt for ftm-council:
14
+
15
+ ```
16
+ Evaluate these research findings for accuracy, completeness, and potential bias.
17
+ For each claim below, independently assess:
18
+ 1. Is the evidence sufficient to support this claim?
19
+ 2. What would make this claim wrong?
20
+ 3. Are there alternative explanations the research may have missed?
21
+ 4. Rate your confidence in this claim (0-1).
22
+
23
+ [claims formatted as numbered list with evidence and sources]
24
+
25
+ Return your assessment for each claim with: verdict (supported/contested/insufficient),
26
+ confidence, and reasoning.
27
+ ```
28
+
29
+ ### Payload Format
30
+
31
+ ```json
32
+ {
33
+ "context": "Research evaluation for: [query]",
34
+ "claims": [
35
+ {
36
+ "id": "f-001",
37
+ "claim": "...",
38
+ "evidence": "...",
39
+ "sources": ["url1", "url2"],
40
+ "source_types": ["peer_reviewed", "blog"],
41
+ "agent_count": 3,
42
+ "credibility_score": 0.78
43
+ }
44
+ ],
45
+ "evaluation_criteria": "accuracy, completeness, potential bias"
46
+ }
47
+ ```
48
+
49
+ ### Expected Response Format
50
+
51
+ ```json
52
+ {
53
+ "evaluations": [
54
+ {
55
+ "claim_id": "f-001",
56
+ "verdict": "supported | contested | insufficient",
57
+ "confidence": 0.85,
58
+ "reasoning": "...",
59
+ "what_would_make_this_wrong": "...",
60
+ "alternative_explanations": ["..."]
61
+ }
62
+ ],
63
+ "provider_positions": {
64
+ "claude": { "f-001": "supported", ... },
65
+ "codex": { "f-001": "contested", ... },
66
+ "gemini": { "f-001": "supported", ... }
67
+ }
68
+ }
69
+ ```
70
+
71
+ ---
72
+
73
+ ## How Council Results Map Back
74
+
75
+ | Council Verdict | Mapping |
76
+ |---|---|
77
+ | All 3 providers: "supported" | consensus tier |
78
+ | 2 agree "supported", 1 contests | consensus tier with minority note |
79
+ | 2 contest, 1 supports | contested tier |
80
+ | All 3 contest | refuted tier |
81
+ | Mixed with "insufficient" | unique_insights tier (needs more evidence) |
82
+ | 2 "insufficient", 1 "supported" | unique_insights tier |
83
+ | 2 "insufficient", 1 "contested" | refuted tier (not enough evidence to contest = rejection) |
84
+
85
+ ### Tie-Breaking Rules
86
+
87
+ When the mapping is ambiguous:
88
+ 1. Prefer the more conservative tier (contested over consensus, refuted over unique_insights)
89
+ 2. If all three providers give different verdicts, place in contested with full position details
90
+ 3. If confidence scores diverge significantly (spread > 0.3), flag as high-uncertainty
91
+
92
+ ---
93
+
94
+ ## Fallback: Standalone Challengers
95
+
96
+ When ftm-council is unavailable (Codex CLI or Gemini CLI not installed):
97
+
98
+ Spawn 2 agents on the `review` model from ftm-config:
99
+
100
+ ### Devil's Advocate Agent
101
+
102
+ ```
103
+ You are the Devil's Advocate in a research pipeline.
104
+
105
+ Your sole purpose is to find reasons each claim is WRONG.
106
+
107
+ For each claim below:
108
+ 1. Search for counter-evidence using WebSearch
109
+ 2. Identify logical gaps in the reasoning
110
+ 3. Flag claims supported by only one source type
111
+ 4. Check if the evidence actually supports the claim or if the claim overstates the evidence
112
+ 5. Look for cherry-picked data or survivorship bias
113
+
114
+ Be adversarial. The goal is to stress-test, not to confirm.
115
+
116
+ CLAIMS TO CHALLENGE:
117
+ [formatted list of claims with evidence]
118
+
119
+ RETURN FORMAT:
120
+ For each claim challenged, return:
121
+ - claim_challenged: [the claim text]
122
+ - challenge_type: counter_evidence | logical_gap | single_source | overstated | bias
123
+ - counter_evidence: [what you found that contradicts or weakens the claim]
124
+ - severity: high | medium | low
125
+ - recommendation: reject | weaken | flag_for_review | accept_with_caveat
126
+ ```
127
+
128
+ ### Edge Case Hunter Agent
129
+
130
+ ```
131
+ You are the Edge Case Hunter in a research pipeline.
132
+
133
+ Your sole purpose is to find where each claim BREAKS.
134
+
135
+ For each claim below:
136
+ 1. What happens at scale? (10x, 100x, 1000x users/data/requests)
137
+ 2. What happens under adversarial conditions? (malicious input, DDoS, data poisoning)
138
+ 3. What about accessibility? (screen readers, keyboard-only, low bandwidth)
139
+ 4. What about the 1% case? (rare but catastrophic failure modes)
140
+ 5. What about 5 years from now? (technology shifts, dependency deprecation, scaling limits)
141
+ 6. What happens when the key assumption changes? (the market shifts, the API breaks, the team grows)
142
+
143
+ CLAIMS TO STRESS-TEST:
144
+ [formatted list of claims with evidence]
145
+
146
+ RETURN FORMAT:
147
+ For each claim stressed, return:
148
+ - claim_challenged: [the claim text]
149
+ - challenge_type: scale | adversarial | accessibility | edge_case | longevity | assumption_shift
150
+ - failure_scenario: [specific scenario where this claim breaks]
151
+ - severity: high | medium | low
152
+ - recommendation: reject | weaken | flag_for_review | accept_with_caveat
153
+ ```
154
+
155
+ ### Fallback Mapping
156
+
157
+ Map challenger results to tiers:
158
+
159
+ | Challenger Result | Mapping |
160
+ |---|---|
161
+ | No challenges from either agent | consensus |
162
+ | Challenges with weak counter-evidence (low severity) | consensus with note |
163
+ | One agent challenges with medium severity | contested |
164
+ | Both agents challenge with medium+ severity | contested (strong) |
165
+ | Multiple high-severity challenges | refuted |
166
+ | Only edge case challenges, no factual counter-evidence | consensus with edge-case notes |
167
+
168
+ ---
169
+
170
+ ## Council Availability Detection
171
+
172
+ Before invoking ftm-council, check availability:
173
+
174
+ 1. Check if `codex` CLI is installed: `which codex`
175
+ 2. Check if `gemini` CLI is installed: `which gemini`
176
+ 3. If both are available: use full council
177
+ 4. If only one is available: use 2-provider council (reduced confidence in verdicts)
178
+ 5. If neither is available: use fallback challenger agents
179
+
180
+ Log the availability status in the research metadata.
181
+
182
+ ---
183
+
184
+ ## Per-Claim Council Invocation
185
+
186
+ The conversational iteration protocol supports council invocation for individual claims:
187
+
188
+ When the user says "council #N":
189
+ 1. Extract finding N from the current research state
190
+ 2. Send ONLY that claim to ftm-council with full evidence
191
+ 3. Update the claim's tier based on council verdict
192
+ 4. Re-render the disagreement map with the updated position
193
+ 5. Report the council's reasoning to the user