feed-the-machine 1.6.1 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +262 -170
  3. package/bin/__pycache__/tasks_db.cpython-314.pyc +0 -0
  4. package/bin/brain.py +1340 -0
  5. package/bin/convert_claude_skills_to_codex.py +490 -0
  6. package/bin/generate-manifest.mjs +463 -463
  7. package/bin/harden_codex_skills.py +141 -0
  8. package/bin/install.mjs +491 -491
  9. package/bin/migrate-eng-buddy-data.py +875 -0
  10. package/bin/playbook_engine/__init__.py +1 -0
  11. package/bin/playbook_engine/conftest.py +8 -0
  12. package/bin/playbook_engine/extractor.py +33 -0
  13. package/bin/playbook_engine/manager.py +102 -0
  14. package/bin/playbook_engine/models.py +84 -0
  15. package/bin/playbook_engine/registry.py +35 -0
  16. package/bin/playbook_engine/test_extractor.py +72 -0
  17. package/bin/playbook_engine/test_integration.py +129 -0
  18. package/bin/playbook_engine/test_manager.py +85 -0
  19. package/bin/playbook_engine/test_models.py +166 -0
  20. package/bin/playbook_engine/test_registry.py +67 -0
  21. package/bin/playbook_engine/test_tracer.py +86 -0
  22. package/bin/playbook_engine/tracer.py +93 -0
  23. package/bin/tasks_db.py +456 -0
  24. package/docs/HOOKS.md +243 -243
  25. package/docs/INBOX.md +233 -233
  26. package/ftm/SKILL.md +125 -122
  27. package/ftm-audit/SKILL.md +673 -623
  28. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  29. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  30. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  31. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  32. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  33. package/ftm-audit/scripts/run-knip.sh +23 -23
  34. package/ftm-audit.yml +2 -2
  35. package/ftm-brainstorm/SKILL.md +1003 -498
  36. package/ftm-brainstorm/evals/evals.json +180 -100
  37. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  38. package/ftm-brainstorm/references/agent-prompts.md +552 -224
  39. package/ftm-brainstorm/references/plan-template.md +209 -121
  40. package/ftm-brainstorm.yml +2 -2
  41. package/ftm-browse/SKILL.md +454 -454
  42. package/ftm-browse/daemon/browser-manager.ts +206 -206
  43. package/ftm-browse/daemon/bun.lock +30 -30
  44. package/ftm-browse/daemon/cli.ts +347 -347
  45. package/ftm-browse/daemon/commands.ts +410 -410
  46. package/ftm-browse/daemon/main.ts +357 -357
  47. package/ftm-browse/daemon/package.json +17 -17
  48. package/ftm-browse/daemon/server.ts +189 -189
  49. package/ftm-browse/daemon/snapshot.ts +519 -519
  50. package/ftm-browse/daemon/tsconfig.json +22 -22
  51. package/ftm-browse.yml +4 -4
  52. package/ftm-capture/SKILL.md +370 -370
  53. package/ftm-capture.yml +4 -4
  54. package/ftm-codex-gate/SKILL.md +361 -361
  55. package/ftm-codex-gate.yml +2 -2
  56. package/ftm-config/SKILL.md +422 -345
  57. package/ftm-config.default.yml +125 -82
  58. package/ftm-config.yml +44 -2
  59. package/ftm-council/SKILL.md +416 -416
  60. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  61. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  62. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  63. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  64. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  65. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  66. package/ftm-council-chat.yml +2 -0
  67. package/ftm-council.yml +2 -2
  68. package/ftm-dashboard/SKILL.md +163 -163
  69. package/ftm-dashboard.yml +4 -4
  70. package/ftm-debug/SKILL.md +1037 -1037
  71. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  72. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  73. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  74. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  75. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  76. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  77. package/ftm-debug.yml +2 -2
  78. package/ftm-diagram/SKILL.md +277 -277
  79. package/ftm-diagram.yml +2 -2
  80. package/ftm-executor/SKILL.md +777 -777
  81. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  82. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  83. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  84. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  85. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +81 -72
  86. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  87. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  88. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  89. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -59
  90. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  91. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  92. package/ftm-executor/runtime/package.json +8 -8
  93. package/ftm-executor.yml +2 -2
  94. package/ftm-git/SKILL.md +441 -441
  95. package/ftm-git/evals/evals.json +26 -26
  96. package/ftm-git/evals/promptfoo.yaml +75 -75
  97. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  98. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  99. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  100. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  101. package/ftm-git.yml +2 -2
  102. package/ftm-inbox/backend/__pycache__/main.cpython-314.pyc +0 -0
  103. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  104. package/ftm-inbox/backend/adapters/base.py +230 -230
  105. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  106. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  107. package/ftm-inbox/backend/adapters/jira.py +136 -136
  108. package/ftm-inbox/backend/adapters/registry.py +192 -192
  109. package/ftm-inbox/backend/adapters/slack.py +110 -110
  110. package/ftm-inbox/backend/db/connection.py +54 -54
  111. package/ftm-inbox/backend/db/schema.py +78 -78
  112. package/ftm-inbox/backend/executor/__init__.py +7 -7
  113. package/ftm-inbox/backend/executor/engine.py +149 -149
  114. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  115. package/ftm-inbox/backend/main.py +103 -103
  116. package/ftm-inbox/backend/models/__init__.py +1 -1
  117. package/ftm-inbox/backend/models/unified_task.py +36 -36
  118. package/ftm-inbox/backend/planner/__init__.py +6 -6
  119. package/ftm-inbox/backend/planner/__pycache__/__init__.cpython-314.pyc +0 -0
  120. package/ftm-inbox/backend/planner/__pycache__/generator.cpython-314.pyc +0 -0
  121. package/ftm-inbox/backend/planner/__pycache__/schema.cpython-314.pyc +0 -0
  122. package/ftm-inbox/backend/planner/generator.py +127 -127
  123. package/ftm-inbox/backend/planner/schema.py +34 -34
  124. package/ftm-inbox/backend/requirements.txt +5 -5
  125. package/ftm-inbox/backend/routes/__pycache__/plan.cpython-314.pyc +0 -0
  126. package/ftm-inbox/backend/routes/execute.py +186 -186
  127. package/ftm-inbox/backend/routes/health.py +52 -52
  128. package/ftm-inbox/backend/routes/inbox.py +68 -68
  129. package/ftm-inbox/backend/routes/plan.py +271 -271
  130. package/ftm-inbox/bin/launchagent.mjs +91 -91
  131. package/ftm-inbox/bin/setup.mjs +188 -188
  132. package/ftm-inbox/bin/start.sh +10 -10
  133. package/ftm-inbox/bin/status.sh +17 -17
  134. package/ftm-inbox/bin/stop.sh +8 -8
  135. package/ftm-inbox/config.example.yml +55 -55
  136. package/ftm-inbox/package-lock.json +2898 -2898
  137. package/ftm-inbox/package.json +26 -26
  138. package/ftm-inbox/postcss.config.js +6 -6
  139. package/ftm-inbox/src/app.css +199 -199
  140. package/ftm-inbox/src/app.html +18 -18
  141. package/ftm-inbox/src/lib/api.ts +166 -166
  142. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  143. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  144. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  145. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  146. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  147. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  148. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  149. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  150. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  151. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  152. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  153. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  154. package/ftm-inbox/src/lib/theme.ts +47 -47
  155. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  156. package/ftm-inbox/src/routes/+page.svelte +401 -401
  157. package/ftm-inbox/svelte.config.js +12 -12
  158. package/ftm-inbox/tailwind.config.ts +63 -63
  159. package/ftm-inbox/tsconfig.json +13 -13
  160. package/ftm-inbox/vite.config.ts +6 -6
  161. package/ftm-intent/SKILL.md +241 -241
  162. package/ftm-intent.yml +2 -2
  163. package/ftm-manifest.json +3794 -3794
  164. package/ftm-map/SKILL.md +291 -291
  165. package/ftm-map/scripts/db.py +712 -712
  166. package/ftm-map/scripts/index.py +415 -415
  167. package/ftm-map/scripts/parser.py +224 -224
  168. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  169. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  170. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  171. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  172. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  173. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  174. package/ftm-map/scripts/query.py +301 -301
  175. package/ftm-map/scripts/ranker.py +377 -377
  176. package/ftm-map/scripts/requirements.txt +5 -5
  177. package/ftm-map/scripts/setup-hooks.sh +27 -27
  178. package/ftm-map/scripts/setup.sh +56 -56
  179. package/ftm-map/scripts/test_db.py +364 -364
  180. package/ftm-map/scripts/test_parser.py +174 -174
  181. package/ftm-map/scripts/test_query.py +183 -183
  182. package/ftm-map/scripts/test_ranker.py +199 -199
  183. package/ftm-map/scripts/views.py +591 -591
  184. package/ftm-map.yml +2 -2
  185. package/ftm-mind/SKILL.md +201 -1943
  186. package/ftm-mind/evals/promptfoo.yaml +142 -142
  187. package/ftm-mind/references/blackboard-protocol.md +110 -0
  188. package/ftm-mind/references/blackboard-schema.md +328 -328
  189. package/ftm-mind/references/complexity-guide.md +110 -110
  190. package/ftm-mind/references/complexity-sizing.md +138 -0
  191. package/ftm-mind/references/decide-act-protocol.md +172 -0
  192. package/ftm-mind/references/direct-execution.md +51 -0
  193. package/ftm-mind/references/environment-discovery.md +77 -0
  194. package/ftm-mind/references/event-registry.md +319 -319
  195. package/ftm-mind/references/mcp-inventory.md +300 -296
  196. package/ftm-mind/references/ops-routing.md +47 -0
  197. package/ftm-mind/references/orient-protocol.md +234 -0
  198. package/ftm-mind/references/personality.md +40 -0
  199. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  200. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  201. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  202. package/ftm-mind/references/reflexion-protocol.md +249 -249
  203. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  204. package/ftm-mind/references/routing-scenarios.md +35 -35
  205. package/ftm-mind.yml +2 -2
  206. package/ftm-ops.yml +4 -0
  207. package/ftm-pause/SKILL.md +395 -395
  208. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  209. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  210. package/ftm-pause.yml +2 -2
  211. package/ftm-researcher/SKILL.md +275 -275
  212. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  213. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  214. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  215. package/ftm-researcher/references/adaptive-search.md +116 -116
  216. package/ftm-researcher/references/agent-prompts.md +193 -193
  217. package/ftm-researcher/references/council-integration.md +193 -193
  218. package/ftm-researcher/references/output-format.md +203 -203
  219. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  220. package/ftm-researcher/scripts/score_credibility.py +234 -234
  221. package/ftm-researcher/scripts/validate_research.py +92 -92
  222. package/ftm-researcher.yml +2 -2
  223. package/ftm-resume/SKILL.md +518 -518
  224. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  225. package/ftm-resume.yml +2 -2
  226. package/ftm-retro/SKILL.md +380 -380
  227. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  228. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  229. package/ftm-retro.yml +2 -2
  230. package/ftm-routine/SKILL.md +170 -170
  231. package/ftm-routine.yml +4 -4
  232. package/ftm-state/blackboard/capabilities.json +5 -5
  233. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  234. package/ftm-state/blackboard/context.json +37 -23
  235. package/ftm-state/blackboard/experiences/doom-statusline-fix.json +26 -0
  236. package/ftm-state/blackboard/experiences/hackathon-pages-site.json +26 -0
  237. package/ftm-state/blackboard/experiences/hindsight-sso-kickoff.json +42 -0
  238. package/ftm-state/blackboard/experiences/index.json +58 -9
  239. package/ftm-state/blackboard/experiences/learning-ragnarok-api-access.json +23 -0
  240. package/ftm-state/blackboard/experiences/nordlayer-members-auto-assign.json +26 -0
  241. package/ftm-state/blackboard/experiences/saml2aws-stale-session-fix.json +41 -0
  242. package/ftm-state/blackboard/patterns.json +6 -6
  243. package/ftm-state/schemas/context.schema.json +130 -130
  244. package/ftm-state/schemas/experience-index.schema.json +77 -77
  245. package/ftm-state/schemas/experience.schema.json +78 -78
  246. package/ftm-state/schemas/patterns.schema.json +44 -44
  247. package/ftm-upgrade/SKILL.md +194 -194
  248. package/ftm-upgrade/scripts/check-version.sh +76 -76
  249. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  250. package/ftm-upgrade.yml +2 -2
  251. package/ftm-verify.yml +2 -2
  252. package/ftm.yml +2 -2
  253. package/hooks/ftm-auto-log.sh +137 -0
  254. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  255. package/hooks/ftm-discovery-reminder.sh +90 -90
  256. package/hooks/ftm-drafts-gate.sh +61 -61
  257. package/hooks/ftm-event-logger.mjs +107 -107
  258. package/hooks/ftm-install-hooks.sh +240 -0
  259. package/hooks/ftm-learning-capture.sh +117 -0
  260. package/hooks/ftm-map-autodetect.sh +79 -79
  261. package/hooks/ftm-pending-sync-check.sh +22 -22
  262. package/hooks/ftm-plan-gate.sh +92 -92
  263. package/hooks/ftm-post-commit-trigger.sh +57 -57
  264. package/hooks/ftm-post-compaction.sh +138 -0
  265. package/hooks/ftm-pre-compaction.sh +147 -0
  266. package/hooks/ftm-session-end.sh +52 -0
  267. package/hooks/ftm-session-snapshot.sh +213 -0
  268. package/hooks/ftm-task-loader.sh +100 -0
  269. package/hooks/settings-template.json +91 -81
  270. package/install.sh +363 -363
  271. package/package.json +84 -84
  272. package/uninstall.sh +25 -25
@@ -1,416 +1,416 @@
1
- ---
2
- name: ftm-council
3
- description: Multi-AI deliberation council that sends problems to Claude, Codex, and Gemini as equal peers, then loops through rounds of debate until 2-of-3 agree on a decision. Use when the user wants a second (and third) opinion, says "council this", "get other opinions", "what would other AIs think", "debate this", "multi-model", "ftm-council", or wants to cross-check a decision, architecture choice, debugging approach, or any problem where diverse AI perspectives would reduce blind spots. Especially valuable for debugging hard problems, architecture decisions, code review, and any situation where confirmation bias from a single model is a risk. Even if the user just says "I'm not sure about this approach" or "sanity check this", consider invoking the council.
4
- ---
5
-
6
- ## Events
7
-
8
- ### Emits
9
- - `review_complete` — when the council reaches a majority verdict (2-of-3 agreement) or synthesizes a final recommendation after 5 rounds
10
- - `task_completed` — when the deliberation session concludes and a verdict is returned to the caller
11
-
12
- ### Listens To
13
- (none — council is invoked explicitly by the user or by ftm-executor when an INTENT.md conflict requires arbitration)
14
-
15
- ## Blackboard Read
16
-
17
- Before starting, load context from the blackboard:
18
-
19
- 1. Read `~/.claude/ftm-state/blackboard/context.json` — check current_task, recent_decisions, active_constraints
20
- 2. Read `~/.claude/ftm-state/blackboard/experiences/index.json` — filter entries by tags matching the current decision domain
21
- 3. Load top 3-5 matching experience files for past council verdicts and how well they held up
22
- 4. Read `~/.claude/ftm-state/blackboard/patterns.json` — check execution_patterns for what types of decisions benefited most from multi-model review
23
-
24
- If index.json is empty or no matches found, proceed normally without experience-informed shortcuts.
25
-
26
- # FTM Council
27
-
28
- Three AI peers — Claude, Codex, and Gemini — independently research the codebase and deliberate on a problem through structured rounds of debate. No single model is the authority. Each model explores the code on its own, forms its own conclusions from what it finds, and only then enters deliberation. The council converges through majority vote: when 2 of 3 agree, that's the decision. If 5 rounds pass without majority agreement, Claude synthesizes the best elements from all three positions and presents the user with a clear summary of where the models agreed, where they diverged, and a recommended path forward.
29
-
30
- ## Why Independent Research Matters
31
-
32
- The whole point of a multi-model council is diverse perspectives. If Claude reads the code first and then tells the other models what it found, you get three models reacting to Claude's framing — not three independent investigations. That's a game of telephone, not a council.
33
-
34
- Each model has different attention patterns, different ways of navigating code, and different instincts about what's relevant. Codex might grep for usage patterns Claude wouldn't think to check. Gemini might focus on a config file Claude skimmed past. By letting each model explore independently, you get genuinely different perspectives grounded in what each model actually found in the codebase — not just different opinions about the same Claude-curated snippet.
35
-
36
- ## Prerequisites
37
-
38
- The user needs both CLI tools installed and authenticated:
39
- - **Codex**: `npm install -g @openai/codex` (authenticated via `codex login`)
40
- - **Gemini**: `npm install -g @google/gemini-cli` (authenticated via Google)
41
-
42
- Before the first round, verify both are available:
43
- ```bash
44
- which codex && which gemini
45
- ```
46
- If either is missing, tell the user what to install and stop — don't try to run a 2-model council.
47
-
48
- ## The Protocol
49
-
50
- ### Auto-Invocation Mode
51
-
52
- The council can be invoked in two ways:
53
-
54
- 1. **User-invoked** (default): The user asks for a council. You frame the problem in Step 0 and proceed through the protocol.
55
- 2. **Auto-invoked**: Another skill (typically ftm-executor) invokes the council with a pre-framed conflict payload. Skip Step 0 — the problem is already framed.
56
-
57
- **Detecting auto-invocation:**
58
- If the invocation includes a structured conflict payload with these fields, you're in auto-invocation mode:
59
- - `CONFLICT TYPE`
60
- - `ORIGINAL INTENT`
61
- - `CODEX'S CHANGE`
62
- - `CODEX'S REASONING`
63
- - `THE CODE IN QUESTION`
64
- - `DEBUG.md HISTORY`
65
- - `QUESTION FOR THE COUNCIL`
66
-
67
- **Auto-invocation protocol:**
68
- 1. Skip Step 0 (problem is already framed by the calling skill)
69
- 2. Use the conflict payload directly as the council prompt for all three models
70
- 3. Add this context to each model's prompt: "This is an INTENT.md conflict from an automated build pipeline. Codex (gpt-5.4) made a code fix that contradicts the project's stated intent. You must decide: should the intent documentation be updated to match the fix, or should the fix be reverted to preserve the original intent?"
71
- 4. Include the DEBUG.md history so models don't suggest approaches already tried
72
- 5. Run through Steps 1-5 as normal (independent research → consensus check → rebuttals → verdict)
73
- 6. Return the verdict in a structured format the calling skill can parse:
74
-
75
- ```
76
- COUNCIL VERDICT:
77
- decision: "update_intent" | "revert_fix"
78
- round: [which round consensus was reached]
79
- agreed_by: [which 2 models agreed]
80
- dissent: [the third model's position]
81
- reasoning: [2-3 sentence explanation]
82
- debug_log_entry: [formatted entry for DEBUG.md]
83
- ```
84
-
85
- **Key difference from user-invoked:**
86
- - In user-invoked mode, you show the user the framed prompt and wait for confirmation before starting
87
- - In auto-invoked mode, you proceed immediately — the calling skill already validated the conflict
88
- - In auto-invoked mode, you do NOT ask the user if they want to dig deeper into the dissent — you return the verdict directly to the calling skill
89
-
90
- ### Step 0: Frame the Problem
91
-
92
- > **Note:** This step is skipped in auto-invocation mode. If a structured conflict payload was provided, proceed directly to Step 1 using the payload as the council prompt.
93
-
94
- Take the user's request and distill it into a clear **council prompt** — a self-contained problem statement that makes sense without conversation history. The prompt should describe the problem and what a good answer looks like, but it should NOT include pre-read code. The models will read the code themselves.
95
-
96
- Include:
97
- - The specific question or decision to be made
98
- - File paths or areas of the codebase to start investigating (as pointers, not content)
99
- - Error messages or symptoms if it's a debugging problem
100
- - Decision criteria — what a good answer looks like
101
- - Any constraints the user has mentioned
102
-
103
- Do NOT include:
104
- - Pre-read file contents (each model reads files itself)
105
- - Your own analysis or opinion about the problem
106
- - Summaries of what the code does (let each model discover that)
107
-
108
- Show the user the framed prompt before proceeding: "Here's what I'll send to the council — does this capture the problem?" Wait for confirmation or edits.
109
-
110
- ### Step 1: Independent Research (Round 1)
111
-
112
- This is the critical step. All three models explore the codebase independently and in parallel. Each one reads whatever files it thinks are relevant, follows whatever threads it wants, and arrives at its own position based on its own research.
113
-
114
- **You (Claude) are the orchestrator in this step, NOT a peer.** You do not form your own position yet. You spawn three independent investigations and collect the results.
115
-
116
- Launch all three in parallel:
117
-
118
- **Claude investigation** — spawn a subagent (this keeps the investigation isolated from your orchestrator context):
119
-
120
- ```
121
- You are one of three AI peers in a deliberation council. The other two peers are Codex (OpenAI) and Gemini (Google). Your job is to independently investigate the following problem by reading the codebase, then give your honest, well-reasoned position.
122
-
123
- IMPORTANT: Do your own research. Read files, search code, trace through logic. Your position must be grounded in what you actually find in the code, not assumptions. Cite specific files and line numbers.
124
-
125
- PROBLEM:
126
- {council_prompt}
127
-
128
- WORKING DIRECTORY: {cwd}
129
-
130
- Instructions:
131
- 1. Start by exploring the relevant parts of the codebase — read files, search for patterns, trace dependencies
132
- 2. Take notes on what you find as you go
133
- 3. After you've done sufficient research, formulate your position
134
-
135
- Give your response in this format:
136
- 1. RESEARCH SUMMARY: What files you examined, what you found (with file:line references)
137
- 2. POSITION: Your clear stance (1-2 sentences)
138
- 3. REASONING: Why you believe this, grounded in specific code you read
139
- 4. CONCERNS: What could go wrong with your approach
140
- 5. CONFIDENCE: High/Medium/Low and why
141
- ```
142
-
143
- **Codex** — spawn a subagent that runs:
144
- ```bash
145
- codex exec --full-auto "You are one of three AI peers in a deliberation council. The other two peers are Claude (Anthropic) and Gemini (Google). Your job is to independently investigate the following problem by reading the codebase, then give your honest, well-reasoned position.
146
-
147
- IMPORTANT: Do your own research. Read files, search code, trace through logic. Your position must be grounded in what you actually find in the code, not assumptions. Cite specific files and line numbers.
148
-
149
- PROBLEM:
150
- {council_prompt}
151
-
152
- Instructions:
153
- 1. Start by exploring the relevant parts of the codebase — read files, search for patterns, trace dependencies
154
- 2. Take notes on what you find as you go
155
- 3. After you have done sufficient research, formulate your position
156
-
157
- Give your response in this format:
158
- 1. RESEARCH SUMMARY: What files you examined, what you found (with file:line references)
159
- 2. POSITION: Your clear stance (1-2 sentences)
160
- 3. REASONING: Why you believe this, grounded in specific code you read
161
- 4. CONCERNS: What could go wrong with your approach
162
- 5. CONFIDENCE: High/Medium/Low and why"
163
- ```
164
-
165
- The `--full-auto` flag gives Codex sandboxed read access to the workspace so it can explore files on its own.
166
-
167
- **Gemini** — spawn a subagent that runs:
168
- ```bash
169
- gemini -p "You are one of three AI peers in a deliberation council. The other two peers are Claude (Anthropic) and Codex (OpenAI). Your job is to independently investigate the following problem by reading the codebase, then give your honest, well-reasoned position.
170
-
171
- IMPORTANT: Do your own research. Read files, search code, trace through logic. Your position must be grounded in what you actually find in the code, not assumptions. Cite specific files and line numbers.
172
-
173
- PROBLEM:
174
- {council_prompt}
175
-
176
- Instructions:
177
- 1. Start by exploring the relevant parts of the codebase — read files, search for patterns, trace dependencies
178
- 2. Take notes on what you find as you go
179
- 3. After you have done sufficient research, formulate your position
180
-
181
- Give your response in this format:
182
- 1. RESEARCH SUMMARY: What files you examined, what you found (with file:line references)
183
- 2. POSITION: Your clear stance (1-2 sentences)
184
- 3. REASONING: Why you believe this, grounded in specific code you read
185
- 4. CONCERNS: What could go wrong with your approach
186
- 5. CONFIDENCE: High/Medium/Low and why" --yolo
187
- ```
188
-
189
- The `--yolo` flag lets Gemini auto-approve file reads so it can explore without getting stuck on permission prompts.
190
-
191
- Collect all three responses. Present them to the user with a structured comparison that highlights what each model found:
192
-
193
- ```
194
- ## Round 1 — Independent Research
195
-
196
- ### Claude
197
- **Research**: [what files it read, what it focused on]
198
- **Position**: ...
199
- **Key evidence**: ...
200
-
201
- ### Codex
202
- **Research**: [what files it read, what it focused on]
203
- **Position**: ...
204
- **Key evidence**: ...
205
-
206
- ### Gemini
207
- **Research**: [what files it read, what it focused on]
208
- **Position**: ...
209
- **Key evidence**: ...
210
-
211
- ### Alignment Check
212
- - Agreement areas: ...
213
- - Divergence points: ...
214
- - Different research paths: [note if models looked at different files or focused on different aspects — this is valuable signal]
215
- - Majority forming? [Yes — X and Y agree / No — all three differ]
216
- ```
217
-
218
- ### Step 2: Check for Early Consensus
219
-
220
- After each round, check if 2 of 3 positions substantially agree. "Substantially agree" means they recommend the same approach, even if they phrase it differently or differ on minor details. Don't require identical wording — look for the same core recommendation.
221
-
222
- If majority exists → jump to **Step 5: Verdict**.
223
- If not → continue to the next rebuttal round.
224
-
225
- ### Step 3: Rebuttal Rounds (Rounds 2-5)
226
-
227
- For each subsequent round, each model sees the other two models' previous positions (including what they found in the code) and must respond directly. This is where the real deliberation happens — models engage with each other's evidence and arguments, not just opinions.
228
-
229
- Build a rebuttal prompt that includes the previous round's research and positions:
230
-
231
- For Codex and Gemini, the rebuttal prompt should include enough context for them to do targeted follow-up research if they want to verify the other models' claims:
232
-
233
- ```
234
- Round {N} of the deliberation council.
235
-
236
- Here's what happened in the previous round. Each model independently researched the codebase and formed a position:
237
-
238
- CLAUDE's research and position:
239
- {claude_previous_full}
240
-
241
- CODEX's research and position:
242
- {codex_previous_full}
243
-
244
- GEMINI's research and position:
245
- {gemini_previous_full}
246
-
247
- Now respond. You may do additional codebase research if you want to verify claims the other models made or investigate angles they raised. Then:
248
-
249
- 1. Directly address the strongest point from each other model
250
- 2. If another model cited code you haven't looked at, go read it and see if you agree with their interpretation
251
- 3. State whether you've changed your position (and why, or why not)
252
- 4. If you agree with another model, say so explicitly
253
-
254
- UPDATED POSITION: [same/changed] ...
255
- NEW EVIDENCE (if any): [anything new you found by following up on other models' research]
256
- KEY RESPONSE TO {OTHER_MODEL_1}: ...
257
- KEY RESPONSE TO {OTHER_MODEL_2}: ...
258
- REMAINING DISAGREEMENTS: ...
259
- ```
260
-
261
- For rebuttal rounds, use the same CLI flags (`--full-auto` for Codex, `--yolo` for Gemini) so models can do follow-up research — they might want to verify a claim another model made by reading a file they hadn't looked at before.
262
-
263
- The Claude rebuttal should also be done via a subagent so it stays isolated and doesn't anchor on the orchestrator's accumulated context.
264
-
265
- Present the round results to the user with the structured comparison format. Highlight what changed, who moved, and whether consensus is forming. Pay special attention to cases where a model changed its mind after reading code another model pointed to — that's the council working as intended.
266
-
267
- ### Step 4: Repeat or Escalate
268
-
269
- After each rebuttal round, check for majority agreement (Step 2).
270
-
271
- If after 5 rounds there's still no majority:
272
- - This is a genuinely hard problem with legitimate disagreement
273
- - Synthesize the three final positions into a summary
274
- - Highlight the core tension — what's the fundamental tradeoff they can't agree on?
275
- - Note which models examined which parts of the codebase — incomplete research might explain persistent disagreement
276
- - Present the user with 2-3 concrete options (mapped to the council positions) and let them decide
277
-
278
- ### Step 5: Verdict
279
-
280
- When 2 of 3 agree, present the verdict:
281
-
282
- ```
283
- ## Council Verdict — Round {N}
284
-
285
- **Decision**: {the agreed position}
286
- **Agreed by**: {which two models}
287
- **Dissent**: {the third model's remaining objection}
288
-
289
- ### Evidence basis
290
- {What code each model examined that led to this conclusion}
291
-
292
- ### Why the majority position won
293
- {Brief analysis of why the arguments were stronger}
294
-
295
- ### The dissent is worth noting because
296
- {What the dissenting model raised that's still valid — this often contains useful caveats}
297
-
298
- ### Recommended action
299
- {Concrete next steps based on the decision}
300
- ```
301
-
302
- Ask the user if they want to proceed with the verdict or if they want to dig deeper into the dissent.
303
-
304
- **Auto-invocation verdict format:**
305
-
306
- When auto-invoked, also return the verdict in the structured format the calling skill expects:
307
-
308
- ```
309
- COUNCIL VERDICT:
310
- decision: "update_intent" | "revert_fix"
311
- round: [N]
312
- agreed_by: [model1, model2]
313
- dissent: [model3's position summary]
314
- reasoning: [why the majority position won]
315
- debug_log_entry: |
316
- ### Council Verdict — [timestamp]
317
- **Conflict**: [brief description]
318
- **Decision**: [update_intent/revert_fix]
319
- **Agreed by**: [models]
320
- **Reasoning**: [explanation]
321
- **Dissent**: [third model's concern]
322
- ```
323
-
324
- Do not ask the user if they want to proceed — return the verdict directly to the calling skill.
325
-
326
- ## Practical Considerations
327
-
328
- ### Timeouts
329
- Independent research takes longer than simple prompting — each model is reading files, searching code, etc. Set timeouts at 300s (5 minutes) for Round 1 since that's the heavy research round. Rebuttal rounds can use 180s since they're doing less exploration. If one model times out, report it and continue with the other two.
330
-
331
- ### Error Handling
332
- If Codex or Gemini returns an error (auth failure, rate limit, sandbox issue, etc.):
333
- - Report the error to the user
334
- - Continue with the remaining models
335
- - A 2-model debate is better than nothing, though you lose the tiebreaker benefit
336
-
337
- ### Conversation State
338
- Between rounds, you (the orchestrator) hold state. Keep a running record of each model's research findings AND positions so you can construct accurate rebuttal prompts. Codex and Gemini are stateless between rounds, so every round's prompt must be self-contained — include the full history of what each model found and argued.
339
-
340
- ### Working Directory
341
- Make sure Codex and Gemini run from the same working directory as the current session. This ensures they're all looking at the same codebase. Pass `cd {cwd} &&` before the CLI command if needed to ensure correct directory.
342
-
343
- ### When NOT to Council
344
- - Trivial questions with obvious answers (don't waste 3 research sessions on "should I use const or let")
345
- - Questions where the user just needs execution, not deliberation
346
- - Pure opinion questions with no code to investigate
347
- - If the user says "just do it" — they want action, not debate
348
- - When auto-invoked by ftm-executor — always proceed (the executor already determined a council is needed)
349
-
350
- ## Blackboard Write
351
-
352
- After completing, update the blackboard:
353
-
354
- 1. Update `~/.claude/ftm-state/blackboard/context.json`:
355
- - Set current_task status to "complete"
356
- - Append decision summary to recent_decisions including the verdict and which models agreed (cap at 10)
357
- - Update session_metadata.skills_invoked and last_updated
358
- 2. Write an experience file to `~/.claude/ftm-state/blackboard/experiences/YYYY-MM-DD_task-slug.json` capturing decision domain, verdict, round reached, dissent summary, and whether the verdict held up
359
- 3. Update `~/.claude/ftm-state/blackboard/experiences/index.json` with the new entry
360
- 4. Emit `task_completed` event
361
-
362
- ## Requirements
363
-
364
- - tool: `codex` | required | Codex CLI for independent peer investigation
365
- - tool: `gemini` | required | Gemini CLI for independent peer investigation
366
- - reference: `references/protocols/PREREQUISITES.md` | required | availability check, fallback logic, timeout config
367
- - reference: `references/protocols/STEP-0-FRAMING.md` | required | problem framing format
368
- - reference: `references/prompts/CLAUDE-INVESTIGATION.md` | required | Claude investigation prompt template
369
- - reference: `references/prompts/CODEX-INVESTIGATION.md` | required | Codex investigation prompt template
370
- - reference: `references/prompts/GEMINI-INVESTIGATION.md` | required | Gemini investigation prompt template
371
- - reference: `references/prompts/REBUTTAL-TEMPLATE.md` | required | rebuttal round prompt template
372
- - reference: `~/.claude/ftm-state/blackboard/context.json` | optional | session state
373
-
374
- ## Risk
375
-
376
- - level: read_only
377
- - scope: reads codebase for independent investigation; does not modify source files; writes blackboard experience after verdict
378
- - rollback: no source mutations; blackboard write can be reverted by editing JSON files
379
-
380
- ## Approval Gates
381
-
382
- - trigger: council prompt framed in Step 0 | action: show framed prompt to user and wait for confirmation before dispatching to council
383
- - trigger: 2-of-3 majority reached | action: present verdict summary to user and ask if they want to proceed or dig into dissent
384
- - trigger: auto-invocation by ftm-executor (INTENT.md conflict) | action: skip user framing confirmation, run immediately and return structured COUNCIL VERDICT to caller
385
- - complexity_routing: micro → auto | small → auto | medium → auto | large → auto | xl → auto
386
-
387
- ## Fallbacks
388
-
389
- - condition: codex CLI not found | action: report missing dependency with install instructions and stop (do not run degraded council)
390
- - condition: gemini CLI not found | action: report missing dependency with install instructions and stop
391
- - condition: no majority after 5 rounds | action: synthesize final positions, highlight core tension, present 2-3 concrete options for user decision
392
- - condition: model times out during a round | action: note timeout for that model, continue round with remaining models' responses
393
-
394
- ## Capabilities
395
-
396
- - cli: `codex` | required | OpenAI Codex CLI peer reviewer
397
- - cli: `gemini` | required | Google Gemini CLI peer reviewer
398
- - env: `OPENAI_API_KEY` | required | for Codex CLI authentication
399
- - env: `GEMINI_API_KEY` | required | for Gemini CLI authentication
400
-
401
- ## Event Payloads
402
-
403
- ### review_complete
404
- - skill: string — "ftm-council"
405
- - verdict: string — "update_intent" | "revert_fix" | "option_a" | "option_b" | custom decision
406
- - round: number — round in which majority was reached (1-5, or 5+ for synthesis)
407
- - agreed_by: string[] — which models agreed on the verdict
408
- - dissent: string | null — summary of dissenting position
409
- - reasoning: string — why the majority won
410
-
411
- ### task_completed
412
- - skill: string — "ftm-council"
413
- - decision_domain: string — topic the council deliberated on
414
- - verdict: string — final decision
415
- - round: number — rounds taken to reach verdict
416
- - duration_ms: number — total deliberation time
1
+ ---
2
+ name: ftm-council
3
+ description: Multi-AI deliberation council that sends problems to Claude, Codex, and Gemini as equal peers, then loops through rounds of debate until 2-of-3 agree on a decision. Use when the user wants a second (and third) opinion, says "council this", "get other opinions", "what would other AIs think", "debate this", "multi-model", "ftm-council", or wants to cross-check a decision, architecture choice, debugging approach, or any problem where diverse AI perspectives would reduce blind spots. Especially valuable for debugging hard problems, architecture decisions, code review, and any situation where confirmation bias from a single model is a risk. Even if the user just says "I'm not sure about this approach" or "sanity check this", consider invoking the council.
4
+ ---
5
+
6
+ ## Events
7
+
8
+ ### Emits
9
+ - `review_complete` — when the council reaches a majority verdict (2-of-3 agreement) or synthesizes a final recommendation after 5 rounds
10
+ - `task_completed` — when the deliberation session concludes and a verdict is returned to the caller
11
+
12
+ ### Listens To
13
+ (none — council is invoked explicitly by the user or by ftm-executor when an INTENT.md conflict requires arbitration)
14
+
15
+ ## Blackboard Read
16
+
17
+ Before starting, load context from the blackboard:
18
+
19
+ 1. Read `~/.claude/ftm-state/blackboard/context.json` — check current_task, recent_decisions, active_constraints
20
+ 2. Read `~/.claude/ftm-state/blackboard/experiences/index.json` — filter entries by tags matching the current decision domain
21
+ 3. Load top 3-5 matching experience files for past council verdicts and how well they held up
22
+ 4. Read `~/.claude/ftm-state/blackboard/patterns.json` — check execution_patterns for what types of decisions benefited most from multi-model review
23
+
24
+ If index.json is empty or no matches found, proceed normally without experience-informed shortcuts.
25
+
26
+ # FTM Council
27
+
28
+ Three AI peers — Claude, Codex, and Gemini — independently research the codebase and deliberate on a problem through structured rounds of debate. No single model is the authority. Each model explores the code on its own, forms its own conclusions from what it finds, and only then enters deliberation. The council converges through majority vote: when 2 of 3 agree, that's the decision. If 5 rounds pass without majority agreement, Claude synthesizes the best elements from all three positions and presents the user with a clear summary of where the models agreed, where they diverged, and a recommended path forward.
29
+
30
+ ## Why Independent Research Matters
31
+
32
+ The whole point of a multi-model council is diverse perspectives. If Claude reads the code first and then tells the other models what it found, you get three models reacting to Claude's framing — not three independent investigations. That's a game of telephone, not a council.
33
+
34
+ Each model has different attention patterns, different ways of navigating code, and different instincts about what's relevant. Codex might grep for usage patterns Claude wouldn't think to check. Gemini might focus on a config file Claude skimmed past. By letting each model explore independently, you get genuinely different perspectives grounded in what each model actually found in the codebase — not just different opinions about the same Claude-curated snippet.
35
+
36
+ ## Prerequisites
37
+
38
+ The user needs both CLI tools installed and authenticated:
39
+ - **Codex**: `npm install -g @openai/codex` (authenticated via `codex login`)
40
+ - **Gemini**: `npm install -g @google/gemini-cli` (authenticated via Google)
41
+
42
+ Before the first round, verify both are available:
43
+ ```bash
44
+ which codex && which gemini
45
+ ```
46
+ If either is missing, tell the user what to install and stop — don't try to run a 2-model council.
47
+
48
+ ## The Protocol
49
+
50
+ ### Auto-Invocation Mode
51
+
52
+ The council can be invoked in two ways:
53
+
54
+ 1. **User-invoked** (default): The user asks for a council. You frame the problem in Step 0 and proceed through the protocol.
55
+ 2. **Auto-invoked**: Another skill (typically ftm-executor) invokes the council with a pre-framed conflict payload. Skip Step 0 — the problem is already framed.
56
+
57
+ **Detecting auto-invocation:**
58
+ If the invocation includes a structured conflict payload with these fields, you're in auto-invocation mode:
59
+ - `CONFLICT TYPE`
60
+ - `ORIGINAL INTENT`
61
+ - `CODEX'S CHANGE`
62
+ - `CODEX'S REASONING`
63
+ - `THE CODE IN QUESTION`
64
+ - `DEBUG.md HISTORY`
65
+ - `QUESTION FOR THE COUNCIL`
66
+
67
+ **Auto-invocation protocol:**
68
+ 1. Skip Step 0 (problem is already framed by the calling skill)
69
+ 2. Use the conflict payload directly as the council prompt for all three models
70
+ 3. Add this context to each model's prompt: "This is an INTENT.md conflict from an automated build pipeline. Codex (gpt-5.4) made a code fix that contradicts the project's stated intent. You must decide: should the intent documentation be updated to match the fix, or should the fix be reverted to preserve the original intent?"
71
+ 4. Include the DEBUG.md history so models don't suggest approaches already tried
72
+ 5. Run through Steps 1-5 as normal (independent research → consensus check → rebuttals → verdict)
73
+ 6. Return the verdict in a structured format the calling skill can parse:
74
+
75
+ ```
76
+ COUNCIL VERDICT:
77
+ decision: "update_intent" | "revert_fix"
78
+ round: [which round consensus was reached]
79
+ agreed_by: [which 2 models agreed]
80
+ dissent: [the third model's position]
81
+ reasoning: [2-3 sentence explanation]
82
+ debug_log_entry: [formatted entry for DEBUG.md]
83
+ ```
84
+
85
+ **Key difference from user-invoked:**
86
+ - In user-invoked mode, you show the user the framed prompt and wait for confirmation before starting
87
+ - In auto-invoked mode, you proceed immediately — the calling skill already validated the conflict
88
+ - In auto-invoked mode, you do NOT ask the user if they want to dig deeper into the dissent — you return the verdict directly to the calling skill
89
+
90
+ ### Step 0: Frame the Problem
91
+
92
+ > **Note:** This step is skipped in auto-invocation mode. If a structured conflict payload was provided, proceed directly to Step 1 using the payload as the council prompt.
93
+
94
+ Take the user's request and distill it into a clear **council prompt** — a self-contained problem statement that makes sense without conversation history. The prompt should describe the problem and what a good answer looks like, but it should NOT include pre-read code. The models will read the code themselves.
95
+
96
+ Include:
97
+ - The specific question or decision to be made
98
+ - File paths or areas of the codebase to start investigating (as pointers, not content)
99
+ - Error messages or symptoms if it's a debugging problem
100
+ - Decision criteria — what a good answer looks like
101
+ - Any constraints the user has mentioned
102
+
103
+ Do NOT include:
104
+ - Pre-read file contents (each model reads files itself)
105
+ - Your own analysis or opinion about the problem
106
+ - Summaries of what the code does (let each model discover that)
107
+
108
+ Show the user the framed prompt before proceeding: "Here's what I'll send to the council — does this capture the problem?" Wait for confirmation or edits.
109
+
110
+ ### Step 1: Independent Research (Round 1)
111
+
112
+ This is the critical step. All three models explore the codebase independently and in parallel. Each one reads whatever files it thinks are relevant, follows whatever threads it wants, and arrives at its own position based on its own research.
113
+
114
+ **You (Claude) are the orchestrator in this step, NOT a peer.** You do not form your own position yet. You spawn three independent investigations and collect the results.
115
+
116
+ Launch all three in parallel:
117
+
118
+ **Claude investigation** — spawn a subagent (this keeps the investigation isolated from your orchestrator context):
119
+
120
+ ```
121
+ You are one of three AI peers in a deliberation council. The other two peers are Codex (OpenAI) and Gemini (Google). Your job is to independently investigate the following problem by reading the codebase, then give your honest, well-reasoned position.
122
+
123
+ IMPORTANT: Do your own research. Read files, search code, trace through logic. Your position must be grounded in what you actually find in the code, not assumptions. Cite specific files and line numbers.
124
+
125
+ PROBLEM:
126
+ {council_prompt}
127
+
128
+ WORKING DIRECTORY: {cwd}
129
+
130
+ Instructions:
131
+ 1. Start by exploring the relevant parts of the codebase — read files, search for patterns, trace dependencies
132
+ 2. Take notes on what you find as you go
133
+ 3. After you've done sufficient research, formulate your position
134
+
135
+ Give your response in this format:
136
+ 1. RESEARCH SUMMARY: What files you examined, what you found (with file:line references)
137
+ 2. POSITION: Your clear stance (1-2 sentences)
138
+ 3. REASONING: Why you believe this, grounded in specific code you read
139
+ 4. CONCERNS: What could go wrong with your approach
140
+ 5. CONFIDENCE: High/Medium/Low and why
141
+ ```
142
+
143
+ **Codex** — spawn a subagent that runs:
144
+ ```bash
145
+ codex exec --full-auto "You are one of three AI peers in a deliberation council. The other two peers are Claude (Anthropic) and Gemini (Google). Your job is to independently investigate the following problem by reading the codebase, then give your honest, well-reasoned position.
146
+
147
+ IMPORTANT: Do your own research. Read files, search code, trace through logic. Your position must be grounded in what you actually find in the code, not assumptions. Cite specific files and line numbers.
148
+
149
+ PROBLEM:
150
+ {council_prompt}
151
+
152
+ Instructions:
153
+ 1. Start by exploring the relevant parts of the codebase — read files, search for patterns, trace dependencies
154
+ 2. Take notes on what you find as you go
155
+ 3. After you have done sufficient research, formulate your position
156
+
157
+ Give your response in this format:
158
+ 1. RESEARCH SUMMARY: What files you examined, what you found (with file:line references)
159
+ 2. POSITION: Your clear stance (1-2 sentences)
160
+ 3. REASONING: Why you believe this, grounded in specific code you read
161
+ 4. CONCERNS: What could go wrong with your approach
162
+ 5. CONFIDENCE: High/Medium/Low and why"
163
+ ```
164
+
165
+ The `--full-auto` flag gives Codex sandboxed read access to the workspace so it can explore files on its own.
166
+
167
+ **Gemini** — spawn a subagent that runs:
168
+ ```bash
169
+ gemini -p "You are one of three AI peers in a deliberation council. The other two peers are Claude (Anthropic) and Codex (OpenAI). Your job is to independently investigate the following problem by reading the codebase, then give your honest, well-reasoned position.
170
+
171
+ IMPORTANT: Do your own research. Read files, search code, trace through logic. Your position must be grounded in what you actually find in the code, not assumptions. Cite specific files and line numbers.
172
+
173
+ PROBLEM:
174
+ {council_prompt}
175
+
176
+ Instructions:
177
+ 1. Start by exploring the relevant parts of the codebase — read files, search for patterns, trace dependencies
178
+ 2. Take notes on what you find as you go
179
+ 3. After you have done sufficient research, formulate your position
180
+
181
+ Give your response in this format:
182
+ 1. RESEARCH SUMMARY: What files you examined, what you found (with file:line references)
183
+ 2. POSITION: Your clear stance (1-2 sentences)
184
+ 3. REASONING: Why you believe this, grounded in specific code you read
185
+ 4. CONCERNS: What could go wrong with your approach
186
+ 5. CONFIDENCE: High/Medium/Low and why" --yolo
187
+ ```
188
+
189
+ The `--yolo` flag lets Gemini auto-approve file reads so it can explore without getting stuck on permission prompts.
190
+
191
+ Collect all three responses. Present them to the user with a structured comparison that highlights what each model found:
192
+
193
+ ```
194
+ ## Round 1 — Independent Research
195
+
196
+ ### Claude
197
+ **Research**: [what files it read, what it focused on]
198
+ **Position**: ...
199
+ **Key evidence**: ...
200
+
201
+ ### Codex
202
+ **Research**: [what files it read, what it focused on]
203
+ **Position**: ...
204
+ **Key evidence**: ...
205
+
206
+ ### Gemini
207
+ **Research**: [what files it read, what it focused on]
208
+ **Position**: ...
209
+ **Key evidence**: ...
210
+
211
+ ### Alignment Check
212
+ - Agreement areas: ...
213
+ - Divergence points: ...
214
+ - Different research paths: [note if models looked at different files or focused on different aspects — this is valuable signal]
215
+ - Majority forming? [Yes — X and Y agree / No — all three differ]
216
+ ```
217
+
218
+ ### Step 2: Check for Early Consensus
219
+
220
+ After each round, check if 2 of 3 positions substantially agree. "Substantially agree" means they recommend the same approach, even if they phrase it differently or differ on minor details. Don't require identical wording — look for the same core recommendation.
221
+
222
+ If majority exists → jump to **Step 5: Verdict**.
223
+ If not → continue to the next rebuttal round.
224
+
225
+ ### Step 3: Rebuttal Rounds (Rounds 2-5)
226
+
227
+ For each subsequent round, each model sees the other two models' previous positions (including what they found in the code) and must respond directly. This is where the real deliberation happens — models engage with each other's evidence and arguments, not just opinions.
228
+
229
+ Build a rebuttal prompt that includes the previous round's research and positions:
230
+
231
+ For Codex and Gemini, the rebuttal prompt should include enough context for them to do targeted follow-up research if they want to verify the other models' claims:
232
+
233
+ ```
234
+ Round {N} of the deliberation council.
235
+
236
+ Here's what happened in the previous round. Each model independently researched the codebase and formed a position:
237
+
238
+ CLAUDE's research and position:
239
+ {claude_previous_full}
240
+
241
+ CODEX's research and position:
242
+ {codex_previous_full}
243
+
244
+ GEMINI's research and position:
245
+ {gemini_previous_full}
246
+
247
+ Now respond. You may do additional codebase research if you want to verify claims the other models made or investigate angles they raised. Then:
248
+
249
+ 1. Directly address the strongest point from each other model
250
+ 2. If another model cited code you haven't looked at, go read it and see if you agree with their interpretation
251
+ 3. State whether you've changed your position (and why, or why not)
252
+ 4. If you agree with another model, say so explicitly
253
+
254
+ UPDATED POSITION: [same/changed] ...
255
+ NEW EVIDENCE (if any): [anything new you found by following up on other models' research]
256
+ KEY RESPONSE TO {OTHER_MODEL_1}: ...
257
+ KEY RESPONSE TO {OTHER_MODEL_2}: ...
258
+ REMAINING DISAGREEMENTS: ...
259
+ ```
260
+
261
+ For rebuttal rounds, use the same CLI flags (`--full-auto` for Codex, `--yolo` for Gemini) so models can do follow-up research — they might want to verify a claim another model made by reading a file they hadn't looked at before.
262
+
263
+ The Claude rebuttal should also be done via a subagent so it stays isolated and doesn't anchor on the orchestrator's accumulated context.
264
+
265
+ Present the round results to the user with the structured comparison format. Highlight what changed, who moved, and whether consensus is forming. Pay special attention to cases where a model changed its mind after reading code another model pointed to — that's the council working as intended.
266
+
267
+ ### Step 4: Repeat or Escalate
268
+
269
+ After each rebuttal round, check for majority agreement (Step 2).
270
+
271
+ If after 5 rounds there's still no majority:
272
+ - This is a genuinely hard problem with legitimate disagreement
273
+ - Synthesize the three final positions into a summary
274
+ - Highlight the core tension — what's the fundamental tradeoff they can't agree on?
275
+ - Note which models examined which parts of the codebase — incomplete research might explain persistent disagreement
276
+ - Present the user with 2-3 concrete options (mapped to the council positions) and let them decide
277
+
278
+ ### Step 5: Verdict
279
+
280
+ When 2 of 3 agree, present the verdict:
281
+
282
+ ```
283
+ ## Council Verdict — Round {N}
284
+
285
+ **Decision**: {the agreed position}
286
+ **Agreed by**: {which two models}
287
+ **Dissent**: {the third model's remaining objection}
288
+
289
+ ### Evidence basis
290
+ {What code each model examined that led to this conclusion}
291
+
292
+ ### Why the majority position won
293
+ {Brief analysis of why the arguments were stronger}
294
+
295
+ ### The dissent is worth noting because
296
+ {What the dissenting model raised that's still valid — this often contains useful caveats}
297
+
298
+ ### Recommended action
299
+ {Concrete next steps based on the decision}
300
+ ```
301
+
302
+ Ask the user if they want to proceed with the verdict or if they want to dig deeper into the dissent.
303
+
304
+ **Auto-invocation verdict format:**
305
+
306
+ When auto-invoked, also return the verdict in the structured format the calling skill expects:
307
+
308
+ ```
309
+ COUNCIL VERDICT:
310
+ decision: "update_intent" | "revert_fix"
311
+ round: [N]
312
+ agreed_by: [model1, model2]
313
+ dissent: [model3's position summary]
314
+ reasoning: [why the majority position won]
315
+ debug_log_entry: |
316
+ ### Council Verdict — [timestamp]
317
+ **Conflict**: [brief description]
318
+ **Decision**: [update_intent/revert_fix]
319
+ **Agreed by**: [models]
320
+ **Reasoning**: [explanation]
321
+ **Dissent**: [third model's concern]
322
+ ```
323
+
324
+ Do not ask the user if they want to proceed — return the verdict directly to the calling skill.
325
+
326
+ ## Practical Considerations
327
+
328
+ ### Timeouts
329
+ Independent research takes longer than simple prompting — each model is reading files, searching code, etc. Set timeouts at 300s (5 minutes) for Round 1 since that's the heavy research round. Rebuttal rounds can use 180s since they're doing less exploration. If one model times out, report it and continue with the other two.
330
+
331
+ ### Error Handling
332
+ If Codex or Gemini returns an error (auth failure, rate limit, sandbox issue, etc.):
333
+ - Report the error to the user
334
+ - Continue with the remaining models
335
+ - A 2-model debate is better than nothing, though you lose the tiebreaker benefit
336
+
337
+ ### Conversation State
338
+ Between rounds, you (the orchestrator) hold state. Keep a running record of each model's research findings AND positions so you can construct accurate rebuttal prompts. Codex and Gemini are stateless between rounds, so every round's prompt must be self-contained — include the full history of what each model found and argued.
339
+
340
+ ### Working Directory
341
+ Make sure Codex and Gemini run from the same working directory as the current session. This ensures they're all looking at the same codebase. Pass `cd {cwd} &&` before the CLI command if needed to ensure correct directory.
342
+
343
+ ### When NOT to Council
344
+ - Trivial questions with obvious answers (don't waste 3 research sessions on "should I use const or let")
345
+ - Questions where the user just needs execution, not deliberation
346
+ - Pure opinion questions with no code to investigate
347
+ - If the user says "just do it" — they want action, not debate
348
+ - When auto-invoked by ftm-executor — always proceed (the executor already determined a council is needed)
349
+
350
+ ## Blackboard Write
351
+
352
+ After completing, update the blackboard:
353
+
354
+ 1. Update `~/.claude/ftm-state/blackboard/context.json`:
355
+ - Set current_task status to "complete"
356
+ - Append decision summary to recent_decisions including the verdict and which models agreed (cap at 10)
357
+ - Update session_metadata.skills_invoked and last_updated
358
+ 2. Write an experience file to `~/.claude/ftm-state/blackboard/experiences/YYYY-MM-DD_task-slug.json` capturing decision domain, verdict, round reached, dissent summary, and whether the verdict held up
359
+ 3. Update `~/.claude/ftm-state/blackboard/experiences/index.json` with the new entry
360
+ 4. Emit `task_completed` event
361
+
362
+ ## Requirements
363
+
364
+ - tool: `codex` | required | Codex CLI for independent peer investigation
365
+ - tool: `gemini` | required | Gemini CLI for independent peer investigation
366
+ - reference: `references/protocols/PREREQUISITES.md` | required | availability check, fallback logic, timeout config
367
+ - reference: `references/protocols/STEP-0-FRAMING.md` | required | problem framing format
368
+ - reference: `references/prompts/CLAUDE-INVESTIGATION.md` | required | Claude investigation prompt template
369
+ - reference: `references/prompts/CODEX-INVESTIGATION.md` | required | Codex investigation prompt template
370
+ - reference: `references/prompts/GEMINI-INVESTIGATION.md` | required | Gemini investigation prompt template
371
+ - reference: `references/prompts/REBUTTAL-TEMPLATE.md` | required | rebuttal round prompt template
372
+ - reference: `~/.claude/ftm-state/blackboard/context.json` | optional | session state
373
+
374
+ ## Risk
375
+
376
+ - level: read_only
377
+ - scope: reads codebase for independent investigation; does not modify source files; writes blackboard experience after verdict
378
+ - rollback: no source mutations; blackboard write can be reverted by editing JSON files
379
+
380
+ ## Approval Gates
381
+
382
+ - trigger: council prompt framed in Step 0 | action: show framed prompt to user and wait for confirmation before dispatching to council
383
+ - trigger: 2-of-3 majority reached | action: present verdict summary to user and ask if they want to proceed or dig into dissent
384
+ - trigger: auto-invocation by ftm-executor (INTENT.md conflict) | action: skip user framing confirmation, run immediately and return structured COUNCIL VERDICT to caller
385
+ - complexity_routing: micro → auto | small → auto | medium → auto | large → auto | xl → auto
386
+
387
+ ## Fallbacks
388
+
389
+ - condition: codex CLI not found | action: report missing dependency with install instructions and stop (do not run degraded council)
390
+ - condition: gemini CLI not found | action: report missing dependency with install instructions and stop
391
+ - condition: no majority after 5 rounds | action: synthesize final positions, highlight core tension, present 2-3 concrete options for user decision
392
+ - condition: model times out during a round | action: note timeout for that model, continue round with remaining models' responses
393
+
394
+ ## Capabilities
395
+
396
+ - cli: `codex` | required | OpenAI Codex CLI peer reviewer
397
+ - cli: `gemini` | required | Google Gemini CLI peer reviewer
398
+ - env: `OPENAI_API_KEY` | required | for Codex CLI authentication
399
+ - env: `GEMINI_API_KEY` | required | for Gemini CLI authentication
400
+
401
+ ## Event Payloads
402
+
403
+ ### review_complete
404
+ - skill: string — "ftm-council"
405
+ - verdict: string — "update_intent" | "revert_fix" | "option_a" | "option_b" | custom decision
406
+ - round: number — round in which majority was reached (1-5, or 5+ for synthesis)
407
+ - agreed_by: string[] — which models agreed on the verdict
408
+ - dissent: string | null — summary of dissenting position
409
+ - reasoning: string — why the majority won
410
+
411
+ ### task_completed
412
+ - skill: string — "ftm-council"
413
+ - decision_domain: string — topic the council deliberated on
414
+ - verdict: string — final decision
415
+ - round: number — rounds taken to reach verdict
416
+ - duration_ms: number — total deliberation time