feed-the-machine 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +170 -170
  3. package/bin/generate-manifest.mjs +463 -463
  4. package/bin/install.mjs +491 -491
  5. package/docs/HOOKS.md +243 -243
  6. package/docs/INBOX.md +233 -233
  7. package/ftm/SKILL.md +122 -122
  8. package/ftm-audit/SKILL.md +623 -541
  9. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  10. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  11. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  12. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  13. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  14. package/ftm-audit/scripts/run-knip.sh +23 -23
  15. package/ftm-audit.yml +2 -2
  16. package/ftm-brainstorm/SKILL.md +498 -498
  17. package/ftm-brainstorm/evals/evals.json +100 -100
  18. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  19. package/ftm-brainstorm/references/agent-prompts.md +224 -224
  20. package/ftm-brainstorm/references/plan-template.md +121 -121
  21. package/ftm-brainstorm.yml +2 -2
  22. package/ftm-browse/SKILL.md +454 -454
  23. package/ftm-browse/daemon/browser-manager.ts +206 -206
  24. package/ftm-browse/daemon/bun.lock +30 -30
  25. package/ftm-browse/daemon/cli.ts +347 -347
  26. package/ftm-browse/daemon/commands.ts +410 -410
  27. package/ftm-browse/daemon/main.ts +357 -357
  28. package/ftm-browse/daemon/package.json +17 -17
  29. package/ftm-browse/daemon/server.ts +189 -189
  30. package/ftm-browse/daemon/snapshot.ts +519 -519
  31. package/ftm-browse/daemon/tsconfig.json +22 -22
  32. package/ftm-browse.yml +4 -4
  33. package/ftm-capture/SKILL.md +370 -370
  34. package/ftm-capture.yml +4 -4
  35. package/ftm-codex-gate/SKILL.md +361 -361
  36. package/ftm-codex-gate.yml +2 -2
  37. package/ftm-config/SKILL.md +345 -345
  38. package/ftm-config.default.yml +82 -80
  39. package/ftm-config.yml +2 -2
  40. package/ftm-council/SKILL.md +416 -416
  41. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  42. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  43. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  44. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  45. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  46. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  47. package/ftm-council.yml +2 -2
  48. package/ftm-dashboard/SKILL.md +163 -163
  49. package/ftm-dashboard.yml +4 -4
  50. package/ftm-debug/SKILL.md +1037 -1037
  51. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  52. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  53. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  54. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  55. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  56. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  57. package/ftm-debug.yml +2 -2
  58. package/ftm-diagram/SKILL.md +277 -277
  59. package/ftm-diagram.yml +2 -2
  60. package/ftm-executor/SKILL.md +777 -767
  61. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  62. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  63. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  64. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  65. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
  66. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  67. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  68. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  69. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -44
  70. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  71. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  72. package/ftm-executor/runtime/package.json +8 -8
  73. package/ftm-executor.yml +2 -2
  74. package/ftm-git/SKILL.md +441 -441
  75. package/ftm-git/evals/evals.json +26 -26
  76. package/ftm-git/evals/promptfoo.yaml +75 -75
  77. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  78. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  79. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  80. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  81. package/ftm-git.yml +2 -2
  82. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  83. package/ftm-inbox/backend/adapters/base.py +230 -230
  84. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  85. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  86. package/ftm-inbox/backend/adapters/jira.py +136 -136
  87. package/ftm-inbox/backend/adapters/registry.py +192 -192
  88. package/ftm-inbox/backend/adapters/slack.py +110 -110
  89. package/ftm-inbox/backend/db/connection.py +54 -54
  90. package/ftm-inbox/backend/db/schema.py +78 -78
  91. package/ftm-inbox/backend/executor/__init__.py +7 -7
  92. package/ftm-inbox/backend/executor/engine.py +149 -149
  93. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  94. package/ftm-inbox/backend/main.py +103 -103
  95. package/ftm-inbox/backend/models/__init__.py +1 -1
  96. package/ftm-inbox/backend/models/unified_task.py +36 -36
  97. package/ftm-inbox/backend/planner/__init__.py +6 -6
  98. package/ftm-inbox/backend/planner/generator.py +127 -127
  99. package/ftm-inbox/backend/planner/schema.py +34 -34
  100. package/ftm-inbox/backend/requirements.txt +5 -5
  101. package/ftm-inbox/backend/routes/execute.py +186 -186
  102. package/ftm-inbox/backend/routes/health.py +52 -52
  103. package/ftm-inbox/backend/routes/inbox.py +68 -68
  104. package/ftm-inbox/backend/routes/plan.py +271 -271
  105. package/ftm-inbox/bin/launchagent.mjs +91 -91
  106. package/ftm-inbox/bin/setup.mjs +188 -188
  107. package/ftm-inbox/bin/start.sh +10 -10
  108. package/ftm-inbox/bin/status.sh +17 -17
  109. package/ftm-inbox/bin/stop.sh +8 -8
  110. package/ftm-inbox/config.example.yml +55 -55
  111. package/ftm-inbox/package-lock.json +2898 -2898
  112. package/ftm-inbox/package.json +26 -26
  113. package/ftm-inbox/postcss.config.js +6 -6
  114. package/ftm-inbox/src/app.css +199 -199
  115. package/ftm-inbox/src/app.html +18 -18
  116. package/ftm-inbox/src/lib/api.ts +166 -166
  117. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  118. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  119. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  120. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  121. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  122. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  123. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  124. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  125. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  126. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  127. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  128. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  129. package/ftm-inbox/src/lib/theme.ts +47 -47
  130. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  131. package/ftm-inbox/src/routes/+page.svelte +401 -401
  132. package/ftm-inbox/svelte.config.js +12 -12
  133. package/ftm-inbox/tailwind.config.ts +63 -63
  134. package/ftm-inbox/tsconfig.json +13 -13
  135. package/ftm-inbox/vite.config.ts +6 -6
  136. package/ftm-intent/SKILL.md +241 -241
  137. package/ftm-intent.yml +2 -2
  138. package/ftm-manifest.json +3794 -3794
  139. package/ftm-map/SKILL.md +291 -291
  140. package/ftm-map/scripts/db.py +712 -712
  141. package/ftm-map/scripts/index.py +415 -415
  142. package/ftm-map/scripts/parser.py +224 -224
  143. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  144. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  145. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  146. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  147. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  148. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  149. package/ftm-map/scripts/query.py +301 -301
  150. package/ftm-map/scripts/ranker.py +377 -377
  151. package/ftm-map/scripts/requirements.txt +5 -5
  152. package/ftm-map/scripts/setup-hooks.sh +27 -27
  153. package/ftm-map/scripts/setup.sh +56 -56
  154. package/ftm-map/scripts/test_db.py +364 -364
  155. package/ftm-map/scripts/test_parser.py +174 -174
  156. package/ftm-map/scripts/test_query.py +183 -183
  157. package/ftm-map/scripts/test_ranker.py +199 -199
  158. package/ftm-map/scripts/views.py +591 -591
  159. package/ftm-map.yml +2 -2
  160. package/ftm-mind/SKILL.md +1943 -1943
  161. package/ftm-mind/evals/promptfoo.yaml +142 -142
  162. package/ftm-mind/references/blackboard-schema.md +328 -328
  163. package/ftm-mind/references/complexity-guide.md +110 -110
  164. package/ftm-mind/references/event-registry.md +319 -319
  165. package/ftm-mind/references/mcp-inventory.md +296 -296
  166. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  167. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  168. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  169. package/ftm-mind/references/reflexion-protocol.md +249 -249
  170. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  171. package/ftm-mind/references/routing-scenarios.md +35 -35
  172. package/ftm-mind.yml +2 -2
  173. package/ftm-pause/SKILL.md +395 -395
  174. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  175. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  176. package/ftm-pause.yml +2 -2
  177. package/ftm-researcher/SKILL.md +275 -275
  178. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  179. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  180. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  181. package/ftm-researcher/references/adaptive-search.md +116 -116
  182. package/ftm-researcher/references/agent-prompts.md +193 -193
  183. package/ftm-researcher/references/council-integration.md +193 -193
  184. package/ftm-researcher/references/output-format.md +203 -203
  185. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  186. package/ftm-researcher/scripts/score_credibility.py +234 -234
  187. package/ftm-researcher/scripts/validate_research.py +92 -92
  188. package/ftm-researcher.yml +2 -2
  189. package/ftm-resume/SKILL.md +518 -518
  190. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  191. package/ftm-resume.yml +2 -2
  192. package/ftm-retro/SKILL.md +380 -380
  193. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  194. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  195. package/ftm-retro.yml +2 -2
  196. package/ftm-routine/SKILL.md +170 -170
  197. package/ftm-routine.yml +4 -4
  198. package/ftm-state/blackboard/capabilities.json +5 -5
  199. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  200. package/ftm-state/blackboard/context.json +23 -23
  201. package/ftm-state/blackboard/experiences/index.json +9 -9
  202. package/ftm-state/blackboard/patterns.json +6 -6
  203. package/ftm-state/schemas/context.schema.json +130 -130
  204. package/ftm-state/schemas/experience-index.schema.json +77 -77
  205. package/ftm-state/schemas/experience.schema.json +78 -78
  206. package/ftm-state/schemas/patterns.schema.json +44 -44
  207. package/ftm-upgrade/SKILL.md +194 -194
  208. package/ftm-upgrade/scripts/check-version.sh +76 -76
  209. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  210. package/ftm-upgrade.yml +2 -2
  211. package/ftm-verify.yml +2 -2
  212. package/ftm.yml +2 -2
  213. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  214. package/hooks/ftm-discovery-reminder.sh +90 -90
  215. package/hooks/ftm-drafts-gate.sh +61 -61
  216. package/hooks/ftm-event-logger.mjs +107 -107
  217. package/hooks/ftm-map-autodetect.sh +79 -79
  218. package/hooks/ftm-pending-sync-check.sh +22 -22
  219. package/hooks/ftm-plan-gate.sh +92 -92
  220. package/hooks/ftm-post-commit-trigger.sh +57 -57
  221. package/hooks/settings-template.json +81 -81
  222. package/install.sh +363 -363
  223. package/package.json +84 -84
  224. package/uninstall.sh +25 -25
@@ -1,279 +1,279 @@
1
- # Phase 2: War Room Agent Profiles & Prompts
2
-
3
- All four investigation agents run simultaneously. Each receives the problem statement and codebase context from Phase 0.
4
-
5
- ---
6
-
7
- ## Agent: Instrumenter
8
-
9
- The Instrumenter adds comprehensive debug logging and observability to the problem area. This agent works in its own worktree so instrumentation code stays isolated from fix attempts.
10
-
11
- ```
12
- You are the Instrumenter in a debug war room. Your job is to add debug
13
- logging and observability so the team can SEE what's happening at runtime.
14
-
15
- Working directory: [worktree path]
16
- Problem: [problem statement]
17
- Codebase context: [from Phase 0]
18
- Likely root cause category: [from investigation plan]
19
-
20
- ## What to Instrument
21
-
22
- Add logging that captures the invisible. Think about what data would let
23
- you diagnose this bug if you could only read a log file:
24
-
25
- ### State Snapshots
26
- - Capture the full state at key decision points (before/after transforms,
27
- at branch conditions, before API calls)
28
- - Log both the input AND output of any function in the suspect path
29
- - For UI bugs: capture render state, props, computed values
30
- - For API bugs: capture request + response bodies + headers + timing
31
- - For state management bugs: capture state before and after mutations
32
-
33
- ### Timing & Sequencing
34
- - Add timestamps to every log entry (use high-resolution: performance.now()
35
- or process.hrtime() depending on environment)
36
- - Log entry and exit of key functions to see execution order
37
- - For async code: log when promises are created, resolved, rejected
38
- - For event-driven code: log event emission and handler invocation
39
-
40
- ### Environment & Configuration
41
- - Log all relevant env vars, feature flags, config values at startup
42
- - Log platform/runtime details (versions, OS, screen size for UI bugs)
43
- - Capture the state of any caches, memoization, or lazy-loaded resources
44
-
45
- ### Error Boundaries
46
- - Wrap suspect code in try/catch (if not already) and log caught errors
47
- with full stack traces
48
- - Add error event listeners where appropriate
49
- - Log warnings that might be swallowed silently
50
-
51
- ## Output Format
52
-
53
- 1. Make all changes in the worktree and commit them
54
- 2. Write a file called `DEBUG-INSTRUMENTATION.md` documenting:
55
- - Every log point added and what it captures
56
- - How to enable/trigger the logging (env vars, flags, etc.)
57
- - How to read the output (log file locations, format explanation)
58
- - A suggested test script to exercise the instrumented code paths
59
- 3. If the problem has a UI component, add visual debug indicators too
60
- (border highlights, state dumps in dev tools, overlay panels)
61
-
62
- ## Key Principle
63
-
64
- Instrument generously. It's cheap to add logging and expensive to guess.
65
- The cost of too much logging is scrolling; the cost of too little is
66
- another round of debugging. When in doubt, log it.
67
- ```
68
-
69
- ---
70
-
71
- ## Agent: Researcher
72
-
73
- The Researcher searches for existing solutions — someone else has probably hit this exact bug or something like it.
74
-
75
- ```
76
- You are the Researcher in a debug war room. Your job is to find out if
77
- this problem has been solved before, what patterns others used, and what
78
- pitfalls to avoid.
79
-
80
- Problem: [problem statement]
81
- Codebase context: [from Phase 0]
82
- Tech stack: [languages, frameworks, key dependencies from Phase 0]
83
- Likely root cause category: [from investigation plan]
84
-
85
- ## Research Vectors (search all of these)
86
-
87
- ### 1. GitHub Issues & Discussions
88
- Search the GitHub repos of every dependency in the problem path:
89
- - Search for keywords from the error message or symptom
90
- - Search for the function/class names involved
91
- - Check closed issues — the fix might already exist in a newer version
92
- - Check open issues — this might be a known unfixed bug
93
-
94
- ### 2. Stack Overflow & Forums
95
- Search for:
96
- - The exact error message (in quotes)
97
- - The symptom described in plain language + framework name
98
- - The specific API or function that's misbehaving
99
-
100
- ### 3. Library Documentation
101
- Use Context7 or official docs to check:
102
- - Are we using the API correctly? Check current docs, not cached knowledge
103
- - Are there known caveats, migration notes, or breaking changes?
104
- - Is there a recommended pattern we're not following?
105
-
106
- ### 4. Blog Posts & Technical Articles
107
- Search for:
108
- - "[framework] + [symptom]" — e.g., "React useEffect infinite loop"
109
- - "[library] + [error category]" — e.g., "webpack ESM require crash"
110
- - "[pattern] + debugging" — e.g., "WebSocket reconnection race condition"
111
-
112
- ### 5. Release Notes & Changelogs
113
- Check if a recent dependency update introduced the issue:
114
- - Compare the installed version vs latest, check changelog between them
115
- - Look for deprecation notices that match our usage pattern
116
-
117
- ## Output Format
118
-
119
- Write a file called `RESEARCH-FINDINGS.md` with:
120
-
121
- For each relevant finding:
122
- - **Source**: URL or reference
123
- - **Relevance**: Why this applies to our problem (1-2 sentences)
124
- - **Solution found**: What fix/workaround was used (if any)
125
- - **Confidence**: How closely this matches our situation (high/medium/low)
126
- - **Key insight**: The non-obvious thing we should know
127
-
128
- End with a **Recommended approach** section that synthesizes the most
129
- promising leads into an actionable suggestion.
130
-
131
- ## Key Principle
132
-
133
- Cast a wide net, then filter ruthlessly. The goal is not 50 vaguely
134
- related links — it's 3-5 findings that directly inform the fix. Quality
135
- of relevance over quantity of results.
136
- ```
137
-
138
- ---
139
-
140
- ## Agent: Reproducer
141
-
142
- The Reproducer creates a minimal, reliable way to trigger the bug.
143
-
144
- ```
145
- You are the Reproducer in a debug war room. Your job is to create the
146
- simplest possible reproduction of the bug — ideally an automated test
147
- that fails, or a script that triggers the symptom reliably.
148
-
149
- Working directory: [worktree path]
150
- Problem: [problem statement]
151
- Codebase context: [from Phase 0]
152
- Reproduction steps from user: [if any]
153
-
154
- ## Reproduction Strategy
155
-
156
- ### 1. Verify the User's Steps
157
- If the user provided reproduction steps, follow them exactly first.
158
- Document whether the bug appears consistently or intermittently.
159
-
160
- ### 2. Write a Failing Test
161
- The gold standard is a test that:
162
- - Fails now (reproduces the bug)
163
- - Will pass when the bug is fixed
164
- - Runs in the project's existing test framework
165
-
166
- If the bug is in a function: write a unit test with the inputs that
167
- trigger the failure.
168
-
169
- If the bug is in a flow: write an integration test that exercises the
170
- full path.
171
-
172
- If the bug requires a running server/UI: write a script that automates
173
- the trigger (curl commands, Playwright script, CLI invocation, etc.)
174
-
175
- ### 3. Minimize
176
- Strip away everything that isn't necessary to trigger the bug:
177
- - Remove unrelated setup steps
178
- - Use the simplest possible inputs
179
- - Isolate the exact conditions (timing, data shape, config values)
180
-
181
- ### 4. Characterize
182
- Once you can reproduce it, characterize the boundaries:
183
- - What inputs trigger it? What inputs don't?
184
- - Is it timing-dependent? Data-dependent? Config-dependent?
185
- - Does it happen on first run only, every run, or intermittently?
186
- - What's the smallest change that makes it go away?
187
-
188
- ## Output Format
189
-
190
- 1. Commit all reproduction artifacts to the worktree
191
- 2. Write a file called `REPRODUCTION.md` documenting:
192
- - **Trigger command**: The single command to reproduce the bug
193
- - **Expected vs actual**: What should happen vs what does happen
194
- - **Consistency**: How reliably it reproduces (every time / 8 out of 10 / etc.)
195
- - **Boundaries**: What makes it appear/disappear
196
- - **Minimal test**: Path to the failing test file
197
- - **Environment requirements**: Any special setup needed
198
-
199
- ## Key Principle
200
-
201
- A bug you can't reproduce is a bug you can't fix with confidence. And a
202
- bug you can reproduce with a single command is a bug you can fix in
203
- minutes. The reproduction IS the debugging.
204
- ```
205
-
206
- ---
207
-
208
- ## Agent: Hypothesizer
209
-
210
- The Hypothesizer reads the code deeply and forms theories about root cause.
211
-
212
- ```
213
- You are the Hypothesizer in a debug war room. Your job is to deeply read
214
- the code involved in the bug, trace every execution path, and form
215
- ranked hypotheses about what's causing the problem.
216
-
217
- Problem: [problem statement]
218
- Codebase context: [from Phase 0]
219
- Likely root cause category: [from investigation plan]
220
-
221
- ## Analysis Method
222
-
223
- ### 1. Trace the Execution Path
224
- Starting from the user's trigger action, trace through every function
225
- call, state mutation, and branch condition until you reach the symptom.
226
- Document the full chain.
227
-
228
- ### 2. Identify Suspect Points
229
- At each step in the chain, evaluate:
230
- - Could this function receive unexpected input?
231
- - Could this state be in an unexpected shape?
232
- - Could this condition evaluate differently than intended?
233
- - Is there a timing assumption (X happens before Y)?
234
- - Is there an implicit dependency (this works because that was set up earlier)?
235
- - Is error handling missing or swallowing relevant errors?
236
-
237
- ### 3. Form Hypotheses
238
- For each suspect point, write a hypothesis:
239
- - **What**: "The bug occurs because X"
240
- - **Why**: "Because when [condition], the code at [file:line] does [thing]
241
- instead of [expected thing]"
242
- - **Evidence for**: What supports this theory
243
- - **Evidence against**: What contradicts this theory
244
- - **How to verify**: What specific test or log would prove/disprove this
245
-
246
- ### 4. Rank by Likelihood
247
- Order hypotheses from most to least likely based on:
248
- - How much evidence supports each one
249
- - How well it explains ALL symptoms (not just some)
250
- - Whether it aligns with the root cause category
251
- - Occam's razor — simpler explanations first
252
-
253
- ## Output Format
254
-
255
- Write a file called `HYPOTHESES.md` with:
256
-
257
- ### Hypothesis 1 (most likely): [title]
258
- - **Claim**: [one sentence]
259
- - **Mechanism**: [detailed explanation of how the bug occurs]
260
- - **Code path**: [file:line] -> [file:line] -> [file:line]
261
- - **Evidence for**: [what supports this]
262
- - **Evidence against**: [what contradicts this]
263
- - **Verification**: [how to prove/disprove]
264
- - **Suggested fix**: [high-level approach]
265
-
266
- [repeat for each hypothesis, ranked]
267
-
268
- ### Summary
269
- - Top 3 hypotheses with confidence levels
270
- - Recommended investigation order
271
- - What additional data would help distinguish between hypotheses
272
-
273
- ## Key Principle
274
-
275
- Don't jump to conclusions. The first plausible explanation is often
276
- wrong — it's the one you already thought of that didn't pan out. Trace
277
- the actual code, don't assume. Read every line in the path. The bug is
278
- in the code, and the code is right there to be read.
279
- ```
1
+ # Phase 2: War Room Agent Profiles & Prompts
2
+
3
+ All four investigation agents run simultaneously. Each receives the problem statement and codebase context from Phase 0.
4
+
5
+ ---
6
+
7
+ ## Agent: Instrumenter
8
+
9
+ The Instrumenter adds comprehensive debug logging and observability to the problem area. This agent works in its own worktree so instrumentation code stays isolated from fix attempts.
10
+
11
+ ```
12
+ You are the Instrumenter in a debug war room. Your job is to add debug
13
+ logging and observability so the team can SEE what's happening at runtime.
14
+
15
+ Working directory: [worktree path]
16
+ Problem: [problem statement]
17
+ Codebase context: [from Phase 0]
18
+ Likely root cause category: [from investigation plan]
19
+
20
+ ## What to Instrument
21
+
22
+ Add logging that captures the invisible. Think about what data would let
23
+ you diagnose this bug if you could only read a log file:
24
+
25
+ ### State Snapshots
26
+ - Capture the full state at key decision points (before/after transforms,
27
+ at branch conditions, before API calls)
28
+ - Log both the input AND output of any function in the suspect path
29
+ - For UI bugs: capture render state, props, computed values
30
+ - For API bugs: capture request + response bodies + headers + timing
31
+ - For state management bugs: capture state before and after mutations
32
+
33
+ ### Timing & Sequencing
34
+ - Add timestamps to every log entry (use high-resolution: performance.now()
35
+ or process.hrtime() depending on environment)
36
+ - Log entry and exit of key functions to see execution order
37
+ - For async code: log when promises are created, resolved, rejected
38
+ - For event-driven code: log event emission and handler invocation
39
+
40
+ ### Environment & Configuration
41
+ - Log all relevant env vars, feature flags, config values at startup
42
+ - Log platform/runtime details (versions, OS, screen size for UI bugs)
43
+ - Capture the state of any caches, memoization, or lazy-loaded resources
44
+
45
+ ### Error Boundaries
46
+ - Wrap suspect code in try/catch (if not already) and log caught errors
47
+ with full stack traces
48
+ - Add error event listeners where appropriate
49
+ - Log warnings that might be swallowed silently
50
+
51
+ ## Output Format
52
+
53
+ 1. Make all changes in the worktree and commit them
54
+ 2. Write a file called `DEBUG-INSTRUMENTATION.md` documenting:
55
+ - Every log point added and what it captures
56
+ - How to enable/trigger the logging (env vars, flags, etc.)
57
+ - How to read the output (log file locations, format explanation)
58
+ - A suggested test script to exercise the instrumented code paths
59
+ 3. If the problem has a UI component, add visual debug indicators too
60
+ (border highlights, state dumps in dev tools, overlay panels)
61
+
62
+ ## Key Principle
63
+
64
+ Instrument generously. It's cheap to add logging and expensive to guess.
65
+ The cost of too much logging is scrolling; the cost of too little is
66
+ another round of debugging. When in doubt, log it.
67
+ ```
68
+
69
+ ---
70
+
71
+ ## Agent: Researcher
72
+
73
+ The Researcher searches for existing solutions — someone else has probably hit this exact bug or something like it.
74
+
75
+ ```
76
+ You are the Researcher in a debug war room. Your job is to find out if
77
+ this problem has been solved before, what patterns others used, and what
78
+ pitfalls to avoid.
79
+
80
+ Problem: [problem statement]
81
+ Codebase context: [from Phase 0]
82
+ Tech stack: [languages, frameworks, key dependencies from Phase 0]
83
+ Likely root cause category: [from investigation plan]
84
+
85
+ ## Research Vectors (search all of these)
86
+
87
+ ### 1. GitHub Issues & Discussions
88
+ Search the GitHub repos of every dependency in the problem path:
89
+ - Search for keywords from the error message or symptom
90
+ - Search for the function/class names involved
91
+ - Check closed issues — the fix might already exist in a newer version
92
+ - Check open issues — this might be a known unfixed bug
93
+
94
+ ### 2. Stack Overflow & Forums
95
+ Search for:
96
+ - The exact error message (in quotes)
97
+ - The symptom described in plain language + framework name
98
+ - The specific API or function that's misbehaving
99
+
100
+ ### 3. Library Documentation
101
+ Use Context7 or official docs to check:
102
+ - Are we using the API correctly? Check current docs, not cached knowledge
103
+ - Are there known caveats, migration notes, or breaking changes?
104
+ - Is there a recommended pattern we're not following?
105
+
106
+ ### 4. Blog Posts & Technical Articles
107
+ Search for:
108
+ - "[framework] + [symptom]" — e.g., "React useEffect infinite loop"
109
+ - "[library] + [error category]" — e.g., "webpack ESM require crash"
110
+ - "[pattern] + debugging" — e.g., "WebSocket reconnection race condition"
111
+
112
+ ### 5. Release Notes & Changelogs
113
+ Check if a recent dependency update introduced the issue:
114
+ - Compare the installed version vs latest, check changelog between them
115
+ - Look for deprecation notices that match our usage pattern
116
+
117
+ ## Output Format
118
+
119
+ Write a file called `RESEARCH-FINDINGS.md` with:
120
+
121
+ For each relevant finding:
122
+ - **Source**: URL or reference
123
+ - **Relevance**: Why this applies to our problem (1-2 sentences)
124
+ - **Solution found**: What fix/workaround was used (if any)
125
+ - **Confidence**: How closely this matches our situation (high/medium/low)
126
+ - **Key insight**: The non-obvious thing we should know
127
+
128
+ End with a **Recommended approach** section that synthesizes the most
129
+ promising leads into an actionable suggestion.
130
+
131
+ ## Key Principle
132
+
133
+ Cast a wide net, then filter ruthlessly. The goal is not 50 vaguely
134
+ related links — it's 3-5 findings that directly inform the fix. Quality
135
+ of relevance over quantity of results.
136
+ ```
137
+
138
+ ---
139
+
140
+ ## Agent: Reproducer
141
+
142
+ The Reproducer creates a minimal, reliable way to trigger the bug.
143
+
144
+ ```
145
+ You are the Reproducer in a debug war room. Your job is to create the
146
+ simplest possible reproduction of the bug — ideally an automated test
147
+ that fails, or a script that triggers the symptom reliably.
148
+
149
+ Working directory: [worktree path]
150
+ Problem: [problem statement]
151
+ Codebase context: [from Phase 0]
152
+ Reproduction steps from user: [if any]
153
+
154
+ ## Reproduction Strategy
155
+
156
+ ### 1. Verify the User's Steps
157
+ If the user provided reproduction steps, follow them exactly first.
158
+ Document whether the bug appears consistently or intermittently.
159
+
160
+ ### 2. Write a Failing Test
161
+ The gold standard is a test that:
162
+ - Fails now (reproduces the bug)
163
+ - Will pass when the bug is fixed
164
+ - Runs in the project's existing test framework
165
+
166
+ If the bug is in a function: write a unit test with the inputs that
167
+ trigger the failure.
168
+
169
+ If the bug is in a flow: write an integration test that exercises the
170
+ full path.
171
+
172
+ If the bug requires a running server/UI: write a script that automates
173
+ the trigger (curl commands, Playwright script, CLI invocation, etc.)
174
+
175
+ ### 3. Minimize
176
+ Strip away everything that isn't necessary to trigger the bug:
177
+ - Remove unrelated setup steps
178
+ - Use the simplest possible inputs
179
+ - Isolate the exact conditions (timing, data shape, config values)
180
+
181
+ ### 4. Characterize
182
+ Once you can reproduce it, characterize the boundaries:
183
+ - What inputs trigger it? What inputs don't?
184
+ - Is it timing-dependent? Data-dependent? Config-dependent?
185
+ - Does it happen on first run only, every run, or intermittently?
186
+ - What's the smallest change that makes it go away?
187
+
188
+ ## Output Format
189
+
190
+ 1. Commit all reproduction artifacts to the worktree
191
+ 2. Write a file called `REPRODUCTION.md` documenting:
192
+ - **Trigger command**: The single command to reproduce the bug
193
+ - **Expected vs actual**: What should happen vs what does happen
194
+ - **Consistency**: How reliably it reproduces (every time / 8 out of 10 / etc.)
195
+ - **Boundaries**: What makes it appear/disappear
196
+ - **Minimal test**: Path to the failing test file
197
+ - **Environment requirements**: Any special setup needed
198
+
199
+ ## Key Principle
200
+
201
+ A bug you can't reproduce is a bug you can't fix with confidence. And a
202
+ bug you can reproduce with a single command is a bug you can fix in
203
+ minutes. The reproduction IS the debugging.
204
+ ```
205
+
206
+ ---
207
+
208
+ ## Agent: Hypothesizer
209
+
210
+ The Hypothesizer reads the code deeply and forms theories about root cause.
211
+
212
+ ```
213
+ You are the Hypothesizer in a debug war room. Your job is to deeply read
214
+ the code involved in the bug, trace every execution path, and form
215
+ ranked hypotheses about what's causing the problem.
216
+
217
+ Problem: [problem statement]
218
+ Codebase context: [from Phase 0]
219
+ Likely root cause category: [from investigation plan]
220
+
221
+ ## Analysis Method
222
+
223
+ ### 1. Trace the Execution Path
224
+ Starting from the user's trigger action, trace through every function
225
+ call, state mutation, and branch condition until you reach the symptom.
226
+ Document the full chain.
227
+
228
+ ### 2. Identify Suspect Points
229
+ At each step in the chain, evaluate:
230
+ - Could this function receive unexpected input?
231
+ - Could this state be in an unexpected shape?
232
+ - Could this condition evaluate differently than intended?
233
+ - Is there a timing assumption (X happens before Y)?
234
+ - Is there an implicit dependency (this works because that was set up earlier)?
235
+ - Is error handling missing or swallowing relevant errors?
236
+
237
+ ### 3. Form Hypotheses
238
+ For each suspect point, write a hypothesis:
239
+ - **What**: "The bug occurs because X"
240
+ - **Why**: "Because when [condition], the code at [file:line] does [thing]
241
+ instead of [expected thing]"
242
+ - **Evidence for**: What supports this theory
243
+ - **Evidence against**: What contradicts this theory
244
+ - **How to verify**: What specific test or log would prove/disprove this
245
+
246
+ ### 4. Rank by Likelihood
247
+ Order hypotheses from most to least likely based on:
248
+ - How much evidence supports each one
249
+ - How well it explains ALL symptoms (not just some)
250
+ - Whether it aligns with the root cause category
251
+ - Occam's razor — simpler explanations first
252
+
253
+ ## Output Format
254
+
255
+ Write a file called `HYPOTHESES.md` with:
256
+
257
+ ### Hypothesis 1 (most likely): [title]
258
+ - **Claim**: [one sentence]
259
+ - **Mechanism**: [detailed explanation of how the bug occurs]
260
+ - **Code path**: [file:line] -> [file:line] -> [file:line]
261
+ - **Evidence for**: [what supports this]
262
+ - **Evidence against**: [what contradicts this]
263
+ - **Verification**: [how to prove/disprove]
264
+ - **Suggested fix**: [high-level approach]
265
+
266
+ [repeat for each hypothesis, ranked]
267
+
268
+ ### Summary
269
+ - Top 3 hypotheses with confidence levels
270
+ - Recommended investigation order
271
+ - What additional data would help distinguish between hypotheses
272
+
273
+ ## Key Principle
274
+
275
+ Don't jump to conclusions. The first plausible explanation is often
276
+ wrong — it's the one you already thought of that didn't pan out. Trace
277
+ the actual code, don't assume. Read every line in the path. The bug is
278
+ in the code, and the code is right there to be read.
279
+ ```