discoclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. package/.context/README.md +42 -0
  2. package/.context/architecture.md +58 -0
  3. package/.context/bot-setup.md +24 -0
  4. package/.context/dev.md +230 -0
  5. package/.context/discord.md +144 -0
  6. package/.context/memory.md +257 -0
  7. package/.context/ops.md +59 -0
  8. package/.context/pa-safety.md +47 -0
  9. package/.context/pa.md +118 -0
  10. package/.context/project.md +43 -0
  11. package/.context/runtime.md +253 -0
  12. package/.context/tasks.md +71 -0
  13. package/.context/tools.md +75 -0
  14. package/.env.example +88 -0
  15. package/.env.example.full +378 -0
  16. package/LICENSE +21 -0
  17. package/README.md +220 -0
  18. package/dist/beads/auto-tag.js +2 -0
  19. package/dist/beads/auto-tag.test.js +62 -0
  20. package/dist/beads/bd-cli.js +9 -0
  21. package/dist/beads/bd-cli.test.js +495 -0
  22. package/dist/beads/bead-hooks-cli.js +149 -0
  23. package/dist/beads/bead-sync-cli.js +5 -0
  24. package/dist/beads/bead-sync-cli.test.js +72 -0
  25. package/dist/beads/bead-sync-coordinator.js +4 -0
  26. package/dist/beads/bead-sync-coordinator.test.js +239 -0
  27. package/dist/beads/bead-sync-watcher.js +2 -0
  28. package/dist/beads/bead-sync-watcher.test.js +96 -0
  29. package/dist/beads/bead-sync.js +7 -0
  30. package/dist/beads/bead-sync.test.js +876 -0
  31. package/dist/beads/bead-thread-cache.js +8 -0
  32. package/dist/beads/bead-thread-cache.test.js +91 -0
  33. package/dist/beads/discord-sync.js +18 -0
  34. package/dist/beads/discord-sync.test.js +782 -0
  35. package/dist/beads/find-bead-by-thread.test.js +36 -0
  36. package/dist/beads/forum-guard.js +2 -0
  37. package/dist/beads/forum-guard.test.js +204 -0
  38. package/dist/beads/initialize.js +3 -0
  39. package/dist/beads/initialize.test.js +304 -0
  40. package/dist/beads/types.js +10 -0
  41. package/dist/cli/daemon-installer.js +225 -0
  42. package/dist/cli/daemon-installer.test.js +289 -0
  43. package/dist/cli/index.js +42 -0
  44. package/dist/cli/init-wizard.js +374 -0
  45. package/dist/cli/init-wizard.test.js +191 -0
  46. package/dist/config.js +385 -0
  47. package/dist/config.test.js +589 -0
  48. package/dist/cron/auto-tag.js +100 -0
  49. package/dist/cron/auto-tag.test.js +91 -0
  50. package/dist/cron/cadence.js +74 -0
  51. package/dist/cron/cadence.test.js +53 -0
  52. package/dist/cron/cron-sync-coordinator.js +66 -0
  53. package/dist/cron/cron-sync-coordinator.test.js +118 -0
  54. package/dist/cron/cron-sync.js +165 -0
  55. package/dist/cron/cron-sync.test.js +228 -0
  56. package/dist/cron/cron-tag-map-watcher.js +128 -0
  57. package/dist/cron/cron-tag-map-watcher.test.js +155 -0
  58. package/dist/cron/default-timezone.js +23 -0
  59. package/dist/cron/default-timezone.test.js +30 -0
  60. package/dist/cron/discord-sync.js +205 -0
  61. package/dist/cron/discord-sync.test.js +353 -0
  62. package/dist/cron/executor.js +303 -0
  63. package/dist/cron/executor.test.js +614 -0
  64. package/dist/cron/forum-sync.js +347 -0
  65. package/dist/cron/forum-sync.test.js +539 -0
  66. package/dist/cron/job-lock.js +164 -0
  67. package/dist/cron/job-lock.test.js +178 -0
  68. package/dist/cron/parser.js +68 -0
  69. package/dist/cron/parser.test.js +115 -0
  70. package/dist/cron/run-control.js +24 -0
  71. package/dist/cron/run-control.test.js +27 -0
  72. package/dist/cron/run-stats.js +265 -0
  73. package/dist/cron/run-stats.test.js +160 -0
  74. package/dist/cron/scheduler.js +97 -0
  75. package/dist/cron/scheduler.test.js +112 -0
  76. package/dist/cron/tag-map.js +47 -0
  77. package/dist/cron/tag-map.test.js +64 -0
  78. package/dist/cron/types.js +1 -0
  79. package/dist/discoclaw-plan-format.test.js +137 -0
  80. package/dist/discoclaw-recipe-format.test.js +137 -0
  81. package/dist/discord/abort-registry.js +70 -0
  82. package/dist/discord/action-categories.js +36 -0
  83. package/dist/discord/action-types.js +1 -0
  84. package/dist/discord/action-utils.js +58 -0
  85. package/dist/discord/action-utils.test.js +58 -0
  86. package/dist/discord/actions-beads.js +1 -0
  87. package/dist/discord/actions-beads.test.js +372 -0
  88. package/dist/discord/actions-bot-profile.js +107 -0
  89. package/dist/discord/actions-bot-profile.test.js +138 -0
  90. package/dist/discord/actions-channels.js +427 -0
  91. package/dist/discord/actions-channels.test.js +697 -0
  92. package/dist/discord/actions-config.js +173 -0
  93. package/dist/discord/actions-config.test.js +322 -0
  94. package/dist/discord/actions-crons.js +586 -0
  95. package/dist/discord/actions-crons.test.js +499 -0
  96. package/dist/discord/actions-defer.js +60 -0
  97. package/dist/discord/actions-defer.test.js +134 -0
  98. package/dist/discord/actions-forge.js +134 -0
  99. package/dist/discord/actions-forge.test.js +206 -0
  100. package/dist/discord/actions-guild.js +301 -0
  101. package/dist/discord/actions-guild.test.js +386 -0
  102. package/dist/discord/actions-memory.js +106 -0
  103. package/dist/discord/actions-memory.test.js +248 -0
  104. package/dist/discord/actions-messaging.js +401 -0
  105. package/dist/discord/actions-messaging.test.js +738 -0
  106. package/dist/discord/actions-moderation.js +65 -0
  107. package/dist/discord/actions-moderation.test.js +88 -0
  108. package/dist/discord/actions-plan.js +445 -0
  109. package/dist/discord/actions-plan.test.js +610 -0
  110. package/dist/discord/actions-poll.js +38 -0
  111. package/dist/discord/actions-poll.test.js +93 -0
  112. package/dist/discord/actions-tasks.js +3 -0
  113. package/dist/discord/actions-tasks.test.js +418 -0
  114. package/dist/discord/actions.js +600 -0
  115. package/dist/discord/actions.test.js +522 -0
  116. package/dist/discord/allowed-mentions.js +3 -0
  117. package/dist/discord/allowed-mentions.test.js +17 -0
  118. package/dist/discord/allowlist.js +29 -0
  119. package/dist/discord/allowlist.test.js +24 -0
  120. package/dist/discord/audit-handler.js +191 -0
  121. package/dist/discord/audit-handler.test.js +361 -0
  122. package/dist/discord/bot.js +141 -0
  123. package/dist/discord/channel-context.js +181 -0
  124. package/dist/discord/defer-scheduler.js +45 -0
  125. package/dist/discord/destructive-confirmation.js +128 -0
  126. package/dist/discord/destructive-confirmation.test.js +49 -0
  127. package/dist/discord/discord-plan-auto-implement.test.js +18 -0
  128. package/dist/discord/durable-memory.js +145 -0
  129. package/dist/discord/durable-memory.test.js +281 -0
  130. package/dist/discord/durable-write-queue.js +4 -0
  131. package/dist/discord/file-download.js +308 -0
  132. package/dist/discord/file-download.test.js +303 -0
  133. package/dist/discord/forge-audit-verdict.js +140 -0
  134. package/dist/discord/forge-auto-implement.js +80 -0
  135. package/dist/discord/forge-auto-implement.test.js +110 -0
  136. package/dist/discord/forge-commands.js +698 -0
  137. package/dist/discord/forge-commands.test.js +1606 -0
  138. package/dist/discord/forge-plan-registry.js +68 -0
  139. package/dist/discord/forge-plan-registry.test.js +127 -0
  140. package/dist/discord/forum-count-sync.js +130 -0
  141. package/dist/discord/forum-count-sync.test.js +200 -0
  142. package/dist/discord/health-command.js +98 -0
  143. package/dist/discord/health-command.test.js +195 -0
  144. package/dist/discord/help-command.js +22 -0
  145. package/dist/discord/help-command.test.js +49 -0
  146. package/dist/discord/image-download.js +201 -0
  147. package/dist/discord/image-download.test.js +499 -0
  148. package/dist/discord/inflight-replies.js +228 -0
  149. package/dist/discord/inflight-replies.test.js +295 -0
  150. package/dist/discord/json-extract.js +110 -0
  151. package/dist/discord/keyed-queue.js +22 -0
  152. package/dist/discord/memory-commands.js +85 -0
  153. package/dist/discord/memory-commands.test.js +159 -0
  154. package/dist/discord/memory-timing.integration.test.js +159 -0
  155. package/dist/discord/message-coordinator.js +2347 -0
  156. package/dist/discord/message-coordinator.onboarding.test.js +183 -0
  157. package/dist/discord/message-coordinator.plan-run.test.js +264 -0
  158. package/dist/discord/message-history.js +53 -0
  159. package/dist/discord/message-history.test.js +95 -0
  160. package/dist/discord/models-command.js +59 -0
  161. package/dist/discord/models-command.test.js +150 -0
  162. package/dist/discord/nickname.test.js +76 -0
  163. package/dist/discord/onboarding-completion.js +55 -0
  164. package/dist/discord/onboarding-completion.test.js +176 -0
  165. package/dist/discord/output-common.js +178 -0
  166. package/dist/discord/output-common.test.js +198 -0
  167. package/dist/discord/output-utils.js +156 -0
  168. package/dist/discord/parse-identity-name.test.js +129 -0
  169. package/dist/discord/plan-commands.js +612 -0
  170. package/dist/discord/plan-commands.test.js +1622 -0
  171. package/dist/discord/plan-manager.js +1491 -0
  172. package/dist/discord/plan-manager.test.js +2380 -0
  173. package/dist/discord/plan-parser.js +110 -0
  174. package/dist/discord/plan-parser.test.js +63 -0
  175. package/dist/discord/plan-run-phase-start.js +20 -0
  176. package/dist/discord/plan-run-phase-start.test.js +29 -0
  177. package/dist/discord/platform-message.js +45 -0
  178. package/dist/discord/platform-message.test.js +110 -0
  179. package/dist/discord/prompt-common.js +240 -0
  180. package/dist/discord/prompt-common.test.js +423 -0
  181. package/dist/discord/reaction-handler.js +691 -0
  182. package/dist/discord/reaction-handler.test.js +1574 -0
  183. package/dist/discord/reaction-prompts.js +118 -0
  184. package/dist/discord/reaction-prompts.test.js +253 -0
  185. package/dist/discord/reply-reference.js +66 -0
  186. package/dist/discord/reply-reference.test.js +125 -0
  187. package/dist/discord/restart-command.js +143 -0
  188. package/dist/discord/restart-command.test.js +196 -0
  189. package/dist/discord/runtime-utils.js +43 -0
  190. package/dist/discord/runtime-utils.test.js +112 -0
  191. package/dist/discord/session-key.js +7 -0
  192. package/dist/discord/session-key.test.js +13 -0
  193. package/dist/discord/shortterm-memory.js +166 -0
  194. package/dist/discord/shortterm-memory.test.js +345 -0
  195. package/dist/discord/shutdown-context.js +122 -0
  196. package/dist/discord/shutdown-context.test.js +279 -0
  197. package/dist/discord/startup-profile.test.js +214 -0
  198. package/dist/discord/status-channel.js +190 -0
  199. package/dist/discord/status-channel.test.js +282 -0
  200. package/dist/discord/status-command.js +206 -0
  201. package/dist/discord/status-command.test.js +341 -0
  202. package/dist/discord/streaming-progress.js +107 -0
  203. package/dist/discord/streaming-progress.test.js +93 -0
  204. package/dist/discord/summarizer.js +89 -0
  205. package/dist/discord/summarizer.test.js +245 -0
  206. package/dist/discord/system-bootstrap.js +396 -0
  207. package/dist/discord/system-bootstrap.test.js +724 -0
  208. package/dist/discord/thread-context.js +169 -0
  209. package/dist/discord/thread-context.test.js +386 -0
  210. package/dist/discord/tool-aware-queue.js +116 -0
  211. package/dist/discord/tool-aware-queue.test.js +180 -0
  212. package/dist/discord/update-command.js +127 -0
  213. package/dist/discord/update-command.test.js +275 -0
  214. package/dist/discord/user-errors.js +40 -0
  215. package/dist/discord/user-errors.test.js +31 -0
  216. package/dist/discord/user-turn-to-durable.js +111 -0
  217. package/dist/discord/user-turn-to-durable.test.js +273 -0
  218. package/dist/discord-followup.test.js +677 -0
  219. package/dist/discord.channel-context.test.js +95 -0
  220. package/dist/discord.fail-closed.test.js +199 -0
  221. package/dist/discord.health-command.integration.test.js +140 -0
  222. package/dist/discord.js +190 -0
  223. package/dist/discord.prompt-context.test.js +1431 -0
  224. package/dist/discord.render.test.js +621 -0
  225. package/dist/discord.status-wiring.test.js +187 -0
  226. package/dist/engine/claudeCli.js +137 -0
  227. package/dist/engine/types.js +1 -0
  228. package/dist/group-queue.js +25 -0
  229. package/dist/health/credential-check.js +175 -0
  230. package/dist/health/credential-check.test.js +401 -0
  231. package/dist/health/startup-healing.js +139 -0
  232. package/dist/health/startup-healing.test.js +298 -0
  233. package/dist/identity.js +36 -0
  234. package/dist/index.js +1378 -0
  235. package/dist/logging/logger-like.js +1 -0
  236. package/dist/observability/memory-sampler.js +51 -0
  237. package/dist/observability/memory-sampler.test.js +93 -0
  238. package/dist/observability/metrics.js +88 -0
  239. package/dist/observability/metrics.test.js +42 -0
  240. package/dist/onboarding/onboarding-flow.js +246 -0
  241. package/dist/onboarding/onboarding-flow.test.js +238 -0
  242. package/dist/onboarding/onboarding-writer.js +102 -0
  243. package/dist/onboarding/onboarding-writer.test.js +143 -0
  244. package/dist/pidlock.js +187 -0
  245. package/dist/pidlock.test.js +128 -0
  246. package/dist/pipeline/engine.js +206 -0
  247. package/dist/pipeline/engine.test.js +771 -0
  248. package/dist/root-policy.js +21 -0
  249. package/dist/root-policy.test.js +55 -0
  250. package/dist/runtime/claude-code-cli.js +35 -0
  251. package/dist/runtime/claude-code-cli.test.js +1199 -0
  252. package/dist/runtime/cli-adapter.js +584 -0
  253. package/dist/runtime/cli-output-parsers.js +108 -0
  254. package/dist/runtime/cli-shared.js +96 -0
  255. package/dist/runtime/cli-shared.test.js +104 -0
  256. package/dist/runtime/cli-strategy.js +6 -0
  257. package/dist/runtime/codex-cli.js +16 -0
  258. package/dist/runtime/codex-cli.test.js +862 -0
  259. package/dist/runtime/concurrency-limit.js +80 -0
  260. package/dist/runtime/concurrency-limit.test.js +137 -0
  261. package/dist/runtime/gemini-cli.js +16 -0
  262. package/dist/runtime/gemini-cli.test.js +413 -0
  263. package/dist/runtime/long-running-process.js +415 -0
  264. package/dist/runtime/long-running-process.test.js +318 -0
  265. package/dist/runtime/model-smoke-helpers.js +160 -0
  266. package/dist/runtime/model-smoke.test.js +194 -0
  267. package/dist/runtime/model-tiers.js +33 -0
  268. package/dist/runtime/model-tiers.test.js +65 -0
  269. package/dist/runtime/openai-auth.js +151 -0
  270. package/dist/runtime/openai-auth.test.js +361 -0
  271. package/dist/runtime/openai-compat.js +178 -0
  272. package/dist/runtime/openai-compat.test.js +449 -0
  273. package/dist/runtime/process-pool.js +93 -0
  274. package/dist/runtime/process-pool.test.js +148 -0
  275. package/dist/runtime/registry.js +15 -0
  276. package/dist/runtime/registry.test.js +47 -0
  277. package/dist/runtime/session-scanner.js +186 -0
  278. package/dist/runtime/session-scanner.test.js +257 -0
  279. package/dist/runtime/strategies/claude-strategy.js +193 -0
  280. package/dist/runtime/strategies/codex-strategy.js +161 -0
  281. package/dist/runtime/strategies/gemini-strategy.js +64 -0
  282. package/dist/runtime/strategies/template-strategy.js +85 -0
  283. package/dist/runtime/tool-capabilities.js +27 -0
  284. package/dist/runtime/tool-capabilities.test.js +24 -0
  285. package/dist/runtime/tool-labels.js +48 -0
  286. package/dist/runtime/types.js +2 -0
  287. package/dist/sessionManager.js +47 -0
  288. package/dist/sessions.js +18 -0
  289. package/dist/tasks/architecture-contract.js +33 -0
  290. package/dist/tasks/architecture-contract.test.js +90 -0
  291. package/dist/tasks/auto-tag.js +50 -0
  292. package/dist/tasks/auto-tag.test.js +64 -0
  293. package/dist/tasks/bd-cli.js +164 -0
  294. package/dist/tasks/bd-cli.test.js +359 -0
  295. package/dist/tasks/bead-sync.js +1 -0
  296. package/dist/tasks/context-summary.js +27 -0
  297. package/dist/tasks/discord-sync.js +3 -0
  298. package/dist/tasks/discord-sync.test.js +685 -0
  299. package/dist/tasks/discord-types.js +4 -0
  300. package/dist/tasks/find-task-by-thread.test.js +36 -0
  301. package/dist/tasks/forum-guard.js +81 -0
  302. package/dist/tasks/forum-guard.test.js +192 -0
  303. package/dist/tasks/initialize.js +77 -0
  304. package/dist/tasks/initialize.test.js +263 -0
  305. package/dist/tasks/logger-types.js +1 -0
  306. package/dist/tasks/metrics-types.js +3 -0
  307. package/dist/tasks/migrate.js +33 -0
  308. package/dist/tasks/migrate.test.js +156 -0
  309. package/dist/tasks/path-defaults.js +67 -0
  310. package/dist/tasks/path-defaults.test.js +73 -0
  311. package/dist/tasks/runtime-types.js +1 -0
  312. package/dist/tasks/service.js +33 -0
  313. package/dist/tasks/service.test.js +51 -0
  314. package/dist/tasks/store.js +238 -0
  315. package/dist/tasks/store.test.js +417 -0
  316. package/dist/tasks/sync-context.js +1 -0
  317. package/dist/tasks/sync-contract.js +24 -0
  318. package/dist/tasks/sync-contract.test.js +25 -0
  319. package/dist/tasks/sync-coordinator-metrics.js +41 -0
  320. package/dist/tasks/sync-coordinator-retries.js +71 -0
  321. package/dist/tasks/sync-coordinator.js +96 -0
  322. package/dist/tasks/sync-coordinator.test.js +501 -0
  323. package/dist/tasks/sync-types.js +1 -0
  324. package/dist/tasks/sync-watcher.js +27 -0
  325. package/dist/tasks/sync-watcher.test.js +92 -0
  326. package/dist/tasks/tag-map.js +36 -0
  327. package/dist/tasks/tag-map.test.js +54 -0
  328. package/dist/tasks/task-action-contract.js +16 -0
  329. package/dist/tasks/task-action-contract.test.js +16 -0
  330. package/dist/tasks/task-action-executor.js +18 -0
  331. package/dist/tasks/task-action-executor.test.js +420 -0
  332. package/dist/tasks/task-action-mutation-helpers.js +17 -0
  333. package/dist/tasks/task-action-mutations.js +151 -0
  334. package/dist/tasks/task-action-prompt.js +62 -0
  335. package/dist/tasks/task-action-read-ops.js +73 -0
  336. package/dist/tasks/task-action-runner-types.js +1 -0
  337. package/dist/tasks/task-action-thread-sync.js +82 -0
  338. package/dist/tasks/task-actions.js +3 -0
  339. package/dist/tasks/task-cli.js +227 -0
  340. package/dist/tasks/task-context.js +1 -0
  341. package/dist/tasks/task-lifecycle.js +46 -0
  342. package/dist/tasks/task-lifecycle.test.js +35 -0
  343. package/dist/tasks/task-sync-apply-plan.js +95 -0
  344. package/dist/tasks/task-sync-apply-types.js +12 -0
  345. package/dist/tasks/task-sync-apply.js +319 -0
  346. package/dist/tasks/task-sync-cli.js +89 -0
  347. package/dist/tasks/task-sync-cli.test.js +70 -0
  348. package/dist/tasks/task-sync-engine.js +88 -0
  349. package/dist/tasks/task-sync-engine.test.js +934 -0
  350. package/dist/tasks/task-sync-phase-apply.js +171 -0
  351. package/dist/tasks/task-sync-pipeline.js +2 -0
  352. package/dist/tasks/task-sync-pipeline.test.js +265 -0
  353. package/dist/tasks/task-sync-reconcile-plan.js +182 -0
  354. package/dist/tasks/task-sync-reconcile.js +144 -0
  355. package/dist/tasks/task-sync.js +56 -0
  356. package/dist/tasks/task-sync.test.js +86 -0
  357. package/dist/tasks/thread-cache.js +42 -0
  358. package/dist/tasks/thread-cache.test.js +89 -0
  359. package/dist/tasks/thread-contracts.test.js +711 -0
  360. package/dist/tasks/thread-forum-ops.js +68 -0
  361. package/dist/tasks/thread-helpers.js +86 -0
  362. package/dist/tasks/thread-helpers.test.js +33 -0
  363. package/dist/tasks/thread-lifecycle-ops.js +144 -0
  364. package/dist/tasks/thread-ops-shared.js +21 -0
  365. package/dist/tasks/thread-ops.js +2 -0
  366. package/dist/tasks/types.js +20 -0
  367. package/dist/tasks/types.test.js +60 -0
  368. package/dist/test-setup.js +11 -0
  369. package/dist/test-setup.test.js +42 -0
  370. package/dist/transport/types.js +1 -0
  371. package/dist/validate.js +41 -0
  372. package/dist/validate.test.js +94 -0
  373. package/dist/version.js +15 -0
  374. package/dist/version.test.js +31 -0
  375. package/dist/webhook/server.js +199 -0
  376. package/dist/webhook/server.test.js +460 -0
  377. package/dist/workspace-bootstrap.js +135 -0
  378. package/dist/workspace-bootstrap.test.js +514 -0
  379. package/dist/workspace-permissions.js +134 -0
  380. package/dist/workspace-permissions.test.js +181 -0
  381. package/package.json +74 -0
  382. package/scripts/cron/cron-tag-map.json +9 -0
  383. package/scripts/tasks/tag-map.json +10 -0
  384. package/systemd/discoclaw.service +19 -0
  385. package/templates/recipes/integration.discoclaw-recipe.md +171 -0
  386. package/templates/workspace/AGENTS.md +217 -0
  387. package/templates/workspace/BOOTSTRAP.md +1 -0
  388. package/templates/workspace/HEARTBEAT.md +10 -0
  389. package/templates/workspace/IDENTITY.md +16 -0
  390. package/templates/workspace/MEMORY.md +24 -0
  391. package/templates/workspace/SOUL.md +52 -0
  392. package/templates/workspace/TOOLS.md +304 -0
  393. package/templates/workspace/USER.md +37 -0
@@ -0,0 +1,1606 @@
1
+ import { describe, expect, it, vi } from 'vitest';
2
+ import fs from 'node:fs/promises';
3
+ import os from 'node:os';
4
+ import path from 'node:path';
5
+ import { parseForgeCommand, parseAuditVerdict, buildDrafterPrompt, buildAuditorPrompt, buildRevisionPrompt, buildPlanSummary, appendAuditRound, ForgeOrchestrator, } from './forge-commands.js';
6
+ import { TaskStore } from '../tasks/store.js';
7
+ async function makeTmpDir() {
8
+ return fs.mkdtemp(path.join(os.tmpdir(), 'forge-test-'));
9
+ }
10
+ function makeMockRuntime(responses) {
11
+ let callIndex = 0;
12
+ return {
13
+ id: 'claude_code',
14
+ capabilities: new Set(['streaming_text']),
15
+ invoke(_params) {
16
+ const text = responses[callIndex] ?? '(no response)';
17
+ callIndex++;
18
+ return (async function* () {
19
+ yield { type: 'text_final', text };
20
+ })();
21
+ },
22
+ };
23
+ }
24
+ function makeMockRuntimeWithError(errorOnCall, responses) {
25
+ let callIndex = 0;
26
+ return {
27
+ id: 'claude_code',
28
+ capabilities: new Set(['streaming_text']),
29
+ invoke(_params) {
30
+ const idx = callIndex++;
31
+ if (idx === errorOnCall) {
32
+ return (async function* () {
33
+ yield { type: 'error', message: 'Runtime crashed' };
34
+ })();
35
+ }
36
+ const text = responses[idx] ?? '(no response)';
37
+ return (async function* () {
38
+ yield { type: 'text_final', text };
39
+ })();
40
+ },
41
+ };
42
+ }
43
+ async function baseOpts(tmpDir, runtime, overrides = {}) {
44
+ const plansDir = path.join(tmpDir, 'plans');
45
+ await fs.mkdir(plansDir, { recursive: true });
46
+ // Write a minimal template
47
+ await fs.writeFile(path.join(plansDir, '.plan-template.md'), `# Plan: {{TITLE}}\n\n**ID:** {{PLAN_ID}}\n**Task:** {{TASK_ID}}\n**Created:** {{DATE}}\n**Status:** DRAFT\n**Project:** {{PROJECT}}\n\n---\n\n## Objective\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`);
48
+ return {
49
+ runtime,
50
+ model: 'test-model',
51
+ cwd: tmpDir,
52
+ workspaceCwd: tmpDir,
53
+ taskStore: new TaskStore({ prefix: 'ws' }),
54
+ plansDir,
55
+ maxAuditRounds: 5,
56
+ progressThrottleMs: 0,
57
+ timeoutMs: 30000,
58
+ ...overrides,
59
+ };
60
+ }
61
+ // ---------------------------------------------------------------------------
62
+ // parseForgeCommand
63
+ // ---------------------------------------------------------------------------
64
+ describe('parseForgeCommand', () => {
65
+ it('returns null for non-forge messages', () => {
66
+ expect(parseForgeCommand('hello world')).toBeNull();
67
+ expect(parseForgeCommand('!plan create something')).toBeNull();
68
+ expect(parseForgeCommand('!memory show')).toBeNull();
69
+ expect(parseForgeCommand('')).toBeNull();
70
+ });
71
+ it('returns null for !forging or !forger (prefix collision)', () => {
72
+ expect(parseForgeCommand('!forging something')).toBeNull();
73
+ expect(parseForgeCommand('!forger')).toBeNull();
74
+ });
75
+ it('!forge with no args returns help', () => {
76
+ expect(parseForgeCommand('!forge')).toEqual({ action: 'help', args: '' });
77
+ });
78
+ it('!forge with extra whitespace returns help', () => {
79
+ expect(parseForgeCommand(' !forge ')).toEqual({ action: 'help', args: '' });
80
+ });
81
+ it('parses create from description text', () => {
82
+ expect(parseForgeCommand('!forge build a webhook retry system')).toEqual({
83
+ action: 'create',
84
+ args: 'build a webhook retry system',
85
+ });
86
+ });
87
+ it('parses status as reserved subcommand', () => {
88
+ expect(parseForgeCommand('!forge status')).toEqual({ action: 'status', args: '' });
89
+ });
90
+ it('parses cancel as reserved subcommand', () => {
91
+ expect(parseForgeCommand('!forge cancel')).toEqual({ action: 'cancel', args: '' });
92
+ });
93
+ it('parses help explicitly', () => {
94
+ expect(parseForgeCommand('!forge help')).toEqual({ action: 'help', args: '' });
95
+ });
96
+ it('parses audit as reserved subcommand with plan-id arg', () => {
97
+ expect(parseForgeCommand('!forge audit plan-027')).toEqual({
98
+ action: 'audit',
99
+ args: 'plan-027',
100
+ });
101
+ });
102
+ it('parses audit with no args', () => {
103
+ expect(parseForgeCommand('!forge audit')).toEqual({ action: 'audit', args: '' });
104
+ });
105
+ it('treats unknown first word as create description', () => {
106
+ expect(parseForgeCommand('!forge add rate limiting')).toEqual({
107
+ action: 'create',
108
+ args: 'add rate limiting',
109
+ });
110
+ });
111
+ });
112
+ // ---------------------------------------------------------------------------
113
+ // parseAuditVerdict
114
+ // ---------------------------------------------------------------------------
115
+ describe('parseAuditVerdict', () => {
116
+ it('parses json verdict payload from fenced block', () => {
117
+ const text = [
118
+ '```json',
119
+ '{"maxSeverity":"blocking","shouldLoop":true,"summary":"Critical issue","concerns":[{"title":"SQL injection","severity":"blocking"}]}',
120
+ '```',
121
+ '',
122
+ '**Concern 1: SQL injection**',
123
+ '**Severity: blocking**',
124
+ '',
125
+ '**Verdict:** Needs revision.',
126
+ ].join('\n');
127
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
128
+ });
129
+ it('json verdict wins over contradictory prose verdict', () => {
130
+ const text = [
131
+ '```json',
132
+ '{"maxSeverity":"medium","shouldLoop":false,"summary":"Non-blocking concerns"}',
133
+ '```',
134
+ '',
135
+ '**Verdict:** Needs revision.',
136
+ ].join('\n');
137
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
138
+ });
139
+ it('falls back to legacy parser when json is malformed', () => {
140
+ const text = [
141
+ '```json',
142
+ '{"maxSeverity":"blocking","shouldLoop":true',
143
+ '```',
144
+ '',
145
+ '**Severity: medium**',
146
+ '**Verdict:** Needs revision.',
147
+ ].join('\n');
148
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
149
+ });
150
+ it('ignores unrelated json objects and falls back to severity markers', () => {
151
+ const text = [
152
+ '```json',
153
+ '{"note":"example payload"}',
154
+ '```',
155
+ '',
156
+ '**Severity: blocking**',
157
+ '**Verdict:** Needs revision.',
158
+ ].join('\n');
159
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
160
+ });
161
+ it('supports high/low aliases in json payload', () => {
162
+ const text = [
163
+ '```json',
164
+ '{"maxSeverity":"high","shouldLoop":true}',
165
+ '```',
166
+ ].join('\n');
167
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
168
+ });
169
+ it('text containing "Severity: blocking" -> blocking, shouldLoop', () => {
170
+ const text = '**Concern 1: Missing error handling**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
171
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
172
+ });
173
+ it('text containing "Severity: medium" -> medium, no loop', () => {
174
+ const text = '**Concern 1: Unclear scope**\n**Severity: medium**\n\n**Verdict:** Needs revision.';
175
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
176
+ });
177
+ it('text containing "Severity: minor" -> minor, no loop', () => {
178
+ const text = '**Concern 1: Minor naming**\n**Severity: minor**\n\n**Verdict:** Ready to approve.';
179
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
180
+ });
181
+ it('text containing "Severity: suggestion" -> suggestion, no loop', () => {
182
+ const text = '**Concern 1: Future idea**\n**Severity: suggestion**\n\n**Verdict:** Ready to approve.';
183
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'suggestion', shouldLoop: false });
184
+ });
185
+ it('backward compat: "Severity: high" -> blocking, shouldLoop', () => {
186
+ const text = '**Concern 1: Missing error handling**\n**Severity: high**\n\n**Verdict:** Needs revision.';
187
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
188
+ });
189
+ it('backward compat: "Severity: HIGH" (uppercase) -> blocking, shouldLoop', () => {
190
+ const text = '**Concern 1: Missing error handling**\n**Severity: HIGH**\n\n**Verdict:** Needs revision.';
191
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
192
+ });
193
+ it('backward compat: "Severity: low" -> minor, no loop', () => {
194
+ const text = '**Concern 1: Minor naming**\n**Severity: low**\n\n**Verdict:** Ready to approve.';
195
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
196
+ });
197
+ it('"Ready to approve" with no severity markers -> minor, no loop', () => {
198
+ const text = 'No concerns found.\n\n**Verdict:** Ready to approve.';
199
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
200
+ });
201
+ it('empty text -> none, no loop', () => {
202
+ expect(parseAuditVerdict('')).toEqual({ maxSeverity: 'none', shouldLoop: false });
203
+ });
204
+ it('whitespace-only text -> none, no loop', () => {
205
+ expect(parseAuditVerdict(' \n ')).toEqual({ maxSeverity: 'none', shouldLoop: false });
206
+ });
207
+ it('malformed text with no markers -> none, no loop', () => {
208
+ expect(parseAuditVerdict('This plan looks interesting.')).toEqual({
209
+ maxSeverity: 'none',
210
+ shouldLoop: false,
211
+ });
212
+ });
213
+ it('blocking takes precedence over medium', () => {
214
+ const text = '**Severity: medium**\n**Severity: blocking**\n**Verdict:** Needs revision.';
215
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
216
+ });
217
+ it('medium takes precedence over minor and suggestion', () => {
218
+ const text = '**Severity: minor**\n**Severity: medium**\n**Severity: suggestion**\n**Verdict:** Ready to approve.';
219
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
220
+ });
221
+ it('detects severity in markdown table rows (without fallback)', () => {
222
+ const text = '| # | Concern | Severity |\n|---|---------|----------|\n| 1 | Missing tests | **medium** |\n| 2 | Minor naming | **minor** |\n\n**Verdict:** Ready to approve.';
223
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
224
+ });
225
+ it('detects severity in table cells without bold formatting', () => {
226
+ const text = '| Concern | Rating |\n|---|---|\n| Missing tests | medium |\n\n**Verdict:** Ready to approve.';
227
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
228
+ });
229
+ it('detects blocking severity in table cells', () => {
230
+ const text = '| Concern | Rating |\n|---|---|\n| SQL injection | blocking |\n\n**Verdict:** Needs revision.';
231
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
232
+ });
233
+ it('detects severity in table header column', () => {
234
+ const text = '| Concern | Severity |\n|---|---|\n| Missing tests | Severity: medium |\n\n**Verdict:** Needs revision.';
235
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
236
+ });
237
+ it('severity markers win over contradictory verdict text', () => {
238
+ const text = '| # | Concern | Severity |\n|---|---------|----------|\n| 1 | SQL injection | **blocking** |\n\n**Verdict:** Ready to approve.';
239
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
240
+ });
241
+ it('backward compat: table with **high** maps to blocking', () => {
242
+ const text = '| # | Concern | Severity |\n|---|---------|----------|\n| 1 | SQL injection | **high** |\n\n**Verdict:** Ready to approve.';
243
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
244
+ });
245
+ it('falls back to "Needs revision" verdict when no severity markers present', () => {
246
+ const text = 'Some concerns found.\n\n**Verdict:** Needs revision.';
247
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
248
+ });
249
+ it('falls back to "Ready to approve" verdict when no severity markers present', () => {
250
+ const text = 'Minor things but overall good.\n\nVerdict: Ready to approve.';
251
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
252
+ });
253
+ it('does not false-positive on "high" in prose without formatting', () => {
254
+ const text = 'The code quality is high.\n\n**Verdict:** Ready to approve.';
255
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
256
+ });
257
+ it('does not false-positive on bold "high" in prose without severity marker', () => {
258
+ const text = '**Concern 1: Throughput concerns**\nExpected load is **high** during peak windows.\n\n**Verdict:** Ready to approve.';
259
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
260
+ });
261
+ it('does not false-positive on "blocking" in prose without severity marker', () => {
262
+ const text = '**Concern 1: I/O pattern**\nUses blocking I/O for file reads.\n\n**Verdict:** Ready to approve.';
263
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
264
+ });
265
+ // --- Legacy Concern N (severity) format tests ---
266
+ it('legacy format: "Concern 1 (high)" with no Severity label -> blocking, shouldLoop', () => {
267
+ const text = '**Concern 1 (high): Missing validation**\nDetails here.\n\n**Verdict:** Needs revision.';
268
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
269
+ });
270
+ it('legacy format: "Concern 1 (medium)" with no Severity label -> medium, no loop', () => {
271
+ const text = '**Concern 1 (medium): Edge case missing**\nDetails here.\n\n**Verdict:** Needs revision.';
272
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
273
+ });
274
+ it('legacy format: "Concern 1 (low)" with no Severity label -> minor, no loop', () => {
275
+ const text = '**Concern 1 (low): Naming issue**\nDetails here.\n\n**Verdict:** Ready to approve.';
276
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
277
+ });
278
+ it('legacy format: "Concern 1 (blocking)" with no Severity label -> blocking, shouldLoop', () => {
279
+ const text = '**Concern 1 (blocking): Security flaw**\nDetails here.\n\n**Verdict:** Needs revision.';
280
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
281
+ });
282
+ it('mixed format: "Severity: medium" + "Concern 2 (high)" -> blocking, shouldLoop', () => {
283
+ const text = '**Concern 1: Issue A**\n**Severity: medium**\n\n**Concern 2 (high): Issue B**\nDetails.\n\n**Verdict:** Needs revision.';
284
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
285
+ });
286
+ it('mixed format: "Severity: medium" + "Concern 2 (minor)" -> medium, no loop', () => {
287
+ const text = '**Concern 1: Issue A**\n**Severity: medium**\n\n**Concern 2 (minor): Issue B**\nDetails.\n\n**Verdict:** Ready to approve.';
288
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
289
+ });
290
+ it('legacy format: "**Item count mismatch (medium):**" -> medium, no loop', () => {
291
+ const text = '**Item count mismatch (medium):**\nExpected 5, got 3.\n\n**Verdict:** Needs revision.';
292
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
293
+ });
294
+ // --- Precedence tests: severity markers vs verdict text ---
295
+ it('precedence: "Severity: medium" + "Needs revision" -> medium, no loop (severity markers win)', () => {
296
+ const text = '**Concern 1: Issue**\n**Severity: medium**\n\n**Verdict:** Needs revision.';
297
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
298
+ });
299
+ it('precedence: "Severity: blocking" + "Ready to approve" -> blocking, shouldLoop (severity markers win)', () => {
300
+ const text = '**Concern 1: Issue**\n**Severity: blocking**\n\n**Verdict:** Ready to approve.';
301
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'blocking', shouldLoop: true });
302
+ });
303
+ it('precedence: "Severity: minor" + "Needs revision" -> minor, no loop (severity markers win)', () => {
304
+ const text = '**Concern 1: Issue**\n**Severity: minor**\n\n**Verdict:** Needs revision.';
305
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'minor', shouldLoop: false });
306
+ });
307
+ it('precedence: "Severity: medium" + "Severity: minor" + "Needs revision" -> medium, no loop', () => {
308
+ const text = '**Concern 1: Issue A**\n**Severity: medium**\n\n**Concern 2: Issue B**\n**Severity: minor**\n\n**Verdict:** Needs revision.';
309
+ expect(parseAuditVerdict(text)).toEqual({ maxSeverity: 'medium', shouldLoop: false });
310
+ });
311
+ });
312
+ // ---------------------------------------------------------------------------
313
+ // buildDrafterPrompt / buildAuditorPrompt / buildRevisionPrompt
314
+ // ---------------------------------------------------------------------------
315
+ describe('buildDrafterPrompt', () => {
316
+ it('includes description, template, and context', () => {
317
+ const prompt = buildDrafterPrompt('Add rate limiting', '## Template', 'Some context');
318
+ expect(prompt).toContain('Add rate limiting');
319
+ expect(prompt).toContain('## Template');
320
+ expect(prompt).toContain('Some context');
321
+ expect(prompt).toContain('Read the codebase');
322
+ });
323
+ });
324
+ describe('buildAuditorPrompt', () => {
325
+ it('includes plan content and structured instructions with new severity vocabulary', () => {
326
+ const prompt = buildAuditorPrompt('# Plan: Test\n\n## Objective\nDo stuff.', 1);
327
+ expect(prompt).toContain('# Plan: Test');
328
+ expect(prompt).toContain('blocking | medium | minor | suggestion');
329
+ expect(prompt).not.toContain('Severity: high | medium | low');
330
+ expect(prompt).toContain('audit round 1');
331
+ });
332
+ it('requires a json verdict block in output format', () => {
333
+ const prompt = buildAuditorPrompt('# Plan: Test', 1);
334
+ expect(prompt).toContain('Start with a fenced JSON verdict block');
335
+ expect(prompt).toContain('"maxSeverity":"blocking|medium|minor|suggestion|none"');
336
+ expect(prompt).toContain('`shouldLoop` must be true only when `maxSeverity` is `blocking`');
337
+ });
338
+ it('includes severity level definitions', () => {
339
+ const prompt = buildAuditorPrompt('# Plan: Test', 1);
340
+ expect(prompt).toContain('Correctness bugs, security issues, architectural flaws');
341
+ expect(prompt).toContain('Substantive improvements');
342
+ expect(prompt).toContain('Small issues: naming, style');
343
+ expect(prompt).toContain('Ideas for future improvement');
344
+ });
345
+ it('includes project context when provided', () => {
346
+ const prompt = buildAuditorPrompt('# Plan: Test', 1, 'Single-user system. No concurrency guards.');
347
+ expect(prompt).toContain('## Project Context');
348
+ expect(prompt).toContain('Single-user system. No concurrency guards.');
349
+ expect(prompt).toContain('Respect them when auditing');
350
+ });
351
+ it('omits project context section when not provided', () => {
352
+ const prompt = buildAuditorPrompt('# Plan: Test', 1);
353
+ expect(prompt).not.toContain('## Project Context');
354
+ });
355
+ it('includes prior audit history instructions for round > 1', () => {
356
+ const prompt = buildAuditorPrompt('# Plan: Test', 3);
357
+ expect(prompt).toContain('Prior Audit History');
358
+ expect(prompt).toContain('DO NOT re-raise concerns that were adequately resolved');
359
+ expect(prompt).toContain('Focus on genuinely new issues');
360
+ });
361
+ it('omits prior audit history instructions for round 1', () => {
362
+ const prompt = buildAuditorPrompt('# Plan: Test', 1);
363
+ expect(prompt).not.toContain('Prior Audit History');
364
+ expect(prompt).not.toContain('DO NOT re-raise');
365
+ });
366
+ it('includes verification instructions for tool use', () => {
367
+ const prompt = buildAuditorPrompt('# Plan: Test', 1);
368
+ expect(prompt).toContain('## Verification');
369
+ expect(prompt).toContain('Read, Glob, and Grep tools');
370
+ expect(prompt).toContain('Use them before raising concerns');
371
+ expect(prompt).toContain('concern evaporates after checking the code');
372
+ });
373
+ });
374
+ describe('buildRevisionPrompt', () => {
375
+ it('includes plan, audit notes, and description', () => {
376
+ const prompt = buildRevisionPrompt('# Plan: Test', 'Concern 1: bad thing', 'Add feature');
377
+ expect(prompt).toContain('# Plan: Test');
378
+ expect(prompt).toContain('Concern 1: bad thing');
379
+ expect(prompt).toContain('Add feature');
380
+ });
381
+ it('includes project context when provided', () => {
382
+ const prompt = buildRevisionPrompt('# Plan: Test', 'Concern 1: bad', 'Add feature', 'Single-user system.');
383
+ expect(prompt).toContain('## Project Context');
384
+ expect(prompt).toContain('Single-user system.');
385
+ expect(prompt).toContain('do not re-introduce complexity');
386
+ });
387
+ it('omits project context section when not provided', () => {
388
+ const prompt = buildRevisionPrompt('# Plan: Test', 'Concern 1: bad', 'Add feature');
389
+ expect(prompt).not.toContain('## Project Context');
390
+ });
391
+ it('includes instruction to preserve prior resolutions', () => {
392
+ const prompt = buildRevisionPrompt('# Plan: Test', 'Concern 1: bad', 'Add feature');
393
+ expect(prompt).toContain('Preserve resolutions from prior audit rounds');
394
+ });
395
+ it('references blocking severity concerns (not high and medium)', () => {
396
+ const prompt = buildRevisionPrompt('# Plan: Test', 'Concern 1: bad', 'Add feature');
397
+ expect(prompt).toContain('blocking severity concerns');
398
+ expect(prompt).not.toContain('high and medium severity');
399
+ });
400
+ });
401
+ // ---------------------------------------------------------------------------
402
+ // buildPlanSummary
403
+ // ---------------------------------------------------------------------------
404
+ describe('buildPlanSummary', () => {
405
+ it('extracts header, objective, scope, and files from plan content', () => {
406
+ const plan = [
407
+ '# Plan: Add rate limiting',
408
+ '',
409
+ '**ID:** plan-010',
410
+ '**Task:** ws-abc',
411
+ '**Created:** 2026-02-12',
412
+ '**Status:** REVIEW',
413
+ '**Project:** discoclaw',
414
+ '',
415
+ '---',
416
+ '',
417
+ '## Objective',
418
+ '',
419
+ 'Add rate limiting to the webhook handler.',
420
+ '',
421
+ '## Scope',
422
+ '',
423
+ '**In:**',
424
+ '- Add per-IP rate limiter',
425
+ '- Add 429 response handling',
426
+ '',
427
+ '**Out:**',
428
+ '- No changes to auth flow',
429
+ '',
430
+ '## Changes',
431
+ '',
432
+ '### File-by-file breakdown',
433
+ '',
434
+ '#### `src/webhook/handler.ts`',
435
+ '',
436
+ 'Add rate limiter middleware.',
437
+ '',
438
+ '#### `src/webhook/rate-limiter.ts`',
439
+ '',
440
+ 'New rate limiter module.',
441
+ '',
442
+ '## Risks',
443
+ '',
444
+ '- None.',
445
+ ].join('\n');
446
+ const summary = buildPlanSummary(plan);
447
+ expect(summary).toContain('**plan-010**');
448
+ expect(summary).toContain('Add rate limiting');
449
+ expect(summary).toContain('REVIEW');
450
+ expect(summary).toContain('ws-abc');
451
+ expect(summary).toContain('Add rate limiting to the webhook handler.');
452
+ expect(summary).toContain('per-IP rate limiter');
453
+ expect(summary).not.toContain('No changes to auth flow');
454
+ expect(summary).toContain('`src/webhook/handler.ts`');
455
+ expect(summary).toContain('`src/webhook/rate-limiter.ts`');
456
+ });
457
+ it('handles plan with no scope In/Out sections', () => {
458
+ const plan = [
459
+ '# Plan: Simple fix',
460
+ '',
461
+ '**ID:** plan-001',
462
+ '**Task:** ws-001',
463
+ '**Created:** 2026-01-01',
464
+ '**Status:** DRAFT',
465
+ '**Project:** test',
466
+ '',
467
+ '## Objective',
468
+ '',
469
+ 'Fix the bug.',
470
+ '',
471
+ '## Scope',
472
+ '',
473
+ 'Just fix one file.',
474
+ '',
475
+ '## Changes',
476
+ '',
477
+ 'No structured file changes.',
478
+ '',
479
+ '## Risks',
480
+ ].join('\n');
481
+ const summary = buildPlanSummary(plan);
482
+ expect(summary).toContain('Fix the bug.');
483
+ expect(summary).toContain('Just fix one file.');
484
+ });
485
+ it('returns (no objective) when objective section is empty', () => {
486
+ const plan = [
487
+ '# Plan: Empty',
488
+ '',
489
+ '**ID:** plan-002',
490
+ '**Task:** ws-002',
491
+ '**Created:** 2026-01-01',
492
+ '**Status:** DRAFT',
493
+ '**Project:** test',
494
+ '',
495
+ '## Objective',
496
+ '',
497
+ '## Scope',
498
+ '',
499
+ '## Changes',
500
+ ].join('\n');
501
+ const summary = buildPlanSummary(plan);
502
+ expect(summary).toContain('(no objective)');
503
+ });
504
+ });
505
+ // ---------------------------------------------------------------------------
506
+ // appendAuditRound (standalone)
507
+ // ---------------------------------------------------------------------------
508
+ describe('appendAuditRound', () => {
509
+ const basePlan = [
510
+ '# Plan: Test',
511
+ '',
512
+ '## Audit Log',
513
+ '',
514
+ '---',
515
+ '',
516
+ '## Implementation Notes',
517
+ '',
518
+ '_Filled in during/after implementation._',
519
+ ].join('\n');
520
+ it('inserts audit section before Implementation Notes', () => {
521
+ const verdict = { maxSeverity: 'minor', shouldLoop: false };
522
+ const result = appendAuditRound(basePlan, 1, 'All good.', verdict);
523
+ expect(result).toContain('### Review 1');
524
+ expect(result).toContain('All good.');
525
+ expect(result).toContain('**Status:** COMPLETE');
526
+ // Implementation Notes should still be present and come after the audit
527
+ const auditIdx = result.indexOf('### Review 1');
528
+ const implIdx = result.indexOf('## Implementation Notes');
529
+ expect(implIdx).toBeGreaterThan(auditIdx);
530
+ });
531
+ it('appends at end when no Implementation Notes section exists', () => {
532
+ const plan = '# Plan: Test\n\n## Audit Log\n';
533
+ const verdict = { maxSeverity: 'blocking', shouldLoop: true };
534
+ const result = appendAuditRound(plan, 2, 'Needs work.', verdict);
535
+ expect(result).toContain('### Review 2');
536
+ expect(result).toContain('Needs work.');
537
+ });
538
+ });
539
+ // ---------------------------------------------------------------------------
540
+ // ForgeOrchestrator
541
+ // ---------------------------------------------------------------------------
542
+ describe('ForgeOrchestrator', () => {
543
+ it('completes in 1 round when audit returns clean', async () => {
544
+ const tmpDir = await makeTmpDir();
545
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
546
+ const auditClean = '**Concern 1: Minor naming**\n**Severity: low**\n\n**Verdict:** Ready to approve.';
547
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
548
+ const opts = await baseOpts(tmpDir, runtime);
549
+ const orchestrator = new ForgeOrchestrator(opts);
550
+ const progress = [];
551
+ const result = await orchestrator.run('Test feature', async (msg) => {
552
+ progress.push(msg);
553
+ });
554
+ expect(result.planId).toMatch(/^plan-001$/);
555
+ expect(result.rounds).toBe(1);
556
+ expect(result.reachedMaxRounds).toBe(false);
557
+ expect(result.error).toBeUndefined();
558
+ expect(progress.some((p) => p.includes('Draft complete'))).toBe(true);
559
+ expect(progress.some((p) => p.includes('Forge complete'))).toBe(true);
560
+ expect(result.planSummary).toBeDefined();
561
+ expect(result.planSummary).toContain('plan-001');
562
+ });
563
+ it('completes in 2 rounds when first audit has blocking concerns', async () => {
564
+ const tmpDir = await makeTmpDir();
565
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\nStuff.\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
566
+ const auditBlocking = '**Concern 1: Missing details**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
567
+ const revisedPlan = draftPlan; // Same structure, orchestrator handles merge
568
+ const auditClean = '**Verdict:** Ready to approve.';
569
+ // Draft -> Audit (blocking) -> Revise -> Audit (clean)
570
+ const runtime = makeMockRuntime([draftPlan, auditBlocking, revisedPlan, auditClean]);
571
+ const opts = await baseOpts(tmpDir, runtime);
572
+ const orchestrator = new ForgeOrchestrator(opts);
573
+ const progress = [];
574
+ const result = await orchestrator.run('Test feature', async (msg) => {
575
+ progress.push(msg);
576
+ });
577
+ expect(result.rounds).toBe(2);
578
+ expect(result.reachedMaxRounds).toBe(false);
579
+ expect(progress.some((p) => p.includes('blocking concerns'))).toBe(true);
580
+ expect(progress.some((p) => p.includes('Forge complete'))).toBe(true);
581
+ });
582
+ it('medium severity auto-approves without revision', async () => {
583
+ const tmpDir = await makeTmpDir();
584
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\nStuff.\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
585
+ const auditMedium = '**Concern 1: Missing details**\n**Severity: medium**\n\n**Verdict:** Needs revision.';
586
+ // Draft -> Audit (medium) -> should auto-approve (no revision)
587
+ const runtime = makeMockRuntime([draftPlan, auditMedium]);
588
+ const opts = await baseOpts(tmpDir, runtime);
589
+ const orchestrator = new ForgeOrchestrator(opts);
590
+ const progress = [];
591
+ const result = await orchestrator.run('Test feature', async (msg) => {
592
+ progress.push(msg);
593
+ });
594
+ expect(result.rounds).toBe(1);
595
+ expect(result.reachedMaxRounds).toBe(false);
596
+ expect(progress.some((p) => p.includes('Forge complete'))).toBe(true);
597
+ // Should NOT include revision progress
598
+ expect(progress.some((p) => p.includes('Revising'))).toBe(false);
599
+ });
600
+ it('stops at max rounds when audit always returns blocking concerns', async () => {
601
+ const tmpDir = await makeTmpDir();
602
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
603
+ const auditHigh = '**Concern 1: Fundamental flaw**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
604
+ // 3 rounds max: draft, audit, revise, audit, revise, audit = 6 runtime calls
605
+ const responses = [];
606
+ for (let i = 0; i < 10; i++) {
607
+ responses.push(i % 2 === 0 ? draftPlan : auditHigh);
608
+ }
609
+ const runtime = makeMockRuntime(responses);
610
+ const opts = await baseOpts(tmpDir, runtime, { maxAuditRounds: 3 });
611
+ const orchestrator = new ForgeOrchestrator(opts);
612
+ const progress = [];
613
+ const result = await orchestrator.run('Test feature', async (msg) => {
614
+ progress.push(msg);
615
+ });
616
+ expect(result.rounds).toBe(3);
617
+ expect(result.reachedMaxRounds).toBe(true);
618
+ expect(progress.some((p) => p.includes('Forge stopped after 3 audit rounds'))).toBe(true);
619
+ });
620
+ it('reports error when draft phase fails', async () => {
621
+ const tmpDir = await makeTmpDir();
622
+ const runtime = makeMockRuntimeWithError(0, []);
623
+ const opts = await baseOpts(tmpDir, runtime);
624
+ const orchestrator = new ForgeOrchestrator(opts);
625
+ const progress = [];
626
+ const result = await orchestrator.run('Test feature', async (msg) => {
627
+ progress.push(msg);
628
+ });
629
+ expect(result.error).toBeDefined();
630
+ expect(progress.some((p) => p.includes('Forge failed'))).toBe(true);
631
+ });
632
+ it('reports error when audit phase fails but preserves draft', async () => {
633
+ const tmpDir = await makeTmpDir();
634
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
635
+ // Draft succeeds, audit errors
636
+ const runtime = makeMockRuntimeWithError(1, [draftPlan]);
637
+ const opts = await baseOpts(tmpDir, runtime);
638
+ const orchestrator = new ForgeOrchestrator(opts);
639
+ const progress = [];
640
+ const result = await orchestrator.run('Test feature', async (msg) => {
641
+ progress.push(msg);
642
+ });
643
+ expect(result.error).toBeDefined();
644
+ expect(result.planId).toMatch(/^plan-001$/);
645
+ expect(result.filePath).toBeTruthy();
646
+ expect(progress.some((p) => p.includes('Partial plan saved'))).toBe(true);
647
+ });
648
+ it('progress callback receives round numbers in format "Audit round N/M"', async () => {
649
+ const tmpDir = await makeTmpDir();
650
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
651
+ const auditClean = '**Verdict:** Ready to approve.';
652
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
653
+ const opts = await baseOpts(tmpDir, runtime);
654
+ const orchestrator = new ForgeOrchestrator(opts);
655
+ const progress = [];
656
+ const result = await orchestrator.run('Test', async (msg) => {
657
+ progress.push(msg);
658
+ });
659
+ expect(progress.some((p) => /Audit round 1\/5/.test(p))).toBe(true);
660
+ });
661
+ it('terminal messages pass force: true', async () => {
662
+ const tmpDir = await makeTmpDir();
663
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
664
+ const auditClean = '**Verdict:** Ready to approve.';
665
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
666
+ const opts = await baseOpts(tmpDir, runtime);
667
+ const orchestrator = new ForgeOrchestrator(opts);
668
+ const calls = [];
669
+ await orchestrator.run('Test', async (msg, optsArg) => {
670
+ calls.push({ msg, force: optsArg?.force });
671
+ });
672
+ const terminalCall = calls.find((c) => c.msg.includes('Forge complete'));
673
+ expect(terminalCall).toBeDefined();
674
+ expect(terminalCall.force).toBe(true);
675
+ });
676
+ it('isRunning reflects orchestrator state', async () => {
677
+ const tmpDir = await makeTmpDir();
678
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
679
+ const auditClean = '**Verdict:** Ready to approve.';
680
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
681
+ const opts = await baseOpts(tmpDir, runtime);
682
+ const orchestrator = new ForgeOrchestrator(opts);
683
+ expect(orchestrator.isRunning).toBe(false);
684
+ const promise = orchestrator.run('Test', async () => { });
685
+ // isRunning is true during execution
686
+ expect(orchestrator.isRunning).toBe(true);
687
+ await promise;
688
+ expect(orchestrator.isRunning).toBe(false);
689
+ });
690
+ it('cancel stops the forge between phases', async () => {
691
+ const tmpDir = await makeTmpDir();
692
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
693
+ const auditBlocking = '**Concern 1: Issue**\n**Severity: blocking**\n**Verdict:** Needs revision.';
694
+ const revisedPlan = draftPlan;
695
+ const auditClean = '**Verdict:** Ready to approve.';
696
+ const runtime = makeMockRuntime([draftPlan, auditBlocking, revisedPlan, auditClean]);
697
+ const opts = await baseOpts(tmpDir, runtime);
698
+ const orchestrator = new ForgeOrchestrator(opts);
699
+ const progress = [];
700
+ // Cancel after the first audit
701
+ const result = await orchestrator.run('Test', async (msg) => {
702
+ progress.push(msg);
703
+ if (msg.includes('blocking concerns')) {
704
+ orchestrator.requestCancel();
705
+ }
706
+ });
707
+ expect(result.finalVerdict).toBe('CANCELLED');
708
+ expect(result.rounds).toBeLessThanOrEqual(2);
709
+ });
710
+ it('concurrent forge throws error', async () => {
711
+ const tmpDir = await makeTmpDir();
712
+ // Use a runtime that returns slowly
713
+ let resolveFirst;
714
+ const firstCallDone = new Promise((r) => { resolveFirst = r; });
715
+ const runtime = {
716
+ id: 'claude_code',
717
+ capabilities: new Set(['streaming_text']),
718
+ invoke(_params) {
719
+ return (async function* () {
720
+ // First call blocks until we resolve
721
+ await firstCallDone;
722
+ yield { type: 'text_final', text: '# Plan: Test\n' };
723
+ })();
724
+ },
725
+ };
726
+ const opts = await baseOpts(tmpDir, runtime);
727
+ const orchestrator = new ForgeOrchestrator(opts);
728
+ // Start first forge (will block)
729
+ const p1 = orchestrator.run('Test 1', async () => { });
730
+ // Try starting second forge
731
+ await expect(orchestrator.run('Test 2', async () => { })).rejects.toThrow('already running');
732
+ // Cleanup: let the first one finish (it'll error, which is fine)
733
+ resolveFirst();
734
+ await p1.catch(() => { });
735
+ });
736
+ it('includes .context/project.md in drafter and auditor prompts', async () => {
737
+ const tmpDir = await makeTmpDir();
738
+ // Create a .context/project.md in the cwd
739
+ const contextDir = path.join(tmpDir, '.context');
740
+ await fs.mkdir(contextDir, { recursive: true });
741
+ await fs.writeFile(path.join(contextDir, 'project.md'), 'Single-user system. No concurrency guards needed.');
742
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
743
+ const auditClean = '**Verdict:** Ready to approve.';
744
+ // Capture the prompts sent to the runtime
745
+ const prompts = [];
746
+ const runtime = {
747
+ id: 'claude_code',
748
+ capabilities: new Set(['streaming_text']),
749
+ invoke(params) {
750
+ prompts.push(params.prompt);
751
+ const responses = [draftPlan, auditClean];
752
+ const text = responses[prompts.length - 1] ?? '(no response)';
753
+ return (async function* () {
754
+ yield { type: 'text_final', text };
755
+ })();
756
+ },
757
+ };
758
+ const opts = await baseOpts(tmpDir, runtime);
759
+ const orchestrator = new ForgeOrchestrator(opts);
760
+ await orchestrator.run('Test', async () => { });
761
+ // Drafter prompt (first call) should include project context
762
+ expect(prompts[0]).toContain('Single-user system');
763
+ // Auditor prompt (second call) should include project context
764
+ expect(prompts[1]).toContain('Single-user system');
765
+ expect(prompts[1]).toContain('Project Context');
766
+ });
767
+ it('includes .context/tools.md in drafter prompt but not auditor prompt', async () => {
768
+ const tmpDir = await makeTmpDir();
769
+ // Create a .context/tools.md in the cwd
770
+ const contextDir = path.join(tmpDir, '.context');
771
+ await fs.mkdir(contextDir, { recursive: true });
772
+ await fs.writeFile(path.join(contextDir, 'tools.md'), 'Browser escalation: WebFetch → Playwright → CDP');
773
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
774
+ const auditClean = '**Verdict:** Ready to approve.';
775
+ // Capture the prompts sent to the runtime
776
+ const prompts = [];
777
+ const runtime = {
778
+ id: 'claude_code',
779
+ capabilities: new Set(['streaming_text']),
780
+ invoke(params) {
781
+ prompts.push(params.prompt);
782
+ const responses = [draftPlan, auditClean];
783
+ const text = responses[prompts.length - 1] ?? '(no response)';
784
+ return (async function* () {
785
+ yield { type: 'text_final', text };
786
+ })();
787
+ },
788
+ };
789
+ const opts = await baseOpts(tmpDir, runtime);
790
+ const orchestrator = new ForgeOrchestrator(opts);
791
+ await orchestrator.run('Test', async () => { });
792
+ // Drafter prompt (first call) should include tools context
793
+ expect(prompts[0]).toContain('Browser escalation: WebFetch');
794
+ expect(prompts[0]).toContain('tools.md (repo)');
795
+ // Auditor prompt (second call) should NOT include tools context
796
+ expect(prompts[1]).not.toContain('Browser escalation: WebFetch');
797
+ expect(prompts[1]).not.toContain('tools.md (repo)');
798
+ });
799
+ it('passes read-only tools to auditor invoke call', async () => {
800
+ const tmpDir = await makeTmpDir();
801
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
802
+ const auditClean = '**Verdict:** Ready to approve.';
803
+ // Capture invoke params for each call
804
+ const invocations = [];
805
+ const runtime = {
806
+ id: 'claude_code',
807
+ capabilities: new Set(['streaming_text', 'tools_fs']),
808
+ invoke(params) {
809
+ invocations.push({ tools: params.tools, addDirs: params.addDirs });
810
+ const responses = [draftPlan, auditClean];
811
+ const text = responses[invocations.length - 1] ?? '(no response)';
812
+ return (async function* () {
813
+ yield { type: 'text_final', text };
814
+ })();
815
+ },
816
+ };
817
+ const opts = await baseOpts(tmpDir, runtime);
818
+ const orchestrator = new ForgeOrchestrator(opts);
819
+ await orchestrator.run('Test', async () => { });
820
+ // Drafter (first call) gets read-only tools
821
+ expect(invocations[0].tools).toEqual(['Read', 'Glob', 'Grep']);
822
+ expect(invocations[0].addDirs).toEqual([tmpDir]);
823
+ // Auditor (second call) also gets read-only tools
824
+ expect(invocations[1].tools).toEqual(['Read', 'Glob', 'Grep']);
825
+ expect(invocations[1].addDirs).toEqual([tmpDir]);
826
+ });
827
+ it('updates bead title when drafter produces a different title than raw description', async () => {
828
+ const tmpDir = await makeTmpDir();
829
+ // Drafter returns a clean title ("Add webhook retry logic") different from raw input
830
+ const draftPlan = `# Plan: Add webhook retry logic\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nAdd retry logic.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
831
+ const auditClean = '**Verdict:** Ready to approve.';
832
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
833
+ const opts = await baseOpts(tmpDir, runtime);
834
+ const updateSpy = vi.spyOn(opts.taskStore, 'update');
835
+ const orchestrator = new ForgeOrchestrator(opts);
836
+ // Raw description differs from the drafter's clean title
837
+ await orchestrator.run('a]plan to add webhook retry stuff', async () => { });
838
+ expect(updateSpy).toHaveBeenCalledWith(expect.any(String), { title: 'Add webhook retry logic' });
839
+ });
840
+ it('skips bead title update when drafter title matches description', async () => {
841
+ const tmpDir = await makeTmpDir();
842
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild it.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
843
+ const auditClean = '**Verdict:** Ready to approve.';
844
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
845
+ const opts = await baseOpts(tmpDir, runtime);
846
+ const updateSpy = vi.spyOn(opts.taskStore, 'update');
847
+ const orchestrator = new ForgeOrchestrator(opts);
848
+ // Description matches the drafter's title exactly
849
+ await orchestrator.run('Test feature', async () => { });
850
+ expect(updateSpy).not.toHaveBeenCalled();
851
+ });
852
+ it('reuses existing open bead with matching title instead of creating duplicate', async () => {
853
+ const tmpDir = await makeTmpDir();
854
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
855
+ const auditClean = '**Verdict:** Ready to approve.';
856
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
857
+ const opts = await baseOpts(tmpDir, runtime);
858
+ // Pre-create a bead with the matching title and 'plan' label
859
+ const existingBead = opts.taskStore.create({ title: 'Test feature', labels: ['plan'] });
860
+ const createSpy = vi.spyOn(opts.taskStore, 'create');
861
+ const orchestrator = new ForgeOrchestrator(opts);
862
+ const result = await orchestrator.run('Test feature', async () => { });
863
+ expect(result.error).toBeUndefined();
864
+ // taskStore.create should NOT have been called — reusing existing bead
865
+ expect(createSpy).not.toHaveBeenCalled();
866
+ // The plan file should reference the existing bead ID
867
+ const plansDir = path.join(tmpDir, 'plans');
868
+ const entries = await fs.readdir(plansDir);
869
+ const planFile = entries.find((e) => e.startsWith('plan-001') && e.endsWith('.md') && !e.includes('template'));
870
+ expect(planFile).toBeTruthy();
871
+ const content = await fs.readFile(path.join(plansDir, planFile), 'utf-8');
872
+ expect(content).toContain(`**Task:** ${existingBead.id}`);
873
+ });
874
+ it('dedup is case-insensitive and trims whitespace', async () => {
875
+ const tmpDir = await makeTmpDir();
876
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
877
+ const auditClean = '**Verdict:** Ready to approve.';
878
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
879
+ const opts = await baseOpts(tmpDir, runtime);
880
+ // Title differs in case and has extra whitespace
881
+ opts.taskStore.create({ title: ' TEST FEATURE ', labels: ['plan'] });
882
+ const createSpy = vi.spyOn(opts.taskStore, 'create');
883
+ const orchestrator = new ForgeOrchestrator(opts);
884
+ const result = await orchestrator.run('test feature', async () => { });
885
+ expect(result.error).toBeUndefined();
886
+ expect(createSpy).not.toHaveBeenCalled();
887
+ });
888
+ it('does not reuse closed beads with matching title', async () => {
889
+ const tmpDir = await makeTmpDir();
890
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
891
+ const auditClean = '**Verdict:** Ready to approve.';
892
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
893
+ const opts = await baseOpts(tmpDir, runtime);
894
+ // Only closed bead matches — should NOT be reused
895
+ const closedBead = opts.taskStore.create({ title: 'Test feature', labels: ['plan'] });
896
+ opts.taskStore.close(closedBead.id);
897
+ const createSpy = vi.spyOn(opts.taskStore, 'create');
898
+ const orchestrator = new ForgeOrchestrator(opts);
899
+ const result = await orchestrator.run('Test feature', async () => { });
900
+ expect(result.error).toBeUndefined();
901
+ // taskStore.create SHOULD have been called — closed bead not reused
902
+ expect(createSpy).toHaveBeenCalled();
903
+ });
904
+ it('creates new bead when no title match exists', async () => {
905
+ const tmpDir = await makeTmpDir();
906
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
907
+ const auditClean = '**Verdict:** Ready to approve.';
908
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
909
+ const opts = await baseOpts(tmpDir, runtime);
910
+ // No matching beads — only an unrelated one exists
911
+ opts.taskStore.create({ title: 'Something else entirely', labels: ['plan'] });
912
+ const createSpy = vi.spyOn(opts.taskStore, 'create');
913
+ const orchestrator = new ForgeOrchestrator(opts);
914
+ const result = await orchestrator.run('Test feature', async () => { });
915
+ expect(result.error).toBeUndefined();
916
+ // taskStore.create SHOULD have been called — no matching bead found
917
+ expect(createSpy).toHaveBeenCalled();
918
+ });
919
+ it('cancel mid-phase (post-return guard): pipeline returns normally but cancel is set', async () => {
920
+ const tmpDir = await makeTmpDir();
921
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
922
+ let orchestrator;
923
+ const runtime = {
924
+ id: 'claude_code',
925
+ capabilities: new Set(['streaming_text']),
926
+ invoke(_params) {
927
+ return (async function* () {
928
+ // Cancel while the pipeline is running, then still yield the response.
929
+ // The post-return guard should catch this before the output is processed.
930
+ orchestrator.requestCancel();
931
+ yield { type: 'text_final', text: draftPlan };
932
+ })();
933
+ },
934
+ };
935
+ const opts = await baseOpts(tmpDir, runtime);
936
+ orchestrator = new ForgeOrchestrator(opts);
937
+ const result = await orchestrator.run('Test', async () => { });
938
+ expect(result.finalVerdict).toBe('CANCELLED');
939
+ expect(result.error).toBeUndefined();
940
+ });
941
+ it('cancel mid-phase (cancel-aware catch): pipeline throws while cancel is set', async () => {
942
+ const tmpDir = await makeTmpDir();
943
+ let orchestrator;
944
+ const runtime = {
945
+ id: 'claude_code',
946
+ capabilities: new Set(['streaming_text']),
947
+ invoke(_params) {
948
+ return (async function* () {
949
+ // Cancel, then emit an error event — pipeline will throw.
950
+ // The cancel-aware catch should treat the throw as cancellation.
951
+ orchestrator.requestCancel();
952
+ yield { type: 'error', message: 'Aborted by signal' };
953
+ })();
954
+ },
955
+ };
956
+ const opts = await baseOpts(tmpDir, runtime);
957
+ orchestrator = new ForgeOrchestrator(opts);
958
+ const result = await orchestrator.run('Test', async () => { });
959
+ expect(result.finalVerdict).toBe('CANCELLED');
960
+ expect(result.error).toBeUndefined();
961
+ });
962
+ it('passes existingTaskId through to handlePlanCommand (skips create)', async () => {
963
+ const tmpDir = await makeTmpDir();
964
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
965
+ const auditClean = '**Verdict:** Ready to approve.';
966
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
967
+ const opts = await baseOpts(tmpDir, runtime, { existingTaskId: 'existing-task-42' });
968
+ const createSpy = vi.spyOn(opts.taskStore, 'create');
969
+ const addLabelSpy = vi.spyOn(opts.taskStore, 'addLabel');
970
+ const orchestrator = new ForgeOrchestrator(opts);
971
+ const result = await orchestrator.run('Test feature', async () => { });
972
+ expect(result.planId).toMatch(/^plan-001$/);
973
+ expect(result.error).toBeUndefined();
974
+ // taskStore.create should NOT have been called — reusing existing task
975
+ expect(createSpy).not.toHaveBeenCalled();
976
+ // taskStore.addLabel should have been called to add the 'plan' label
977
+ expect(addLabelSpy).toHaveBeenCalledWith('existing-task-42', 'plan');
978
+ // Verify the plan file contains the existing task ID.
979
+ const plansDir = path.join(tmpDir, 'plans');
980
+ const entries = await fs.readdir(plansDir);
981
+ const planFile = entries.find((e) => e.startsWith('plan-001') && e.endsWith('.md') && !e.includes('template'));
982
+ expect(planFile).toBeTruthy();
983
+ const content = await fs.readFile(path.join(plansDir, planFile), 'utf-8');
984
+ expect(content).toMatch(/\*\*(Task|Bead):\*\* existing-task-42/);
985
+ });
986
+ });
987
+ // ---------------------------------------------------------------------------
988
+ // ForgeOrchestrator.resume()
989
+ // ---------------------------------------------------------------------------
990
+ function makePlanContent(overrides = {}) {
991
+ const status = overrides.status ?? 'REVIEW';
992
+ const title = overrides.title ?? 'Test Plan';
993
+ const planId = overrides.planId ?? 'plan-001';
994
+ const includeChanges = overrides.includeChanges ?? true;
995
+ const reviews = overrides.reviews ?? 0;
996
+ const lines = [
997
+ `# Plan: ${title}`,
998
+ '',
999
+ `**ID:** ${planId}`,
1000
+ `**Task:** ws-test-001`,
1001
+ `**Created:** 2026-01-01`,
1002
+ `**Status:** ${status}`,
1003
+ `**Project:** discoclaw`,
1004
+ '',
1005
+ '---',
1006
+ '',
1007
+ '## Objective',
1008
+ '',
1009
+ 'Build the test feature with proper error handling.',
1010
+ '',
1011
+ '## Scope',
1012
+ '',
1013
+ 'In scope: everything related to testing.',
1014
+ '',
1015
+ '## Changes',
1016
+ '',
1017
+ ...(includeChanges
1018
+ ? ['### File-by-file breakdown', '', '#### `src/foo.ts`', '', 'Add bar function.', '']
1019
+ : ['']),
1020
+ '## Risks',
1021
+ '',
1022
+ '- Low risk of breaking existing tests.',
1023
+ '',
1024
+ '## Testing',
1025
+ '',
1026
+ '- Unit tests for the new feature.',
1027
+ '',
1028
+ '---',
1029
+ '',
1030
+ '## Audit Log',
1031
+ '',
1032
+ ];
1033
+ for (let i = 1; i <= reviews; i++) {
1034
+ lines.push(`### Review ${i} — 2026-01-01`);
1035
+ lines.push('**Status:** COMPLETE');
1036
+ lines.push('');
1037
+ lines.push(`Audit round ${i} notes.`);
1038
+ lines.push('');
1039
+ }
1040
+ lines.push('---', '', '## Implementation Notes', '', '_Filled in during/after implementation._', '');
1041
+ return lines.join('\n');
1042
+ }
1043
+ describe('ForgeOrchestrator.resume()', () => {
1044
+ it('loads existing plan and runs audit loop (skipping draft)', async () => {
1045
+ const tmpDir = await makeTmpDir();
1046
+ const opts = await baseOpts(tmpDir, makeMockRuntime([
1047
+ // Only audit output — no draft call
1048
+ '**Verdict:** Ready to approve.',
1049
+ ]));
1050
+ // Write plan file directly
1051
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'REVIEW' });
1052
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1053
+ await fs.writeFile(filePath, planContent, 'utf-8');
1054
+ const orchestrator = new ForgeOrchestrator(opts);
1055
+ const progress = [];
1056
+ const result = await orchestrator.resume('plan-001', filePath, 'Test Plan', async (msg) => {
1057
+ progress.push(msg);
1058
+ });
1059
+ expect(result.planId).toBe('plan-001');
1060
+ expect(result.rounds).toBe(1);
1061
+ expect(result.reachedMaxRounds).toBe(false);
1062
+ expect(result.error).toBeUndefined();
1063
+ expect(progress.some((p) => p.includes('Forge complete'))).toBe(true);
1064
+ // Should NOT contain draft-phase progress
1065
+ expect(progress.some((p) => p.includes('Drafting'))).toBe(false);
1066
+ });
1067
+ it('handles audit-then-revise loop', async () => {
1068
+ const tmpDir = await makeTmpDir();
1069
+ const auditBlocking = '**Concern 1: Issue**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
1070
+ const revisedPlan = makePlanContent({ planId: 'plan-001', status: 'REVIEW' });
1071
+ const auditClean = '**Verdict:** Ready to approve.';
1072
+ const opts = await baseOpts(tmpDir, makeMockRuntime([
1073
+ auditBlocking, // first audit
1074
+ revisedPlan, // revision
1075
+ auditClean, // second audit
1076
+ ]));
1077
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'REVIEW' });
1078
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1079
+ await fs.writeFile(filePath, planContent, 'utf-8');
1080
+ const orchestrator = new ForgeOrchestrator(opts);
1081
+ const result = await orchestrator.resume('plan-001', filePath, 'Test Plan', async () => { });
1082
+ expect(result.rounds).toBe(2);
1083
+ expect(result.reachedMaxRounds).toBe(false);
1084
+ expect(result.error).toBeUndefined();
1085
+ });
1086
+ it('respects cancel', async () => {
1087
+ const tmpDir = await makeTmpDir();
1088
+ const auditBlocking = '**Concern 1: Issue**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
1089
+ const revisedPlan = makePlanContent({ planId: 'plan-001', status: 'REVIEW' });
1090
+ const auditClean = '**Verdict:** Ready to approve.';
1091
+ const opts = await baseOpts(tmpDir, makeMockRuntime([auditBlocking, revisedPlan, auditClean]));
1092
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'REVIEW' });
1093
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1094
+ await fs.writeFile(filePath, planContent, 'utf-8');
1095
+ const orchestrator = new ForgeOrchestrator(opts);
1096
+ const result = await orchestrator.resume('plan-001', filePath, 'Test Plan', async (msg) => {
1097
+ if (msg.includes('blocking concerns')) {
1098
+ orchestrator.requestCancel();
1099
+ }
1100
+ });
1101
+ expect(result.finalVerdict).toBe('CANCELLED');
1102
+ });
1103
+ it('rejects IMPLEMENTING plans', async () => {
1104
+ const tmpDir = await makeTmpDir();
1105
+ const opts = await baseOpts(tmpDir, makeMockRuntime([]));
1106
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'IMPLEMENTING' });
1107
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1108
+ await fs.writeFile(filePath, planContent, 'utf-8');
1109
+ const orchestrator = new ForgeOrchestrator(opts);
1110
+ const result = await orchestrator.resume('plan-001', filePath, 'Test Plan', async () => { });
1111
+ expect(result.error).toBeDefined();
1112
+ expect(result.error).toContain('currently being implemented');
1113
+ });
1114
+ it('rejects APPROVED plans', async () => {
1115
+ const tmpDir = await makeTmpDir();
1116
+ const opts = await baseOpts(tmpDir, makeMockRuntime([]));
1117
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'APPROVED' });
1118
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1119
+ await fs.writeFile(filePath, planContent, 'utf-8');
1120
+ const orchestrator = new ForgeOrchestrator(opts);
1121
+ const result = await orchestrator.resume('plan-001', filePath, 'Test Plan', async () => { });
1122
+ expect(result.error).toBeDefined();
1123
+ expect(result.error).toContain('approved');
1124
+ expect(result.error).toContain('downgrade');
1125
+ });
1126
+ it('uses correct round numbers when plan has existing reviews', async () => {
1127
+ const tmpDir = await makeTmpDir();
1128
+ const opts = await baseOpts(tmpDir, makeMockRuntime([
1129
+ '**Verdict:** Ready to approve.',
1130
+ ]));
1131
+ // Plan already has Review 1 and Review 2
1132
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'REVIEW', reviews: 2 });
1133
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1134
+ await fs.writeFile(filePath, planContent, 'utf-8');
1135
+ const orchestrator = new ForgeOrchestrator(opts);
1136
+ await orchestrator.resume('plan-001', filePath, 'Test Plan', async () => { });
1137
+ // Read the plan file and verify the new review is Review 3
1138
+ const updatedContent = await fs.readFile(filePath, 'utf-8');
1139
+ expect(updatedContent).toContain('### Review 3');
1140
+ });
1141
+ it('rejects plans with missing required sections', async () => {
1142
+ const tmpDir = await makeTmpDir();
1143
+ const opts = await baseOpts(tmpDir, makeMockRuntime([]));
1144
+ // Plan missing Changes and Testing sections
1145
+ const planContent = [
1146
+ '# Plan: Incomplete Plan',
1147
+ '',
1148
+ '**ID:** plan-001',
1149
+ '**Task:** ws-test-001',
1150
+ '**Created:** 2026-01-01',
1151
+ '**Status:** REVIEW',
1152
+ '**Project:** discoclaw',
1153
+ '',
1154
+ '---',
1155
+ '',
1156
+ '## Objective',
1157
+ '',
1158
+ 'Build the test feature with proper error handling.',
1159
+ '',
1160
+ '## Scope',
1161
+ '',
1162
+ 'In scope: everything.',
1163
+ '',
1164
+ '## Risks',
1165
+ '',
1166
+ '- None.',
1167
+ '',
1168
+ '---',
1169
+ '',
1170
+ '## Audit Log',
1171
+ '',
1172
+ '---',
1173
+ '',
1174
+ '## Implementation Notes',
1175
+ '',
1176
+ '_Filled in during/after implementation._',
1177
+ ].join('\n');
1178
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1179
+ await fs.writeFile(filePath, planContent, 'utf-8');
1180
+ const orchestrator = new ForgeOrchestrator(opts);
1181
+ const result = await orchestrator.resume('plan-001', filePath, 'Incomplete Plan', async () => { });
1182
+ expect(result.error).toBeDefined();
1183
+ expect(result.error).toContain('structural issues');
1184
+ expect(result.error).toContain('Changes');
1185
+ expect(result.error).toContain('Testing');
1186
+ });
1187
+ it('rejects plans with placeholder sections (medium structural)', async () => {
1188
+ const tmpDir = await makeTmpDir();
1189
+ const opts = await baseOpts(tmpDir, makeMockRuntime([]));
1190
+ // Plan has all required sections but Objective is placeholder text
1191
+ const planContent = [
1192
+ '# Plan: Placeholder Plan',
1193
+ '',
1194
+ '**ID:** plan-001',
1195
+ '**Task:** ws-test-001',
1196
+ '**Created:** 2026-01-01',
1197
+ '**Status:** REVIEW',
1198
+ '**Project:** discoclaw',
1199
+ '',
1200
+ '---',
1201
+ '',
1202
+ '## Objective',
1203
+ '',
1204
+ '_(TODO)_',
1205
+ '',
1206
+ '## Scope',
1207
+ '',
1208
+ 'In scope: everything related to testing.',
1209
+ '',
1210
+ '## Changes',
1211
+ '',
1212
+ '- `src/foo.ts` — Add bar function.',
1213
+ '',
1214
+ '## Risks',
1215
+ '',
1216
+ '- Low risk.',
1217
+ '',
1218
+ '## Testing',
1219
+ '',
1220
+ '- Unit tests for the new feature.',
1221
+ '',
1222
+ '---',
1223
+ '',
1224
+ '## Audit Log',
1225
+ '',
1226
+ '---',
1227
+ '',
1228
+ '## Implementation Notes',
1229
+ '',
1230
+ '_Filled in during/after implementation._',
1231
+ ].join('\n');
1232
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1233
+ await fs.writeFile(filePath, planContent, 'utf-8');
1234
+ const orchestrator = new ForgeOrchestrator(opts);
1235
+ const result = await orchestrator.resume('plan-001', filePath, 'Placeholder Plan', async () => { });
1236
+ expect(result.error).toBeDefined();
1237
+ expect(result.error).toContain('structural issues');
1238
+ expect(result.error).toContain('Objective');
1239
+ });
1240
+ });
1241
+ // ---------------------------------------------------------------------------
1242
+ // Forge session key tests
1243
+ // ---------------------------------------------------------------------------
1244
+ function makeCaptureRuntime(responses) {
1245
+ let callIndex = 0;
1246
+ const invocations = [];
1247
+ const runtime = {
1248
+ id: 'claude_code',
1249
+ capabilities: new Set(['streaming_text', 'sessions']),
1250
+ invoke(params) {
1251
+ invocations.push(params);
1252
+ const text = responses[callIndex] ?? '(no response)';
1253
+ callIndex++;
1254
+ return (async function* () {
1255
+ yield { type: 'text_final', text };
1256
+ })();
1257
+ },
1258
+ };
1259
+ return { runtime, invocations };
1260
+ }
1261
+ describe('Forge session keys', () => {
1262
+ it('passes distinct sessionKey for drafter and auditor calls', async () => {
1263
+ const tmpDir = await makeTmpDir();
1264
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1265
+ const auditClean = '**Verdict:** Ready to approve.';
1266
+ const { runtime, invocations } = makeCaptureRuntime([draftPlan, auditClean]);
1267
+ const opts = await baseOpts(tmpDir, runtime);
1268
+ const orchestrator = new ForgeOrchestrator(opts);
1269
+ await orchestrator.run('Test feature', async () => { });
1270
+ // Draft call (index 0) should have drafter session key
1271
+ expect(invocations[0].sessionKey).toContain(':drafter');
1272
+ // Audit call (index 1) should have auditor session key
1273
+ expect(invocations[1].sessionKey).toContain(':auditor');
1274
+ // Keys must be different
1275
+ expect(invocations[0].sessionKey).not.toBe(invocations[1].sessionKey);
1276
+ });
1277
+ it('session key includes model to prevent mismatch', async () => {
1278
+ const tmpDir = await makeTmpDir();
1279
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1280
+ const auditClean = '**Verdict:** Ready to approve.';
1281
+ const { runtime, invocations } = makeCaptureRuntime([draftPlan, auditClean]);
1282
+ const opts = await baseOpts(tmpDir, runtime, {
1283
+ drafterModel: 'opus',
1284
+ auditorModel: 'sonnet',
1285
+ });
1286
+ const orchestrator = new ForgeOrchestrator(opts);
1287
+ await orchestrator.run('Test feature', async () => { });
1288
+ expect(invocations[0].sessionKey).toContain('opus');
1289
+ expect(invocations[0].sessionKey).toContain(':drafter');
1290
+ expect(invocations[1].sessionKey).toContain('sonnet');
1291
+ expect(invocations[1].sessionKey).toContain(':auditor');
1292
+ });
1293
+ it('session key includes planId for uniqueness', async () => {
1294
+ const tmpDir = await makeTmpDir();
1295
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1296
+ const auditClean = '**Verdict:** Ready to approve.';
1297
+ const { runtime, invocations } = makeCaptureRuntime([draftPlan, auditClean]);
1298
+ const opts = await baseOpts(tmpDir, runtime);
1299
+ const orchestrator = new ForgeOrchestrator(opts);
1300
+ await orchestrator.run('Test feature', async () => { });
1301
+ // planId is plan-001 (auto-generated by handlePlanCommand)
1302
+ expect(invocations[0].sessionKey).toContain('plan-001');
1303
+ expect(invocations[1].sessionKey).toContain('plan-001');
1304
+ });
1305
+ it('revision step reuses drafter session key', async () => {
1306
+ const tmpDir = await makeTmpDir();
1307
+ const draftPlan = `# Plan: Test\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\nStuff.\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1308
+ const auditBlocking = '**Concern 1: Missing details**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
1309
+ const revisedPlan = draftPlan;
1310
+ const auditClean = '**Verdict:** Ready to approve.';
1311
+ // Draft -> Audit (blocking) -> Revise -> Audit (clean)
1312
+ const { runtime, invocations } = makeCaptureRuntime([draftPlan, auditBlocking, revisedPlan, auditClean]);
1313
+ const opts = await baseOpts(tmpDir, runtime);
1314
+ const orchestrator = new ForgeOrchestrator(opts);
1315
+ await orchestrator.run('Test feature', async () => { });
1316
+ // Call 0: draft (drafter key)
1317
+ // Call 1: audit round 1 (auditor key)
1318
+ // Call 2: revision (drafter key — same as call 0)
1319
+ // Call 3: audit round 2 (auditor key — same as call 1)
1320
+ expect(invocations[2].sessionKey).toBe(invocations[0].sessionKey);
1321
+ expect(invocations[3].sessionKey).toBe(invocations[1].sessionKey);
1322
+ });
1323
+ it('resume() also gets session keys via auditLoop', async () => {
1324
+ const tmpDir = await makeTmpDir();
1325
+ const { runtime, invocations } = makeCaptureRuntime([
1326
+ '**Verdict:** Ready to approve.',
1327
+ ]);
1328
+ const opts = await baseOpts(tmpDir, runtime);
1329
+ const planContent = makePlanContent({ planId: 'plan-001', status: 'REVIEW' });
1330
+ const filePath = path.join(opts.plansDir, 'plan-001-test.md');
1331
+ await fs.writeFile(filePath, planContent, 'utf-8');
1332
+ const orchestrator = new ForgeOrchestrator(opts);
1333
+ await orchestrator.resume('plan-001', filePath, 'Test Plan', async () => { });
1334
+ // The audit call should have a session key
1335
+ expect(invocations[0].sessionKey).toContain(':auditor');
1336
+ expect(invocations[0].sessionKey).toContain('plan-001');
1337
+ });
1338
+ });
1339
+ // ---------------------------------------------------------------------------
1340
+ // Auditor runtime tests
1341
+ // ---------------------------------------------------------------------------
1342
+ describe('auditorRuntime support', () => {
1343
+ it('auditorRuntime is used for audit calls when set', async () => {
1344
+ const tmpDir = await makeTmpDir();
1345
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1346
+ const auditClean = '**Verdict:** Ready to approve.';
1347
+ const drafterRuntime = makeMockRuntime([draftPlan]);
1348
+ // Separate auditor runtime
1349
+ const auditorInvocations = [];
1350
+ const auditorRuntime = {
1351
+ id: 'openai',
1352
+ capabilities: new Set(['streaming_text']),
1353
+ invoke(params) {
1354
+ auditorInvocations.push(params);
1355
+ return (async function* () {
1356
+ yield { type: 'text_final', text: auditClean };
1357
+ })();
1358
+ },
1359
+ };
1360
+ const opts = await baseOpts(tmpDir, drafterRuntime, { auditorRuntime });
1361
+ const orchestrator = new ForgeOrchestrator(opts);
1362
+ const result = await orchestrator.run('Test feature', async () => { });
1363
+ expect(result.error).toBeUndefined();
1364
+ expect(result.rounds).toBe(1);
1365
+ // The auditor runtime should have been called
1366
+ expect(auditorInvocations).toHaveLength(1);
1367
+ });
1368
+ it('falls back to default runtime when auditorRuntime is undefined', async () => {
1369
+ const tmpDir = await makeTmpDir();
1370
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1371
+ const auditClean = '**Verdict:** Ready to approve.';
1372
+ const { runtime, invocations } = makeCaptureRuntime([draftPlan, auditClean]);
1373
+ const opts = await baseOpts(tmpDir, runtime, { auditorRuntime: undefined });
1374
+ const orchestrator = new ForgeOrchestrator(opts);
1375
+ await orchestrator.run('Test feature', async () => { });
1376
+ // Both drafter and auditor calls go to the same runtime
1377
+ expect(invocations).toHaveLength(2);
1378
+ });
1379
+ it('non-Claude auditor runtime receives empty model string when auditorModel not set', async () => {
1380
+ const tmpDir = await makeTmpDir();
1381
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1382
+ const auditClean = '**Verdict:** Ready to approve.';
1383
+ const drafterRuntime = makeMockRuntime([draftPlan]);
1384
+ const auditorInvocations = [];
1385
+ const auditorRuntime = {
1386
+ id: 'openai',
1387
+ capabilities: new Set(['streaming_text']),
1388
+ invoke(params) {
1389
+ auditorInvocations.push(params);
1390
+ return (async function* () {
1391
+ yield { type: 'text_final', text: auditClean };
1392
+ })();
1393
+ },
1394
+ };
1395
+ // auditorModel is not set, so it defaults to opts.model ('test-model')
1396
+ const opts = await baseOpts(tmpDir, drafterRuntime, { auditorRuntime });
1397
+ const orchestrator = new ForgeOrchestrator(opts);
1398
+ await orchestrator.run('Test feature', async () => { });
1399
+ // Non-Claude auditor should receive empty model (to fall back to adapter's defaultModel)
1400
+ expect(auditorInvocations[0].model).toBe('');
1401
+ });
1402
+ it('non-Claude auditor receives no tools, addDirs, or sessionKey', async () => {
1403
+ const tmpDir = await makeTmpDir();
1404
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1405
+ const auditClean = '**Verdict:** Ready to approve.';
1406
+ const drafterRuntime = makeMockRuntime([draftPlan]);
1407
+ const auditorInvocations = [];
1408
+ const auditorRuntime = {
1409
+ id: 'openai',
1410
+ capabilities: new Set(['streaming_text']),
1411
+ invoke(params) {
1412
+ auditorInvocations.push(params);
1413
+ return (async function* () {
1414
+ yield { type: 'text_final', text: auditClean };
1415
+ })();
1416
+ },
1417
+ };
1418
+ const opts = await baseOpts(tmpDir, drafterRuntime, { auditorRuntime });
1419
+ const orchestrator = new ForgeOrchestrator(opts);
1420
+ await orchestrator.run('Test feature', async () => { });
1421
+ expect(auditorInvocations[0].tools).toEqual([]);
1422
+ expect(auditorInvocations[0].addDirs).toBeUndefined();
1423
+ expect(auditorInvocations[0].sessionKey).toBeUndefined();
1424
+ });
1425
+ });
1426
+ // ---------------------------------------------------------------------------
1427
+ // buildAuditorPrompt hasTools option
1428
+ // ---------------------------------------------------------------------------
1429
+ describe('buildAuditorPrompt hasTools option', () => {
1430
+ it('hasTools=false omits tool instructions', () => {
1431
+ const prompt = buildAuditorPrompt('# Plan: Test', 1, undefined, { hasTools: false });
1432
+ expect(prompt).not.toContain('Read, Glob, and Grep tools');
1433
+ expect(prompt).not.toContain('Use them before raising concerns');
1434
+ expect(prompt).toContain('You do not have access to the codebase');
1435
+ expect(prompt).toContain('logical consistency');
1436
+ });
1437
+ it('hasTools=true (default) includes tool instructions', () => {
1438
+ const prompt = buildAuditorPrompt('# Plan: Test', 1);
1439
+ expect(prompt).toContain('Read, Glob, and Grep tools');
1440
+ expect(prompt).toContain('Use them before raising concerns');
1441
+ expect(prompt).not.toContain('You do not have access to the codebase');
1442
+ });
1443
+ });
1444
+ // ---------------------------------------------------------------------------
1445
+ // Drafter runtime tests
1446
+ // ---------------------------------------------------------------------------
1447
+ const MINIMAL_DRAFT_PLAN = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nBuild the thing.\n\n## Scope\n\nIn scope: everything.\n\n## Changes\n\n### File-by-file breakdown\n\n- src/foo.ts — add bar\n\n## Risks\n\n- None.\n\n## Testing\n\n- Unit tests.\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1448
+ describe('drafterRuntime support', () => {
1449
+ it('drafterRuntime is used for draft calls when set', async () => {
1450
+ const tmpDir = await makeTmpDir();
1451
+ const auditClean = '**Verdict:** Ready to approve.';
1452
+ const drafterInvocations = [];
1453
+ const drafterRuntime = {
1454
+ id: 'openai',
1455
+ capabilities: new Set(['streaming_text']),
1456
+ invoke(params) {
1457
+ drafterInvocations.push(params);
1458
+ return (async function* () {
1459
+ yield { type: 'text_final', text: MINIMAL_DRAFT_PLAN };
1460
+ })();
1461
+ },
1462
+ };
1463
+ const auditorRuntime = makeMockRuntime([auditClean]);
1464
+ const opts = await baseOpts(tmpDir, auditorRuntime, { drafterRuntime });
1465
+ const orchestrator = new ForgeOrchestrator(opts);
1466
+ const result = await orchestrator.run('Test feature', async () => { });
1467
+ expect(result.error).toBeUndefined();
1468
+ expect(drafterInvocations).toHaveLength(1);
1469
+ });
1470
+ it('drafterRuntime is used for revision calls when set', async () => {
1471
+ const tmpDir = await makeTmpDir();
1472
+ const auditBlocking = '**Concern 1: Missing details**\n**Severity: blocking**\n\n**Verdict:** Needs revision.';
1473
+ const auditClean = '**Verdict:** Ready to approve.';
1474
+ const drafterInvocations = [];
1475
+ const drafterRuntime = {
1476
+ id: 'openai',
1477
+ capabilities: new Set(['streaming_text']),
1478
+ invoke(params) {
1479
+ drafterInvocations.push(params);
1480
+ return (async function* () {
1481
+ yield { type: 'text_final', text: MINIMAL_DRAFT_PLAN };
1482
+ })();
1483
+ },
1484
+ };
1485
+ const auditorRuntime = makeMockRuntime([auditBlocking, auditClean]);
1486
+ const opts = await baseOpts(tmpDir, auditorRuntime, { drafterRuntime });
1487
+ const orchestrator = new ForgeOrchestrator(opts);
1488
+ const result = await orchestrator.run('Test feature', async () => { });
1489
+ expect(result.error).toBeUndefined();
1490
+ // call 0: draft, call 1: revision
1491
+ expect(drafterInvocations).toHaveLength(2);
1492
+ });
1493
+ it('falls back to default runtime when drafterRuntime is undefined', async () => {
1494
+ const tmpDir = await makeTmpDir();
1495
+ const auditClean = '**Verdict:** Ready to approve.';
1496
+ const { runtime, invocations } = makeCaptureRuntime([MINIMAL_DRAFT_PLAN, auditClean]);
1497
+ const opts = await baseOpts(tmpDir, runtime, { drafterRuntime: undefined });
1498
+ const orchestrator = new ForgeOrchestrator(opts);
1499
+ await orchestrator.run('Test feature', async () => { });
1500
+ // Both drafter and auditor calls go to the same runtime
1501
+ expect(invocations).toHaveLength(2);
1502
+ });
1503
+ it('non-Claude drafter runtime receives empty model string when drafterModel not set', async () => {
1504
+ const tmpDir = await makeTmpDir();
1505
+ const auditClean = '**Verdict:** Ready to approve.';
1506
+ const drafterInvocations = [];
1507
+ const drafterRuntime = {
1508
+ id: 'openai',
1509
+ capabilities: new Set(['streaming_text']),
1510
+ invoke(params) {
1511
+ drafterInvocations.push(params);
1512
+ return (async function* () {
1513
+ yield { type: 'text_final', text: MINIMAL_DRAFT_PLAN };
1514
+ })();
1515
+ },
1516
+ };
1517
+ // drafterModel is not set — non-Claude runtime should receive empty model string
1518
+ const opts = await baseOpts(tmpDir, makeMockRuntime([auditClean]), { drafterRuntime });
1519
+ const orchestrator = new ForgeOrchestrator(opts);
1520
+ await orchestrator.run('Test feature', async () => { });
1521
+ expect(drafterInvocations[0].model).toBe('');
1522
+ });
1523
+ it('non-Claude drafter runtime receives no sessionKey', async () => {
1524
+ const tmpDir = await makeTmpDir();
1525
+ const auditClean = '**Verdict:** Ready to approve.';
1526
+ const drafterInvocations = [];
1527
+ const drafterRuntime = {
1528
+ id: 'openai',
1529
+ capabilities: new Set(['streaming_text']),
1530
+ invoke(params) {
1531
+ drafterInvocations.push(params);
1532
+ return (async function* () {
1533
+ yield { type: 'text_final', text: MINIMAL_DRAFT_PLAN };
1534
+ })();
1535
+ },
1536
+ };
1537
+ const opts = await baseOpts(tmpDir, makeMockRuntime([auditClean]), { drafterRuntime });
1538
+ const orchestrator = new ForgeOrchestrator(opts);
1539
+ await orchestrator.run('Test feature', async () => { });
1540
+ expect(drafterInvocations[0].sessionKey).toBeUndefined();
1541
+ });
1542
+ });
1543
+ // ---------------------------------------------------------------------------
1544
+ // ForgeOrchestrator onEvent threading
1545
+ // ---------------------------------------------------------------------------
1546
+ function makeMockRuntimeWithEvents(responseMap) {
1547
+ let callIndex = 0;
1548
+ return {
1549
+ id: 'claude_code',
1550
+ capabilities: new Set(['streaming_text']),
1551
+ invoke(_params) {
1552
+ const entry = responseMap[callIndex] ?? { text: '(no response)' };
1553
+ callIndex++;
1554
+ return (async function* () {
1555
+ for (const evt of entry.events ?? [])
1556
+ yield evt;
1557
+ yield { type: 'text_final', text: entry.text };
1558
+ })();
1559
+ },
1560
+ };
1561
+ }
1562
+ describe('ForgeOrchestrator onEvent threading', () => {
1563
+ it('onEvent spy receives events during draft phase', async () => {
1564
+ const tmpDir = await makeTmpDir();
1565
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1566
+ const auditClean = '**Verdict:** Ready to approve.';
1567
+ const runtime = makeMockRuntimeWithEvents([
1568
+ { text: draftPlan, events: [{ type: 'text_delta', text: 'drafting...' }] },
1569
+ { text: auditClean },
1570
+ ]);
1571
+ const opts = await baseOpts(tmpDir, runtime);
1572
+ const orchestrator = new ForgeOrchestrator(opts);
1573
+ const received = [];
1574
+ await orchestrator.run('Test feature', async () => { }, undefined, (evt) => received.push(evt));
1575
+ expect(received.some((e) => e.type === 'text_delta')).toBe(true);
1576
+ expect(received.some((e) => e.type === 'text_final')).toBe(true);
1577
+ });
1578
+ it('onEvent spy receives events during audit phase', async () => {
1579
+ const tmpDir = await makeTmpDir();
1580
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1581
+ const auditClean = '**Verdict:** Ready to approve.';
1582
+ const runtime = makeMockRuntimeWithEvents([
1583
+ { text: draftPlan },
1584
+ { text: auditClean, events: [{ type: 'text_delta', text: 'auditing...' }] },
1585
+ ]);
1586
+ const opts = await baseOpts(tmpDir, runtime);
1587
+ const orchestrator = new ForgeOrchestrator(opts);
1588
+ const received = [];
1589
+ await orchestrator.run('Test feature', async () => { }, undefined, (evt) => received.push(evt));
1590
+ expect(received.some((e) => e.type === 'text_delta')).toBe(true);
1591
+ });
1592
+ it('throwing onEvent does not abort forge execution', async () => {
1593
+ const tmpDir = await makeTmpDir();
1594
+ const draftPlan = `# Plan: Test feature\n\n**ID:** (system)\n**Task:** (system)\n**Created:** 2026-01-01\n**Status:** DRAFT\n**Project:** discoclaw\n\n---\n\n## Objective\n\nDo something.\n\n## Scope\n\n## Changes\n\n## Risks\n\n## Testing\n\n---\n\n## Audit Log\n\n---\n\n## Implementation Notes\n\n_Filled in during/after implementation._\n`;
1595
+ const auditClean = '**Verdict:** Ready to approve.';
1596
+ const runtime = makeMockRuntime([draftPlan, auditClean]);
1597
+ const opts = await baseOpts(tmpDir, runtime);
1598
+ const orchestrator = new ForgeOrchestrator(opts);
1599
+ const result = await orchestrator.run('Test feature', async () => { }, undefined, () => {
1600
+ throw new Error('callback exploded');
1601
+ });
1602
+ // Forge should complete successfully despite throwing onEvent
1603
+ expect(result.error).toBeUndefined();
1604
+ expect(result.rounds).toBe(1);
1605
+ });
1606
+ });