@nathapp/nax 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (459) hide show
  1. package/.gitlab-ci.yml +96 -0
  2. package/BRIEF.md +140 -0
  3. package/CHANGELOG.md +60 -0
  4. package/CLAUDE.md +159 -0
  5. package/README.md +373 -0
  6. package/US-007-IMPLEMENTATION.md +139 -0
  7. package/bin/nax.ts +930 -0
  8. package/biome.json +14 -0
  9. package/bun.lock +168 -0
  10. package/bunfig.toml +11 -0
  11. package/docs/20260216-fix-plan-context-review.md +56 -0
  12. package/docs/20260216-relentless-vs-ngent-comparison.md +208 -0
  13. package/docs/20260216-v02-plan.md +136 -0
  14. package/docs/20260216-v02-review.md +685 -0
  15. package/docs/20260217-dogfood-findings.md +56 -0
  16. package/docs/20260217-p2-plus-plan.md +117 -0
  17. package/docs/20260217-partial-fixes-plan.md +62 -0
  18. package/docs/20260217-plan-analyze-spec.md +117 -0
  19. package/docs/20260217-post-impl-review.md +1137 -0
  20. package/docs/20260217-quick-wins-plan.md +66 -0
  21. package/docs/20260217-split-runner-plan.md +75 -0
  22. package/docs/20260217-v03-impl-plan.md +80 -0
  23. package/docs/20260217-v03-post-impl-review.md +589 -0
  24. package/docs/20260217-v04-impl-plan.md +86 -0
  25. package/docs/20260217-v05-post-impl-review.md +850 -0
  26. package/docs/20260217-v06-post-impl-review.md +817 -0
  27. package/docs/20260218-adr003-port-plan.md +151 -0
  28. package/docs/20260218-review-adr003-verification.md +175 -0
  29. package/docs/20260219-fix-plan-bug16-19.md +79 -0
  30. package/docs/20260219-fix-plan-bug20-22.md +114 -0
  31. package/docs/20260219-plan-llm-routing.md +116 -0
  32. package/docs/20260219-review-bug20-22-fixes.md +135 -0
  33. package/docs/20260219-routing-baseline-keyword.md +63 -0
  34. package/docs/20260220-plan-structured-logging-p1.md +80 -0
  35. package/docs/20260220-plan-structured-logging-p2.md +37 -0
  36. package/docs/20260220-review-llm-routing.md +180 -0
  37. package/docs/20260220-review-post-fix-llm-routing.md +70 -0
  38. package/docs/20260221-fix-plan-relevantfiles-split.md +101 -0
  39. package/docs/20260221-fix-plan-routing-mode.md +125 -0
  40. package/docs/20260221-review-v0.9-implementation.md +379 -0
  41. package/docs/20260222-fix-plan-v091-routing-isolation.md +197 -0
  42. package/docs/20260223-fix-plan-prompt-audit.md +62 -0
  43. package/docs/20260224-nax-roadmap-phases.md +189 -0
  44. package/docs/20260225-phase2-llm-service-layer.md +401 -0
  45. package/docs/20260225-review-v0.10.1.md +187 -0
  46. package/docs/20260303-v010-implementation-plan.md +165 -0
  47. package/docs/CLAUDE.md.bak +191 -0
  48. package/docs/ROADMAP.md +165 -0
  49. package/docs/SPEC-rectification.md +0 -0
  50. package/docs/SPEC.md +324 -0
  51. package/docs/US-001-plugin-loading-verification.md +152 -0
  52. package/docs/architecture-analysis.md +1076 -0
  53. package/docs/bugs/BUG-21-escalation-null-attempts.md +48 -0
  54. package/docs/bugs-from-dogfood-run-c.md +243 -0
  55. package/docs/code-review-20260228.md +612 -0
  56. package/docs/code-review-v0.15.0.md +629 -0
  57. package/docs/hook-lifecycle-test-plan.md +149 -0
  58. package/docs/releases/v0.11.0-and-earlier.md +20 -0
  59. package/docs/releases/v0.12.0.md +15 -0
  60. package/docs/releases/v0.13.0.md +14 -0
  61. package/docs/releases/v0.14.0.md +20 -0
  62. package/docs/releases/v0.14.1.md +36 -0
  63. package/docs/releases/v0.14.2.md +51 -0
  64. package/docs/releases/v0.14.3.md +174 -0
  65. package/docs/releases/v0.14.4.md +94 -0
  66. package/docs/releases/v0.15.0.md +502 -0
  67. package/docs/releases/v0.15.1.md +170 -0
  68. package/docs/releases/v0.15.3.md +193 -0
  69. package/docs/specs/status-file-v0.10.1.md +812 -0
  70. package/docs/v0.10-global-config.md +206 -0
  71. package/docs/v0.10-plugin-system.md +415 -0
  72. package/docs/v0.10-prompt-optimizer.md +234 -0
  73. package/docs/v0.3-spec.md +244 -0
  74. package/docs/v0.4-spec.md +140 -0
  75. package/docs/v0.5-spec.md +237 -0
  76. package/docs/v0.6-spec.md +371 -0
  77. package/docs/v0.7-spec.md +177 -0
  78. package/docs/v0.8-llm-routing.md +206 -0
  79. package/docs/v0.8-structured-logging.md +132 -0
  80. package/docs/v0.9.3-prompt-audit.md +112 -0
  81. package/examples/plugins/console-reporter/index.test.ts +207 -0
  82. package/examples/plugins/console-reporter/index.ts +110 -0
  83. package/nax/config.json +147 -0
  84. package/nax/features/bugfix-v0171/prd.json +52 -0
  85. package/nax/features/config-management/prd.json +108 -0
  86. package/nax/features/config-management/progress.txt +5 -0
  87. package/nax/features/diagnose/acceptance.test.ts +412 -0
  88. package/nax/features/diagnose/prd.json +41 -0
  89. package/nax/features/orchestration-fixes/prd.json +89 -0
  90. package/nax/features/orchestration-fixes/progress.txt +1 -0
  91. package/nax/features/plugin-integration/US-007-VERIFICATION.md +259 -0
  92. package/nax/features/plugin-integration/prd.json +208 -0
  93. package/nax/features/plugin-integration/progress.txt +5 -0
  94. package/nax/features/precheck/prd.json +205 -0
  95. package/nax/features/precheck/progress.txt +15 -0
  96. package/nax/features/structured-logging/prd.json +199 -0
  97. package/nax/features/unlock/prd.json +36 -0
  98. package/package.json +47 -0
  99. package/src/acceptance/fix-generator.ts +348 -0
  100. package/src/acceptance/generator.ts +282 -0
  101. package/src/acceptance/index.ts +30 -0
  102. package/src/acceptance/types.ts +79 -0
  103. package/src/agents/claude-decompose.ts +169 -0
  104. package/src/agents/claude-plan.ts +139 -0
  105. package/src/agents/claude.ts +324 -0
  106. package/src/agents/cost.ts +268 -0
  107. package/src/agents/index.ts +13 -0
  108. package/src/agents/registry.ts +48 -0
  109. package/src/agents/types-extended.ts +133 -0
  110. package/src/agents/types.ts +113 -0
  111. package/src/agents/validation.ts +69 -0
  112. package/src/analyze/classifier.ts +305 -0
  113. package/src/analyze/index.ts +16 -0
  114. package/src/analyze/scanner.ts +175 -0
  115. package/src/analyze/types.ts +51 -0
  116. package/src/cli/accept.ts +108 -0
  117. package/src/cli/analyze-parser.ts +284 -0
  118. package/src/cli/analyze.ts +207 -0
  119. package/src/cli/config.ts +561 -0
  120. package/src/cli/constitution.ts +109 -0
  121. package/src/cli/diagnose-analysis.ts +159 -0
  122. package/src/cli/diagnose-formatter.ts +87 -0
  123. package/src/cli/diagnose.ts +203 -0
  124. package/src/cli/generate.ts +127 -0
  125. package/src/cli/index.ts +37 -0
  126. package/src/cli/init.ts +188 -0
  127. package/src/cli/interact.ts +295 -0
  128. package/src/cli/plan.ts +198 -0
  129. package/src/cli/plugins.ts +111 -0
  130. package/src/cli/prompts.ts +295 -0
  131. package/src/cli/runs.ts +174 -0
  132. package/src/cli/status-cost.ts +151 -0
  133. package/src/cli/status-features.ts +338 -0
  134. package/src/cli/status.ts +13 -0
  135. package/src/commands/common.ts +171 -0
  136. package/src/commands/diagnose.ts +17 -0
  137. package/src/commands/index.ts +8 -0
  138. package/src/commands/logs.ts +384 -0
  139. package/src/commands/precheck.ts +86 -0
  140. package/src/commands/unlock.ts +96 -0
  141. package/src/config/defaults.ts +160 -0
  142. package/src/config/index.ts +22 -0
  143. package/src/config/loader.ts +121 -0
  144. package/src/config/merger.ts +147 -0
  145. package/src/config/path-security.ts +121 -0
  146. package/src/config/paths.ts +27 -0
  147. package/src/config/schema.ts +56 -0
  148. package/src/config/schemas.ts +286 -0
  149. package/src/config/types.ts +423 -0
  150. package/src/config/validate.ts +103 -0
  151. package/src/constitution/generator.ts +191 -0
  152. package/src/constitution/generators/aider.ts +41 -0
  153. package/src/constitution/generators/claude.ts +35 -0
  154. package/src/constitution/generators/cursor.ts +36 -0
  155. package/src/constitution/generators/opencode.ts +38 -0
  156. package/src/constitution/generators/types.ts +33 -0
  157. package/src/constitution/generators/windsurf.ts +36 -0
  158. package/src/constitution/index.ts +10 -0
  159. package/src/constitution/loader.ts +133 -0
  160. package/src/constitution/types.ts +31 -0
  161. package/src/context/auto-detect.ts +227 -0
  162. package/src/context/builder.ts +246 -0
  163. package/src/context/elements.ts +83 -0
  164. package/src/context/formatter.ts +107 -0
  165. package/src/context/generator.ts +129 -0
  166. package/src/context/generators/aider.ts +34 -0
  167. package/src/context/generators/claude.ts +28 -0
  168. package/src/context/generators/cursor.ts +28 -0
  169. package/src/context/generators/opencode.ts +30 -0
  170. package/src/context/generators/windsurf.ts +28 -0
  171. package/src/context/greenfield.ts +114 -0
  172. package/src/context/index.ts +33 -0
  173. package/src/context/injector.ts +279 -0
  174. package/src/context/test-scanner.ts +370 -0
  175. package/src/context/types.ts +98 -0
  176. package/src/errors.ts +67 -0
  177. package/src/execution/batching.ts +157 -0
  178. package/src/execution/crash-recovery.ts +373 -0
  179. package/src/execution/escalation/escalation.ts +44 -0
  180. package/src/execution/escalation/index.ts +13 -0
  181. package/src/execution/escalation/tier-escalation.ts +295 -0
  182. package/src/execution/escalation/tier-outcome.ts +158 -0
  183. package/src/execution/helpers.ts +38 -0
  184. package/src/execution/index.ts +45 -0
  185. package/src/execution/lifecycle/acceptance-loop.ts +272 -0
  186. package/src/execution/lifecycle/headless-formatter.ts +85 -0
  187. package/src/execution/lifecycle/index.ts +12 -0
  188. package/src/execution/lifecycle/parallel-lifecycle.ts +101 -0
  189. package/src/execution/lifecycle/precheck-runner.ts +140 -0
  190. package/src/execution/lifecycle/run-cleanup.ts +81 -0
  191. package/src/execution/lifecycle/run-completion.ts +129 -0
  192. package/src/execution/lifecycle/run-initialization.ts +141 -0
  193. package/src/execution/lifecycle/run-lifecycle.ts +312 -0
  194. package/src/execution/lifecycle/run-setup.ts +204 -0
  195. package/src/execution/lifecycle/story-hooks.ts +38 -0
  196. package/src/execution/lifecycle/story-size-prompts.ts +123 -0
  197. package/src/execution/lock.ts +115 -0
  198. package/src/execution/parallel-executor.ts +216 -0
  199. package/src/execution/parallel.ts +400 -0
  200. package/src/execution/pid-registry.ts +280 -0
  201. package/src/execution/pipeline-result-handler.ts +388 -0
  202. package/src/execution/post-verify-rectification.ts +188 -0
  203. package/src/execution/post-verify.ts +274 -0
  204. package/src/execution/progress.ts +25 -0
  205. package/src/execution/prompts.ts +127 -0
  206. package/src/execution/queue-handler.ts +109 -0
  207. package/src/execution/rectification.ts +13 -0
  208. package/src/execution/runner.ts +377 -0
  209. package/src/execution/sequential-executor.ts +388 -0
  210. package/src/execution/status-file.ts +264 -0
  211. package/src/execution/status-writer.ts +139 -0
  212. package/src/execution/story-context.ts +229 -0
  213. package/src/execution/test-output-parser.ts +14 -0
  214. package/src/execution/verification.ts +72 -0
  215. package/src/hooks/index.ts +2 -0
  216. package/src/hooks/runner.ts +286 -0
  217. package/src/hooks/types.ts +67 -0
  218. package/src/interaction/chain.ts +154 -0
  219. package/src/interaction/index.ts +60 -0
  220. package/src/interaction/init.ts +83 -0
  221. package/src/interaction/plugins/auto.ts +217 -0
  222. package/src/interaction/plugins/cli.ts +300 -0
  223. package/src/interaction/plugins/telegram.ts +384 -0
  224. package/src/interaction/plugins/webhook.ts +258 -0
  225. package/src/interaction/state.ts +171 -0
  226. package/src/interaction/triggers.ts +229 -0
  227. package/src/interaction/types.ts +163 -0
  228. package/src/logger/formatters.ts +84 -0
  229. package/src/logger/index.ts +16 -0
  230. package/src/logger/logger.ts +298 -0
  231. package/src/logger/types.ts +48 -0
  232. package/src/logging/formatter.ts +355 -0
  233. package/src/logging/index.ts +22 -0
  234. package/src/logging/types.ts +93 -0
  235. package/src/metrics/aggregator.ts +190 -0
  236. package/src/metrics/index.ts +14 -0
  237. package/src/metrics/tracker.ts +200 -0
  238. package/src/metrics/types.ts +109 -0
  239. package/src/optimizer/index.ts +62 -0
  240. package/src/optimizer/noop.optimizer.ts +24 -0
  241. package/src/optimizer/rule-based.optimizer.ts +248 -0
  242. package/src/optimizer/types.ts +53 -0
  243. package/src/pipeline/events.ts +130 -0
  244. package/src/pipeline/index.ts +19 -0
  245. package/src/pipeline/runner.ts +161 -0
  246. package/src/pipeline/stages/acceptance.ts +197 -0
  247. package/src/pipeline/stages/completion.ts +99 -0
  248. package/src/pipeline/stages/constitution.ts +63 -0
  249. package/src/pipeline/stages/context.ts +117 -0
  250. package/src/pipeline/stages/execution.ts +194 -0
  251. package/src/pipeline/stages/index.ts +62 -0
  252. package/src/pipeline/stages/optimizer.ts +74 -0
  253. package/src/pipeline/stages/prompt.ts +57 -0
  254. package/src/pipeline/stages/queue-check.ts +103 -0
  255. package/src/pipeline/stages/review.ts +181 -0
  256. package/src/pipeline/stages/routing.ts +81 -0
  257. package/src/pipeline/stages/verify.ts +100 -0
  258. package/src/pipeline/types.ts +167 -0
  259. package/src/plugins/index.ts +31 -0
  260. package/src/plugins/loader.ts +287 -0
  261. package/src/plugins/registry.ts +168 -0
  262. package/src/plugins/types.ts +327 -0
  263. package/src/plugins/validator.ts +352 -0
  264. package/src/prd/index.ts +172 -0
  265. package/src/prd/types.ts +202 -0
  266. package/src/precheck/checks-blockers.ts +391 -0
  267. package/src/precheck/checks-warnings.ts +142 -0
  268. package/src/precheck/checks.ts +30 -0
  269. package/src/precheck/index.ts +247 -0
  270. package/src/precheck/story-size-gate.ts +144 -0
  271. package/src/precheck/types.ts +31 -0
  272. package/src/queue/index.ts +2 -0
  273. package/src/queue/manager.ts +254 -0
  274. package/src/queue/types.ts +54 -0
  275. package/src/review/index.ts +8 -0
  276. package/src/review/runner.ts +172 -0
  277. package/src/review/types.ts +66 -0
  278. package/src/routing/builder.ts +81 -0
  279. package/src/routing/chain.ts +74 -0
  280. package/src/routing/index.ts +16 -0
  281. package/src/routing/loader.ts +58 -0
  282. package/src/routing/router.ts +303 -0
  283. package/src/routing/strategies/adaptive.ts +215 -0
  284. package/src/routing/strategies/index.ts +8 -0
  285. package/src/routing/strategies/keyword.ts +163 -0
  286. package/src/routing/strategies/llm-prompts.ts +209 -0
  287. package/src/routing/strategies/llm.ts +235 -0
  288. package/src/routing/strategies/manual.ts +50 -0
  289. package/src/routing/strategy.ts +99 -0
  290. package/src/tdd/cleanup.ts +111 -0
  291. package/src/tdd/index.ts +23 -0
  292. package/src/tdd/isolation.ts +123 -0
  293. package/src/tdd/orchestrator.ts +383 -0
  294. package/src/tdd/prompts.ts +270 -0
  295. package/src/tdd/rectification-gate.ts +183 -0
  296. package/src/tdd/session-runner.ts +179 -0
  297. package/src/tdd/types.ts +81 -0
  298. package/src/tdd/verdict.ts +271 -0
  299. package/src/tui/App.tsx +265 -0
  300. package/src/tui/components/AgentPanel.tsx +75 -0
  301. package/src/tui/components/CostOverlay.tsx +118 -0
  302. package/src/tui/components/HelpOverlay.tsx +107 -0
  303. package/src/tui/components/StatusBar.tsx +63 -0
  304. package/src/tui/components/StoriesPanel.tsx +177 -0
  305. package/src/tui/hooks/useKeyboard.ts +142 -0
  306. package/src/tui/hooks/useLayout.ts +137 -0
  307. package/src/tui/hooks/usePipelineEvents.ts +183 -0
  308. package/src/tui/hooks/usePty.ts +194 -0
  309. package/src/tui/index.tsx +38 -0
  310. package/src/tui/types.ts +76 -0
  311. package/src/utils/git.ts +83 -0
  312. package/src/utils/queue-writer.ts +54 -0
  313. package/src/verification/executor.ts +235 -0
  314. package/src/verification/gate.ts +207 -0
  315. package/src/verification/index.ts +12 -0
  316. package/src/verification/parser.ts +230 -0
  317. package/src/verification/rectification.ts +108 -0
  318. package/src/verification/types.ts +113 -0
  319. package/src/worktree/dispatcher.ts +65 -0
  320. package/src/worktree/index.ts +2 -0
  321. package/src/worktree/manager.ts +187 -0
  322. package/src/worktree/merge.ts +301 -0
  323. package/src/worktree/types.ts +4 -0
  324. package/test/TEST_COVERAGE_US001.md +217 -0
  325. package/test/TEST_COVERAGE_US003.md +84 -0
  326. package/test/TEST_COVERAGE_US005.md +86 -0
  327. package/test/US-002-orchestrator.test.ts +246 -0
  328. package/test/acceptance/cm-003-default-view.test.ts +194 -0
  329. package/test/execution/pid-registry.test.ts +240 -0
  330. package/test/execution/post-verify.test.ts +224 -0
  331. package/test/helpers/timeout.ts +42 -0
  332. package/test/integration/US-002-TEST-SUMMARY.md +107 -0
  333. package/test/integration/US-003-TEST-SUMMARY.md +149 -0
  334. package/test/integration/US-004-TEST-SUMMARY.md +106 -0
  335. package/test/integration/US-005-TEST-SUMMARY.md +138 -0
  336. package/test/integration/US-007-TEST-SUMMARY.md +100 -0
  337. package/test/integration/agent-validation.test.ts +439 -0
  338. package/test/integration/analyze-integration.test.ts +261 -0
  339. package/test/integration/analyze-scanner.test.ts +131 -0
  340. package/test/integration/cli-config-default-edge-cases.test.ts +222 -0
  341. package/test/integration/cli-config-default-view.test.ts +229 -0
  342. package/test/integration/cli-config-diff.test.ts +460 -0
  343. package/test/integration/cli-config.test.ts +736 -0
  344. package/test/integration/cli-diagnose.test.ts +592 -0
  345. package/test/integration/cli-logs.test.ts +314 -0
  346. package/test/integration/cli-plugins.test.ts +678 -0
  347. package/test/integration/cli-precheck.test.ts +371 -0
  348. package/test/integration/cli-run-headless.test.ts +173 -0
  349. package/test/integration/cli.test.ts +75 -0
  350. package/test/integration/config/merger.test.ts +465 -0
  351. package/test/integration/config/paths.test.ts +51 -0
  352. package/test/integration/config-loader.test.ts +265 -0
  353. package/test/integration/config.test.ts +444 -0
  354. package/test/integration/context-integration.test.ts +702 -0
  355. package/test/integration/context-provider-injection.test.ts +506 -0
  356. package/test/integration/context-verification-integration.test.ts +295 -0
  357. package/test/integration/e2e.test.ts +896 -0
  358. package/test/integration/execution.test.ts +625 -0
  359. package/test/integration/helpers.test.ts +295 -0
  360. package/test/integration/hooks.test.ts +361 -0
  361. package/test/integration/interaction-chain-pipeline.test.ts +464 -0
  362. package/test/integration/isolation.test.ts +143 -0
  363. package/test/integration/logger.test.ts +461 -0
  364. package/test/integration/parallel.test.ts +250 -0
  365. package/test/integration/path-security.test.ts +173 -0
  366. package/test/integration/pipeline-acceptance.test.ts +302 -0
  367. package/test/integration/pipeline-events.test.ts +475 -0
  368. package/test/integration/pipeline.test.ts +658 -0
  369. package/test/integration/plan.test.ts +157 -0
  370. package/test/integration/plugin-routing.test.ts +921 -0
  371. package/test/integration/plugins/config-integration.test.ts +172 -0
  372. package/test/integration/plugins/config-resolution.test.ts +522 -0
  373. package/test/integration/plugins/loader.test.ts +641 -0
  374. package/test/integration/plugins/registry.test.ts +746 -0
  375. package/test/integration/plugins/validator.test.ts +563 -0
  376. package/test/integration/prd-pause.test.ts +205 -0
  377. package/test/integration/prd-resolvers.test.ts +185 -0
  378. package/test/integration/precheck-integration.test.ts +468 -0
  379. package/test/integration/precheck.test.ts +805 -0
  380. package/test/integration/progress.test.ts +34 -0
  381. package/test/integration/rectification-flow.test.ts +512 -0
  382. package/test/integration/reporter-lifecycle.test.ts +860 -0
  383. package/test/integration/review-config-commands.test.ts +319 -0
  384. package/test/integration/review-config-schema.test.ts +116 -0
  385. package/test/integration/review-plugin-integration.test.ts +722 -0
  386. package/test/integration/review.test.ts +149 -0
  387. package/test/integration/routing-stage-bug-021.test.ts +274 -0
  388. package/test/integration/routing-stage-greenfield.test.ts +286 -0
  389. package/test/integration/runner-config-plugins.test.ts +461 -0
  390. package/test/integration/runner-fixes.test.ts +399 -0
  391. package/test/integration/runner-plugin-integration.test.ts +543 -0
  392. package/test/integration/runner.test.ts +1679 -0
  393. package/test/integration/s5-greenfield-fallback.test.ts +297 -0
  394. package/test/integration/status-file-integration.test.ts +325 -0
  395. package/test/integration/status-file.test.ts +379 -0
  396. package/test/integration/status-writer.test.ts +345 -0
  397. package/test/integration/story-id-in-events.test.ts +273 -0
  398. package/test/integration/tdd-cleanup.test.ts +246 -0
  399. package/test/integration/tdd-orchestrator.test.ts +1762 -0
  400. package/test/integration/test-scanner.test.ts +403 -0
  401. package/test/integration/verification-asset-check.test.ts +142 -0
  402. package/test/integration/verify-stage.test.ts +275 -0
  403. package/test/integration/worktree/manager.test.ts +218 -0
  404. package/test/integration/worktree/merge.test.ts +341 -0
  405. package/test/manual/logging-formatter-demo.ts +158 -0
  406. package/test/ui/tui-agent-panel.test.tsx +99 -0
  407. package/test/ui/tui-controls.test.ts +334 -0
  408. package/test/ui/tui-cost-and-pty.test.ts +189 -0
  409. package/test/ui/tui-layout.test.ts +378 -0
  410. package/test/ui/tui-pty-integration.test.tsx +159 -0
  411. package/test/ui/tui-stories.test.ts +332 -0
  412. package/test/unit/acceptance.test.ts +186 -0
  413. package/test/unit/agent-stderr-capture.test.ts +146 -0
  414. package/test/unit/analyze-classifier.test.ts +215 -0
  415. package/test/unit/analyze.test.ts +224 -0
  416. package/test/unit/auto-detect.test.ts +249 -0
  417. package/test/unit/cli-status.test.ts +417 -0
  418. package/test/unit/commands/common.test.ts +320 -0
  419. package/test/unit/commands/logs.test.ts +416 -0
  420. package/test/unit/commands/unlock.test.ts +319 -0
  421. package/test/unit/constitution-generators.test.ts +160 -0
  422. package/test/unit/constitution.test.ts +209 -0
  423. package/test/unit/context.test.ts +1722 -0
  424. package/test/unit/cost.test.ts +231 -0
  425. package/test/unit/crash-recovery.test.ts +308 -0
  426. package/test/unit/escalation.test.ts +126 -0
  427. package/test/unit/execution-logging-stderr.test.ts +156 -0
  428. package/test/unit/execution-stage.test.ts +122 -0
  429. package/test/unit/fix-generator.test.ts +275 -0
  430. package/test/unit/formatters.test.ts +469 -0
  431. package/test/unit/greenfield.test.ts +179 -0
  432. package/test/unit/helpers.test.ts +317 -0
  433. package/test/unit/interaction/human-review-trigger.test.ts +164 -0
  434. package/test/unit/interaction-network-failures.test.ts +389 -0
  435. package/test/unit/interaction-plugins.test.ts +164 -0
  436. package/test/unit/isolation.test.ts +134 -0
  437. package/test/unit/logging/formatter.test.ts +455 -0
  438. package/test/unit/merge.test.ts +268 -0
  439. package/test/unit/metrics.test.ts +276 -0
  440. package/test/unit/optimizer/noop.optimizer.test.ts +125 -0
  441. package/test/unit/optimizer/rule-based.optimizer.test.ts +358 -0
  442. package/test/unit/prd-auto-default.test.ts +290 -0
  443. package/test/unit/prd-failure-category.test.ts +176 -0
  444. package/test/unit/prd-get-next-story.test.ts +186 -0
  445. package/test/unit/precheck-checks.test.ts +840 -0
  446. package/test/unit/precheck-story-size-gate.test.ts +287 -0
  447. package/test/unit/precheck-types.test.ts +142 -0
  448. package/test/unit/prompts.test.ts +475 -0
  449. package/test/unit/queue.test.ts +237 -0
  450. package/test/unit/rectification.test.ts +284 -0
  451. package/test/unit/registry.test.ts +287 -0
  452. package/test/unit/routing.test.ts +937 -0
  453. package/test/unit/run-lifecycle.test.ts +140 -0
  454. package/test/unit/storyid-events.test.ts +224 -0
  455. package/test/unit/tdd-verdict.test.ts +492 -0
  456. package/test/unit/test-output-parser.test.ts +377 -0
  457. package/test/unit/verdict.test.ts +324 -0
  458. package/test/unit/worktree-manager.test.ts +158 -0
  459. package/tsconfig.json +27 -0
@@ -0,0 +1,1762 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+ import { existsSync } from "node:fs";
3
+ import { mkdir, rm, writeFile } from "node:fs/promises";
4
+ import path from "node:path";
5
+ import type { AgentAdapter, AgentResult } from "../../src/agents";
6
+ import { DEFAULT_CONFIG } from "../../src/config";
7
+ import type { UserStory } from "../../src/prd";
8
+ import { runThreeSessionTdd } from "../../src/tdd/orchestrator";
9
+ import { VERDICT_FILE } from "../../src/tdd/verdict";
10
+
11
+ let originalSpawn: typeof Bun.spawn;
12
+
13
+ beforeEach(() => {
14
+ originalSpawn = Bun.spawn;
15
+ });
16
+
17
+ afterEach(() => {
18
+ Bun.spawn = originalSpawn;
19
+ });
20
+
21
+ /** Create a mock agent that returns sequential results */
22
+ function createMockAgent(results: Partial<AgentResult>[]): AgentAdapter {
23
+ let callCount = 0;
24
+ return {
25
+ name: "mock",
26
+ displayName: "Mock Agent",
27
+ binary: "mock",
28
+ isInstalled: async () => true,
29
+ buildCommand: () => ["mock"],
30
+ run: mock(async () => {
31
+ const r = results[callCount] || {};
32
+ callCount++;
33
+ return {
34
+ success: r.success ?? true,
35
+ exitCode: r.exitCode ?? 0,
36
+ output: r.output ?? "",
37
+ rateLimited: r.rateLimited ?? false,
38
+ durationMs: r.durationMs ?? 100,
39
+ estimatedCost: r.estimatedCost ?? 0.01,
40
+ };
41
+ }),
42
+ };
43
+ }
44
+
45
+ /** Mock Bun.spawn to intercept git commands */
46
+ function mockGitSpawn(opts: {
47
+ /** Files returned by git diff for each session (indexed by git-diff call number) */
48
+ diffFiles: string[][];
49
+ /** Optional: mock test command success (default: true) */
50
+ testCommandSuccess?: boolean;
51
+ }) {
52
+ let revParseCount = 0;
53
+ let diffCount = 0;
54
+ const testSuccess = opts.testCommandSuccess ?? true;
55
+
56
+ // @ts-ignore — mocking global
57
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
58
+ // Intercept test commands (bun test, npm test, etc.)
59
+ if ((cmd[0] === "/bin/sh" || cmd[0] === "/bin/bash" || cmd[0] === "/bin/zsh") && cmd[1] === "-c") {
60
+ return {
61
+ pid: 9999,
62
+ exited: Promise.resolve(testSuccess ? 0 : 1),
63
+ stdout: new Response(testSuccess ? "tests pass\n" : "tests fail\n").body,
64
+ stderr: new Response("").body,
65
+ };
66
+ }
67
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
68
+ revParseCount++;
69
+ return {
70
+ exited: Promise.resolve(0),
71
+ stdout: new Response(`ref-${revParseCount}\n`).body,
72
+ stderr: new Response("").body,
73
+ };
74
+ }
75
+ if (cmd[0] === "git" && cmd[1] === "checkout") {
76
+ // Intercept git checkout (used in zero-file fallback) — silently succeed
77
+ return {
78
+ exited: Promise.resolve(0),
79
+ stdout: new Response("").body,
80
+ stderr: new Response("").body,
81
+ };
82
+ }
83
+ if (cmd[0] === "git" && cmd[1] === "diff") {
84
+ const files = opts.diffFiles[diffCount] || [];
85
+ diffCount++;
86
+ return {
87
+ exited: Promise.resolve(0),
88
+ stdout: new Response(files.join("\n") + "\n").body,
89
+ stderr: new Response("").body,
90
+ };
91
+ }
92
+ return originalSpawn(cmd, spawnOpts);
93
+ });
94
+ }
95
+
96
+ const story: UserStory = {
97
+ id: "US-001",
98
+ title: "Add user validation",
99
+ description: "Add validation to user input",
100
+ acceptanceCriteria: ["Validation works", "Errors are clear"],
101
+ dependencies: [],
102
+ tags: [],
103
+ status: "pending",
104
+ passes: false,
105
+ escalations: [],
106
+ attempts: 0,
107
+ };
108
+
109
+ describe("runThreeSessionTdd", () => {
110
+ test("happy path: all 3 sessions succeed", async () => {
111
+ // Each session triggers: captureGitRef (rev-parse) + isolation check (git diff) + getChangedFiles (git diff)
112
+ // Session 1: test-writer → verifyTestWriterIsolation calls getChangedFiles (1 diff) + getChangedFiles for result (1 diff) = 2 diffs
113
+ // Session 2: implementer → verifyImplementerIsolation (1 diff) + getChangedFiles (1 diff) = 2 diffs
114
+ // Session 3: verifier → no isolation check + getChangedFiles (1 diff) = 1 diff
115
+ // But actually looking at the code: isolation + getChangedFiles share the same call in runTddSession
116
+ // isolation calls getChangedFiles internally, then runTddSession calls getChangedFiles separately
117
+ // Actually no — look at orchestrator.ts runTddSession:
118
+ // 1. verifyTestWriterIsolation (calls getChangedFiles) → 1 diff call
119
+ // 2. getChangedFiles → 1 diff call
120
+ // So per session with isolation: 2 diff calls. Without isolation (verifier): 1 diff call.
121
+ // Total: 2 + 2 + 1 = 5 diff calls
122
+ mockGitSpawn({
123
+ diffFiles: [
124
+ // Session 1 isolation check: test files only (OK)
125
+ ["test/user.test.ts"],
126
+ // Session 1 getChangedFiles
127
+ ["test/user.test.ts"],
128
+ // Session 2 isolation check: source files only (OK)
129
+ ["src/user.ts"],
130
+ // Session 2 getChangedFiles
131
+ ["src/user.ts"],
132
+ // Session 3 getChangedFiles (no isolation check for verifier)
133
+ ["src/user.ts"],
134
+ ],
135
+ });
136
+
137
+ const agent = createMockAgent([
138
+ { success: true, estimatedCost: 0.01 },
139
+ { success: true, estimatedCost: 0.02 },
140
+ { success: true, estimatedCost: 0.01 },
141
+ ]);
142
+
143
+ const result = await runThreeSessionTdd({
144
+ agent,
145
+ story,
146
+ config: DEFAULT_CONFIG,
147
+ workdir: "/tmp/test",
148
+ modelTier: "balanced",
149
+ });
150
+
151
+ expect(result.success).toBe(true);
152
+ expect(result.sessions).toHaveLength(3);
153
+ expect(result.sessions[0].role).toBe("test-writer");
154
+ expect(result.sessions[1].role).toBe("implementer");
155
+ expect(result.sessions[2].role).toBe("verifier");
156
+ expect(result.needsHumanReview).toBe(false);
157
+ expect(result.totalCost).toBe(0.04);
158
+ });
159
+
160
+ test("failure when test-writer session fails", async () => {
161
+ mockGitSpawn({
162
+ diffFiles: [["test/user.test.ts"], ["test/user.test.ts"]],
163
+ });
164
+
165
+ const agent = createMockAgent([{ success: false, exitCode: 1, estimatedCost: 0.01 }]);
166
+
167
+ const result = await runThreeSessionTdd({
168
+ agent,
169
+ story,
170
+ config: DEFAULT_CONFIG,
171
+ workdir: "/tmp/test",
172
+ modelTier: "balanced",
173
+ });
174
+
175
+ expect(result.success).toBe(false);
176
+ expect(result.sessions).toHaveLength(1);
177
+ expect(result.needsHumanReview).toBe(true);
178
+ });
179
+
180
+ test("failure when test-writer violates isolation", async () => {
181
+ mockGitSpawn({
182
+ diffFiles: [
183
+ // Isolation check: test-writer touched source files!
184
+ ["src/user.ts", "test/user.test.ts"],
185
+ // getChangedFiles
186
+ ["src/user.ts", "test/user.test.ts"],
187
+ ],
188
+ });
189
+
190
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
191
+
192
+ const result = await runThreeSessionTdd({
193
+ agent,
194
+ story,
195
+ config: DEFAULT_CONFIG,
196
+ workdir: "/tmp/test",
197
+ modelTier: "balanced",
198
+ });
199
+
200
+ expect(result.success).toBe(false);
201
+ expect(result.sessions).toHaveLength(1);
202
+ expect(result.sessions[0].success).toBe(false);
203
+ expect(result.needsHumanReview).toBe(true);
204
+ });
205
+
206
+ test("failure when implementer session fails", async () => {
207
+ mockGitSpawn({
208
+ diffFiles: [
209
+ // Session 1 isolation: OK
210
+ ["test/user.test.ts"],
211
+ // Session 1 getChangedFiles
212
+ ["test/user.test.ts"],
213
+ // Session 2 isolation: OK
214
+ ["src/user.ts"],
215
+ // Session 2 getChangedFiles
216
+ ["src/user.ts"],
217
+ ],
218
+ });
219
+
220
+ const agent = createMockAgent([
221
+ { success: true, estimatedCost: 0.01 },
222
+ { success: false, exitCode: 1, estimatedCost: 0.02 },
223
+ ]);
224
+
225
+ const result = await runThreeSessionTdd({
226
+ agent,
227
+ story,
228
+ config: DEFAULT_CONFIG,
229
+ workdir: "/tmp/test",
230
+ modelTier: "balanced",
231
+ });
232
+
233
+ expect(result.success).toBe(false);
234
+ expect(result.sessions).toHaveLength(2);
235
+ expect(result.needsHumanReview).toBe(true);
236
+ });
237
+
238
+ test("implementer touching test files is a warning (soft-pass), not failure", async () => {
239
+ mockGitSpawn({
240
+ diffFiles: [
241
+ // Session 1 isolation: OK
242
+ ["test/user.test.ts"],
243
+ // Session 1 getChangedFiles
244
+ ["test/user.test.ts"],
245
+ // Session 2 isolation: implementer touched tests (warning, not violation)
246
+ ["test/user.test.ts", "src/user.ts"],
247
+ // Session 2 getChangedFiles
248
+ ["test/user.test.ts", "src/user.ts"],
249
+ // Session 3 isolation: OK
250
+ [],
251
+ // Session 3 getChangedFiles
252
+ [],
253
+ ],
254
+ });
255
+
256
+ const agent = createMockAgent([
257
+ { success: true, estimatedCost: 0.01 },
258
+ { success: true, estimatedCost: 0.02 },
259
+ { success: true, estimatedCost: 0.01 },
260
+ ]);
261
+
262
+ const result = await runThreeSessionTdd({
263
+ agent,
264
+ story,
265
+ config: DEFAULT_CONFIG,
266
+ workdir: "/tmp/test",
267
+ modelTier: "balanced",
268
+ });
269
+
270
+ // v0.9.2: implementer touching test files is a warning, not a failure
271
+ expect(result.sessions).toHaveLength(3);
272
+ expect(result.sessions[1].success).toBe(true);
273
+ expect(result.sessions[1].isolation?.warnings).toContain("test/user.test.ts");
274
+ expect(result.success).toBe(true);
275
+ });
276
+
277
+ test("dry-run mode logs sessions without executing", async () => {
278
+ const agent = createMockAgent([]);
279
+
280
+ const result = await runThreeSessionTdd({
281
+ agent,
282
+ story,
283
+ config: DEFAULT_CONFIG,
284
+ workdir: "/tmp/test",
285
+ modelTier: "balanced",
286
+ dryRun: true,
287
+ });
288
+
289
+ expect(result.success).toBe(true);
290
+ expect(result.sessions).toHaveLength(0);
291
+ expect(result.needsHumanReview).toBe(false);
292
+ expect(result.totalCost).toBe(0);
293
+ // Agent should not have been called
294
+ expect(agent.run).not.toHaveBeenCalled();
295
+ });
296
+
297
+ test("dry-run mode works with context markdown", async () => {
298
+ const agent = createMockAgent([]);
299
+ const contextMarkdown = "## Dependencies\n- US-000: Setup database\n";
300
+
301
+ const result = await runThreeSessionTdd({
302
+ agent,
303
+ story,
304
+ config: DEFAULT_CONFIG,
305
+ workdir: "/tmp/test",
306
+ modelTier: "powerful",
307
+ contextMarkdown,
308
+ dryRun: true,
309
+ });
310
+
311
+ expect(result.success).toBe(true);
312
+ expect(result.sessions).toHaveLength(0);
313
+ expect(result.totalCost).toBe(0);
314
+ // Agent should not have been called
315
+ expect(agent.run).not.toHaveBeenCalled();
316
+ });
317
+
318
+ test("BUG-22: post-TDD verification overrides session failures when tests pass", async () => {
319
+ // Scenario: All 3 sessions complete but verifier has non-zero exit code
320
+ // However, when we run tests independently, they pass
321
+ // Expected: allSuccessful should be overridden to true
322
+
323
+ let testCommandCalled = false;
324
+ let revParseCount = 0;
325
+ let diffCount = 0;
326
+
327
+ const diffFiles = [
328
+ // Session 1 isolation + getChangedFiles
329
+ ["test/user.test.ts"],
330
+ ["test/user.test.ts"],
331
+ // Session 2 isolation + getChangedFiles
332
+ ["src/user.ts"],
333
+ ["src/user.ts"],
334
+ // Session 3 getChangedFiles
335
+ ["src/user.ts"],
336
+ ];
337
+
338
+ // @ts-ignore — mocking global
339
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
340
+ // Intercept the post-TDD test command (bun test)
341
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
342
+ testCommandCalled = true;
343
+ return {
344
+ pid: 9999,
345
+ exited: Promise.resolve(0), // Tests pass!
346
+ stdout: new Response("5 pass, 0 fail\n").body,
347
+ stderr: new Response("").body,
348
+ };
349
+ }
350
+ // Git rev-parse
351
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
352
+ revParseCount++;
353
+ return {
354
+ exited: Promise.resolve(0),
355
+ stdout: new Response(`ref-${revParseCount}\n`).body,
356
+ stderr: new Response("").body,
357
+ };
358
+ }
359
+ // Git diff
360
+ if (cmd[0] === "git" && cmd[1] === "diff") {
361
+ const files = diffFiles[diffCount] || [];
362
+ diffCount++;
363
+ return {
364
+ exited: Promise.resolve(0),
365
+ stdout: new Response(files.join("\n") + "\n").body,
366
+ stderr: new Response("").body,
367
+ };
368
+ }
369
+ return originalSpawn(cmd, spawnOpts);
370
+ });
371
+
372
+ const agent = createMockAgent([
373
+ { success: true, estimatedCost: 0.01 }, // test-writer succeeds
374
+ { success: true, estimatedCost: 0.02 }, // implementer succeeds
375
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails (e.g., fixed issues)
376
+ ]);
377
+
378
+ const result = await runThreeSessionTdd({
379
+ agent,
380
+ story,
381
+ config: DEFAULT_CONFIG,
382
+ workdir: "/tmp/test",
383
+ modelTier: "balanced",
384
+ });
385
+
386
+ // Assertions
387
+ expect(testCommandCalled).toBe(true); // Post-TDD test was executed
388
+ expect(result.sessions).toHaveLength(3);
389
+ expect(result.sessions[2].success).toBe(false); // Verifier session itself failed
390
+ expect(result.success).toBe(true); // But overall result is success (overridden)
391
+ expect(result.needsHumanReview).toBe(false); // No human review needed
392
+ expect(result.reviewReason).toBeUndefined();
393
+ });
394
+
395
+ test("BUG-20: failure when test-writer creates no test files", async () => {
396
+ // Scenario: Test-writer session succeeds and passes isolation but creates no test files
397
+ // (e.g., creates requirements.md instead)
398
+ // Expected: Should fail with needsHumanReview and specific reason
399
+ mockGitSpawn({
400
+ diffFiles: [
401
+ // Isolation check: only non-test files
402
+ ["requirements.md", "docs/plan.md"],
403
+ // getChangedFiles
404
+ ["requirements.md", "docs/plan.md"],
405
+ ],
406
+ });
407
+
408
+ const agent = createMockAgent([
409
+ { success: true, estimatedCost: 0.01 }, // test-writer succeeds but creates wrong files
410
+ ]);
411
+
412
+ const result = await runThreeSessionTdd({
413
+ agent,
414
+ story,
415
+ config: DEFAULT_CONFIG,
416
+ workdir: "/tmp/test",
417
+ modelTier: "balanced",
418
+ });
419
+
420
+ expect(result.success).toBe(false);
421
+ expect(result.sessions).toHaveLength(1); // Should stop after session 1
422
+ expect(result.needsHumanReview).toBe(true);
423
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
424
+ });
425
+
426
+ test("BUG-20: failure when test-writer creates zero files", async () => {
427
+ // Scenario: Test-writer session succeeds but creates no files at all
428
+ // Expected: Should fail with needsHumanReview
429
+ mockGitSpawn({
430
+ diffFiles: [
431
+ // Isolation check: no files
432
+ [],
433
+ // getChangedFiles: no files
434
+ [],
435
+ ],
436
+ });
437
+
438
+ const agent = createMockAgent([
439
+ { success: true, estimatedCost: 0.01 }, // test-writer succeeds but creates nothing
440
+ ]);
441
+
442
+ const result = await runThreeSessionTdd({
443
+ agent,
444
+ story,
445
+ config: DEFAULT_CONFIG,
446
+ workdir: "/tmp/test",
447
+ modelTier: "balanced",
448
+ });
449
+
450
+ expect(result.success).toBe(false);
451
+ expect(result.sessions).toHaveLength(1);
452
+ expect(result.needsHumanReview).toBe(true);
453
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
454
+ });
455
+
456
+ test("BUG-20: success when test-writer creates test files with various extensions", async () => {
457
+ // Scenario: Test-writer creates test files with different valid extensions
458
+ // Expected: Should succeed and continue to session 2
459
+ mockGitSpawn({
460
+ diffFiles: [
461
+ // Isolation check: various test file formats
462
+ ["test/user.test.ts", "test/auth.spec.js", "test/api.test.tsx"],
463
+ // getChangedFiles
464
+ ["test/user.test.ts", "test/auth.spec.js", "test/api.test.tsx"],
465
+ // Session 2 isolation
466
+ ["src/user.ts", "src/auth.js"],
467
+ // Session 2 getChangedFiles
468
+ ["src/user.ts", "src/auth.js"],
469
+ // Session 3 getChangedFiles
470
+ ["src/user.ts"],
471
+ ],
472
+ });
473
+
474
+ const agent = createMockAgent([
475
+ { success: true, estimatedCost: 0.01 },
476
+ { success: true, estimatedCost: 0.02 },
477
+ { success: true, estimatedCost: 0.01 },
478
+ ]);
479
+
480
+ const result = await runThreeSessionTdd({
481
+ agent,
482
+ story,
483
+ config: DEFAULT_CONFIG,
484
+ workdir: "/tmp/test",
485
+ modelTier: "balanced",
486
+ });
487
+
488
+ expect(result.success).toBe(true);
489
+ expect(result.sessions).toHaveLength(3); // All sessions run
490
+ expect(result.needsHumanReview).toBe(false);
491
+ });
492
+
493
+ test("BUG-22: post-TDD verification does not override when tests actually fail", async () => {
494
+ // Scenario: Sessions complete with failures AND independent test run also fails
495
+ // Expected: Result should remain failed
496
+
497
+ let testCommandCalled = false;
498
+ let revParseCount = 0;
499
+ let diffCount = 0;
500
+
501
+ const diffFiles = [["test/user.test.ts"], ["test/user.test.ts"], ["src/user.ts"], ["src/user.ts"], ["src/user.ts"]];
502
+
503
+ // @ts-ignore — mocking global
504
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
505
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
506
+ testCommandCalled = true;
507
+ return {
508
+ pid: 9999,
509
+ exited: Promise.resolve(1), // Tests FAIL!
510
+ stdout: new Response("3 pass, 2 fail\n").body,
511
+ stderr: new Response("Test errors...\n").body,
512
+ };
513
+ }
514
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
515
+ revParseCount++;
516
+ return {
517
+ exited: Promise.resolve(0),
518
+ stdout: new Response(`ref-${revParseCount}\n`).body,
519
+ stderr: new Response("").body,
520
+ };
521
+ }
522
+ if (cmd[0] === "git" && cmd[1] === "diff") {
523
+ const files = diffFiles[diffCount] || [];
524
+ diffCount++;
525
+ return {
526
+ exited: Promise.resolve(0),
527
+ stdout: new Response(files.join("\n") + "\n").body,
528
+ stderr: new Response("").body,
529
+ };
530
+ }
531
+ return originalSpawn(cmd, spawnOpts);
532
+ });
533
+
534
+ const agent = createMockAgent([
535
+ { success: true, estimatedCost: 0.01 },
536
+ { success: true, estimatedCost: 0.02 },
537
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails
538
+ ]);
539
+
540
+ const result = await runThreeSessionTdd({
541
+ agent,
542
+ story,
543
+ config: DEFAULT_CONFIG,
544
+ workdir: "/tmp/test",
545
+ modelTier: "balanced",
546
+ });
547
+
548
+ expect(testCommandCalled).toBe(true);
549
+ expect(result.success).toBe(false); // Should remain failed
550
+ expect(result.needsHumanReview).toBe(true); // Needs review
551
+ expect(result.reviewReason).toBeDefined();
552
+ });
553
+ });
554
+
555
+ // ─── Lite-mode prompt tests ───────────────────────────────────────────────────
556
+
557
+ import {
558
+ buildImplementerLitePrompt,
559
+ buildImplementerPrompt,
560
+ buildTestWriterLitePrompt,
561
+ buildTestWriterPrompt,
562
+ buildVerifierPrompt,
563
+ } from "../../src/tdd/prompts";
564
+
565
+ describe("buildTestWriterLitePrompt", () => {
566
+ test("tells agent it CAN read source files", () => {
567
+ const prompt = buildTestWriterLitePrompt(story);
568
+ expect(prompt).toContain("MAY read source files");
569
+ });
570
+
571
+ test("tells agent it CAN import from source files", () => {
572
+ const prompt = buildTestWriterLitePrompt(story);
573
+ expect(prompt).toContain("MAY import from source files");
574
+ });
575
+
576
+ test("still instructs agent to only CREATE test files", () => {
577
+ const prompt = buildTestWriterLitePrompt(story);
578
+ expect(prompt).toMatch(/[Oo]nly\s+[Cc][Rr][Ee][Aa][Tt][Ee]\s+test files|CREATE test files/);
579
+ });
580
+
581
+ test("does NOT say DO NOT create or modify any source files (strict isolation rule)", () => {
582
+ const prompt = buildTestWriterLitePrompt(story);
583
+ expect(prompt).not.toContain("DO NOT create or modify any source files");
584
+ });
585
+
586
+ test("does NOT say ONLY create/modify test files (strict isolation rule)", () => {
587
+ const prompt = buildTestWriterLitePrompt(story);
588
+ expect(prompt).not.toContain("ONLY create/modify test files");
589
+ });
590
+
591
+ test("includes story title and acceptance criteria", () => {
592
+ const prompt = buildTestWriterLitePrompt(story);
593
+ expect(prompt).toContain(story.title);
594
+ expect(prompt).toContain("Validation works");
595
+ expect(prompt).toContain("Errors are clear");
596
+ });
597
+
598
+ test("includes context markdown when provided", () => {
599
+ const ctx = "## Relevant Files\n- src/user.ts";
600
+ const prompt = buildTestWriterLitePrompt(story, ctx);
601
+ expect(prompt).toContain("## Relevant Files");
602
+ expect(prompt).toContain("src/user.ts");
603
+ });
604
+
605
+ test("does not include context separator when no context provided", () => {
606
+ const prompt = buildTestWriterLitePrompt(story);
607
+ // Should still have content but no trailing separator
608
+ expect(prompt).not.toMatch(/---\s*$/);
609
+ });
610
+
611
+ test("uses lite mode label in heading", () => {
612
+ const prompt = buildTestWriterLitePrompt(story);
613
+ expect(prompt.toLowerCase()).toContain("lite");
614
+ });
615
+ });
616
+
617
+ describe("buildImplementerLitePrompt", () => {
618
+ test("has no file restriction rules (does not say Only create or modify files in the test/ directory)", () => {
619
+ const prompt = buildImplementerLitePrompt(story);
620
+ expect(prompt).not.toContain("Only create or modify files in the test/ directory");
621
+ });
622
+
623
+ test("has no file restriction rules (does not say Implement source code in src/ to make tests pass)", () => {
624
+ const prompt = buildImplementerLitePrompt(story);
625
+ expect(prompt).not.toContain("Implement source code in src/ to make tests pass");
626
+ });
627
+
628
+ test("allows writing tests and implementing", () => {
629
+ const prompt = buildImplementerLitePrompt(story);
630
+ expect(prompt).toContain("Write tests AND implement");
631
+ });
632
+
633
+ test("includes story title and acceptance criteria", () => {
634
+ const prompt = buildImplementerLitePrompt(story);
635
+ expect(prompt).toContain(story.title);
636
+ expect(prompt).toContain("Validation works");
637
+ expect(prompt).toContain("Errors are clear");
638
+ });
639
+
640
+ test("includes context markdown when provided", () => {
641
+ const ctx = "## Context\n- Use existing patterns";
642
+ const prompt = buildImplementerLitePrompt(story, ctx);
643
+ expect(prompt).toContain("## Context");
644
+ expect(prompt).toContain("Use existing patterns");
645
+ });
646
+
647
+ test("uses lite mode label in heading", () => {
648
+ const prompt = buildImplementerLitePrompt(story);
649
+ expect(prompt.toLowerCase()).toContain("lite");
650
+ });
651
+
652
+ test("still instructs to make tests pass", () => {
653
+ const prompt = buildImplementerLitePrompt(story);
654
+ expect(prompt.toLowerCase()).toContain("all tests must pass");
655
+ });
656
+ });
657
+
658
+ describe("buildVerifierPrompt (unchanged)", () => {
659
+ test("is unchanged — still has isolation-focused verification rules", () => {
660
+ const prompt = buildVerifierPrompt(story);
661
+ expect(prompt).toContain("Session 3: Verify");
662
+ expect(prompt).toContain("Check if test files were modified by the implementer");
663
+ expect(prompt).toContain(story.title);
664
+ });
665
+
666
+ test("does NOT mention lite mode", () => {
667
+ const prompt = buildVerifierPrompt(story);
668
+ expect(prompt.toLowerCase()).not.toContain("lite");
669
+ });
670
+
671
+ test("still verifies acceptance criteria", () => {
672
+ const prompt = buildVerifierPrompt(story);
673
+ expect(prompt).toContain("Validation works");
674
+ expect(prompt).toContain("Errors are clear");
675
+ });
676
+ });
677
+
678
+ describe("strict vs lite prompt comparison", () => {
679
+ test("strict test-writer has harder isolation rules than lite", () => {
680
+ const strict = buildTestWriterPrompt(story);
681
+ const lite = buildTestWriterLitePrompt(story);
682
+
683
+ // Strict has hard NO rule on source files
684
+ expect(strict).toContain("Only create or modify files in the test/ directory");
685
+ expect(lite).not.toContain("Only create or modify files in the test/ directory");
686
+
687
+ // Lite explicitly allows reading source files
688
+ expect(lite).toContain("You may create minimal stubs in src/");
689
+ expect(strict).not.toContain("You may create minimal stubs in src/");
690
+ });
691
+
692
+ test("strict implementer has harder isolation rules than lite", () => {
693
+ const strict = buildImplementerPrompt(story);
694
+ const lite = buildImplementerLitePrompt(story);
695
+
696
+ // Strict bans test file modifications
697
+ expect(strict).toContain("Do NOT modify test files");
698
+ expect(lite).not.toContain("Do NOT modify test files");
699
+
700
+ // Lite allows adjusting test files
701
+ expect(lite).toContain("Write tests AND implement");
702
+ expect(strict).not.toContain("Write tests AND implement");
703
+ });
704
+ });
705
+
706
+ // ─── T4: Lite mode orchestration tests ───────────────────────────────────────
707
+
708
+ describe("runThreeSessionTdd — lite mode", () => {
709
+ test("lite mode: result includes lite=true flag", async () => {
710
+ // In lite mode all 3 sessions succeed
711
+ // Lite skips isolation for sessions 1 and 2, so only 2 diff calls for those
712
+ // Session 3 (verifier) always runs isolation: 2 diff calls (isolation + getChangedFiles)
713
+ // Total: 1 (s1 getChangedFiles) + 1 (s2 getChangedFiles) + 2 (s3) = 4 diff calls
714
+ mockGitSpawn({
715
+ diffFiles: [
716
+ ["test/user.test.ts"], // s1 getChangedFiles (no isolation in lite)
717
+ ["src/user.ts"], // s2 getChangedFiles (no isolation in lite)
718
+ [], // s3 isolation check (verifier always checks)
719
+ ["src/user.ts"], // s3 getChangedFiles
720
+ ],
721
+ });
722
+
723
+ const agent = createMockAgent([
724
+ { success: true, estimatedCost: 0.01 },
725
+ { success: true, estimatedCost: 0.02 },
726
+ { success: true, estimatedCost: 0.01 },
727
+ ]);
728
+
729
+ const result = await runThreeSessionTdd({
730
+ agent,
731
+ story,
732
+ config: DEFAULT_CONFIG,
733
+ workdir: "/tmp/test",
734
+ modelTier: "balanced",
735
+ lite: true,
736
+ });
737
+
738
+ expect(result.lite).toBe(true);
739
+ expect(result.success).toBe(true);
740
+ });
741
+
742
+ test("strict mode: result includes lite=false flag", async () => {
743
+ mockGitSpawn({
744
+ diffFiles: [
745
+ ["test/user.test.ts"],
746
+ ["test/user.test.ts"],
747
+ ["src/user.ts"],
748
+ ["src/user.ts"],
749
+ [], // s3 isolation
750
+ ["src/user.ts"], // s3 getChangedFiles
751
+ ],
752
+ });
753
+
754
+ const agent = createMockAgent([
755
+ { success: true, estimatedCost: 0.01 },
756
+ { success: true, estimatedCost: 0.02 },
757
+ { success: true, estimatedCost: 0.01 },
758
+ ]);
759
+
760
+ const result = await runThreeSessionTdd({
761
+ agent,
762
+ story,
763
+ config: DEFAULT_CONFIG,
764
+ workdir: "/tmp/test",
765
+ modelTier: "balanced",
766
+ lite: false,
767
+ });
768
+
769
+ expect(result.lite).toBe(false);
770
+ expect(result.success).toBe(true);
771
+ });
772
+
773
+ test("lite mode: test-writer session has no isolation check (isolation is undefined)", async () => {
774
+ mockGitSpawn({
775
+ diffFiles: [
776
+ ["test/user.test.ts"], // s1 getChangedFiles only (no isolation in lite)
777
+ ["src/user.ts"], // s2 getChangedFiles only (no isolation in lite)
778
+ [], // s3 isolation
779
+ ["src/user.ts"], // s3 getChangedFiles
780
+ ],
781
+ });
782
+
783
+ const agent = createMockAgent([
784
+ { success: true, estimatedCost: 0.01 },
785
+ { success: true, estimatedCost: 0.02 },
786
+ { success: true, estimatedCost: 0.01 },
787
+ ]);
788
+
789
+ const result = await runThreeSessionTdd({
790
+ agent,
791
+ story,
792
+ config: DEFAULT_CONFIG,
793
+ workdir: "/tmp/test",
794
+ modelTier: "balanced",
795
+ lite: true,
796
+ });
797
+
798
+ expect(result.sessions).toHaveLength(3);
799
+ // In lite mode, test-writer and implementer skip isolation
800
+ expect(result.sessions[0].isolation).toBeUndefined();
801
+ expect(result.sessions[1].isolation).toBeUndefined();
802
+ // Verifier always runs isolation
803
+ expect(result.sessions[2].isolation).toBeDefined();
804
+ });
805
+
806
+ test("lite mode: implementer modifying test files does NOT appear in isolation warnings (no isolation check)", async () => {
807
+ // In strict mode, implementer touching test files produces warnings.
808
+ // In lite mode, isolation is skipped entirely, so there are no warnings.
809
+ mockGitSpawn({
810
+ diffFiles: [
811
+ ["test/user.test.ts"], // s1 getChangedFiles
812
+ ["test/user.test.ts", "src/user.ts"], // s2 getChangedFiles
813
+ [], // s3 isolation
814
+ [], // s3 getChangedFiles
815
+ ],
816
+ });
817
+
818
+ const agent = createMockAgent([
819
+ { success: true, estimatedCost: 0.01 },
820
+ { success: true, estimatedCost: 0.02 },
821
+ { success: true, estimatedCost: 0.01 },
822
+ ]);
823
+
824
+ const result = await runThreeSessionTdd({
825
+ agent,
826
+ story,
827
+ config: DEFAULT_CONFIG,
828
+ workdir: "/tmp/test",
829
+ modelTier: "balanced",
830
+ lite: true,
831
+ });
832
+
833
+ expect(result.sessions[1].isolation).toBeUndefined(); // No isolation in lite
834
+ expect(result.sessions[1].success).toBe(true); // Agent succeeded
835
+ expect(result.success).toBe(true);
836
+ expect(result.lite).toBe(true);
837
+ });
838
+
839
+ test("lite mode: verifier always runs isolation check (even in lite mode)", async () => {
840
+ mockGitSpawn({
841
+ diffFiles: [
842
+ ["test/user.test.ts"], // s1 getChangedFiles
843
+ ["src/user.ts"], // s2 getChangedFiles
844
+ [], // s3 isolation (verifier always checks)
845
+ [], // s3 getChangedFiles
846
+ ],
847
+ });
848
+
849
+ const agent = createMockAgent([
850
+ { success: true, estimatedCost: 0.01 },
851
+ { success: true, estimatedCost: 0.02 },
852
+ { success: true, estimatedCost: 0.01 },
853
+ ]);
854
+
855
+ const result = await runThreeSessionTdd({
856
+ agent,
857
+ story,
858
+ config: DEFAULT_CONFIG,
859
+ workdir: "/tmp/test",
860
+ modelTier: "balanced",
861
+ lite: true,
862
+ });
863
+
864
+ expect(result.sessions[2].isolation).toBeDefined();
865
+ expect(result.sessions[2].isolation?.passed).toBe(true);
866
+ expect(result.lite).toBe(true);
867
+ });
868
+
869
+ test("lite mode: dry-run returns lite=true", async () => {
870
+ const agent = createMockAgent([]);
871
+ const result = await runThreeSessionTdd({
872
+ agent,
873
+ story,
874
+ config: DEFAULT_CONFIG,
875
+ workdir: "/tmp/test",
876
+ modelTier: "balanced",
877
+ dryRun: true,
878
+ lite: true,
879
+ });
880
+ expect(result.lite).toBe(true);
881
+ expect(result.success).toBe(true);
882
+ expect(result.sessions).toHaveLength(0);
883
+ });
884
+ });
885
+
886
+ // ─── T4: Zero-file fallback tests ────────────────────────────────────────────
887
+
888
+ describe("runThreeSessionTdd — zero-file fallback", () => {
889
+ /** Extended git mock that also handles `git checkout .` */
890
+ function mockGitSpawnWithCheckout(opts: {
891
+ diffFiles: string[][];
892
+ onCheckout?: () => void;
893
+ testCommandSuccess?: boolean;
894
+ }) {
895
+ let revParseCount = 0;
896
+ let diffCount = 0;
897
+ const testSuccess = opts.testCommandSuccess ?? true;
898
+
899
+ // @ts-ignore — mocking global
900
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
901
+ // Intercept test commands
902
+ if ((cmd[0] === "/bin/sh" || cmd[0] === "/bin/bash" || cmd[0] === "/bin/zsh") && cmd[1] === "-c") {
903
+ return {
904
+ pid: 9999,
905
+ exited: Promise.resolve(testSuccess ? 0 : 1),
906
+ stdout: new Response(testSuccess ? "tests pass\n" : "tests fail\n").body,
907
+ stderr: new Response("").body,
908
+ };
909
+ }
910
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
911
+ revParseCount++;
912
+ return {
913
+ exited: Promise.resolve(0),
914
+ stdout: new Response(`ref-${revParseCount}\n`).body,
915
+ stderr: new Response("").body,
916
+ };
917
+ }
918
+ if (cmd[0] === "git" && cmd[1] === "checkout") {
919
+ opts.onCheckout?.();
920
+ return {
921
+ exited: Promise.resolve(0),
922
+ stdout: new Response("").body,
923
+ stderr: new Response("").body,
924
+ };
925
+ }
926
+ if (cmd[0] === "git" && cmd[1] === "diff") {
927
+ const files = opts.diffFiles[diffCount] || [];
928
+ diffCount++;
929
+ return {
930
+ exited: Promise.resolve(0),
931
+ stdout: new Response(files.join("\n") + "\n").body,
932
+ stderr: new Response("").body,
933
+ };
934
+ }
935
+ return originalSpawn(cmd, spawnOpts);
936
+ });
937
+ }
938
+
939
+ test("fallback NO LONGER triggers when strategy='auto' and 0 test files (BUG-010 removed auto-fallback)", async () => {
940
+ let checkoutCalled = false;
941
+
942
+ // BUG-010: Zero-file scenarios now return greenfield-no-tests immediately
943
+ // No fallback to lite mode occurs
944
+ mockGitSpawnWithCheckout({
945
+ diffFiles: [
946
+ ["requirements.md"], // s1 isolation (strict) — no source violations
947
+ ["requirements.md"], // s1 getChangedFiles (strict) — 0 test files → return greenfield-no-tests
948
+ ],
949
+ onCheckout: () => {
950
+ checkoutCalled = true;
951
+ },
952
+ });
953
+
954
+ const agent = createMockAgent([
955
+ { success: true, estimatedCost: 0.01 }, // s1 strict test-writer
956
+ ]);
957
+
958
+ const configWithAutoStrategy = {
959
+ ...DEFAULT_CONFIG,
960
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "auto" as const },
961
+ };
962
+
963
+ const result = await runThreeSessionTdd({
964
+ agent,
965
+ story,
966
+ config: configWithAutoStrategy,
967
+ workdir: "/tmp/test",
968
+ modelTier: "balanced",
969
+ });
970
+
971
+ expect(checkoutCalled).toBe(false); // git checkout NOT called (no fallback)
972
+ expect(result.lite).toBe(false); // not in lite mode
973
+ expect(result.success).toBe(false); // fails with greenfield-no-tests
974
+ expect(result.failureCategory).toBe("greenfield-no-tests");
975
+ });
976
+
977
+ test("zero-file scenario returns greenfield-no-tests (BUG-010 removed lite fallback)", async () => {
978
+ // BUG-010: No more auto-fallback to lite mode
979
+ mockGitSpawn({
980
+ diffFiles: [
981
+ ["docs/plan.md"], // s1 isolation (strict)
982
+ ["docs/plan.md"], // s1 getChangedFiles (strict) → 0 test files
983
+ ],
984
+ });
985
+
986
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
987
+
988
+ const result = await runThreeSessionTdd({
989
+ agent,
990
+ story,
991
+ config: DEFAULT_CONFIG,
992
+ workdir: "/tmp/test",
993
+ modelTier: "balanced",
994
+ });
995
+
996
+ expect(result.lite).toBe(false);
997
+ expect(result.success).toBe(false);
998
+ expect(result.failureCategory).toBe("greenfield-no-tests");
999
+ });
1000
+
1001
+ test("fallback does NOT trigger when strategy='strict' (explicit strict mode)", async () => {
1002
+ // In strategy='strict', no fallback — should return failure
1003
+ mockGitSpawn({
1004
+ diffFiles: [
1005
+ ["requirements.md"], // s1 isolation — no source violations
1006
+ ["requirements.md"], // s1 getChangedFiles — 0 test files
1007
+ ],
1008
+ });
1009
+
1010
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
1011
+
1012
+ const configWithStrictStrategy = {
1013
+ ...DEFAULT_CONFIG,
1014
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "strict" as const },
1015
+ };
1016
+
1017
+ const result = await runThreeSessionTdd({
1018
+ agent,
1019
+ story,
1020
+ config: configWithStrictStrategy,
1021
+ workdir: "/tmp/test",
1022
+ modelTier: "balanced",
1023
+ });
1024
+
1025
+ // Should fail (no fallback in strict mode)
1026
+ expect(result.success).toBe(false);
1027
+ expect(result.needsHumanReview).toBe(true);
1028
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
1029
+ expect(result.lite).toBe(false); // Was called in strict mode, no fallback
1030
+ });
1031
+
1032
+ test("fallback does NOT trigger when already in lite mode", async () => {
1033
+ // Calling with lite=true — if 0 test files, should return failure (not recurse again)
1034
+ mockGitSpawn({
1035
+ diffFiles: [
1036
+ ["requirements.md"], // s1 getChangedFiles (lite, no isolation) — 0 test files
1037
+ ],
1038
+ });
1039
+
1040
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
1041
+
1042
+ const result = await runThreeSessionTdd({
1043
+ agent,
1044
+ story,
1045
+ config: DEFAULT_CONFIG,
1046
+ workdir: "/tmp/test",
1047
+ modelTier: "balanced",
1048
+ lite: true,
1049
+ });
1050
+
1051
+ // Should fail — no further fallback from lite mode
1052
+ expect(result.success).toBe(false);
1053
+ expect(result.needsHumanReview).toBe(true);
1054
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
1055
+ expect(result.lite).toBe(true);
1056
+ });
1057
+
1058
+ test("fallback does NOT trigger when strategy='lite' config", async () => {
1059
+ // When strategy='lite', runThreeSessionTdd is called with lite=true (from execution stage)
1060
+ // So !lite = false → no fallback
1061
+ mockGitSpawn({
1062
+ diffFiles: [
1063
+ [], // s1 getChangedFiles (lite, no isolation) — 0 test files
1064
+ ],
1065
+ });
1066
+
1067
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
1068
+
1069
+ const configWithLiteStrategy = {
1070
+ ...DEFAULT_CONFIG,
1071
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "lite" as const },
1072
+ };
1073
+
1074
+ const result = await runThreeSessionTdd({
1075
+ agent,
1076
+ story,
1077
+ config: configWithLiteStrategy,
1078
+ workdir: "/tmp/test",
1079
+ modelTier: "balanced",
1080
+ lite: true, // router sets this for lite strategy
1081
+ });
1082
+
1083
+ expect(result.success).toBe(false);
1084
+ expect(result.lite).toBe(true);
1085
+ });
1086
+ });
1087
+
1088
+ // ─── T4: failureCategory tests ────────────────────────────────────────────────
1089
+
1090
+ describe("runThreeSessionTdd — failureCategory", () => {
1091
+ test("test-writer isolation failure sets failureCategory='isolation-violation'", async () => {
1092
+ // Test-writer modifies source files → isolation violation
1093
+ mockGitSpawn({
1094
+ diffFiles: [
1095
+ // Isolation check: test-writer touched source files!
1096
+ ["src/user.ts", "test/user.test.ts"],
1097
+ // getChangedFiles
1098
+ ["src/user.ts", "test/user.test.ts"],
1099
+ ],
1100
+ });
1101
+
1102
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
1103
+
1104
+ const result = await runThreeSessionTdd({
1105
+ agent,
1106
+ story,
1107
+ config: DEFAULT_CONFIG,
1108
+ workdir: "/tmp/test",
1109
+ modelTier: "balanced",
1110
+ });
1111
+
1112
+ expect(result.success).toBe(false);
1113
+ expect(result.failureCategory).toBe("isolation-violation");
1114
+ });
1115
+
1116
+ test("test-writer zero files (non-auto strategy) sets failureCategory='isolation-violation'", async () => {
1117
+ // In strict strategy, zero test files → greenfield-no-tests category (BUG-010 behavior)
1118
+ mockGitSpawn({
1119
+ diffFiles: [
1120
+ ["requirements.md"], // s1 isolation — no source violations
1121
+ ["requirements.md"], // s1 getChangedFiles — 0 test files
1122
+ ],
1123
+ });
1124
+
1125
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
1126
+
1127
+ const configWithStrictStrategy = {
1128
+ ...DEFAULT_CONFIG,
1129
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "strict" as const },
1130
+ };
1131
+
1132
+ const result = await runThreeSessionTdd({
1133
+ agent,
1134
+ story,
1135
+ config: configWithStrictStrategy,
1136
+ workdir: "/tmp/test",
1137
+ modelTier: "balanced",
1138
+ });
1139
+
1140
+ expect(result.success).toBe(false);
1141
+ expect(result.failureCategory).toBe("greenfield-no-tests");
1142
+ });
1143
+
1144
+ test("test-writer crash/timeout (non-isolation failure) sets failureCategory='session-failure'", async () => {
1145
+ // Test-writer agent crashes/times out but isolation is clean
1146
+ mockGitSpawn({
1147
+ diffFiles: [
1148
+ // Isolation check: only test files (passes)
1149
+ ["test/user.test.ts"],
1150
+ // getChangedFiles
1151
+ ["test/user.test.ts"],
1152
+ ],
1153
+ });
1154
+
1155
+ const agent = createMockAgent([
1156
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // Agent crash
1157
+ ]);
1158
+
1159
+ const result = await runThreeSessionTdd({
1160
+ agent,
1161
+ story,
1162
+ config: DEFAULT_CONFIG,
1163
+ workdir: "/tmp/test",
1164
+ modelTier: "balanced",
1165
+ });
1166
+
1167
+ expect(result.success).toBe(false);
1168
+ // isolation.passed=true but agent failed → session-failure
1169
+ expect(result.failureCategory).toBe("session-failure");
1170
+ });
1171
+
1172
+ test("implementer failure sets failureCategory='session-failure'", async () => {
1173
+ mockGitSpawn({
1174
+ diffFiles: [
1175
+ // Session 1 isolation: OK
1176
+ ["test/user.test.ts"],
1177
+ // Session 1 getChangedFiles
1178
+ ["test/user.test.ts"],
1179
+ // Session 2 isolation: OK
1180
+ ["src/user.ts"],
1181
+ // Session 2 getChangedFiles
1182
+ ["src/user.ts"],
1183
+ ],
1184
+ });
1185
+
1186
+ const agent = createMockAgent([
1187
+ { success: true, estimatedCost: 0.01 }, // test-writer OK
1188
+ { success: false, exitCode: 1, estimatedCost: 0.02 }, // implementer fails
1189
+ ]);
1190
+
1191
+ const result = await runThreeSessionTdd({
1192
+ agent,
1193
+ story,
1194
+ config: DEFAULT_CONFIG,
1195
+ workdir: "/tmp/test",
1196
+ modelTier: "balanced",
1197
+ });
1198
+
1199
+ expect(result.success).toBe(false);
1200
+ expect(result.failureCategory).toBe("session-failure");
1201
+ });
1202
+
1203
+ test("post-TDD test failure sets failureCategory='tests-failing'", async () => {
1204
+ // Verifier session fails AND independent test run also fails
1205
+ let revParseCount = 0;
1206
+ let diffCount = 0;
1207
+
1208
+ const diffFiles = [["test/user.test.ts"], ["test/user.test.ts"], ["src/user.ts"], ["src/user.ts"], ["src/user.ts"]];
1209
+
1210
+ // @ts-ignore — mocking global
1211
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
1212
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
1213
+ return {
1214
+ pid: 9999,
1215
+ exited: Promise.resolve(1), // Tests FAIL
1216
+ stdout: new Response("3 pass, 2 fail\n").body,
1217
+ stderr: new Response("Test errors...\n").body,
1218
+ };
1219
+ }
1220
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
1221
+ revParseCount++;
1222
+ return {
1223
+ exited: Promise.resolve(0),
1224
+ stdout: new Response(`ref-${revParseCount}\n`).body,
1225
+ stderr: new Response("").body,
1226
+ };
1227
+ }
1228
+ if (cmd[0] === "git" && cmd[1] === "diff") {
1229
+ const files = diffFiles[diffCount] || [];
1230
+ diffCount++;
1231
+ return {
1232
+ exited: Promise.resolve(0),
1233
+ stdout: new Response(files.join("\n") + "\n").body,
1234
+ stderr: new Response("").body,
1235
+ };
1236
+ }
1237
+ return originalSpawn(cmd, spawnOpts);
1238
+ });
1239
+
1240
+ const agent = createMockAgent([
1241
+ { success: true, estimatedCost: 0.01 },
1242
+ { success: true, estimatedCost: 0.02 },
1243
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails
1244
+ ]);
1245
+
1246
+ const result = await runThreeSessionTdd({
1247
+ agent,
1248
+ story,
1249
+ config: DEFAULT_CONFIG,
1250
+ workdir: "/tmp/test",
1251
+ modelTier: "balanced",
1252
+ });
1253
+
1254
+ expect(result.success).toBe(false);
1255
+ expect(result.failureCategory).toBe("tests-failing");
1256
+ });
1257
+
1258
+ test("success path has no failureCategory", async () => {
1259
+ mockGitSpawn({
1260
+ diffFiles: [["test/user.test.ts"], ["test/user.test.ts"], ["src/user.ts"], ["src/user.ts"], ["src/user.ts"]],
1261
+ });
1262
+
1263
+ const agent = createMockAgent([
1264
+ { success: true, estimatedCost: 0.01 },
1265
+ { success: true, estimatedCost: 0.02 },
1266
+ { success: true, estimatedCost: 0.01 },
1267
+ ]);
1268
+
1269
+ const result = await runThreeSessionTdd({
1270
+ agent,
1271
+ story,
1272
+ config: DEFAULT_CONFIG,
1273
+ workdir: "/tmp/test",
1274
+ modelTier: "balanced",
1275
+ });
1276
+
1277
+ expect(result.success).toBe(true);
1278
+ expect(result.failureCategory).toBeUndefined();
1279
+ });
1280
+
1281
+ test("zero-file scenario (auto strategy) returns greenfield-no-tests (BUG-010 removed auto-fallback)", async () => {
1282
+ // BUG-010: In auto strategy, zero test files → return greenfield-no-tests (no more fallback)
1283
+ let diffCount = 0;
1284
+
1285
+ const diffFiles = [
1286
+ ["requirements.md"], // s1 isolation (strict) — no source violations
1287
+ ["requirements.md"], // s1 getChangedFiles (strict) — 0 test files → return greenfield-no-tests
1288
+ ];
1289
+
1290
+ // @ts-ignore — mocking global
1291
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
1292
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
1293
+ return {
1294
+ exited: Promise.resolve(0),
1295
+ stdout: new Response("ref-1\n").body,
1296
+ stderr: new Response("").body,
1297
+ };
1298
+ }
1299
+ if (cmd[0] === "git" && cmd[1] === "diff") {
1300
+ const files = diffFiles[diffCount] || [];
1301
+ diffCount++;
1302
+ return {
1303
+ exited: Promise.resolve(0),
1304
+ stdout: new Response(files.join("\n") + "\n").body,
1305
+ stderr: new Response("").body,
1306
+ };
1307
+ }
1308
+ return originalSpawn(cmd, spawnOpts);
1309
+ });
1310
+
1311
+ const agent = createMockAgent([
1312
+ { success: true, estimatedCost: 0.01 }, // s1 strict test-writer
1313
+ ]);
1314
+
1315
+ const configWithAutoStrategy = {
1316
+ ...DEFAULT_CONFIG,
1317
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "auto" as const },
1318
+ };
1319
+
1320
+ const result = await runThreeSessionTdd({
1321
+ agent,
1322
+ story,
1323
+ config: configWithAutoStrategy,
1324
+ workdir: "/tmp/test",
1325
+ modelTier: "balanced",
1326
+ });
1327
+
1328
+ expect(result.success).toBe(false);
1329
+ expect(result.lite).toBe(false);
1330
+ expect(result.failureCategory).toBe("greenfield-no-tests");
1331
+ });
1332
+ });
1333
+
1334
+ // ─── T9: Verdict integration tests ───────────────────────────────────────────
1335
+
1336
+ describe("runThreeSessionTdd — T9: verdict integration", () => {
1337
+ let tmpDir: string;
1338
+
1339
+ beforeEach(async () => {
1340
+ tmpDir = `/tmp/nax-t9-test-${Date.now()}-${Math.random().toString(36).slice(2)}`;
1341
+ await mkdir(tmpDir, { recursive: true });
1342
+ });
1343
+
1344
+ afterEach(async () => {
1345
+ await rm(tmpDir, { recursive: true, force: true });
1346
+ Bun.spawn = originalSpawn;
1347
+ });
1348
+
1349
+ /** Write a valid verdict file to tmpDir */
1350
+ async function writeVerdictToDir(opts: {
1351
+ approved: boolean;
1352
+ failReason?: "tests-failing" | "illegitimate-mods" | "criteria-not-met" | "poor-quality";
1353
+ }) {
1354
+ const verdict = {
1355
+ version: 1,
1356
+ approved: opts.approved,
1357
+ tests: {
1358
+ allPassing: opts.failReason !== "tests-failing",
1359
+ passCount: opts.failReason === "tests-failing" ? 5 : 10,
1360
+ failCount: opts.failReason === "tests-failing" ? 3 : 0,
1361
+ },
1362
+ testModifications: {
1363
+ detected: opts.failReason === "illegitimate-mods",
1364
+ files: opts.failReason === "illegitimate-mods" ? ["test/foo.test.ts"] : [],
1365
+ legitimate: opts.failReason !== "illegitimate-mods",
1366
+ reasoning: opts.failReason === "illegitimate-mods" ? "Implementer cheated" : "No mods",
1367
+ },
1368
+ acceptanceCriteria: {
1369
+ allMet: opts.failReason !== "criteria-not-met",
1370
+ criteria:
1371
+ opts.failReason === "criteria-not-met"
1372
+ ? [{ criterion: "Must work", met: false }]
1373
+ : [{ criterion: "Works", met: true }],
1374
+ },
1375
+ quality: {
1376
+ rating: opts.failReason === "poor-quality" ? "poor" : "good",
1377
+ issues: opts.failReason === "poor-quality" ? ["Security issue"] : [],
1378
+ },
1379
+ fixes: [],
1380
+ reasoning: opts.approved ? "All good." : "Implementation rejected.",
1381
+ };
1382
+ await writeFile(path.join(tmpDir, VERDICT_FILE), JSON.stringify(verdict, null, 2));
1383
+ }
1384
+
1385
+ /**
1386
+ * Mock Bun.spawn for a full 3-session T9 run.
1387
+ * Provides 6 git diff calls (isolation + getChangedFiles per session)
1388
+ * and optionally intercepts the post-TDD shell command (bun test).
1389
+ */
1390
+ function mockGitAndTestForT9(opts: {
1391
+ diffFiles?: string[][];
1392
+ onTestCmd?: () => { exitCode: number; stdout: string };
1393
+ }) {
1394
+ const files = opts.diffFiles ?? [
1395
+ ["test/user.test.ts"], // s1 isolation
1396
+ ["test/user.test.ts"], // s1 getChangedFiles
1397
+ ["src/user.ts"], // s2 isolation
1398
+ ["src/user.ts"], // s2 getChangedFiles
1399
+ [], // s3 isolation
1400
+ ["src/user.ts"], // s3 getChangedFiles
1401
+ ];
1402
+ let revParseCount = 0;
1403
+ let diffCount = 0;
1404
+
1405
+ // @ts-ignore — mocking global
1406
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
1407
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
1408
+ const r = opts.onTestCmd?.() ?? { exitCode: 0, stdout: "5 pass, 0 fail\n" };
1409
+ return {
1410
+ pid: 9999,
1411
+ exited: Promise.resolve(r.exitCode),
1412
+ stdout: new Response(r.stdout).body,
1413
+ stderr: new Response("").body,
1414
+ };
1415
+ }
1416
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
1417
+ revParseCount++;
1418
+ return {
1419
+ exited: Promise.resolve(0),
1420
+ stdout: new Response(`ref-${revParseCount}\n`).body,
1421
+ stderr: new Response("").body,
1422
+ };
1423
+ }
1424
+ if (cmd[0] === "git" && cmd[1] === "diff") {
1425
+ const f = files[diffCount] || [];
1426
+ diffCount++;
1427
+ return {
1428
+ exited: Promise.resolve(0),
1429
+ stdout: new Response(f.join("\n") + "\n").body,
1430
+ stderr: new Response("").body,
1431
+ };
1432
+ }
1433
+ return originalSpawn(cmd, spawnOpts);
1434
+ });
1435
+ }
1436
+
1437
+ test("verdict approved=true: overall success even when verifier session failed", async () => {
1438
+ await writeVerdictToDir({ approved: true });
1439
+ mockGitAndTestForT9({});
1440
+
1441
+ const agent = createMockAgent([
1442
+ { success: true, estimatedCost: 0.01 },
1443
+ { success: true, estimatedCost: 0.02 },
1444
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier exits non-zero
1445
+ ]);
1446
+
1447
+ const result = await runThreeSessionTdd({
1448
+ agent,
1449
+ story,
1450
+ config: DEFAULT_CONFIG,
1451
+ workdir: tmpDir,
1452
+ modelTier: "balanced",
1453
+ });
1454
+
1455
+ expect(result.success).toBe(true);
1456
+ expect(result.needsHumanReview).toBe(false);
1457
+ expect(result.failureCategory).toBeUndefined();
1458
+ expect(result.reviewReason).toBeUndefined();
1459
+ });
1460
+
1461
+ test("verdict approved=true: skips the post-TDD independent test check", async () => {
1462
+ await writeVerdictToDir({ approved: true });
1463
+ let testCommandCalled = false;
1464
+ mockGitAndTestForT9({
1465
+ onTestCmd: () => {
1466
+ testCommandCalled = true;
1467
+ return { exitCode: 0, stdout: "" };
1468
+ },
1469
+ });
1470
+
1471
+ const agent = createMockAgent([
1472
+ { success: true, estimatedCost: 0.01 },
1473
+ { success: true, estimatedCost: 0.02 },
1474
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails
1475
+ ]);
1476
+
1477
+ // Disable rectification to avoid test command being called for full-suite gate
1478
+ const configNoRectification = {
1479
+ ...DEFAULT_CONFIG,
1480
+ execution: {
1481
+ ...DEFAULT_CONFIG.execution,
1482
+ rectification: { ...DEFAULT_CONFIG.execution.rectification, enabled: false },
1483
+ },
1484
+ };
1485
+
1486
+ await runThreeSessionTdd({
1487
+ agent,
1488
+ story,
1489
+ config: configNoRectification,
1490
+ workdir: tmpDir,
1491
+ modelTier: "balanced",
1492
+ });
1493
+ expect(testCommandCalled).toBe(false); // Test was NOT run when verdict present
1494
+ });
1495
+
1496
+ test("verdict approved=false + tests-failing → failureCategory='tests-failing'", async () => {
1497
+ await writeVerdictToDir({ approved: false, failReason: "tests-failing" });
1498
+ mockGitAndTestForT9({});
1499
+
1500
+ const agent = createMockAgent([
1501
+ { success: true, estimatedCost: 0.01 },
1502
+ { success: true, estimatedCost: 0.02 },
1503
+ { success: true, estimatedCost: 0.01 }, // sessions succeed but verdict says rejected
1504
+ ]);
1505
+
1506
+ const result = await runThreeSessionTdd({
1507
+ agent,
1508
+ story,
1509
+ config: DEFAULT_CONFIG,
1510
+ workdir: tmpDir,
1511
+ modelTier: "balanced",
1512
+ });
1513
+
1514
+ expect(result.success).toBe(false);
1515
+ expect(result.needsHumanReview).toBe(true);
1516
+ expect(result.failureCategory).toBe("tests-failing");
1517
+ expect(result.reviewReason).toContain("failure(s)");
1518
+ });
1519
+
1520
+ test("verdict approved=false + illegitimate test mods → failureCategory='verifier-rejected'", async () => {
1521
+ await writeVerdictToDir({ approved: false, failReason: "illegitimate-mods" });
1522
+ mockGitAndTestForT9({});
1523
+
1524
+ const agent = createMockAgent([
1525
+ { success: true, estimatedCost: 0.01 },
1526
+ { success: true, estimatedCost: 0.02 },
1527
+ { success: true, estimatedCost: 0.01 },
1528
+ ]);
1529
+
1530
+ const result = await runThreeSessionTdd({
1531
+ agent,
1532
+ story,
1533
+ config: DEFAULT_CONFIG,
1534
+ workdir: tmpDir,
1535
+ modelTier: "balanced",
1536
+ });
1537
+
1538
+ expect(result.success).toBe(false);
1539
+ expect(result.failureCategory).toBe("verifier-rejected");
1540
+ expect(result.reviewReason).toContain("illegitimate test modifications");
1541
+ });
1542
+
1543
+ test("verdict approved=false + criteria not met → failureCategory='verifier-rejected'", async () => {
1544
+ await writeVerdictToDir({ approved: false, failReason: "criteria-not-met" });
1545
+ mockGitAndTestForT9({});
1546
+
1547
+ const agent = createMockAgent([
1548
+ { success: true, estimatedCost: 0.01 },
1549
+ { success: true, estimatedCost: 0.02 },
1550
+ { success: true, estimatedCost: 0.01 },
1551
+ ]);
1552
+
1553
+ const result = await runThreeSessionTdd({
1554
+ agent,
1555
+ story,
1556
+ config: DEFAULT_CONFIG,
1557
+ workdir: tmpDir,
1558
+ modelTier: "balanced",
1559
+ });
1560
+
1561
+ expect(result.success).toBe(false);
1562
+ expect(result.failureCategory).toBe("verifier-rejected");
1563
+ expect(result.reviewReason).toContain("Must work");
1564
+ });
1565
+
1566
+ test("no verdict file → fallback: post-TDD test check is run on session failures", async () => {
1567
+ // No verdict file — when verifier fails, falls back to running tests independently
1568
+ let testCommandCalled = false;
1569
+ mockGitAndTestForT9({
1570
+ onTestCmd: () => {
1571
+ testCommandCalled = true;
1572
+ return { exitCode: 0, stdout: "5 pass, 0 fail\n" }; // Tests pass in fallback
1573
+ },
1574
+ });
1575
+
1576
+ const agent = createMockAgent([
1577
+ { success: true, estimatedCost: 0.01 },
1578
+ { success: true, estimatedCost: 0.02 },
1579
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails
1580
+ ]);
1581
+
1582
+ const result = await runThreeSessionTdd({
1583
+ agent,
1584
+ story,
1585
+ config: DEFAULT_CONFIG,
1586
+ workdir: tmpDir,
1587
+ modelTier: "balanced",
1588
+ });
1589
+
1590
+ expect(testCommandCalled).toBe(true); // Fallback test run was executed
1591
+ expect(result.success).toBe(true); // Tests pass in fallback → success
1592
+ expect(result.verdict).toBeNull(); // No verdict available
1593
+ });
1594
+
1595
+ test("malformed verdict → fallback: post-TDD test check is run", async () => {
1596
+ // Write invalid JSON — should trigger fallback
1597
+ await writeFile(path.join(tmpDir, VERDICT_FILE), "{ this is not valid json }");
1598
+ let testCommandCalled = false;
1599
+ mockGitAndTestForT9({
1600
+ onTestCmd: () => {
1601
+ testCommandCalled = true;
1602
+ return { exitCode: 0, stdout: "5 pass\n" };
1603
+ },
1604
+ });
1605
+
1606
+ const agent = createMockAgent([
1607
+ { success: true, estimatedCost: 0.01 },
1608
+ { success: true, estimatedCost: 0.02 },
1609
+ { success: false, exitCode: 1, estimatedCost: 0.01 },
1610
+ ]);
1611
+
1612
+ const result = await runThreeSessionTdd({
1613
+ agent,
1614
+ story,
1615
+ config: DEFAULT_CONFIG,
1616
+ workdir: tmpDir,
1617
+ modelTier: "balanced",
1618
+ });
1619
+
1620
+ expect(testCommandCalled).toBe(true); // Fallback used when verdict is malformed
1621
+ expect(result.verdict).toBeNull(); // Malformed = null
1622
+ });
1623
+
1624
+ test("verdict stored in result.verdict for logging/debugging (approved=true)", async () => {
1625
+ await writeVerdictToDir({ approved: true });
1626
+ mockGitAndTestForT9({});
1627
+
1628
+ const agent = createMockAgent([
1629
+ { success: true, estimatedCost: 0.01 },
1630
+ { success: true, estimatedCost: 0.02 },
1631
+ { success: true, estimatedCost: 0.01 },
1632
+ ]);
1633
+
1634
+ const result = await runThreeSessionTdd({
1635
+ agent,
1636
+ story,
1637
+ config: DEFAULT_CONFIG,
1638
+ workdir: tmpDir,
1639
+ modelTier: "balanced",
1640
+ });
1641
+
1642
+ expect(result.verdict).toBeDefined();
1643
+ expect(result.verdict).not.toBeNull();
1644
+ expect(result.verdict!.version).toBe(1);
1645
+ expect(result.verdict!.approved).toBe(true);
1646
+ expect(result.verdict!.tests.allPassing).toBe(true);
1647
+ expect(result.verdict!.tests.passCount).toBe(10);
1648
+ expect(result.verdict!.reasoning).toBe("All good.");
1649
+ });
1650
+
1651
+ test("verdict stored in result.verdict for logging/debugging (approved=false)", async () => {
1652
+ await writeVerdictToDir({ approved: false, failReason: "tests-failing" });
1653
+ mockGitAndTestForT9({});
1654
+
1655
+ const agent = createMockAgent([
1656
+ { success: true, estimatedCost: 0.01 },
1657
+ { success: true, estimatedCost: 0.02 },
1658
+ { success: true, estimatedCost: 0.01 },
1659
+ ]);
1660
+
1661
+ const result = await runThreeSessionTdd({
1662
+ agent,
1663
+ story,
1664
+ config: DEFAULT_CONFIG,
1665
+ workdir: tmpDir,
1666
+ modelTier: "balanced",
1667
+ });
1668
+
1669
+ expect(result.verdict).not.toBeNull();
1670
+ expect(result.verdict!.approved).toBe(false);
1671
+ expect(result.verdict!.tests.failCount).toBe(3);
1672
+ });
1673
+
1674
+ test("verdict file is deleted after reading (cleanup enforced)", async () => {
1675
+ await writeVerdictToDir({ approved: true });
1676
+ mockGitAndTestForT9({});
1677
+
1678
+ const verdictPath = path.join(tmpDir, VERDICT_FILE);
1679
+ expect(existsSync(verdictPath)).toBe(true); // File exists before run
1680
+
1681
+ const agent = createMockAgent([
1682
+ { success: true, estimatedCost: 0.01 },
1683
+ { success: true, estimatedCost: 0.02 },
1684
+ { success: true, estimatedCost: 0.01 },
1685
+ ]);
1686
+ await runThreeSessionTdd({
1687
+ agent,
1688
+ story,
1689
+ config: DEFAULT_CONFIG,
1690
+ workdir: tmpDir,
1691
+ modelTier: "balanced",
1692
+ });
1693
+
1694
+ expect(existsSync(verdictPath)).toBe(false); // File cleaned up after run
1695
+ });
1696
+
1697
+ test("no verdict + all sessions succeed → success without running test check", async () => {
1698
+ // All sessions succeed, no verdict → should succeed and NOT run the test command
1699
+ let testCommandCalled = false;
1700
+ mockGitAndTestForT9({
1701
+ onTestCmd: () => {
1702
+ testCommandCalled = true;
1703
+ return { exitCode: 0, stdout: "" };
1704
+ },
1705
+ });
1706
+
1707
+ const agent = createMockAgent([
1708
+ { success: true, estimatedCost: 0.01 },
1709
+ { success: true, estimatedCost: 0.02 },
1710
+ { success: true, estimatedCost: 0.01 },
1711
+ ]);
1712
+
1713
+ // Disable rectification to avoid test command being called for full-suite gate
1714
+ const configNoRectification = {
1715
+ ...DEFAULT_CONFIG,
1716
+ execution: {
1717
+ ...DEFAULT_CONFIG.execution,
1718
+ rectification: { ...DEFAULT_CONFIG.execution.rectification, enabled: false },
1719
+ },
1720
+ };
1721
+
1722
+ const result = await runThreeSessionTdd({
1723
+ agent,
1724
+ story,
1725
+ config: configNoRectification,
1726
+ workdir: tmpDir,
1727
+ modelTier: "balanced",
1728
+ });
1729
+
1730
+ expect(result.success).toBe(true);
1731
+ expect(testCommandCalled).toBe(false); // Not needed when sessions all succeed
1732
+ expect(result.verdict).toBeNull(); // No verdict
1733
+ expect(result.failureCategory).toBeUndefined();
1734
+ });
1735
+
1736
+ test("early-exit before session 3 (session 1 fails) → verdict is undefined (not attempted)", async () => {
1737
+ // If we exit before session 3, verdict reading is never attempted
1738
+ mockGitAndTestForT9({
1739
+ diffFiles: [
1740
+ ["test/user.test.ts"], // s1 isolation
1741
+ ["test/user.test.ts"], // s1 getChangedFiles
1742
+ ],
1743
+ });
1744
+
1745
+ const agent = createMockAgent([
1746
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // session 1 fails
1747
+ ]);
1748
+
1749
+ const result = await runThreeSessionTdd({
1750
+ agent,
1751
+ story,
1752
+ config: DEFAULT_CONFIG,
1753
+ workdir: tmpDir,
1754
+ modelTier: "balanced",
1755
+ });
1756
+
1757
+ expect(result.success).toBe(false);
1758
+ expect(result.sessions).toHaveLength(1);
1759
+ // verdict is undefined (field not set) because we never got to session 3
1760
+ expect(result.verdict).toBeUndefined();
1761
+ });
1762
+ });