@nathapp/nax 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (459) hide show
  1. package/.gitlab-ci.yml +96 -0
  2. package/BRIEF.md +140 -0
  3. package/CHANGELOG.md +60 -0
  4. package/CLAUDE.md +159 -0
  5. package/README.md +373 -0
  6. package/US-007-IMPLEMENTATION.md +139 -0
  7. package/bin/nax.ts +930 -0
  8. package/biome.json +14 -0
  9. package/bun.lock +168 -0
  10. package/bunfig.toml +11 -0
  11. package/docs/20260216-fix-plan-context-review.md +56 -0
  12. package/docs/20260216-relentless-vs-ngent-comparison.md +208 -0
  13. package/docs/20260216-v02-plan.md +136 -0
  14. package/docs/20260216-v02-review.md +685 -0
  15. package/docs/20260217-dogfood-findings.md +56 -0
  16. package/docs/20260217-p2-plus-plan.md +117 -0
  17. package/docs/20260217-partial-fixes-plan.md +62 -0
  18. package/docs/20260217-plan-analyze-spec.md +117 -0
  19. package/docs/20260217-post-impl-review.md +1137 -0
  20. package/docs/20260217-quick-wins-plan.md +66 -0
  21. package/docs/20260217-split-runner-plan.md +75 -0
  22. package/docs/20260217-v03-impl-plan.md +80 -0
  23. package/docs/20260217-v03-post-impl-review.md +589 -0
  24. package/docs/20260217-v04-impl-plan.md +86 -0
  25. package/docs/20260217-v05-post-impl-review.md +850 -0
  26. package/docs/20260217-v06-post-impl-review.md +817 -0
  27. package/docs/20260218-adr003-port-plan.md +151 -0
  28. package/docs/20260218-review-adr003-verification.md +175 -0
  29. package/docs/20260219-fix-plan-bug16-19.md +79 -0
  30. package/docs/20260219-fix-plan-bug20-22.md +114 -0
  31. package/docs/20260219-plan-llm-routing.md +116 -0
  32. package/docs/20260219-review-bug20-22-fixes.md +135 -0
  33. package/docs/20260219-routing-baseline-keyword.md +63 -0
  34. package/docs/20260220-plan-structured-logging-p1.md +80 -0
  35. package/docs/20260220-plan-structured-logging-p2.md +37 -0
  36. package/docs/20260220-review-llm-routing.md +180 -0
  37. package/docs/20260220-review-post-fix-llm-routing.md +70 -0
  38. package/docs/20260221-fix-plan-relevantfiles-split.md +101 -0
  39. package/docs/20260221-fix-plan-routing-mode.md +125 -0
  40. package/docs/20260221-review-v0.9-implementation.md +379 -0
  41. package/docs/20260222-fix-plan-v091-routing-isolation.md +197 -0
  42. package/docs/20260223-fix-plan-prompt-audit.md +62 -0
  43. package/docs/20260224-nax-roadmap-phases.md +189 -0
  44. package/docs/20260225-phase2-llm-service-layer.md +401 -0
  45. package/docs/20260225-review-v0.10.1.md +187 -0
  46. package/docs/20260303-v010-implementation-plan.md +165 -0
  47. package/docs/CLAUDE.md.bak +191 -0
  48. package/docs/ROADMAP.md +165 -0
  49. package/docs/SPEC-rectification.md +0 -0
  50. package/docs/SPEC.md +324 -0
  51. package/docs/US-001-plugin-loading-verification.md +152 -0
  52. package/docs/architecture-analysis.md +1076 -0
  53. package/docs/bugs/BUG-21-escalation-null-attempts.md +48 -0
  54. package/docs/bugs-from-dogfood-run-c.md +243 -0
  55. package/docs/code-review-20260228.md +612 -0
  56. package/docs/code-review-v0.15.0.md +629 -0
  57. package/docs/hook-lifecycle-test-plan.md +149 -0
  58. package/docs/releases/v0.11.0-and-earlier.md +20 -0
  59. package/docs/releases/v0.12.0.md +15 -0
  60. package/docs/releases/v0.13.0.md +14 -0
  61. package/docs/releases/v0.14.0.md +20 -0
  62. package/docs/releases/v0.14.1.md +36 -0
  63. package/docs/releases/v0.14.2.md +51 -0
  64. package/docs/releases/v0.14.3.md +174 -0
  65. package/docs/releases/v0.14.4.md +94 -0
  66. package/docs/releases/v0.15.0.md +502 -0
  67. package/docs/releases/v0.15.1.md +170 -0
  68. package/docs/releases/v0.15.3.md +193 -0
  69. package/docs/specs/status-file-v0.10.1.md +812 -0
  70. package/docs/v0.10-global-config.md +206 -0
  71. package/docs/v0.10-plugin-system.md +415 -0
  72. package/docs/v0.10-prompt-optimizer.md +234 -0
  73. package/docs/v0.3-spec.md +244 -0
  74. package/docs/v0.4-spec.md +140 -0
  75. package/docs/v0.5-spec.md +237 -0
  76. package/docs/v0.6-spec.md +371 -0
  77. package/docs/v0.7-spec.md +177 -0
  78. package/docs/v0.8-llm-routing.md +206 -0
  79. package/docs/v0.8-structured-logging.md +132 -0
  80. package/docs/v0.9.3-prompt-audit.md +112 -0
  81. package/examples/plugins/console-reporter/index.test.ts +207 -0
  82. package/examples/plugins/console-reporter/index.ts +110 -0
  83. package/nax/config.json +147 -0
  84. package/nax/features/bugfix-v0171/prd.json +52 -0
  85. package/nax/features/config-management/prd.json +108 -0
  86. package/nax/features/config-management/progress.txt +5 -0
  87. package/nax/features/diagnose/acceptance.test.ts +412 -0
  88. package/nax/features/diagnose/prd.json +41 -0
  89. package/nax/features/orchestration-fixes/prd.json +89 -0
  90. package/nax/features/orchestration-fixes/progress.txt +1 -0
  91. package/nax/features/plugin-integration/US-007-VERIFICATION.md +259 -0
  92. package/nax/features/plugin-integration/prd.json +208 -0
  93. package/nax/features/plugin-integration/progress.txt +5 -0
  94. package/nax/features/precheck/prd.json +205 -0
  95. package/nax/features/precheck/progress.txt +15 -0
  96. package/nax/features/structured-logging/prd.json +199 -0
  97. package/nax/features/unlock/prd.json +36 -0
  98. package/package.json +47 -0
  99. package/src/acceptance/fix-generator.ts +348 -0
  100. package/src/acceptance/generator.ts +282 -0
  101. package/src/acceptance/index.ts +30 -0
  102. package/src/acceptance/types.ts +79 -0
  103. package/src/agents/claude-decompose.ts +169 -0
  104. package/src/agents/claude-plan.ts +139 -0
  105. package/src/agents/claude.ts +324 -0
  106. package/src/agents/cost.ts +268 -0
  107. package/src/agents/index.ts +13 -0
  108. package/src/agents/registry.ts +48 -0
  109. package/src/agents/types-extended.ts +133 -0
  110. package/src/agents/types.ts +113 -0
  111. package/src/agents/validation.ts +69 -0
  112. package/src/analyze/classifier.ts +305 -0
  113. package/src/analyze/index.ts +16 -0
  114. package/src/analyze/scanner.ts +175 -0
  115. package/src/analyze/types.ts +51 -0
  116. package/src/cli/accept.ts +108 -0
  117. package/src/cli/analyze-parser.ts +284 -0
  118. package/src/cli/analyze.ts +207 -0
  119. package/src/cli/config.ts +561 -0
  120. package/src/cli/constitution.ts +109 -0
  121. package/src/cli/diagnose-analysis.ts +159 -0
  122. package/src/cli/diagnose-formatter.ts +87 -0
  123. package/src/cli/diagnose.ts +203 -0
  124. package/src/cli/generate.ts +127 -0
  125. package/src/cli/index.ts +37 -0
  126. package/src/cli/init.ts +188 -0
  127. package/src/cli/interact.ts +295 -0
  128. package/src/cli/plan.ts +198 -0
  129. package/src/cli/plugins.ts +111 -0
  130. package/src/cli/prompts.ts +295 -0
  131. package/src/cli/runs.ts +174 -0
  132. package/src/cli/status-cost.ts +151 -0
  133. package/src/cli/status-features.ts +338 -0
  134. package/src/cli/status.ts +13 -0
  135. package/src/commands/common.ts +171 -0
  136. package/src/commands/diagnose.ts +17 -0
  137. package/src/commands/index.ts +8 -0
  138. package/src/commands/logs.ts +384 -0
  139. package/src/commands/precheck.ts +86 -0
  140. package/src/commands/unlock.ts +96 -0
  141. package/src/config/defaults.ts +160 -0
  142. package/src/config/index.ts +22 -0
  143. package/src/config/loader.ts +121 -0
  144. package/src/config/merger.ts +147 -0
  145. package/src/config/path-security.ts +121 -0
  146. package/src/config/paths.ts +27 -0
  147. package/src/config/schema.ts +56 -0
  148. package/src/config/schemas.ts +286 -0
  149. package/src/config/types.ts +423 -0
  150. package/src/config/validate.ts +103 -0
  151. package/src/constitution/generator.ts +191 -0
  152. package/src/constitution/generators/aider.ts +41 -0
  153. package/src/constitution/generators/claude.ts +35 -0
  154. package/src/constitution/generators/cursor.ts +36 -0
  155. package/src/constitution/generators/opencode.ts +38 -0
  156. package/src/constitution/generators/types.ts +33 -0
  157. package/src/constitution/generators/windsurf.ts +36 -0
  158. package/src/constitution/index.ts +10 -0
  159. package/src/constitution/loader.ts +133 -0
  160. package/src/constitution/types.ts +31 -0
  161. package/src/context/auto-detect.ts +227 -0
  162. package/src/context/builder.ts +246 -0
  163. package/src/context/elements.ts +83 -0
  164. package/src/context/formatter.ts +107 -0
  165. package/src/context/generator.ts +129 -0
  166. package/src/context/generators/aider.ts +34 -0
  167. package/src/context/generators/claude.ts +28 -0
  168. package/src/context/generators/cursor.ts +28 -0
  169. package/src/context/generators/opencode.ts +30 -0
  170. package/src/context/generators/windsurf.ts +28 -0
  171. package/src/context/greenfield.ts +114 -0
  172. package/src/context/index.ts +33 -0
  173. package/src/context/injector.ts +279 -0
  174. package/src/context/test-scanner.ts +370 -0
  175. package/src/context/types.ts +98 -0
  176. package/src/errors.ts +67 -0
  177. package/src/execution/batching.ts +157 -0
  178. package/src/execution/crash-recovery.ts +373 -0
  179. package/src/execution/escalation/escalation.ts +44 -0
  180. package/src/execution/escalation/index.ts +13 -0
  181. package/src/execution/escalation/tier-escalation.ts +295 -0
  182. package/src/execution/escalation/tier-outcome.ts +158 -0
  183. package/src/execution/helpers.ts +38 -0
  184. package/src/execution/index.ts +45 -0
  185. package/src/execution/lifecycle/acceptance-loop.ts +272 -0
  186. package/src/execution/lifecycle/headless-formatter.ts +85 -0
  187. package/src/execution/lifecycle/index.ts +12 -0
  188. package/src/execution/lifecycle/parallel-lifecycle.ts +101 -0
  189. package/src/execution/lifecycle/precheck-runner.ts +140 -0
  190. package/src/execution/lifecycle/run-cleanup.ts +81 -0
  191. package/src/execution/lifecycle/run-completion.ts +129 -0
  192. package/src/execution/lifecycle/run-initialization.ts +141 -0
  193. package/src/execution/lifecycle/run-lifecycle.ts +312 -0
  194. package/src/execution/lifecycle/run-setup.ts +204 -0
  195. package/src/execution/lifecycle/story-hooks.ts +38 -0
  196. package/src/execution/lifecycle/story-size-prompts.ts +123 -0
  197. package/src/execution/lock.ts +115 -0
  198. package/src/execution/parallel-executor.ts +216 -0
  199. package/src/execution/parallel.ts +400 -0
  200. package/src/execution/pid-registry.ts +280 -0
  201. package/src/execution/pipeline-result-handler.ts +388 -0
  202. package/src/execution/post-verify-rectification.ts +188 -0
  203. package/src/execution/post-verify.ts +274 -0
  204. package/src/execution/progress.ts +25 -0
  205. package/src/execution/prompts.ts +127 -0
  206. package/src/execution/queue-handler.ts +109 -0
  207. package/src/execution/rectification.ts +13 -0
  208. package/src/execution/runner.ts +377 -0
  209. package/src/execution/sequential-executor.ts +388 -0
  210. package/src/execution/status-file.ts +264 -0
  211. package/src/execution/status-writer.ts +139 -0
  212. package/src/execution/story-context.ts +229 -0
  213. package/src/execution/test-output-parser.ts +14 -0
  214. package/src/execution/verification.ts +72 -0
  215. package/src/hooks/index.ts +2 -0
  216. package/src/hooks/runner.ts +286 -0
  217. package/src/hooks/types.ts +67 -0
  218. package/src/interaction/chain.ts +154 -0
  219. package/src/interaction/index.ts +60 -0
  220. package/src/interaction/init.ts +83 -0
  221. package/src/interaction/plugins/auto.ts +217 -0
  222. package/src/interaction/plugins/cli.ts +300 -0
  223. package/src/interaction/plugins/telegram.ts +384 -0
  224. package/src/interaction/plugins/webhook.ts +258 -0
  225. package/src/interaction/state.ts +171 -0
  226. package/src/interaction/triggers.ts +229 -0
  227. package/src/interaction/types.ts +163 -0
  228. package/src/logger/formatters.ts +84 -0
  229. package/src/logger/index.ts +16 -0
  230. package/src/logger/logger.ts +298 -0
  231. package/src/logger/types.ts +48 -0
  232. package/src/logging/formatter.ts +355 -0
  233. package/src/logging/index.ts +22 -0
  234. package/src/logging/types.ts +93 -0
  235. package/src/metrics/aggregator.ts +190 -0
  236. package/src/metrics/index.ts +14 -0
  237. package/src/metrics/tracker.ts +200 -0
  238. package/src/metrics/types.ts +109 -0
  239. package/src/optimizer/index.ts +62 -0
  240. package/src/optimizer/noop.optimizer.ts +24 -0
  241. package/src/optimizer/rule-based.optimizer.ts +248 -0
  242. package/src/optimizer/types.ts +53 -0
  243. package/src/pipeline/events.ts +130 -0
  244. package/src/pipeline/index.ts +19 -0
  245. package/src/pipeline/runner.ts +161 -0
  246. package/src/pipeline/stages/acceptance.ts +197 -0
  247. package/src/pipeline/stages/completion.ts +99 -0
  248. package/src/pipeline/stages/constitution.ts +63 -0
  249. package/src/pipeline/stages/context.ts +117 -0
  250. package/src/pipeline/stages/execution.ts +194 -0
  251. package/src/pipeline/stages/index.ts +62 -0
  252. package/src/pipeline/stages/optimizer.ts +74 -0
  253. package/src/pipeline/stages/prompt.ts +57 -0
  254. package/src/pipeline/stages/queue-check.ts +103 -0
  255. package/src/pipeline/stages/review.ts +181 -0
  256. package/src/pipeline/stages/routing.ts +81 -0
  257. package/src/pipeline/stages/verify.ts +100 -0
  258. package/src/pipeline/types.ts +167 -0
  259. package/src/plugins/index.ts +31 -0
  260. package/src/plugins/loader.ts +287 -0
  261. package/src/plugins/registry.ts +168 -0
  262. package/src/plugins/types.ts +327 -0
  263. package/src/plugins/validator.ts +352 -0
  264. package/src/prd/index.ts +172 -0
  265. package/src/prd/types.ts +202 -0
  266. package/src/precheck/checks-blockers.ts +391 -0
  267. package/src/precheck/checks-warnings.ts +142 -0
  268. package/src/precheck/checks.ts +30 -0
  269. package/src/precheck/index.ts +247 -0
  270. package/src/precheck/story-size-gate.ts +144 -0
  271. package/src/precheck/types.ts +31 -0
  272. package/src/queue/index.ts +2 -0
  273. package/src/queue/manager.ts +254 -0
  274. package/src/queue/types.ts +54 -0
  275. package/src/review/index.ts +8 -0
  276. package/src/review/runner.ts +172 -0
  277. package/src/review/types.ts +66 -0
  278. package/src/routing/builder.ts +81 -0
  279. package/src/routing/chain.ts +74 -0
  280. package/src/routing/index.ts +16 -0
  281. package/src/routing/loader.ts +58 -0
  282. package/src/routing/router.ts +303 -0
  283. package/src/routing/strategies/adaptive.ts +215 -0
  284. package/src/routing/strategies/index.ts +8 -0
  285. package/src/routing/strategies/keyword.ts +163 -0
  286. package/src/routing/strategies/llm-prompts.ts +209 -0
  287. package/src/routing/strategies/llm.ts +235 -0
  288. package/src/routing/strategies/manual.ts +50 -0
  289. package/src/routing/strategy.ts +99 -0
  290. package/src/tdd/cleanup.ts +111 -0
  291. package/src/tdd/index.ts +23 -0
  292. package/src/tdd/isolation.ts +123 -0
  293. package/src/tdd/orchestrator.ts +383 -0
  294. package/src/tdd/prompts.ts +270 -0
  295. package/src/tdd/rectification-gate.ts +183 -0
  296. package/src/tdd/session-runner.ts +179 -0
  297. package/src/tdd/types.ts +81 -0
  298. package/src/tdd/verdict.ts +271 -0
  299. package/src/tui/App.tsx +265 -0
  300. package/src/tui/components/AgentPanel.tsx +75 -0
  301. package/src/tui/components/CostOverlay.tsx +118 -0
  302. package/src/tui/components/HelpOverlay.tsx +107 -0
  303. package/src/tui/components/StatusBar.tsx +63 -0
  304. package/src/tui/components/StoriesPanel.tsx +177 -0
  305. package/src/tui/hooks/useKeyboard.ts +142 -0
  306. package/src/tui/hooks/useLayout.ts +137 -0
  307. package/src/tui/hooks/usePipelineEvents.ts +183 -0
  308. package/src/tui/hooks/usePty.ts +194 -0
  309. package/src/tui/index.tsx +38 -0
  310. package/src/tui/types.ts +76 -0
  311. package/src/utils/git.ts +83 -0
  312. package/src/utils/queue-writer.ts +54 -0
  313. package/src/verification/executor.ts +235 -0
  314. package/src/verification/gate.ts +207 -0
  315. package/src/verification/index.ts +12 -0
  316. package/src/verification/parser.ts +230 -0
  317. package/src/verification/rectification.ts +108 -0
  318. package/src/verification/types.ts +113 -0
  319. package/src/worktree/dispatcher.ts +65 -0
  320. package/src/worktree/index.ts +2 -0
  321. package/src/worktree/manager.ts +187 -0
  322. package/src/worktree/merge.ts +301 -0
  323. package/src/worktree/types.ts +4 -0
  324. package/test/TEST_COVERAGE_US001.md +217 -0
  325. package/test/TEST_COVERAGE_US003.md +84 -0
  326. package/test/TEST_COVERAGE_US005.md +86 -0
  327. package/test/US-002-orchestrator.test.ts +246 -0
  328. package/test/acceptance/cm-003-default-view.test.ts +194 -0
  329. package/test/execution/pid-registry.test.ts +240 -0
  330. package/test/execution/post-verify.test.ts +224 -0
  331. package/test/helpers/timeout.ts +42 -0
  332. package/test/integration/US-002-TEST-SUMMARY.md +107 -0
  333. package/test/integration/US-003-TEST-SUMMARY.md +149 -0
  334. package/test/integration/US-004-TEST-SUMMARY.md +106 -0
  335. package/test/integration/US-005-TEST-SUMMARY.md +138 -0
  336. package/test/integration/US-007-TEST-SUMMARY.md +100 -0
  337. package/test/integration/agent-validation.test.ts +439 -0
  338. package/test/integration/analyze-integration.test.ts +261 -0
  339. package/test/integration/analyze-scanner.test.ts +131 -0
  340. package/test/integration/cli-config-default-edge-cases.test.ts +222 -0
  341. package/test/integration/cli-config-default-view.test.ts +229 -0
  342. package/test/integration/cli-config-diff.test.ts +460 -0
  343. package/test/integration/cli-config.test.ts +736 -0
  344. package/test/integration/cli-diagnose.test.ts +592 -0
  345. package/test/integration/cli-logs.test.ts +314 -0
  346. package/test/integration/cli-plugins.test.ts +678 -0
  347. package/test/integration/cli-precheck.test.ts +371 -0
  348. package/test/integration/cli-run-headless.test.ts +173 -0
  349. package/test/integration/cli.test.ts +75 -0
  350. package/test/integration/config/merger.test.ts +465 -0
  351. package/test/integration/config/paths.test.ts +51 -0
  352. package/test/integration/config-loader.test.ts +265 -0
  353. package/test/integration/config.test.ts +444 -0
  354. package/test/integration/context-integration.test.ts +702 -0
  355. package/test/integration/context-provider-injection.test.ts +506 -0
  356. package/test/integration/context-verification-integration.test.ts +295 -0
  357. package/test/integration/e2e.test.ts +896 -0
  358. package/test/integration/execution.test.ts +625 -0
  359. package/test/integration/helpers.test.ts +295 -0
  360. package/test/integration/hooks.test.ts +361 -0
  361. package/test/integration/interaction-chain-pipeline.test.ts +464 -0
  362. package/test/integration/isolation.test.ts +143 -0
  363. package/test/integration/logger.test.ts +461 -0
  364. package/test/integration/parallel.test.ts +250 -0
  365. package/test/integration/path-security.test.ts +173 -0
  366. package/test/integration/pipeline-acceptance.test.ts +302 -0
  367. package/test/integration/pipeline-events.test.ts +475 -0
  368. package/test/integration/pipeline.test.ts +658 -0
  369. package/test/integration/plan.test.ts +157 -0
  370. package/test/integration/plugin-routing.test.ts +921 -0
  371. package/test/integration/plugins/config-integration.test.ts +172 -0
  372. package/test/integration/plugins/config-resolution.test.ts +522 -0
  373. package/test/integration/plugins/loader.test.ts +641 -0
  374. package/test/integration/plugins/registry.test.ts +746 -0
  375. package/test/integration/plugins/validator.test.ts +563 -0
  376. package/test/integration/prd-pause.test.ts +205 -0
  377. package/test/integration/prd-resolvers.test.ts +185 -0
  378. package/test/integration/precheck-integration.test.ts +468 -0
  379. package/test/integration/precheck.test.ts +805 -0
  380. package/test/integration/progress.test.ts +34 -0
  381. package/test/integration/rectification-flow.test.ts +512 -0
  382. package/test/integration/reporter-lifecycle.test.ts +860 -0
  383. package/test/integration/review-config-commands.test.ts +319 -0
  384. package/test/integration/review-config-schema.test.ts +116 -0
  385. package/test/integration/review-plugin-integration.test.ts +722 -0
  386. package/test/integration/review.test.ts +149 -0
  387. package/test/integration/routing-stage-bug-021.test.ts +274 -0
  388. package/test/integration/routing-stage-greenfield.test.ts +286 -0
  389. package/test/integration/runner-config-plugins.test.ts +461 -0
  390. package/test/integration/runner-fixes.test.ts +399 -0
  391. package/test/integration/runner-plugin-integration.test.ts +543 -0
  392. package/test/integration/runner.test.ts +1679 -0
  393. package/test/integration/s5-greenfield-fallback.test.ts +297 -0
  394. package/test/integration/status-file-integration.test.ts +325 -0
  395. package/test/integration/status-file.test.ts +379 -0
  396. package/test/integration/status-writer.test.ts +345 -0
  397. package/test/integration/story-id-in-events.test.ts +273 -0
  398. package/test/integration/tdd-cleanup.test.ts +246 -0
  399. package/test/integration/tdd-orchestrator.test.ts +1762 -0
  400. package/test/integration/test-scanner.test.ts +403 -0
  401. package/test/integration/verification-asset-check.test.ts +142 -0
  402. package/test/integration/verify-stage.test.ts +275 -0
  403. package/test/integration/worktree/manager.test.ts +218 -0
  404. package/test/integration/worktree/merge.test.ts +341 -0
  405. package/test/manual/logging-formatter-demo.ts +158 -0
  406. package/test/ui/tui-agent-panel.test.tsx +99 -0
  407. package/test/ui/tui-controls.test.ts +334 -0
  408. package/test/ui/tui-cost-and-pty.test.ts +189 -0
  409. package/test/ui/tui-layout.test.ts +378 -0
  410. package/test/ui/tui-pty-integration.test.tsx +159 -0
  411. package/test/ui/tui-stories.test.ts +332 -0
  412. package/test/unit/acceptance.test.ts +186 -0
  413. package/test/unit/agent-stderr-capture.test.ts +146 -0
  414. package/test/unit/analyze-classifier.test.ts +215 -0
  415. package/test/unit/analyze.test.ts +224 -0
  416. package/test/unit/auto-detect.test.ts +249 -0
  417. package/test/unit/cli-status.test.ts +417 -0
  418. package/test/unit/commands/common.test.ts +320 -0
  419. package/test/unit/commands/logs.test.ts +416 -0
  420. package/test/unit/commands/unlock.test.ts +319 -0
  421. package/test/unit/constitution-generators.test.ts +160 -0
  422. package/test/unit/constitution.test.ts +209 -0
  423. package/test/unit/context.test.ts +1722 -0
  424. package/test/unit/cost.test.ts +231 -0
  425. package/test/unit/crash-recovery.test.ts +308 -0
  426. package/test/unit/escalation.test.ts +126 -0
  427. package/test/unit/execution-logging-stderr.test.ts +156 -0
  428. package/test/unit/execution-stage.test.ts +122 -0
  429. package/test/unit/fix-generator.test.ts +275 -0
  430. package/test/unit/formatters.test.ts +469 -0
  431. package/test/unit/greenfield.test.ts +179 -0
  432. package/test/unit/helpers.test.ts +317 -0
  433. package/test/unit/interaction/human-review-trigger.test.ts +164 -0
  434. package/test/unit/interaction-network-failures.test.ts +389 -0
  435. package/test/unit/interaction-plugins.test.ts +164 -0
  436. package/test/unit/isolation.test.ts +134 -0
  437. package/test/unit/logging/formatter.test.ts +455 -0
  438. package/test/unit/merge.test.ts +268 -0
  439. package/test/unit/metrics.test.ts +276 -0
  440. package/test/unit/optimizer/noop.optimizer.test.ts +125 -0
  441. package/test/unit/optimizer/rule-based.optimizer.test.ts +358 -0
  442. package/test/unit/prd-auto-default.test.ts +290 -0
  443. package/test/unit/prd-failure-category.test.ts +176 -0
  444. package/test/unit/prd-get-next-story.test.ts +186 -0
  445. package/test/unit/precheck-checks.test.ts +840 -0
  446. package/test/unit/precheck-story-size-gate.test.ts +287 -0
  447. package/test/unit/precheck-types.test.ts +142 -0
  448. package/test/unit/prompts.test.ts +475 -0
  449. package/test/unit/queue.test.ts +237 -0
  450. package/test/unit/rectification.test.ts +284 -0
  451. package/test/unit/registry.test.ts +287 -0
  452. package/test/unit/routing.test.ts +937 -0
  453. package/test/unit/run-lifecycle.test.ts +140 -0
  454. package/test/unit/storyid-events.test.ts +224 -0
  455. package/test/unit/tdd-verdict.test.ts +492 -0
  456. package/test/unit/test-output-parser.test.ts +377 -0
  457. package/test/unit/verdict.test.ts +324 -0
  458. package/test/unit/worktree-manager.test.ts +158 -0
  459. package/tsconfig.json +27 -0
@@ -0,0 +1,151 @@
1
+ # Fix Plan: ADR-003 Robust Orchestration Feedback Loop — Port to @nathapp/nax
2
+ **Date:** 2026-02-18
3
+ **Branch:** feat/adr-003-verification
4
+
5
+ ## Background
6
+ ADR-003 was prototyped in @arvorco/relentless but the real product is @nathapp/nax v0.5.0.
7
+ This plan ports the safety infrastructure, adapting to nax's existing architecture.
8
+
9
+ ## Phase 1: Config Schema Updates
10
+ ### Fix 1: Per-tier attempt config (replaces flat maxAttempts)
11
+ **File:** `src/config/schema.ts`
12
+ **Impact:** Root cause of 20-iteration stall — same model retried endlessly
13
+ **Change:**
14
+ - Replace `tierOrder: z.array(ModelTierSchema)` (string[]) with `tierOrder: z.array(z.object({ tier: z.string().min(1), attempts: z.number().int().min(1).max(20) }))`
15
+ - Remove `maxAttempts` from escalation config (calculated from sum of tier attempts)
16
+ - Keep `maxIterations` as a hard safety cap, default to sum of tier attempts
17
+ - Add `verificationTimeoutSeconds` to execution config (default 300, min 1, max 3600)
18
+ - Add `sessionTimeoutSeconds` to execution config (rename from any existing timeout if present)
19
+
20
+ ### Fix 2: Quality config extensions
21
+ **File:** `src/config/schema.ts`
22
+ **Impact:** Enables all verification features
23
+ **Change:**
24
+ - Extend existing `QualityConfig` with:
25
+ - `forceExit: z.boolean().default(false)`
26
+ - `detectOpenHandles: z.boolean().default(true)`
27
+ - `detectOpenHandlesRetries: z.number().int().min(0).max(5).default(1)`
28
+ - `gracePeriodMs: z.number().int().min(500).max(30000).default(5000)`
29
+ - `drainTimeoutMs: z.number().int().min(0).max(10000).default(2000)`
30
+ - `shell: z.string().default("/bin/sh")`
31
+ - `stripEnvVars: z.array(z.string()).default(["CLAUDECODE", "REPL_ID", "AGENT"])`
32
+ - `environmentalEscalationDivisor: z.number().min(1).max(10).default(2)`
33
+ - Update `DEFAULT_CONFIG` with new defaults
34
+ - Update `NaxConfig` interface
35
+
36
+ **Commit:** `feat(config): add ADR-003 tier-aware escalation and quality verification config`
37
+
38
+ ## Phase 2: Verification Module (new file)
39
+ ### Fix 3: Create verification.ts
40
+ **File:** `src/execution/verification.ts` (NEW)
41
+ **Impact:** Core safety infrastructure — prevents zombie processes and stall loops
42
+ **Change:**
43
+ - Port from Relentless `src/execution/verification.ts`, adapting imports to nax:
44
+ - `verifyAssets()` — pre-flight file check
45
+ - `executeWithTimeout()` — hard timeout with process group kill (SIGTERM→grace→SIGKILL)
46
+ - `drainWithDeadline()` — Bun stream workaround
47
+ - `parseTestOutput()` — detect pass/fail counts from output
48
+ - `getEnvironmentalEscalationThreshold()` — early escalation for env failures
49
+ - `normalizeEnvironment()` — strip AI env vars
50
+ - `buildTestCommand()` — append --detectOpenHandles/--forceExit based on retry state
51
+ - `runVerification()` — integrated flow combining all above
52
+ - Export from `src/execution/index.ts`
53
+
54
+ **Commit:** `feat(execution): add ADR-003 verification module with timeout, process group kill, and smart exit-code analysis`
55
+
56
+ ## Phase 3: PRD Types Update
57
+ ### Fix 4: Add blocked status
58
+ **File:** `src/prd/types.ts` (or wherever story status is defined)
59
+ **Impact:** Enables skip-and-continue for blocked stories
60
+ **Change:**
61
+ - Add `"blocked"` to story status union type
62
+ - Add `isStalled()` helper — returns true when all remaining stories are blocked or depend on blocked
63
+ - Add `markStoryAsBlocked()` helper
64
+
65
+ **Commit:** `feat(prd): add blocked status and stall detection helpers`
66
+
67
+ ## Phase 4: Runner Integration
68
+ ### Fix 5: Wire tier-aware escalation into runner loop
69
+ **File:** `src/execution/runner.ts`
70
+ **Impact:** Replaces flat iteration loop with tier-aware escalation
71
+ **Change:**
72
+ - Track `currentTierIndex` and `iterationWithinTier` per story
73
+ - Escalate when `iterationWithinTier >= currentTier.attempts`
74
+ - Use config `tierOrder` (new object format) instead of string array
75
+ - Calculate max iterations from sum of all tier attempts (replace hardcoded `config.execution.maxIterations`)
76
+ - Update `escalateTier()` call to use new tier config format
77
+
78
+ ### Fix 6: Wire runVerification() post-agent
79
+ **File:** `src/execution/runner.ts`
80
+ **Impact:** Adds test verification after each agent iteration
81
+ **Change:**
82
+ - After agent completes, call `runVerification()` if `config.quality.commands.test` is set
83
+ - Pass quality config (forceExit, detectOpenHandles, etc.) through
84
+ - Track `timeoutRetryCount` per story for diagnostic retry escalation
85
+ - On ENVIRONMENTAL_FAILURE: append diagnostic context to story progress
86
+ - On TIMEOUT: don't count toward tier escalation
87
+ - On ASSET_CHECK_FAILED: append missing file list to progress
88
+
89
+ ### Fix 7: Wire stall detection
90
+ **File:** `src/execution/runner.ts`
91
+ **Impact:** Prevents infinite loops when all stories are blocked
92
+ **Change:**
93
+ - After each iteration, check `isStalled()`
94
+ - If stalled, generate human halt summary and break loop
95
+ - Mark dependent stories as blocked when their dependencies fail
96
+
97
+ **Commit:** `feat(execution): wire ADR-003 tier escalation, verification, and stall detection into runner`
98
+
99
+ ## Phase 5: Escalation Module Update
100
+ ### Fix 8: Update escalation for new tier format
101
+ **File:** `src/execution/escalation.ts`
102
+ **Impact:** Existing escalation logic works with string[], needs to handle object[]
103
+ **Change:**
104
+ - Update `escalateTier()` to accept `{tier, attempts}[]` format
105
+ - Remove hardcoded fallback switch statement (no backward compat needed)
106
+
107
+ **Commit:** `refactor(execution): update escalation for ADR-003 tier config format`
108
+
109
+ ## Phase 6: Tests
110
+ ### Fix 9: Add verification tests
111
+ **File:** `test/execution/verification.test.ts` (NEW)
112
+ **Tests:**
113
+ - `verifyAssets()` — missing files, empty list, all present
114
+ - `executeWithTimeout()` — success, timeout, SIGTERM/SIGKILL, process cleanup
115
+ - `parseTestOutput()` — bun/jest formats, environmental failure detection
116
+ - `normalizeEnvironment()` — strips AI vars, preserves others
117
+ - `buildTestCommand()` — forceExit, detectOpenHandles, retry cap, pipe insertion
118
+ - `runVerification()` — integrated flow, US-001 scenario
119
+
120
+ ### Fix 10: Update config tests
121
+ **File:** `test/config/schema.test.ts` (existing)
122
+ **Tests:**
123
+ - New tierOrder format validation (object[], reject string[])
124
+ - Quality config extensions
125
+ - verificationTimeoutSeconds bounds
126
+
127
+ ### Fix 11: Update escalation tests
128
+ **File:** `test/execution/escalation.test.ts` (existing or new)
129
+ **Tests:**
130
+ - New tier format escalation
131
+ - Edge cases (single tier, unknown tier)
132
+
133
+ **Commit:** `test: add ADR-003 verification, config, and escalation tests`
134
+
135
+ ## Test Strategy
136
+ - Mode: test-after
137
+ - Test-after targets: verification.ts, schema.ts, escalation.ts, runner.ts integration
138
+ - Run: `bun test`
139
+
140
+ ## Breaking Changes
141
+ - `escalation.tierOrder` changes from `string[]` to `{tier, attempts}[]` — [BREAKING but no published consumers]
142
+ - `escalation.maxAttempts` removed — [BREAKING but no published consumers]
143
+ - William confirmed: "Remove backward compatibility — no published version yet"
144
+
145
+ ## Commits Summary
146
+ 1. `feat(config): add ADR-003 tier-aware escalation and quality verification config`
147
+ 2. `feat(execution): add ADR-003 verification module`
148
+ 3. `feat(prd): add blocked status and stall detection helpers`
149
+ 4. `feat(execution): wire ADR-003 into runner loop`
150
+ 5. `refactor(execution): update escalation for ADR-003 tier config format`
151
+ 6. `test: add ADR-003 verification, config, and escalation tests`
@@ -0,0 +1,175 @@
1
+ # Deep Code Review: @nathapp/nax — ADR-003 Verification Port
2
+
3
+ **Date:** 2025-06-18
4
+ **Reviewer:** Subrina (AI)
5
+ **Branch:** `feat/adr-003-verification` vs `master`
6
+ **Scope:** +1,352 / -5,529 lines across 47 files (net -4,177 — major TUI removal + verification addition)
7
+ **Tests:** 103 pass, 0 fail (7 test files)
8
+
9
+ ---
10
+
11
+ ## Overall Grade: B+ (82/100)
12
+
13
+ Solid implementation of ADR-003 with well-structured verification module, good config extensibility, and proper process management (zombie prevention, Bun stream workaround). The TUI removal is clean. Main concerns: runner.ts growing too large, a few logic gaps in the verification→runner wiring, and some missing edge case handling.
14
+
15
+ | Dimension | Score | Notes |
16
+ |:---|:---|:---|
17
+ | **Security** | 17/20 | Good env normalization, process group kill. Minor: shell injection surface |
18
+ | **Reliability** | 16/20 | Timeout/zombie prevention solid. Some edge cases in verification wiring |
19
+ | **API Design** | 17/20 | Clean interfaces, extensible tiers, good separation of concerns |
20
+ | **Code Quality** | 15/20 | verification.ts excellent. runner.ts too large (350+ lines). Some duplication |
21
+ | **Best Practices** | 17/20 | Config-driven, well-documented JSDoc, Zod validation |
22
+
23
+ ---
24
+
25
+ ## Findings
26
+
27
+ ### 🔴 CRITICAL
28
+
29
+ _None._
30
+
31
+ ### 🟡 HIGH
32
+
33
+ #### BUG-1: Verification passes story through pipeline, then reverts on verification failure — double state mutation
34
+ **Severity:** HIGH | **Category:** Bug
35
+ ```typescript
36
+ // runner.ts ~L213: Pipeline marks story as "passed" in completionStage
37
+ if (pipelineResult.success) {
38
+ // ...
39
+ // L225: Then verification runs AFTER and may revert:
40
+ prd.userStories = prd.userStories.map(s =>
41
+ s.id === story.id
42
+ ? { ...s, status: "pending" as const, passes: false }
43
+ : s
44
+ );
45
+ ```
46
+ **Risk:** The pipeline's `completionStage` already marks the story as `passed` and saves the PRD. Then verification fails and reverts it. This creates a race condition if anything reads the PRD between those two saves. More importantly, `storiesCompleted` is incremented based on `verificationPassed` but the pipeline already fired `on-story-pass` hooks in the completion stage — so hooks see a story as passed that later fails verification.
47
+ **Fix:** Either (a) skip the completion stage when `quality.commands.test` is configured (verify first, then complete), or (b) run verification BEFORE the completion stage by adding it as a pipeline stage.
48
+
49
+ #### BUG-2: `parseTestOutput` regex may misparse multi-test-suite output
50
+ **Severity:** HIGH | **Category:** Bug
51
+ ```typescript
52
+ // verification.ts: Only matches FIRST occurrence
53
+ const patterns = [
54
+ /(\d+)\s+pass(?:ed)?(?:,\s+|\s+)(\d+)\s+fail/i,
55
+ // ...
56
+ ];
57
+ // If output has "5 pass, 0 fail" from one suite followed by "3 pass, 2 fail" from another,
58
+ // only the first match (5 pass, 0 fail) is captured — misses the actual failures.
59
+ ```
60
+ **Risk:** False ENVIRONMENTAL_FAILURE classification when only the first suite output matches. The orchestrator would treat it as "all tests pass but exit != 0" when in reality some tests failed.
61
+ **Fix:** Match ALL occurrences and sum pass/fail counts, or match only the final summary line (most frameworks print a total summary last).
62
+
63
+ ### 🟡 MEDIUM
64
+
65
+ #### ENH-1: runner.ts is 350+ lines and growing — verification wiring should be extracted
66
+ **Severity:** MEDIUM | **Category:** Enhancement
67
+ **Risk:** Maintenance burden. The runner now handles pipeline orchestration, verification, stall detection, acceptance retries, and metrics. Single Responsibility violated.
68
+ **Fix:** Extract verification wiring into a `postAgentVerification()` function in a separate module (e.g., `src/execution/verify-runner.ts`).
69
+
70
+ #### BUG-3: `executeWithTimeout` — `proc.exited` resolved value discarded, re-awaited
71
+ **Severity:** MEDIUM | **Category:** Bug
72
+ ```typescript
73
+ // verification.ts L148-150
74
+ await Promise.race([processPromise, timeoutPromise]);
75
+ // ...
76
+ const exitCode = await proc.exited; // ← awaits again (works but wasteful)
77
+ ```
78
+ **Risk:** No functional bug — `proc.exited` returns the same promise. But the first `await` result is discarded, and the second `await` is redundant. Minor clarity issue.
79
+ **Fix:** `const exitCode = await processPromise;` after the race confirms no timeout.
80
+
81
+ #### SEC-1: Shell command passed as string to `Bun.spawn([shell, "-c", command])`
82
+ **Severity:** MEDIUM | **Category:** Security
83
+ ```typescript
84
+ const proc = Bun.spawn([shell, "-c", command], { ... });
85
+ ```
86
+ **Risk:** If `command` is ever constructed from user/PRD input (e.g., a story-specific test command), this is a shell injection vector. Currently `command` comes from `config.quality.commands.test` which is operator-controlled, so low practical risk.
87
+ **Fix:** Document this as `@design` — the command is config-driven, not user-driven. Add a note in schema.ts JSDoc.
88
+
89
+ #### TYPE-1: `getNextStory` doesn't filter `blocked` in all paths
90
+ **Severity:** MEDIUM | **Category:** Type Safety
91
+ ```typescript
92
+ // prd/index.ts: getNextStory filters blocked correctly
93
+ s.status !== "blocked" && s.status !== "failed"
94
+ // But runner.ts batch plan filters differently:
95
+ storiesToExecute = batch.stories.filter(s => !s.passes && s.status !== "skipped");
96
+ // Missing: && s.status !== "blocked" && s.status !== "failed"
97
+ ```
98
+ **Risk:** Batch execution could pick up blocked/failed stories from a stale batch plan.
99
+ **Fix:** Add `s.status !== "blocked" && s.status !== "failed"` to the batch story filter.
100
+
101
+ #### PERF-1: `dynamic import()` inside hot loop
102
+ **Severity:** MEDIUM | **Category:** Performance
103
+ ```typescript
104
+ // runner.ts ~L253
105
+ const analysis = await import("./verification").then(m =>
106
+ m.parseTestOutput(verificationResult.output!, 0)
107
+ );
108
+ ```
109
+ **Risk:** Dynamic import on every successful verification. The module is already imported at the top of the file (`import { runVerification } from "./verification"`).
110
+ **Fix:** Use the already-imported module: `import { runVerification, parseTestOutput } from "./verification"` and call directly.
111
+
112
+ ### 🟢 LOW
113
+
114
+ #### STYLE-1: `environmentalEscalationDivisor` defined in config but never used in runner
115
+ **Severity:** LOW | **Category:** Style / Dead Config
116
+ The config field `quality.environmentalEscalationDivisor` is validated and defaulted in schema.ts, and `getEnvironmentalEscalationThreshold()` accepts a divisor parameter, but the runner never calls `getEnvironmentalEscalationThreshold()`. Environmental failures just increment attempts like normal failures.
117
+ **Fix:** Wire up the early escalation logic in the runner, or remove the config field until it's needed.
118
+
119
+ #### STYLE-2: `isComplete` doesn't account for `blocked` status
120
+ **Severity:** LOW | **Category:** Style
121
+ ```typescript
122
+ export function isComplete(prd: PRD): boolean {
123
+ return prd.userStories.every(s => s.passes || s.status === "passed" || s.status === "skipped");
124
+ }
125
+ ```
126
+ A PRD with all stories blocked will never be "complete" (correct) but also won't trigger stall detection until the `isStalled` check runs. This is fine — just document that `isComplete` means "all pass/skip" not "no more work possible".
127
+
128
+ #### ENH-2: Missing test for `runVerification()` integration
129
+ **Severity:** LOW | **Category:** Enhancement
130
+ The verification module has unit tests for individual functions, but no test covers the full `runVerification()` flow (asset check → build command → normalize env → execute → analyze). Consider a focused integration test.
131
+
132
+ #### STYLE-3: TUI deletion is clean but leaves orphaned type in `pipeline/types.ts`
133
+ **Severity:** LOW | **Category:** Style
134
+ ```typescript
135
+ // pipeline/types.ts still references StageAction which had 'cost' removed
136
+ // Check if any StageAction-related types have dangling references
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Priority Fix Order
142
+
143
+ | Priority | ID | Effort | Description |
144
+ |:---|:---|:---|:---|
145
+ | P0 | BUG-1 | M | Verification runs after completion stage — double state mutation + stale hooks |
146
+ | P0 | BUG-2 | S | parseTestOutput only matches first regex occurrence — may miss failures |
147
+ | P1 | TYPE-1 | S | Batch story filter missing blocked/failed status check |
148
+ | P1 | PERF-1 | S | Dynamic import of already-imported module in hot path |
149
+ | P2 | ENH-1 | M | Extract verification wiring from runner.ts |
150
+ | P2 | STYLE-1 | S | Wire up environmentalEscalationDivisor or remove |
151
+ | P3 | BUG-3 | S | Redundant proc.exited await |
152
+ | P3 | SEC-1 | S | Document shell command as @design (config-driven) |
153
+ | P3 | ENH-2 | M | Integration test for runVerification() |
154
+ | P3 | STYLE-2 | S | Document isComplete semantics |
155
+
156
+ ---
157
+
158
+ ## Summary
159
+
160
+ The ADR-003 port is **well-aligned with everything discussed today**. All key decisions are implemented:
161
+
162
+ ✅ Per-tier `TierConfig[]` with configurable attempts
163
+ ✅ Extensible tier names (`z.string()` not enum)
164
+ ✅ Separate `verificationTimeoutSeconds` vs `sessionTimeoutSeconds`
165
+ ✅ TIMEOUT doesn't count toward escalation (`countsTowardEscalation: false`)
166
+ ✅ Process group kill (`process.kill(-pid)`) + SIGTERM→grace→SIGKILL
167
+ ✅ `drainWithDeadline()` Bun stream workaround
168
+ ✅ `buildTestCommand()` with --detectOpenHandles escalation + --forceExit fallback
169
+ ✅ All config values extracted (zero hardcoded)
170
+ ✅ `blocked` status + `isStalled()` + `generateHumanHaltSummary()`
171
+ ✅ Environment normalization (strip AI-optimized vars)
172
+ ✅ Backward compat removed (no `maxAttempts`, no string-array `tierOrder`)
173
+ ✅ TUI cleanly removed
174
+
175
+ The P0 items (BUG-1 double state mutation, BUG-2 regex parsing) should be addressed before dogfooding. Everything else can be iterative.
@@ -0,0 +1,79 @@
1
+ # Fix Plan: BUG-16 to BUG-19 (Dogfood Run C)
2
+ **Date:** 2026-02-19
3
+ **Branch:** fix/bug-16-19-routing-escalation
4
+ **Repo:** nax (on Mac01)
5
+
6
+ ## Phase 1: BUG-19 — Fix complexity→tier routing mismatch
7
+
8
+ ### Fix 1: Re-derive modelTier after overriding cached complexity
9
+ **File:** `src/execution/runner.ts` (lines ~195-204 and ~218-225)
10
+ **Root Cause:** `routeTask()` derives `modelTier` from keyword-classified complexity (e.g. "medium" for 5 ACs), then cached `story.routing.complexity` ("simple") overwrites display but NOT modelTier.
11
+ **Change:**
12
+ After `routing.complexity = story.routing.complexity`, add:
13
+ ```typescript
14
+ routing.modelTier = config.autoMode.complexityRouting[routing.complexity] ?? "balanced";
15
+ ```
16
+ Apply in BOTH places where cached routing overrides happen (batch mode ~line 203 and single-story mode ~line 225).
17
+
18
+ ### Fix 2: Export complexityToModelTier or inline the mapping
19
+ **File:** `src/routing/router.ts`
20
+ **Change:** Just inline `config.autoMode.complexityRouting[complexity]` in runner.ts (simpler, no new export needed).
21
+
22
+ **Commit:** `fix: re-derive modelTier from cached complexity (BUG-19)`
23
+
24
+ ## Phase 2: BUG-17 + BUG-16 — Fix escalation and per-story capping
25
+
26
+ ### Fix 3: Add pre-iteration tier escalation check
27
+ **File:** `src/execution/runner.ts`
28
+ **Root Cause:** Escalation only happens in `case "escalate"` handler, but post-verify failures (ASSET_CHECK) revert story to pending without returning "escalate". Story loops at same tier forever.
29
+ **Change:** At the start of each iteration, BEFORE spawning the agent, check if `story.attempts` has exceeded the current tier budget. If so, escalate `story.routing.modelTier` in the PRD and save. After all tiers exhausted, mark story as FAILED instead of retrying.
30
+
31
+ Key logic:
32
+ - Get currentTier from story.routing.modelTier (or first tier in tierOrder)
33
+ - Get tierCfg via getTierConfig(currentTier, tierOrder)
34
+ - If story.attempts >= tierCfg.attempts → escalate to next tier
35
+ - If no next tier → markStoryFailed, fireHook on-story-fail, continue to next story
36
+ - This also fixes BUG-16 because stories can no longer loop indefinitely
37
+
38
+ **Commit:** `fix: pre-iteration tier escalation and per-story failure cap (BUG-16, BUG-17)`
39
+
40
+ ## Phase 3: BUG-18 — Surface ASSET_CHECK errors prominently in prompt
41
+
42
+ ### Fix 4: Format ASSET_CHECK errors as mandatory instructions
43
+ **File:** `src/pipeline/stages/prompt.ts` (or wherever the agent prompt is built)
44
+ **Root Cause:** ASSET_CHECK errors in story.priorErrors are shown as generic "Prior Errors" — agent ignores them.
45
+ **Change:** Parse priorErrors for entries starting with `ASSET_CHECK_FAILED:`. Extract missing file names and format as:
46
+ ```
47
+ MANDATORY: You MUST create these files (previous attempts failed because they were missing):
48
+ - src/finder.ts
49
+ - test/finder.test.ts
50
+ Do NOT use alternative filenames. These exact paths are required.
51
+ ```
52
+ Place this BEFORE the story description in the prompt.
53
+
54
+ **Commit:** `fix: surface ASSET_CHECK errors as mandatory instructions in prompt (BUG-18)`
55
+
56
+ ## Phase 4: Tests
57
+
58
+ ### Test 5: Add routing tests
59
+ **File:** `test/routing.test.ts` (new)
60
+ - Test classifyComplexity with various AC counts (4, 5, 9 ACs)
61
+ - Test complexityToModelTier mapping respects config
62
+ - Test that cached routing complexity correctly re-derives modelTier
63
+
64
+ ### Test 6: Add escalation integration tests
65
+ **File:** `test/runner.test.ts` (append)
66
+ - Test pre-iteration escalation: story with attempts >= tier budget escalates before agent spawn
67
+ - Test all-tiers-exhausted: story marked FAILED, not retried
68
+ - Test ASSET_CHECK failure increments attempts and triggers escalation
69
+
70
+ ### Test 7: Add prompt formatting test
71
+ **File:** `test/prompt.test.ts` (new or append)
72
+ - Test ASSET_CHECK priorErrors rendered as mandatory file creation instructions
73
+ - Test non-ASSET_CHECK priorErrors rendered normally
74
+
75
+ **Commit:** `test: add tests for BUG-16 to BUG-19 fixes`
76
+
77
+ ## Test Strategy
78
+ - Mode: test-after
79
+ - Run: `bun test` after each phase
@@ -0,0 +1,114 @@
1
+ # Fix Plan: BUG-20, BUG-21, BUG-22 — TDD Orchestrator Fixes
2
+ **Date:** 2026-02-19
3
+ **Branch:** `fix/bug-20-22-tdd-orchestrator`
4
+ **Base:** `fix/bug-16-19-routing-escalation` (has prior fixes)
5
+
6
+ ## Phase 1: BUG-22 — Post-TDD test verification (false positive fix)
7
+
8
+ ### Fix: Add independent test run after all 3 TDD sessions complete
9
+
10
+ **File:** `src/tdd/orchestrator.ts`
11
+ **Impact:** Without fix, verifier sessions that fix issues are treated as failures, causing false pauses.
12
+
13
+ **Change:**
14
+ After all 3 sessions complete (line ~387), before checking `sessions.every(s => s.success)`:
15
+ 1. Run the project's test command (`config.quality.commands.test`) independently via `executeWithTimeout()`
16
+ 2. If tests pass → override `allSuccessful = true` regardless of individual session exit codes
17
+ 3. If tests fail → keep existing behavior (flag for human review)
18
+ 4. Log: `[tdd] Post-TDD verification: X tests pass` or `[tdd] Post-TDD verification failed, flagging for review`
19
+
20
+ ```typescript
21
+ // After sessions complete, verify final state independently
22
+ const testCmd = config.quality?.commands?.test ?? "bun test";
23
+ const postVerify = await executeWithTimeout(testCmd, config.quality?.verificationTimeoutSeconds ?? 120);
24
+ const testsActuallyPass = postVerify.success && postVerify.exitCode === 0;
25
+
26
+ if (testsActuallyPass && !allSuccessful) {
27
+ console.log(chalk.dim(" ℹ️ Sessions had non-zero exits but tests pass — treating as success"));
28
+ allSuccessful = true;
29
+ needsHumanReview = false;
30
+ reviewReason = undefined;
31
+ }
32
+ ```
33
+
34
+ **Import needed:** `executeWithTimeout` from `../execution/verification`
35
+
36
+ ## Phase 2: BUG-20 — Detect empty test-writer sessions
37
+
38
+ ### Fix: Check that test-writer session actually created test files
39
+
40
+ **File:** `src/tdd/orchestrator.ts`
41
+ **Impact:** Without fix, test-writer sessions that create no test files (e.g., create `requirements.md` instead) pass isolation check vacuously.
42
+
43
+ **Change:**
44
+ After Session 1 (test-writer) isolation check passes, add a file creation check:
45
+ 1. Get `session1.filesChanged` (already tracked)
46
+ 2. Filter for files matching test patterns: `*.test.ts`, `*.spec.ts`, `test/**/*`
47
+ 3. If zero test files created → fail with reason "Test writer created no test files"
48
+
49
+ ```typescript
50
+ // After session1 isolation passes
51
+ const testFilePatterns = /\.(test|spec)\.(ts|js|tsx|jsx)$/;
52
+ const testFilesCreated = session1.filesChanged.filter(f => testFilePatterns.test(f));
53
+ if (testFilesCreated.length === 0) {
54
+ needsHumanReview = true;
55
+ reviewReason = "Test writer session created no test files";
56
+ console.log(chalk.yellow(`\n⚠️ ${reviewReason}`));
57
+ // Return early — no point running implementer without tests
58
+ return { success: false, sessions: [session1], needsHumanReview, reviewReason, totalCost: session1.estimatedCost };
59
+ }
60
+ ```
61
+
62
+ ## Phase 3: BUG-21 — Process tree cleanup after agent failure
63
+
64
+ ### Fix: Kill process tree when TDD session agent exits non-zero
65
+
66
+ **File:** `src/tdd/orchestrator.ts` (in `runTddSession()`)
67
+ **Impact:** Without fix, Claude Code child processes (like `bun test | tail -5`) become zombies at 100% CPU.
68
+
69
+ **Change:**
70
+ In `runTddSession()`, after `agent.runSession()` returns (especially on failure), add process tree cleanup:
71
+
72
+ ```typescript
73
+ // After agent.runSession() returns
74
+ const result = await agent.runSession(/* ... */);
75
+
76
+ // Clean up any orphaned child processes from the agent
77
+ if (!result.success && result.pid) {
78
+ await cleanupProcessTree(result.pid);
79
+ }
80
+ ```
81
+
82
+ Add utility function (can be in orchestrator or a new `src/tdd/cleanup.ts`):
83
+
84
+ ```typescript
85
+ async function cleanupProcessTree(pid: number): Promise<void> {
86
+ try {
87
+ // Find all child processes
88
+ const pgid = await getPgid(pid);
89
+ if (pgid) {
90
+ process.kill(-pgid, "SIGTERM");
91
+ await new Promise(r => setTimeout(r, 3000));
92
+ try { process.kill(-pgid, "SIGKILL"); } catch {}
93
+ }
94
+ } catch {
95
+ // Process already dead — that's fine
96
+ }
97
+ }
98
+ ```
99
+
100
+ **Note:** Check if `AgentAdapter.runSession()` exposes the child PID. If not, need to also modify `src/agents/claude-adapter.ts` to return `pid` in the result.
101
+
102
+ ## Test Strategy
103
+ - Mode: test-after
104
+ - Run existing tests after each phase to verify no regressions
105
+ - Add unit tests for BUG-20 (test file detection) and BUG-22 (post-TDD verify override)
106
+ - BUG-21 is harder to unit test (process management) — verify manually
107
+
108
+ ## Commits
109
+ - Phase 1: `fix: post-TDD test verification to prevent false positive pauses (BUG-22)`
110
+ - Phase 2: `fix: detect empty test-writer sessions in TDD orchestrator (BUG-20)`
111
+ - Phase 3: `fix: cleanup orphaned child processes after TDD session failure (BUG-21)`
112
+
113
+ ---
114
+ *Created 2026-02-19*
@@ -0,0 +1,116 @@
1
+ # Implementation Plan: LLM-Enhanced Routing (v0.8)
2
+
3
+ **Date:** 2026-02-19
4
+ **Branch:** `feat/v0.8-llm-routing`
5
+ **Spec:** `docs/v0.8-llm-routing.md`
6
+
7
+ ## Test Strategy
8
+ - Mode: test-after
9
+ - Test-after targets: `test/routing/llm-strategy.test.ts`, `test/routing/chain.test.ts`
10
+
11
+ ## Phase 1: Config Schema + LLM Routing Types
12
+
13
+ ### Fix 1.1: Add `LlmRoutingConfig` to schema
14
+ **File:** `src/config/schema.ts`
15
+ **Change:** Add interface and extend `RoutingConfig`:
16
+ ```typescript
17
+ export interface LlmRoutingConfig {
18
+ model?: string; // tier for routing call (default: "fast")
19
+ fallbackToKeywords?: boolean; // default: true
20
+ maxInputTokens?: number; // default: 2000
21
+ cacheDecisions?: boolean; // default: true
22
+ batchMode?: boolean; // default: true
23
+ timeoutMs?: number; // default: 15000
24
+ }
25
+ ```
26
+ Add `llm?: LlmRoutingConfig` to `RoutingConfig`.
27
+
28
+ ### Fix 1.2: Add defaults
29
+ **File:** `src/config/defaults.ts`
30
+ **Change:** Add LLM routing defaults in `DEFAULT_CONFIG.routing.llm`.
31
+
32
+ ### Fix 1.3: Add Zod validation
33
+ **File:** `src/config/schema.ts` (wherever Zod schemas are)
34
+ **Change:** Add LlmRoutingConfigSchema, wire into RoutingConfigSchema.
35
+
36
+ **Commit:** `feat(config): add LLM routing config schema and defaults`
37
+
38
+ ## Phase 2: Make Strategy Chain Async
39
+
40
+ ### Fix 2.1: Update `RoutingStrategy` interface
41
+ **File:** `src/routing/strategy.ts`
42
+ **Change:** `route()` return type → `RoutingDecision | null | Promise<RoutingDecision | null>`
43
+
44
+ ### Fix 2.2: Make `StrategyChain.route()` async
45
+ **File:** `src/routing/chain.ts`
46
+ **Change:** `route()` → `async route()`, `await` each strategy result. Return type `Promise<RoutingDecision>`.
47
+
48
+ ### Fix 2.3: Update chain callers
49
+ **File:** `src/routing/router.ts` (and any other callers)
50
+ **Change:** Add `await` where `chain.route()` is called (should already be in async context).
51
+
52
+ ### Fix 2.4: Update existing tests
53
+ **File:** `test/routing/chain.test.ts`
54
+ **Change:** Add `await` to `chain.route()` calls. Mark test callbacks as `async`.
55
+
56
+ **Commit:** `refactor(routing): make strategy chain async for LLM support`
57
+
58
+ ## Phase 3: Implement LLM Strategy
59
+
60
+ ### Fix 3.1: Implement `llmStrategy`
61
+ **File:** `src/routing/strategies/llm.ts`
62
+ **Change:** Full implementation:
63
+ - `buildRoutingPrompt(story, config)` → formats the system prompt from spec
64
+ - `buildBatchPrompt(stories, config)` → formats batch prompt for multiple stories
65
+ - `callLlm(modelTier, prompt, config)` → spawns `claude -p "<prompt>" --model <model>` with timeout
66
+ - `parseRoutingResponse(output)` → JSON.parse + validate fields
67
+ - `cachedDecisions: Map<string, RoutingDecision>` module-level cache
68
+ - `clearCache()` export for testing
69
+ - Main `route()`: check cache → build prompt → call LLM → parse → cache → return
70
+ - Error handling: catch all, log warn, return null (falls through to keyword)
71
+
72
+ ### Fix 3.2: Add batch routing function
73
+ **File:** `src/routing/strategies/llm.ts`
74
+ **Change:** Export `routeBatch(stories, context)` that sends all stories in one LLM call, returns `Map<string, RoutingDecision>`. Called from router before individual routing.
75
+
76
+ ### Fix 3.3: Wire batch routing into runner
77
+ **File:** `src/routing/router.ts`
78
+ **Change:** If `config.routing.strategy === "llm" && config.routing.llm?.batchMode`, call `routeBatch()` before the story loop to pre-populate the cache.
79
+
80
+ ### Fix 3.4: Write tests
81
+ **File:** `test/routing/llm-strategy.test.ts` (new)
82
+ **Change:** Tests for:
83
+ - `buildRoutingPrompt` output format
84
+ - `parseRoutingResponse` happy path (valid JSON)
85
+ - `parseRoutingResponse` error paths (invalid JSON, missing fields, unknown values)
86
+ - `route()` with mocked `Bun.spawn` returning valid JSON
87
+ - `route()` with timeout → returns null
88
+ - `route()` with parse error → returns null
89
+ - Cache hit (second call returns cached decision)
90
+ - `clearCache()` resets cache
91
+ - Batch prompt format
92
+
93
+ **Commit:** `feat(routing): implement LLM-enhanced routing with batch support`
94
+
95
+ ## Phase 4: Integration + Logging
96
+
97
+ ### Fix 4.1: Add routing log output
98
+ **File:** `src/routing/strategies/llm.ts`
99
+ **Change:** `console.log` with chalk for each routing decision:
100
+ ```
101
+ [routing] LLM classified US-008 as simple/fast/test-after: "Barrel export file"
102
+ ```
103
+
104
+ ### Fix 4.2: Run full test suite
105
+ **Change:** `bun test` — ensure all existing tests still pass with async chain.
106
+
107
+ **Commit:** `feat(routing): add LLM routing logging and integration tests`
108
+
109
+ ## Breaking Changes
110
+ None — `RoutingStrategy.route()` now accepts sync OR async return (union type). Existing sync strategies continue to work. `StrategyChain.route()` becomes async but callers already use `await`.
111
+
112
+ ## Commits Summary
113
+ 1. `feat(config): add LLM routing config schema and defaults`
114
+ 2. `refactor(routing): make strategy chain async for LLM support`
115
+ 3. `feat(routing): implement LLM-enhanced routing with batch support`
116
+ 4. `feat(routing): add LLM routing logging and integration tests`