@nathapp/nax 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/.mcp.json +8 -0
  2. package/docs/ROADMAP.md +20 -5
  3. package/docs/adr/ADR-005-implementation-plan.md +655 -0
  4. package/docs/adr/ADR-005-pipeline-re-architecture.md +464 -0
  5. package/package.json +1 -1
  6. package/src/agents/claude.ts +44 -9
  7. package/src/config/types.ts +11 -0
  8. package/src/execution/dry-run.ts +81 -0
  9. package/src/execution/escalation/tier-outcome.ts +29 -44
  10. package/src/execution/executor-types.ts +65 -0
  11. package/src/execution/index.ts +0 -17
  12. package/src/execution/iteration-runner.ts +132 -0
  13. package/src/execution/lifecycle/index.ts +0 -1
  14. package/src/execution/lifecycle/run-regression.ts +5 -5
  15. package/src/execution/pipeline-result-handler.ts +51 -254
  16. package/src/execution/sequential-executor.ts +72 -316
  17. package/src/execution/story-selector.ts +75 -0
  18. package/src/pipeline/event-bus.ts +276 -0
  19. package/src/pipeline/runner.ts +51 -77
  20. package/src/pipeline/stages/autofix.ts +133 -0
  21. package/src/pipeline/stages/completion.ts +22 -30
  22. package/src/pipeline/stages/index.ts +30 -13
  23. package/src/pipeline/stages/rectify.ts +93 -0
  24. package/src/pipeline/stages/regression.ts +88 -0
  25. package/src/pipeline/stages/review.ts +19 -153
  26. package/src/pipeline/stages/verify.ts +18 -2
  27. package/src/pipeline/subscribers/hooks.ts +133 -0
  28. package/src/pipeline/subscribers/interaction.ts +68 -0
  29. package/src/pipeline/subscribers/reporters.ts +174 -0
  30. package/src/pipeline/types.ts +10 -1
  31. package/src/review/orchestrator.ts +105 -0
  32. package/src/tdd/prompts.ts +1 -1
  33. package/src/verification/index.ts +1 -1
  34. package/src/verification/orchestrator-types.ts +145 -0
  35. package/src/verification/orchestrator.ts +76 -0
  36. package/src/{execution/post-verify-rectification.ts → verification/rectification-loop.ts} +13 -20
  37. package/src/verification/{gate.ts → runners.ts} +17 -105
  38. package/src/verification/strategies/acceptance.ts +133 -0
  39. package/src/verification/strategies/regression.ts +90 -0
  40. package/src/verification/strategies/scoped.ts +123 -0
  41. package/test/COVERAGE-GAPS.md +333 -0
  42. package/test/{acceptance → e2e}/cm-003-default-view.test.ts +1 -0
  43. package/test/{integration/e2e.test.ts → e2e/plan-analyze-run.test.ts} +1 -0
  44. package/test/integration/{agent-validation.test.ts → cli/agent-validation.test.ts} +3 -3
  45. package/test/integration/{cli-config-default-edge-cases.test.ts → cli/cli-config-default-edge-cases.test.ts} +6 -5
  46. package/test/integration/{cli-config-default-view.test.ts → cli/cli-config-default-view.test.ts} +8 -7
  47. package/test/integration/{cli-config-diff.test.ts → cli/cli-config-diff.test.ts} +3 -2
  48. package/test/integration/{cli-config.test.ts → cli/cli-config.test.ts} +3 -2
  49. package/test/integration/{cli-diagnose.test.ts → cli/cli-diagnose.test.ts} +5 -4
  50. package/test/integration/{cli-logs.test.ts → cli/cli-logs.test.ts} +12 -3
  51. package/test/integration/{cli-plugins.test.ts → cli/cli-plugins.test.ts} +4 -3
  52. package/test/integration/{cli-precheck.test.ts → cli/cli-precheck.test.ts} +4 -3
  53. package/test/integration/{cli-run-headless.test.ts → cli/cli-run-headless.test.ts} +3 -2
  54. package/test/integration/{cli.test.ts → cli/cli.test.ts} +2 -1
  55. package/test/integration/{precheck-integration.test.ts → cli/precheck-integration.test.ts} +10 -9
  56. package/test/integration/{precheck-orchestrator.test.ts → cli/precheck-orchestrator.test.ts} +4 -3
  57. package/test/integration/{precheck.test.ts → cli/precheck.test.ts} +5 -4
  58. package/test/integration/{config-loader.test.ts → config/config-loader.test.ts} +2 -1
  59. package/test/integration/{config.test.ts → config/config.test.ts} +2 -2
  60. package/test/integration/config/merger.test.ts +1 -0
  61. package/test/integration/config/paths.test.ts +1 -0
  62. package/test/integration/{security-loader.test.ts → config/security-loader.test.ts} +2 -2
  63. package/test/integration/{context-integration.test.ts → context/context-integration.test.ts} +7 -6
  64. package/test/integration/{path-security.test.ts → context/context-path-security.test.ts} +2 -2
  65. package/test/integration/{context-provider-injection.test.ts → context/context-provider-injection.test.ts} +7 -6
  66. package/test/integration/{context-verification-integration.test.ts → context/context-verification-integration.test.ts} +5 -4
  67. package/test/integration/{s5-greenfield-fallback.test.ts → context/s5-greenfield-fallback.test.ts} +4 -3
  68. package/test/integration/{isolation.test.ts → execution/execution-isolation.test.ts} +1 -1
  69. package/test/integration/{execution.test.ts → execution/execution.test.ts} +8 -8
  70. package/test/integration/{parallel.test.ts → execution/parallel.test.ts} +2 -1
  71. package/test/integration/{prd-pause.test.ts → execution/prd-pause.test.ts} +2 -2
  72. package/test/integration/{prd-resolvers.test.ts → execution/prd-resolvers.test.ts} +3 -2
  73. package/test/integration/{progress.test.ts → execution/progress.test.ts} +1 -1
  74. package/test/integration/execution/runner-batching.test.ts +682 -0
  75. package/test/integration/{runner-config-plugins.test.ts → execution/runner-config-plugins.test.ts} +3 -2
  76. package/test/integration/execution/runner-escalation.test.ts +561 -0
  77. package/test/integration/{runner-fixes.test.ts → execution/runner-fixes.test.ts} +4 -3
  78. package/test/integration/{runner-plugin-integration.test.ts → execution/runner-plugin-integration.test.ts} +6 -5
  79. package/test/integration/execution/runner-queue-and-attempts.test.ts +476 -0
  80. package/test/integration/{status-file-integration.test.ts → execution/status-file-integration.test.ts} +9 -8
  81. package/test/integration/{status-file.test.ts → execution/status-file.test.ts} +3 -2
  82. package/test/integration/{status-writer.test.ts → execution/status-writer.test.ts} +5 -4
  83. package/test/integration/{story-id-in-events.test.ts → execution/story-id-in-events.test.ts} +9 -8
  84. package/test/integration/{interaction-chain-pipeline.test.ts → interaction/interaction-chain-pipeline.test.ts} +26 -14
  85. package/test/integration/{hooks.test.ts → pipeline/hooks.test.ts} +4 -2
  86. package/test/integration/{pipeline-acceptance.test.ts → pipeline/pipeline-acceptance.test.ts} +7 -6
  87. package/test/integration/{pipeline-events.test.ts → pipeline/pipeline-events.test.ts} +7 -6
  88. package/test/integration/{pipeline.test.ts → pipeline/pipeline.test.ts} +9 -7
  89. package/test/integration/{reporter-lifecycle.test.ts → pipeline/reporter-lifecycle.test.ts} +9 -7
  90. package/test/integration/{verify-stage.test.ts → pipeline/verify-stage.test.ts} +7 -5
  91. package/test/integration/{analyze-integration.test.ts → plan/analyze-integration.test.ts} +3 -2
  92. package/test/integration/{analyze-scanner.test.ts → plan/analyze-scanner.test.ts} +8 -7
  93. package/test/integration/{logger.test.ts → plan/logger.test.ts} +1 -1
  94. package/test/integration/{plan.test.ts → plan/plan.test.ts} +3 -3
  95. package/test/integration/plugins/config-integration.test.ts +1 -0
  96. package/test/integration/plugins/config-resolution.test.ts +1 -0
  97. package/test/integration/plugins/loader.test.ts +1 -0
  98. package/test/integration/plugins/{registry.test.ts → plugins-registry.test.ts} +1 -0
  99. package/test/integration/plugins/validator.test.ts +1 -0
  100. package/test/integration/{review-config-commands.test.ts → review/review-config-commands.test.ts} +4 -3
  101. package/test/integration/{review-config-schema.test.ts → review/review-config-schema.test.ts} +3 -2
  102. package/test/integration/{review-plugin-integration.test.ts → review/review-plugin-integration.test.ts} +5 -4
  103. package/test/integration/{review.test.ts → review/review.test.ts} +3 -2
  104. package/test/integration/routing/plugin-routing-advanced.test.ts +461 -0
  105. package/test/integration/{plugin-routing.test.ts → routing/plugin-routing-core.test.ts} +9 -403
  106. package/test/integration/{routing-stage-bug-021.test.ts → routing/routing-stage-bug-021.test.ts} +8 -7
  107. package/test/integration/{routing-stage-greenfield.test.ts → routing/routing-stage-greenfield.test.ts} +7 -6
  108. package/test/integration/{tdd-cleanup.test.ts → tdd/tdd-cleanup.test.ts} +1 -1
  109. package/test/integration/tdd/tdd-orchestrator-core.test.ts +565 -0
  110. package/test/integration/tdd/tdd-orchestrator-failureCategory.test.ts +355 -0
  111. package/test/integration/tdd/tdd-orchestrator-fallback.test.ts +311 -0
  112. package/test/integration/tdd/tdd-orchestrator-lite.test.ts +289 -0
  113. package/test/integration/tdd/tdd-orchestrator-prompts.test.ts +260 -0
  114. package/test/integration/tdd/tdd-orchestrator-verdict.test.ts +536 -0
  115. package/test/integration/tmp/headless-test/test.jsonl +30 -0
  116. package/test/integration/{test-scanner.test.ts → verification/test-scanner.test.ts} +1 -1
  117. package/test/integration/{verification-asset-check.test.ts → verification/verification-asset-check.test.ts} +3 -2
  118. package/test/unit/acceptance.test.ts +1 -0
  119. package/test/unit/agent-stderr-capture.test.ts +1 -0
  120. package/test/unit/agents/claude.test.ts +1 -0
  121. package/test/unit/analyze-classifier.test.ts +1 -0
  122. package/test/unit/auto-detect.test.ts +1 -0
  123. package/test/unit/cli-status.test.ts +1 -0
  124. package/test/unit/commands/common.test.ts +1 -0
  125. package/test/unit/commands/logs.test.ts +1 -0
  126. package/test/unit/commands/unlock.test.ts +1 -0
  127. package/test/unit/config/defaults.test.ts +1 -0
  128. package/test/unit/config/regression-gate-schema.test.ts +1 -0
  129. package/test/unit/config/smart-runner-flag.test.ts +1 -0
  130. package/test/unit/constitution-generators.test.ts +1 -0
  131. package/test/unit/constitution.test.ts +1 -0
  132. package/test/unit/context/context-autodetect.test.ts +297 -0
  133. package/test/unit/context/context-build.test.ts +575 -0
  134. package/test/unit/context/context-coverage.test.ts +236 -0
  135. package/test/unit/context/context-error.test.ts +93 -0
  136. package/test/unit/context/context-estimate-tokens.test.ts +201 -0
  137. package/test/unit/context/context-format.test.ts +302 -0
  138. package/test/unit/context/context-isolation.test.ts +267 -0
  139. package/test/unit/context/context-sort.test.ts +93 -0
  140. package/test/unit/context/context-story.test.ts +108 -0
  141. package/test/{context → unit/context}/prior-failures.test.ts +5 -4
  142. package/test/unit/context.test.ts +1 -0
  143. package/test/unit/crash-recovery.test.ts +1 -0
  144. package/test/unit/escalation.test.ts +1 -0
  145. package/test/unit/execution/lifecycle/run-completion.test.ts +1 -0
  146. package/test/unit/execution/lifecycle/run-regression.test.ts +2 -0
  147. package/test/{execution → unit/execution}/pid-registry.test.ts +2 -1
  148. package/test/{execution → unit/execution}/structured-failure.test.ts +3 -2
  149. package/test/unit/execution-logging-stderr.test.ts +1 -0
  150. package/test/unit/execution-stage.test.ts +1 -0
  151. package/test/unit/fix-generator.test.ts +1 -0
  152. package/test/unit/greenfield.test.ts +1 -0
  153. package/test/unit/interaction/human-review-trigger.test.ts +1 -0
  154. package/test/unit/interaction-network-failures.test.ts +1 -0
  155. package/test/unit/interaction-plugins.test.ts +1 -0
  156. package/test/unit/logging/formatter.test.ts +1 -0
  157. package/test/unit/merge.test.ts +1 -0
  158. package/test/unit/pipeline/event-bus.test.ts +105 -0
  159. package/test/unit/pipeline/routing-partial-override.test.ts +1 -0
  160. package/test/unit/pipeline/runner-retry.test.ts +89 -0
  161. package/test/unit/pipeline/stages/autofix.test.ts +97 -0
  162. package/test/unit/pipeline/stages/rectify.test.ts +101 -0
  163. package/test/unit/pipeline/stages/regression-stage.test.ts +69 -0
  164. package/test/unit/pipeline/stages/verify.test.ts +1 -0
  165. package/test/unit/pipeline/subscribers/hooks.test.ts +45 -0
  166. package/test/unit/pipeline/subscribers/interaction.test.ts +31 -0
  167. package/test/unit/pipeline/subscribers/reporters.test.ts +90 -0
  168. package/test/unit/pipeline/verify-smart-runner.test.ts +1 -0
  169. package/test/unit/prd-auto-default.test.ts +1 -0
  170. package/test/unit/prd-failure-category.test.ts +1 -0
  171. package/test/unit/prd-get-next-story.test.ts +1 -0
  172. package/test/unit/precheck-checks.test.ts +1 -0
  173. package/test/unit/precheck-story-size-gate.test.ts +1 -0
  174. package/test/unit/precheck-types.test.ts +1 -0
  175. package/test/unit/prompts.test.ts +1 -0
  176. package/test/unit/rectification.test.ts +2 -1
  177. package/test/unit/registry.test.ts +1 -0
  178. package/test/unit/routing/routing-stability.test.ts +1 -0
  179. package/test/unit/routing/strategies/llm.test.ts +1 -0
  180. package/test/unit/routing-advanced.test.ts +313 -0
  181. package/test/unit/routing-core.test.ts +341 -0
  182. package/test/unit/routing-strategies.test.ts +442 -0
  183. package/test/unit/storyid-events.test.ts +1 -0
  184. package/test/{ui → unit/ui}/tui-controls.test.ts +8 -7
  185. package/test/{ui → unit/ui}/tui-cost-and-pty.test.ts +4 -3
  186. package/test/{ui → unit/ui}/tui-layout.test.ts +5 -4
  187. package/test/{ui → unit/ui}/tui-stories.test.ts +5 -4
  188. package/test/unit/{isolation.test.ts → unit-isolation.test.ts} +1 -0
  189. package/test/unit/{helpers.test.ts → utils-helpers.test.ts} +1 -0
  190. package/test/unit/verdict.test.ts +1 -0
  191. package/test/unit/verification/orchestrator-types.test.ts +54 -0
  192. package/test/unit/verification/orchestrator.test.ts +66 -0
  193. package/test/unit/verification/smart-runner-config.test.ts +1 -0
  194. package/test/unit/verification/smart-runner-discovery.test.ts +8 -7
  195. package/test/unit/verification/strategies/acceptance.test.ts +33 -0
  196. package/test/unit/verification/strategies/regression.test.ts +87 -0
  197. package/test/unit/verification/strategies/scoped.test.ts +100 -0
  198. package/test/unit/worktree-manager.test.ts +1 -0
  199. package/src/execution/lifecycle/story-hooks.ts +0 -38
  200. package/src/execution/post-verify.ts +0 -193
  201. package/src/execution/rectification.ts +0 -13
  202. package/src/execution/verification.ts +0 -72
  203. package/test/integration/rectification-flow.test.ts +0 -512
  204. package/test/integration/runner.test.ts +0 -1679
  205. package/test/integration/tdd-orchestrator.test.ts +0 -1762
  206. package/test/unit/execution/post-verify-regression.test.ts +0 -362
  207. package/test/unit/execution/post-verify.test.ts +0 -236
  208. package/test/unit/routing.test.ts +0 -1039
  209. /package/test/{integration → helpers}/helpers.test.ts +0 -0
  210. /package/test/integration/worktree/{merge.test.ts → worktree-merge.test.ts} +0 -0
@@ -0,0 +1,355 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+ import { existsSync } from "node:fs";
3
+ import { mkdir, rm, writeFile } from "node:fs/promises";
4
+ import path from "node:path";
5
+ import type { AgentAdapter, AgentResult } from "../../../src/agents";
6
+ import { DEFAULT_CONFIG } from "../../../src/config";
7
+ import type { UserStory } from "../../../src/prd";
8
+ import { runThreeSessionTdd } from "../../../src/tdd/orchestrator";
9
+ import { VERDICT_FILE } from "../../../src/tdd/verdict";
10
+
11
+ let originalSpawn: typeof Bun.spawn;
12
+
13
+ beforeEach(() => {
14
+ originalSpawn = Bun.spawn;
15
+ });
16
+
17
+ afterEach(() => {
18
+ Bun.spawn = originalSpawn;
19
+ });
20
+
21
+ /** Create a mock agent that returns sequential results */
22
+ function createMockAgent(results: Partial<AgentResult>[]): AgentAdapter {
23
+ let callCount = 0;
24
+ return {
25
+ name: "mock",
26
+ displayName: "Mock Agent",
27
+ binary: "mock",
28
+ isInstalled: async () => true,
29
+ buildCommand: () => ["mock"],
30
+ run: mock(async () => {
31
+ const r = results[callCount] || {};
32
+ callCount++;
33
+ return {
34
+ success: r.success ?? true,
35
+ exitCode: r.exitCode ?? 0,
36
+ output: r.output ?? "",
37
+ rateLimited: r.rateLimited ?? false,
38
+ durationMs: r.durationMs ?? 100,
39
+ estimatedCost: r.estimatedCost ?? 0.01,
40
+ };
41
+ }),
42
+ };
43
+ }
44
+
45
+ /** Mock Bun.spawn to intercept git commands */
46
+ function mockGitSpawn(opts: {
47
+ /** Files returned by git diff for each session (indexed by git-diff call number) */
48
+ diffFiles: string[][];
49
+ /** Optional: mock test command success (default: true) */
50
+ testCommandSuccess?: boolean;
51
+ }) {
52
+ let revParseCount = 0;
53
+ let diffCount = 0;
54
+ const testSuccess = opts.testCommandSuccess ?? true;
55
+
56
+ // @ts-ignore — mocking global
57
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
58
+ // Intercept test commands (bun test, npm test, etc.)
59
+ if ((cmd[0] === "/bin/sh" || cmd[0] === "/bin/bash" || cmd[0] === "/bin/zsh") && cmd[1] === "-c") {
60
+ return {
61
+ pid: 9999,
62
+ exited: Promise.resolve(testSuccess ? 0 : 1),
63
+ stdout: new Response(testSuccess ? "tests pass\n" : "tests fail\n").body,
64
+ stderr: new Response("").body,
65
+ };
66
+ }
67
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
68
+ revParseCount++;
69
+ return {
70
+ exited: Promise.resolve(0),
71
+ stdout: new Response(`ref-${revParseCount}\n`).body,
72
+ stderr: new Response("").body,
73
+ };
74
+ }
75
+ if (cmd[0] === "git" && cmd[1] === "checkout") {
76
+ // Intercept git checkout (used in zero-file fallback) — silently succeed
77
+ return {
78
+ exited: Promise.resolve(0),
79
+ stdout: new Response("").body,
80
+ stderr: new Response("").body,
81
+ };
82
+ }
83
+ if (cmd[0] === "git" && cmd[1] === "diff") {
84
+ const files = opts.diffFiles[diffCount] || [];
85
+ diffCount++;
86
+ return {
87
+ exited: Promise.resolve(0),
88
+ stdout: new Response(files.join("\n") + "\n").body,
89
+ stderr: new Response("").body,
90
+ };
91
+ }
92
+ return originalSpawn(cmd, spawnOpts);
93
+ });
94
+ }
95
+
96
+ const story: UserStory = {
97
+ id: "US-001",
98
+ title: "Add user validation",
99
+ description: "Add validation to user input",
100
+ acceptanceCriteria: ["Validation works", "Errors are clear"],
101
+ dependencies: [],
102
+ tags: [],
103
+ status: "pending",
104
+ passes: false,
105
+ escalations: [],
106
+ attempts: 0,
107
+ };
108
+
109
+
110
+ describe("runThreeSessionTdd — failureCategory", () => {
111
+ test("test-writer isolation failure sets failureCategory='isolation-violation'", async () => {
112
+ // Test-writer modifies source files → isolation violation
113
+ mockGitSpawn({
114
+ diffFiles: [
115
+ // Isolation check: test-writer touched source files!
116
+ ["src/user.ts", "test/user.test.ts"],
117
+ // getChangedFiles
118
+ ["src/user.ts", "test/user.test.ts"],
119
+ ],
120
+ });
121
+
122
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
123
+
124
+ const result = await runThreeSessionTdd({
125
+ agent,
126
+ story,
127
+ config: DEFAULT_CONFIG,
128
+ workdir: "/tmp/test",
129
+ modelTier: "balanced",
130
+ });
131
+
132
+ expect(result.success).toBe(false);
133
+ expect(result.failureCategory).toBe("isolation-violation");
134
+ });
135
+
136
+ test("test-writer zero files (non-auto strategy) sets failureCategory='isolation-violation'", async () => {
137
+ // In strict strategy, zero test files → greenfield-no-tests category (BUG-010 behavior)
138
+ mockGitSpawn({
139
+ diffFiles: [
140
+ ["requirements.md"], // s1 isolation — no source violations
141
+ ["requirements.md"], // s1 getChangedFiles — 0 test files
142
+ ],
143
+ });
144
+
145
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
146
+
147
+ const configWithStrictStrategy = {
148
+ ...DEFAULT_CONFIG,
149
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "strict" as const },
150
+ };
151
+
152
+ const result = await runThreeSessionTdd({
153
+ agent,
154
+ story,
155
+ config: configWithStrictStrategy,
156
+ workdir: "/tmp/test",
157
+ modelTier: "balanced",
158
+ });
159
+
160
+ expect(result.success).toBe(false);
161
+ expect(result.failureCategory).toBe("greenfield-no-tests");
162
+ });
163
+
164
+ test("test-writer crash/timeout (non-isolation failure) sets failureCategory='session-failure'", async () => {
165
+ // Test-writer agent crashes/times out but isolation is clean
166
+ mockGitSpawn({
167
+ diffFiles: [
168
+ // Isolation check: only test files (passes)
169
+ ["test/user.test.ts"],
170
+ // getChangedFiles
171
+ ["test/user.test.ts"],
172
+ ],
173
+ });
174
+
175
+ const agent = createMockAgent([
176
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // Agent crash
177
+ ]);
178
+
179
+ const result = await runThreeSessionTdd({
180
+ agent,
181
+ story,
182
+ config: DEFAULT_CONFIG,
183
+ workdir: "/tmp/test",
184
+ modelTier: "balanced",
185
+ });
186
+
187
+ expect(result.success).toBe(false);
188
+ // isolation.passed=true but agent failed → session-failure
189
+ expect(result.failureCategory).toBe("session-failure");
190
+ });
191
+
192
+ test("implementer failure sets failureCategory='session-failure'", async () => {
193
+ mockGitSpawn({
194
+ diffFiles: [
195
+ // Session 1 isolation: OK
196
+ ["test/user.test.ts"],
197
+ // Session 1 getChangedFiles
198
+ ["test/user.test.ts"],
199
+ // Session 2 isolation: OK
200
+ ["src/user.ts"],
201
+ // Session 2 getChangedFiles
202
+ ["src/user.ts"],
203
+ ],
204
+ });
205
+
206
+ const agent = createMockAgent([
207
+ { success: true, estimatedCost: 0.01 }, // test-writer OK
208
+ { success: false, exitCode: 1, estimatedCost: 0.02 }, // implementer fails
209
+ ]);
210
+
211
+ const result = await runThreeSessionTdd({
212
+ agent,
213
+ story,
214
+ config: DEFAULT_CONFIG,
215
+ workdir: "/tmp/test",
216
+ modelTier: "balanced",
217
+ });
218
+
219
+ expect(result.success).toBe(false);
220
+ expect(result.failureCategory).toBe("session-failure");
221
+ });
222
+
223
+ test("post-TDD test failure sets failureCategory='tests-failing'", async () => {
224
+ // Verifier session fails AND independent test run also fails
225
+ let revParseCount = 0;
226
+ let diffCount = 0;
227
+
228
+ const diffFiles = [["test/user.test.ts"], ["test/user.test.ts"], ["src/user.ts"], ["src/user.ts"], ["src/user.ts"]];
229
+
230
+ // @ts-ignore — mocking global
231
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
232
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
233
+ return {
234
+ pid: 9999,
235
+ exited: Promise.resolve(1), // Tests FAIL
236
+ stdout: new Response("3 pass, 2 fail\n").body,
237
+ stderr: new Response("Test errors...\n").body,
238
+ };
239
+ }
240
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
241
+ revParseCount++;
242
+ return {
243
+ exited: Promise.resolve(0),
244
+ stdout: new Response(`ref-${revParseCount}\n`).body,
245
+ stderr: new Response("").body,
246
+ };
247
+ }
248
+ if (cmd[0] === "git" && cmd[1] === "diff") {
249
+ const files = diffFiles[diffCount] || [];
250
+ diffCount++;
251
+ return {
252
+ exited: Promise.resolve(0),
253
+ stdout: new Response(files.join("\n") + "\n").body,
254
+ stderr: new Response("").body,
255
+ };
256
+ }
257
+ return originalSpawn(cmd, spawnOpts);
258
+ });
259
+
260
+ const agent = createMockAgent([
261
+ { success: true, estimatedCost: 0.01 },
262
+ { success: true, estimatedCost: 0.02 },
263
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails
264
+ ]);
265
+
266
+ const result = await runThreeSessionTdd({
267
+ agent,
268
+ story,
269
+ config: DEFAULT_CONFIG,
270
+ workdir: "/tmp/test",
271
+ modelTier: "balanced",
272
+ });
273
+
274
+ expect(result.success).toBe(false);
275
+ expect(result.failureCategory).toBe("tests-failing");
276
+ });
277
+
278
+ test("success path has no failureCategory", async () => {
279
+ mockGitSpawn({
280
+ diffFiles: [["test/user.test.ts"], ["test/user.test.ts"], ["src/user.ts"], ["src/user.ts"], ["src/user.ts"]],
281
+ });
282
+
283
+ const agent = createMockAgent([
284
+ { success: true, estimatedCost: 0.01 },
285
+ { success: true, estimatedCost: 0.02 },
286
+ { success: true, estimatedCost: 0.01 },
287
+ ]);
288
+
289
+ const result = await runThreeSessionTdd({
290
+ agent,
291
+ story,
292
+ config: DEFAULT_CONFIG,
293
+ workdir: "/tmp/test",
294
+ modelTier: "balanced",
295
+ });
296
+
297
+ expect(result.success).toBe(true);
298
+ expect(result.failureCategory).toBeUndefined();
299
+ });
300
+
301
+ test("zero-file scenario (auto strategy) returns greenfield-no-tests (BUG-010 removed auto-fallback)", async () => {
302
+ // BUG-010: In auto strategy, zero test files → return greenfield-no-tests (no more fallback)
303
+ let diffCount = 0;
304
+
305
+ const diffFiles = [
306
+ ["requirements.md"], // s1 isolation (strict) — no source violations
307
+ ["requirements.md"], // s1 getChangedFiles (strict) — 0 test files → return greenfield-no-tests
308
+ ];
309
+
310
+ // @ts-ignore — mocking global
311
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
312
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
313
+ return {
314
+ exited: Promise.resolve(0),
315
+ stdout: new Response("ref-1\n").body,
316
+ stderr: new Response("").body,
317
+ };
318
+ }
319
+ if (cmd[0] === "git" && cmd[1] === "diff") {
320
+ const files = diffFiles[diffCount] || [];
321
+ diffCount++;
322
+ return {
323
+ exited: Promise.resolve(0),
324
+ stdout: new Response(files.join("\n") + "\n").body,
325
+ stderr: new Response("").body,
326
+ };
327
+ }
328
+ return originalSpawn(cmd, spawnOpts);
329
+ });
330
+
331
+ const agent = createMockAgent([
332
+ { success: true, estimatedCost: 0.01 }, // s1 strict test-writer
333
+ ]);
334
+
335
+ const configWithAutoStrategy = {
336
+ ...DEFAULT_CONFIG,
337
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "auto" as const },
338
+ };
339
+
340
+ const result = await runThreeSessionTdd({
341
+ agent,
342
+ story,
343
+ config: configWithAutoStrategy,
344
+ workdir: "/tmp/test",
345
+ modelTier: "balanced",
346
+ });
347
+
348
+ expect(result.success).toBe(false);
349
+ expect(result.lite).toBe(false);
350
+ expect(result.failureCategory).toBe("greenfield-no-tests");
351
+ });
352
+ });
353
+
354
+ // ─── T9: Verdict integration tests ───────────────────────────────────────────
355
+
@@ -0,0 +1,311 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+ import { existsSync } from "node:fs";
3
+ import { mkdir, rm, writeFile } from "node:fs/promises";
4
+ import path from "node:path";
5
+ import type { AgentAdapter, AgentResult } from "../../../src/agents";
6
+ import { DEFAULT_CONFIG } from "../../../src/config";
7
+ import type { UserStory } from "../../../src/prd";
8
+ import { runThreeSessionTdd } from "../../../src/tdd/orchestrator";
9
+ import { VERDICT_FILE } from "../../../src/tdd/verdict";
10
+
11
+ let originalSpawn: typeof Bun.spawn;
12
+
13
+ beforeEach(() => {
14
+ originalSpawn = Bun.spawn;
15
+ });
16
+
17
+ afterEach(() => {
18
+ Bun.spawn = originalSpawn;
19
+ });
20
+
21
+ /** Create a mock agent that returns sequential results */
22
+ function createMockAgent(results: Partial<AgentResult>[]): AgentAdapter {
23
+ let callCount = 0;
24
+ return {
25
+ name: "mock",
26
+ displayName: "Mock Agent",
27
+ binary: "mock",
28
+ isInstalled: async () => true,
29
+ buildCommand: () => ["mock"],
30
+ run: mock(async () => {
31
+ const r = results[callCount] || {};
32
+ callCount++;
33
+ return {
34
+ success: r.success ?? true,
35
+ exitCode: r.exitCode ?? 0,
36
+ output: r.output ?? "",
37
+ rateLimited: r.rateLimited ?? false,
38
+ durationMs: r.durationMs ?? 100,
39
+ estimatedCost: r.estimatedCost ?? 0.01,
40
+ };
41
+ }),
42
+ };
43
+ }
44
+
45
+ /** Mock Bun.spawn to intercept git commands */
46
+ function mockGitSpawn(opts: {
47
+ /** Files returned by git diff for each session (indexed by git-diff call number) */
48
+ diffFiles: string[][];
49
+ /** Optional: mock test command success (default: true) */
50
+ testCommandSuccess?: boolean;
51
+ }) {
52
+ let revParseCount = 0;
53
+ let diffCount = 0;
54
+ const testSuccess = opts.testCommandSuccess ?? true;
55
+
56
+ // @ts-ignore — mocking global
57
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
58
+ // Intercept test commands (bun test, npm test, etc.)
59
+ if ((cmd[0] === "/bin/sh" || cmd[0] === "/bin/bash" || cmd[0] === "/bin/zsh") && cmd[1] === "-c") {
60
+ return {
61
+ pid: 9999,
62
+ exited: Promise.resolve(testSuccess ? 0 : 1),
63
+ stdout: new Response(testSuccess ? "tests pass\n" : "tests fail\n").body,
64
+ stderr: new Response("").body,
65
+ };
66
+ }
67
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
68
+ revParseCount++;
69
+ return {
70
+ exited: Promise.resolve(0),
71
+ stdout: new Response(`ref-${revParseCount}\n`).body,
72
+ stderr: new Response("").body,
73
+ };
74
+ }
75
+ if (cmd[0] === "git" && cmd[1] === "checkout") {
76
+ // Intercept git checkout (used in zero-file fallback) — silently succeed
77
+ return {
78
+ exited: Promise.resolve(0),
79
+ stdout: new Response("").body,
80
+ stderr: new Response("").body,
81
+ };
82
+ }
83
+ if (cmd[0] === "git" && cmd[1] === "diff") {
84
+ const files = opts.diffFiles[diffCount] || [];
85
+ diffCount++;
86
+ return {
87
+ exited: Promise.resolve(0),
88
+ stdout: new Response(files.join("\n") + "\n").body,
89
+ stderr: new Response("").body,
90
+ };
91
+ }
92
+ return originalSpawn(cmd, spawnOpts);
93
+ });
94
+ }
95
+
96
+ const story: UserStory = {
97
+ id: "US-001",
98
+ title: "Add user validation",
99
+ description: "Add validation to user input",
100
+ acceptanceCriteria: ["Validation works", "Errors are clear"],
101
+ dependencies: [],
102
+ tags: [],
103
+ status: "pending",
104
+ passes: false,
105
+ escalations: [],
106
+ attempts: 0,
107
+ };
108
+
109
+
110
+ describe("runThreeSessionTdd — zero-file fallback", () => {
111
+ /** Extended git mock that also handles `git checkout .` */
112
+ function mockGitSpawnWithCheckout(opts: {
113
+ diffFiles: string[][];
114
+ onCheckout?: () => void;
115
+ testCommandSuccess?: boolean;
116
+ }) {
117
+ let revParseCount = 0;
118
+ let diffCount = 0;
119
+ const testSuccess = opts.testCommandSuccess ?? true;
120
+
121
+ // @ts-ignore — mocking global
122
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
123
+ // Intercept test commands
124
+ if ((cmd[0] === "/bin/sh" || cmd[0] === "/bin/bash" || cmd[0] === "/bin/zsh") && cmd[1] === "-c") {
125
+ return {
126
+ pid: 9999,
127
+ exited: Promise.resolve(testSuccess ? 0 : 1),
128
+ stdout: new Response(testSuccess ? "tests pass\n" : "tests fail\n").body,
129
+ stderr: new Response("").body,
130
+ };
131
+ }
132
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
133
+ revParseCount++;
134
+ return {
135
+ exited: Promise.resolve(0),
136
+ stdout: new Response(`ref-${revParseCount}\n`).body,
137
+ stderr: new Response("").body,
138
+ };
139
+ }
140
+ if (cmd[0] === "git" && cmd[1] === "checkout") {
141
+ opts.onCheckout?.();
142
+ return {
143
+ exited: Promise.resolve(0),
144
+ stdout: new Response("").body,
145
+ stderr: new Response("").body,
146
+ };
147
+ }
148
+ if (cmd[0] === "git" && cmd[1] === "diff") {
149
+ const files = opts.diffFiles[diffCount] || [];
150
+ diffCount++;
151
+ return {
152
+ exited: Promise.resolve(0),
153
+ stdout: new Response(files.join("\n") + "\n").body,
154
+ stderr: new Response("").body,
155
+ };
156
+ }
157
+ return originalSpawn(cmd, spawnOpts);
158
+ });
159
+ }
160
+
161
+ test("fallback NO LONGER triggers when strategy='auto' and 0 test files (BUG-010 removed auto-fallback)", async () => {
162
+ let checkoutCalled = false;
163
+
164
+ // BUG-010: Zero-file scenarios now return greenfield-no-tests immediately
165
+ // No fallback to lite mode occurs
166
+ mockGitSpawnWithCheckout({
167
+ diffFiles: [
168
+ ["requirements.md"], // s1 isolation (strict) — no source violations
169
+ ["requirements.md"], // s1 getChangedFiles (strict) — 0 test files → return greenfield-no-tests
170
+ ],
171
+ onCheckout: () => {
172
+ checkoutCalled = true;
173
+ },
174
+ });
175
+
176
+ const agent = createMockAgent([
177
+ { success: true, estimatedCost: 0.01 }, // s1 strict test-writer
178
+ ]);
179
+
180
+ const configWithAutoStrategy = {
181
+ ...DEFAULT_CONFIG,
182
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "auto" as const },
183
+ };
184
+
185
+ const result = await runThreeSessionTdd({
186
+ agent,
187
+ story,
188
+ config: configWithAutoStrategy,
189
+ workdir: "/tmp/test",
190
+ modelTier: "balanced",
191
+ });
192
+
193
+ expect(checkoutCalled).toBe(false); // git checkout NOT called (no fallback)
194
+ expect(result.lite).toBe(false); // not in lite mode
195
+ expect(result.success).toBe(false); // fails with greenfield-no-tests
196
+ expect(result.failureCategory).toBe("greenfield-no-tests");
197
+ });
198
+
199
+ test("zero-file scenario returns greenfield-no-tests (BUG-010 removed lite fallback)", async () => {
200
+ // BUG-010: No more auto-fallback to lite mode
201
+ mockGitSpawn({
202
+ diffFiles: [
203
+ ["docs/plan.md"], // s1 isolation (strict)
204
+ ["docs/plan.md"], // s1 getChangedFiles (strict) → 0 test files
205
+ ],
206
+ });
207
+
208
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
209
+
210
+ const result = await runThreeSessionTdd({
211
+ agent,
212
+ story,
213
+ config: DEFAULT_CONFIG,
214
+ workdir: "/tmp/test",
215
+ modelTier: "balanced",
216
+ });
217
+
218
+ expect(result.lite).toBe(false);
219
+ expect(result.success).toBe(false);
220
+ expect(result.failureCategory).toBe("greenfield-no-tests");
221
+ });
222
+
223
+ test("fallback does NOT trigger when strategy='strict' (explicit strict mode)", async () => {
224
+ // In strategy='strict', no fallback — should return failure
225
+ mockGitSpawn({
226
+ diffFiles: [
227
+ ["requirements.md"], // s1 isolation — no source violations
228
+ ["requirements.md"], // s1 getChangedFiles — 0 test files
229
+ ],
230
+ });
231
+
232
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
233
+
234
+ const configWithStrictStrategy = {
235
+ ...DEFAULT_CONFIG,
236
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "strict" as const },
237
+ };
238
+
239
+ const result = await runThreeSessionTdd({
240
+ agent,
241
+ story,
242
+ config: configWithStrictStrategy,
243
+ workdir: "/tmp/test",
244
+ modelTier: "balanced",
245
+ });
246
+
247
+ // Should fail (no fallback in strict mode)
248
+ expect(result.success).toBe(false);
249
+ expect(result.needsHumanReview).toBe(true);
250
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
251
+ expect(result.lite).toBe(false); // Was called in strict mode, no fallback
252
+ });
253
+
254
+ test("fallback does NOT trigger when already in lite mode", async () => {
255
+ // Calling with lite=true — if 0 test files, should return failure (not recurse again)
256
+ mockGitSpawn({
257
+ diffFiles: [
258
+ ["requirements.md"], // s1 getChangedFiles (lite, no isolation) — 0 test files
259
+ ],
260
+ });
261
+
262
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
263
+
264
+ const result = await runThreeSessionTdd({
265
+ agent,
266
+ story,
267
+ config: DEFAULT_CONFIG,
268
+ workdir: "/tmp/test",
269
+ modelTier: "balanced",
270
+ lite: true,
271
+ });
272
+
273
+ // Should fail — no further fallback from lite mode
274
+ expect(result.success).toBe(false);
275
+ expect(result.needsHumanReview).toBe(true);
276
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
277
+ expect(result.lite).toBe(true);
278
+ });
279
+
280
+ test("fallback does NOT trigger when strategy='lite' config", async () => {
281
+ // When strategy='lite', runThreeSessionTdd is called with lite=true (from execution stage)
282
+ // So !lite = false → no fallback
283
+ mockGitSpawn({
284
+ diffFiles: [
285
+ [], // s1 getChangedFiles (lite, no isolation) — 0 test files
286
+ ],
287
+ });
288
+
289
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
290
+
291
+ const configWithLiteStrategy = {
292
+ ...DEFAULT_CONFIG,
293
+ tdd: { ...DEFAULT_CONFIG.tdd, strategy: "lite" as const },
294
+ };
295
+
296
+ const result = await runThreeSessionTdd({
297
+ agent,
298
+ story,
299
+ config: configWithLiteStrategy,
300
+ workdir: "/tmp/test",
301
+ modelTier: "balanced",
302
+ lite: true, // router sets this for lite strategy
303
+ });
304
+
305
+ expect(result.success).toBe(false);
306
+ expect(result.lite).toBe(true);
307
+ });
308
+ });
309
+
310
+ // ─── T4: failureCategory tests ────────────────────────────────────────────────
311
+