@nathapp/nax 0.21.0 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/.mcp.json +8 -0
  2. package/docs/ROADMAP.md +20 -5
  3. package/docs/adr/ADR-005-implementation-plan.md +655 -0
  4. package/docs/adr/ADR-005-pipeline-re-architecture.md +464 -0
  5. package/package.json +1 -1
  6. package/src/agents/claude.ts +44 -9
  7. package/src/config/types.ts +11 -0
  8. package/src/execution/dry-run.ts +81 -0
  9. package/src/execution/escalation/tier-outcome.ts +29 -44
  10. package/src/execution/executor-types.ts +65 -0
  11. package/src/execution/index.ts +0 -17
  12. package/src/execution/iteration-runner.ts +132 -0
  13. package/src/execution/lifecycle/index.ts +0 -1
  14. package/src/execution/lifecycle/run-regression.ts +5 -5
  15. package/src/execution/pipeline-result-handler.ts +51 -254
  16. package/src/execution/sequential-executor.ts +72 -316
  17. package/src/execution/story-selector.ts +75 -0
  18. package/src/pipeline/event-bus.ts +276 -0
  19. package/src/pipeline/runner.ts +51 -77
  20. package/src/pipeline/stages/autofix.ts +133 -0
  21. package/src/pipeline/stages/completion.ts +22 -30
  22. package/src/pipeline/stages/index.ts +30 -13
  23. package/src/pipeline/stages/rectify.ts +93 -0
  24. package/src/pipeline/stages/regression.ts +88 -0
  25. package/src/pipeline/stages/review.ts +19 -153
  26. package/src/pipeline/stages/verify.ts +18 -2
  27. package/src/pipeline/subscribers/hooks.ts +133 -0
  28. package/src/pipeline/subscribers/interaction.ts +68 -0
  29. package/src/pipeline/subscribers/reporters.ts +174 -0
  30. package/src/pipeline/types.ts +10 -1
  31. package/src/review/orchestrator.ts +105 -0
  32. package/src/tdd/prompts.ts +1 -1
  33. package/src/verification/index.ts +1 -1
  34. package/src/verification/orchestrator-types.ts +145 -0
  35. package/src/verification/orchestrator.ts +76 -0
  36. package/src/{execution/post-verify-rectification.ts → verification/rectification-loop.ts} +13 -20
  37. package/src/verification/{gate.ts → runners.ts} +17 -105
  38. package/src/verification/strategies/acceptance.ts +133 -0
  39. package/src/verification/strategies/regression.ts +90 -0
  40. package/src/verification/strategies/scoped.ts +123 -0
  41. package/test/COVERAGE-GAPS.md +333 -0
  42. package/test/{acceptance → e2e}/cm-003-default-view.test.ts +1 -0
  43. package/test/{integration/e2e.test.ts → e2e/plan-analyze-run.test.ts} +1 -0
  44. package/test/integration/{agent-validation.test.ts → cli/agent-validation.test.ts} +3 -3
  45. package/test/integration/{cli-config-default-edge-cases.test.ts → cli/cli-config-default-edge-cases.test.ts} +6 -5
  46. package/test/integration/{cli-config-default-view.test.ts → cli/cli-config-default-view.test.ts} +8 -7
  47. package/test/integration/{cli-config-diff.test.ts → cli/cli-config-diff.test.ts} +3 -2
  48. package/test/integration/{cli-config.test.ts → cli/cli-config.test.ts} +3 -2
  49. package/test/integration/{cli-diagnose.test.ts → cli/cli-diagnose.test.ts} +5 -4
  50. package/test/integration/{cli-logs.test.ts → cli/cli-logs.test.ts} +12 -3
  51. package/test/integration/{cli-plugins.test.ts → cli/cli-plugins.test.ts} +4 -3
  52. package/test/integration/{cli-precheck.test.ts → cli/cli-precheck.test.ts} +4 -3
  53. package/test/integration/{cli-run-headless.test.ts → cli/cli-run-headless.test.ts} +3 -2
  54. package/test/integration/{cli.test.ts → cli/cli.test.ts} +2 -1
  55. package/test/integration/{precheck-integration.test.ts → cli/precheck-integration.test.ts} +10 -9
  56. package/test/integration/{precheck-orchestrator.test.ts → cli/precheck-orchestrator.test.ts} +4 -3
  57. package/test/integration/{precheck.test.ts → cli/precheck.test.ts} +5 -4
  58. package/test/integration/{config-loader.test.ts → config/config-loader.test.ts} +2 -1
  59. package/test/integration/{config.test.ts → config/config.test.ts} +2 -2
  60. package/test/integration/config/merger.test.ts +1 -0
  61. package/test/integration/config/paths.test.ts +1 -0
  62. package/test/integration/{security-loader.test.ts → config/security-loader.test.ts} +2 -2
  63. package/test/integration/{context-integration.test.ts → context/context-integration.test.ts} +7 -6
  64. package/test/integration/{path-security.test.ts → context/context-path-security.test.ts} +2 -2
  65. package/test/integration/{context-provider-injection.test.ts → context/context-provider-injection.test.ts} +7 -6
  66. package/test/integration/{context-verification-integration.test.ts → context/context-verification-integration.test.ts} +5 -4
  67. package/test/integration/{s5-greenfield-fallback.test.ts → context/s5-greenfield-fallback.test.ts} +4 -3
  68. package/test/integration/{isolation.test.ts → execution/execution-isolation.test.ts} +1 -1
  69. package/test/integration/{execution.test.ts → execution/execution.test.ts} +8 -8
  70. package/test/integration/{parallel.test.ts → execution/parallel.test.ts} +2 -1
  71. package/test/integration/{prd-pause.test.ts → execution/prd-pause.test.ts} +2 -2
  72. package/test/integration/{prd-resolvers.test.ts → execution/prd-resolvers.test.ts} +3 -2
  73. package/test/integration/{progress.test.ts → execution/progress.test.ts} +1 -1
  74. package/test/integration/execution/runner-batching.test.ts +682 -0
  75. package/test/integration/{runner-config-plugins.test.ts → execution/runner-config-plugins.test.ts} +3 -2
  76. package/test/integration/execution/runner-escalation.test.ts +561 -0
  77. package/test/integration/{runner-fixes.test.ts → execution/runner-fixes.test.ts} +4 -3
  78. package/test/integration/{runner-plugin-integration.test.ts → execution/runner-plugin-integration.test.ts} +6 -5
  79. package/test/integration/execution/runner-queue-and-attempts.test.ts +476 -0
  80. package/test/integration/{status-file-integration.test.ts → execution/status-file-integration.test.ts} +9 -8
  81. package/test/integration/{status-file.test.ts → execution/status-file.test.ts} +3 -2
  82. package/test/integration/{status-writer.test.ts → execution/status-writer.test.ts} +5 -4
  83. package/test/integration/{story-id-in-events.test.ts → execution/story-id-in-events.test.ts} +9 -8
  84. package/test/integration/{interaction-chain-pipeline.test.ts → interaction/interaction-chain-pipeline.test.ts} +26 -14
  85. package/test/integration/{hooks.test.ts → pipeline/hooks.test.ts} +4 -2
  86. package/test/integration/{pipeline-acceptance.test.ts → pipeline/pipeline-acceptance.test.ts} +7 -6
  87. package/test/integration/{pipeline-events.test.ts → pipeline/pipeline-events.test.ts} +7 -6
  88. package/test/integration/{pipeline.test.ts → pipeline/pipeline.test.ts} +9 -7
  89. package/test/integration/{reporter-lifecycle.test.ts → pipeline/reporter-lifecycle.test.ts} +9 -7
  90. package/test/integration/{verify-stage.test.ts → pipeline/verify-stage.test.ts} +7 -5
  91. package/test/integration/{analyze-integration.test.ts → plan/analyze-integration.test.ts} +3 -2
  92. package/test/integration/{analyze-scanner.test.ts → plan/analyze-scanner.test.ts} +8 -7
  93. package/test/integration/{logger.test.ts → plan/logger.test.ts} +1 -1
  94. package/test/integration/{plan.test.ts → plan/plan.test.ts} +3 -3
  95. package/test/integration/plugins/config-integration.test.ts +1 -0
  96. package/test/integration/plugins/config-resolution.test.ts +1 -0
  97. package/test/integration/plugins/loader.test.ts +1 -0
  98. package/test/integration/plugins/{registry.test.ts → plugins-registry.test.ts} +1 -0
  99. package/test/integration/plugins/validator.test.ts +1 -0
  100. package/test/integration/{review-config-commands.test.ts → review/review-config-commands.test.ts} +4 -3
  101. package/test/integration/{review-config-schema.test.ts → review/review-config-schema.test.ts} +3 -2
  102. package/test/integration/{review-plugin-integration.test.ts → review/review-plugin-integration.test.ts} +5 -4
  103. package/test/integration/{review.test.ts → review/review.test.ts} +3 -2
  104. package/test/integration/routing/plugin-routing-advanced.test.ts +461 -0
  105. package/test/integration/{plugin-routing.test.ts → routing/plugin-routing-core.test.ts} +9 -403
  106. package/test/integration/{routing-stage-bug-021.test.ts → routing/routing-stage-bug-021.test.ts} +8 -7
  107. package/test/integration/{routing-stage-greenfield.test.ts → routing/routing-stage-greenfield.test.ts} +7 -6
  108. package/test/integration/{tdd-cleanup.test.ts → tdd/tdd-cleanup.test.ts} +1 -1
  109. package/test/integration/tdd/tdd-orchestrator-core.test.ts +565 -0
  110. package/test/integration/tdd/tdd-orchestrator-failureCategory.test.ts +355 -0
  111. package/test/integration/tdd/tdd-orchestrator-fallback.test.ts +311 -0
  112. package/test/integration/tdd/tdd-orchestrator-lite.test.ts +289 -0
  113. package/test/integration/tdd/tdd-orchestrator-prompts.test.ts +260 -0
  114. package/test/integration/tdd/tdd-orchestrator-verdict.test.ts +536 -0
  115. package/test/integration/tmp/headless-test/test.jsonl +30 -0
  116. package/test/integration/{test-scanner.test.ts → verification/test-scanner.test.ts} +1 -1
  117. package/test/integration/{verification-asset-check.test.ts → verification/verification-asset-check.test.ts} +3 -2
  118. package/test/unit/acceptance.test.ts +1 -0
  119. package/test/unit/agent-stderr-capture.test.ts +1 -0
  120. package/test/unit/agents/claude.test.ts +1 -0
  121. package/test/unit/analyze-classifier.test.ts +1 -0
  122. package/test/unit/auto-detect.test.ts +1 -0
  123. package/test/unit/cli-status.test.ts +1 -0
  124. package/test/unit/commands/common.test.ts +1 -0
  125. package/test/unit/commands/logs.test.ts +1 -0
  126. package/test/unit/commands/unlock.test.ts +1 -0
  127. package/test/unit/config/defaults.test.ts +1 -0
  128. package/test/unit/config/regression-gate-schema.test.ts +1 -0
  129. package/test/unit/config/smart-runner-flag.test.ts +1 -0
  130. package/test/unit/constitution-generators.test.ts +1 -0
  131. package/test/unit/constitution.test.ts +1 -0
  132. package/test/unit/context/context-autodetect.test.ts +297 -0
  133. package/test/unit/context/context-build.test.ts +575 -0
  134. package/test/unit/context/context-coverage.test.ts +236 -0
  135. package/test/unit/context/context-error.test.ts +93 -0
  136. package/test/unit/context/context-estimate-tokens.test.ts +201 -0
  137. package/test/unit/context/context-format.test.ts +302 -0
  138. package/test/unit/context/context-isolation.test.ts +267 -0
  139. package/test/unit/context/context-sort.test.ts +93 -0
  140. package/test/unit/context/context-story.test.ts +108 -0
  141. package/test/{context → unit/context}/prior-failures.test.ts +5 -4
  142. package/test/unit/context.test.ts +1 -0
  143. package/test/unit/crash-recovery.test.ts +1 -0
  144. package/test/unit/escalation.test.ts +1 -0
  145. package/test/unit/execution/lifecycle/run-completion.test.ts +1 -0
  146. package/test/unit/execution/lifecycle/run-regression.test.ts +2 -0
  147. package/test/{execution → unit/execution}/pid-registry.test.ts +2 -1
  148. package/test/{execution → unit/execution}/structured-failure.test.ts +3 -2
  149. package/test/unit/execution-logging-stderr.test.ts +1 -0
  150. package/test/unit/execution-stage.test.ts +1 -0
  151. package/test/unit/fix-generator.test.ts +1 -0
  152. package/test/unit/greenfield.test.ts +1 -0
  153. package/test/unit/interaction/human-review-trigger.test.ts +1 -0
  154. package/test/unit/interaction-network-failures.test.ts +1 -0
  155. package/test/unit/interaction-plugins.test.ts +1 -0
  156. package/test/unit/logging/formatter.test.ts +1 -0
  157. package/test/unit/merge.test.ts +1 -0
  158. package/test/unit/pipeline/event-bus.test.ts +105 -0
  159. package/test/unit/pipeline/routing-partial-override.test.ts +1 -0
  160. package/test/unit/pipeline/runner-retry.test.ts +89 -0
  161. package/test/unit/pipeline/stages/autofix.test.ts +97 -0
  162. package/test/unit/pipeline/stages/rectify.test.ts +101 -0
  163. package/test/unit/pipeline/stages/regression-stage.test.ts +69 -0
  164. package/test/unit/pipeline/stages/verify.test.ts +1 -0
  165. package/test/unit/pipeline/subscribers/hooks.test.ts +45 -0
  166. package/test/unit/pipeline/subscribers/interaction.test.ts +31 -0
  167. package/test/unit/pipeline/subscribers/reporters.test.ts +90 -0
  168. package/test/unit/pipeline/verify-smart-runner.test.ts +1 -0
  169. package/test/unit/prd-auto-default.test.ts +1 -0
  170. package/test/unit/prd-failure-category.test.ts +1 -0
  171. package/test/unit/prd-get-next-story.test.ts +1 -0
  172. package/test/unit/precheck-checks.test.ts +1 -0
  173. package/test/unit/precheck-story-size-gate.test.ts +1 -0
  174. package/test/unit/precheck-types.test.ts +1 -0
  175. package/test/unit/prompts.test.ts +1 -0
  176. package/test/unit/rectification.test.ts +2 -1
  177. package/test/unit/registry.test.ts +1 -0
  178. package/test/unit/routing/routing-stability.test.ts +1 -0
  179. package/test/unit/routing/strategies/llm.test.ts +1 -0
  180. package/test/unit/routing-advanced.test.ts +313 -0
  181. package/test/unit/routing-core.test.ts +341 -0
  182. package/test/unit/routing-strategies.test.ts +442 -0
  183. package/test/unit/storyid-events.test.ts +1 -0
  184. package/test/{ui → unit/ui}/tui-controls.test.ts +8 -7
  185. package/test/{ui → unit/ui}/tui-cost-and-pty.test.ts +4 -3
  186. package/test/{ui → unit/ui}/tui-layout.test.ts +5 -4
  187. package/test/{ui → unit/ui}/tui-stories.test.ts +5 -4
  188. package/test/unit/{isolation.test.ts → unit-isolation.test.ts} +1 -0
  189. package/test/unit/{helpers.test.ts → utils-helpers.test.ts} +1 -0
  190. package/test/unit/verdict.test.ts +1 -0
  191. package/test/unit/verification/orchestrator-types.test.ts +54 -0
  192. package/test/unit/verification/orchestrator.test.ts +66 -0
  193. package/test/unit/verification/smart-runner-config.test.ts +1 -0
  194. package/test/unit/verification/smart-runner-discovery.test.ts +8 -7
  195. package/test/unit/verification/strategies/acceptance.test.ts +33 -0
  196. package/test/unit/verification/strategies/regression.test.ts +87 -0
  197. package/test/unit/verification/strategies/scoped.test.ts +100 -0
  198. package/test/unit/worktree-manager.test.ts +1 -0
  199. package/src/execution/lifecycle/story-hooks.ts +0 -38
  200. package/src/execution/post-verify.ts +0 -193
  201. package/src/execution/rectification.ts +0 -13
  202. package/src/execution/verification.ts +0 -72
  203. package/test/integration/rectification-flow.test.ts +0 -512
  204. package/test/integration/runner.test.ts +0 -1679
  205. package/test/integration/tdd-orchestrator.test.ts +0 -1762
  206. package/test/unit/execution/post-verify-regression.test.ts +0 -362
  207. package/test/unit/execution/post-verify.test.ts +0 -236
  208. package/test/unit/routing.test.ts +0 -1039
  209. /package/test/{integration → helpers}/helpers.test.ts +0 -0
  210. /package/test/integration/worktree/{merge.test.ts → worktree-merge.test.ts} +0 -0
@@ -0,0 +1,565 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+ import { existsSync } from "node:fs";
3
+ import { mkdir, rm, writeFile } from "node:fs/promises";
4
+ import path from "node:path";
5
+ import type { AgentAdapter, AgentResult } from "../../../src/agents";
6
+ import { DEFAULT_CONFIG } from "../../../src/config";
7
+ import type { UserStory } from "../../../src/prd";
8
+ import { runThreeSessionTdd } from "../../../src/tdd/orchestrator";
9
+ import { VERDICT_FILE } from "../../../src/tdd/verdict";
10
+
11
+ let originalSpawn: typeof Bun.spawn;
12
+
13
+ beforeEach(() => {
14
+ originalSpawn = Bun.spawn;
15
+ });
16
+
17
+ afterEach(() => {
18
+ Bun.spawn = originalSpawn;
19
+ });
20
+
21
+ /** Create a mock agent that returns sequential results */
22
+ function createMockAgent(results: Partial<AgentResult>[]): AgentAdapter {
23
+ let callCount = 0;
24
+ return {
25
+ name: "mock",
26
+ displayName: "Mock Agent",
27
+ binary: "mock",
28
+ isInstalled: async () => true,
29
+ buildCommand: () => ["mock"],
30
+ run: mock(async () => {
31
+ const r = results[callCount] || {};
32
+ callCount++;
33
+ return {
34
+ success: r.success ?? true,
35
+ exitCode: r.exitCode ?? 0,
36
+ output: r.output ?? "",
37
+ rateLimited: r.rateLimited ?? false,
38
+ durationMs: r.durationMs ?? 100,
39
+ estimatedCost: r.estimatedCost ?? 0.01,
40
+ };
41
+ }),
42
+ };
43
+ }
44
+
45
+ /** Mock Bun.spawn to intercept git commands */
46
+ function mockGitSpawn(opts: {
47
+ /** Files returned by git diff for each session (indexed by git-diff call number) */
48
+ diffFiles: string[][];
49
+ /** Optional: mock test command success (default: true) */
50
+ testCommandSuccess?: boolean;
51
+ }) {
52
+ let revParseCount = 0;
53
+ let diffCount = 0;
54
+ const testSuccess = opts.testCommandSuccess ?? true;
55
+
56
+ // @ts-ignore — mocking global
57
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
58
+ // Intercept test commands (bun test, npm test, etc.)
59
+ if ((cmd[0] === "/bin/sh" || cmd[0] === "/bin/bash" || cmd[0] === "/bin/zsh") && cmd[1] === "-c") {
60
+ return {
61
+ pid: 9999,
62
+ exited: Promise.resolve(testSuccess ? 0 : 1),
63
+ stdout: new Response(testSuccess ? "tests pass\n" : "tests fail\n").body,
64
+ stderr: new Response("").body,
65
+ };
66
+ }
67
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
68
+ revParseCount++;
69
+ return {
70
+ exited: Promise.resolve(0),
71
+ stdout: new Response(`ref-${revParseCount}\n`).body,
72
+ stderr: new Response("").body,
73
+ };
74
+ }
75
+ if (cmd[0] === "git" && cmd[1] === "checkout") {
76
+ // Intercept git checkout (used in zero-file fallback) — silently succeed
77
+ return {
78
+ exited: Promise.resolve(0),
79
+ stdout: new Response("").body,
80
+ stderr: new Response("").body,
81
+ };
82
+ }
83
+ if (cmd[0] === "git" && cmd[1] === "diff") {
84
+ const files = opts.diffFiles[diffCount] || [];
85
+ diffCount++;
86
+ return {
87
+ exited: Promise.resolve(0),
88
+ stdout: new Response(files.join("\n") + "\n").body,
89
+ stderr: new Response("").body,
90
+ };
91
+ }
92
+ return originalSpawn(cmd, spawnOpts);
93
+ });
94
+ }
95
+
96
+ const story: UserStory = {
97
+ id: "US-001",
98
+ title: "Add user validation",
99
+ description: "Add validation to user input",
100
+ acceptanceCriteria: ["Validation works", "Errors are clear"],
101
+ dependencies: [],
102
+ tags: [],
103
+ status: "pending",
104
+ passes: false,
105
+ escalations: [],
106
+ attempts: 0,
107
+ };
108
+
109
+
110
+ describe("runThreeSessionTdd", () => {
111
+ test("happy path: all 3 sessions succeed", async () => {
112
+ // Each session triggers: captureGitRef (rev-parse) + isolation check (git diff) + getChangedFiles (git diff)
113
+ // Session 1: test-writer → verifyTestWriterIsolation calls getChangedFiles (1 diff) + getChangedFiles for result (1 diff) = 2 diffs
114
+ // Session 2: implementer → verifyImplementerIsolation (1 diff) + getChangedFiles (1 diff) = 2 diffs
115
+ // Session 3: verifier → no isolation check + getChangedFiles (1 diff) = 1 diff
116
+ // But actually looking at the code: isolation + getChangedFiles share the same call in runTddSession
117
+ // isolation calls getChangedFiles internally, then runTddSession calls getChangedFiles separately
118
+ // Actually no — look at orchestrator.ts runTddSession:
119
+ // 1. verifyTestWriterIsolation (calls getChangedFiles) → 1 diff call
120
+ // 2. getChangedFiles → 1 diff call
121
+ // So per session with isolation: 2 diff calls. Without isolation (verifier): 1 diff call.
122
+ // Total: 2 + 2 + 1 = 5 diff calls
123
+ mockGitSpawn({
124
+ diffFiles: [
125
+ // Session 1 isolation check: test files only (OK)
126
+ ["test/user.test.ts"],
127
+ // Session 1 getChangedFiles
128
+ ["test/user.test.ts"],
129
+ // Session 2 isolation check: source files only (OK)
130
+ ["src/user.ts"],
131
+ // Session 2 getChangedFiles
132
+ ["src/user.ts"],
133
+ // Session 3 getChangedFiles (no isolation check for verifier)
134
+ ["src/user.ts"],
135
+ ],
136
+ });
137
+
138
+ const agent = createMockAgent([
139
+ { success: true, estimatedCost: 0.01 },
140
+ { success: true, estimatedCost: 0.02 },
141
+ { success: true, estimatedCost: 0.01 },
142
+ ]);
143
+
144
+ const result = await runThreeSessionTdd({
145
+ agent,
146
+ story,
147
+ config: DEFAULT_CONFIG,
148
+ workdir: "/tmp/test",
149
+ modelTier: "balanced",
150
+ });
151
+
152
+ expect(result.success).toBe(true);
153
+ expect(result.sessions).toHaveLength(3);
154
+ expect(result.sessions[0].role).toBe("test-writer");
155
+ expect(result.sessions[1].role).toBe("implementer");
156
+ expect(result.sessions[2].role).toBe("verifier");
157
+ expect(result.needsHumanReview).toBe(false);
158
+ expect(result.totalCost).toBe(0.04);
159
+ });
160
+
161
+ test("failure when test-writer session fails", async () => {
162
+ mockGitSpawn({
163
+ diffFiles: [["test/user.test.ts"], ["test/user.test.ts"]],
164
+ });
165
+
166
+ const agent = createMockAgent([{ success: false, exitCode: 1, estimatedCost: 0.01 }]);
167
+
168
+ const result = await runThreeSessionTdd({
169
+ agent,
170
+ story,
171
+ config: DEFAULT_CONFIG,
172
+ workdir: "/tmp/test",
173
+ modelTier: "balanced",
174
+ });
175
+
176
+ expect(result.success).toBe(false);
177
+ expect(result.sessions).toHaveLength(1);
178
+ expect(result.needsHumanReview).toBe(true);
179
+ });
180
+
181
+ test("failure when test-writer violates isolation", async () => {
182
+ mockGitSpawn({
183
+ diffFiles: [
184
+ // Isolation check: test-writer touched source files!
185
+ ["src/user.ts", "test/user.test.ts"],
186
+ // getChangedFiles
187
+ ["src/user.ts", "test/user.test.ts"],
188
+ ],
189
+ });
190
+
191
+ const agent = createMockAgent([{ success: true, estimatedCost: 0.01 }]);
192
+
193
+ const result = await runThreeSessionTdd({
194
+ agent,
195
+ story,
196
+ config: DEFAULT_CONFIG,
197
+ workdir: "/tmp/test",
198
+ modelTier: "balanced",
199
+ });
200
+
201
+ expect(result.success).toBe(false);
202
+ expect(result.sessions).toHaveLength(1);
203
+ expect(result.sessions[0].success).toBe(false);
204
+ expect(result.needsHumanReview).toBe(true);
205
+ });
206
+
207
+ test("failure when implementer session fails", async () => {
208
+ mockGitSpawn({
209
+ diffFiles: [
210
+ // Session 1 isolation: OK
211
+ ["test/user.test.ts"],
212
+ // Session 1 getChangedFiles
213
+ ["test/user.test.ts"],
214
+ // Session 2 isolation: OK
215
+ ["src/user.ts"],
216
+ // Session 2 getChangedFiles
217
+ ["src/user.ts"],
218
+ ],
219
+ });
220
+
221
+ const agent = createMockAgent([
222
+ { success: true, estimatedCost: 0.01 },
223
+ { success: false, exitCode: 1, estimatedCost: 0.02 },
224
+ ]);
225
+
226
+ const result = await runThreeSessionTdd({
227
+ agent,
228
+ story,
229
+ config: DEFAULT_CONFIG,
230
+ workdir: "/tmp/test",
231
+ modelTier: "balanced",
232
+ });
233
+
234
+ expect(result.success).toBe(false);
235
+ expect(result.sessions).toHaveLength(2);
236
+ expect(result.needsHumanReview).toBe(true);
237
+ });
238
+
239
+ test("implementer touching test files is a warning (soft-pass), not failure", async () => {
240
+ mockGitSpawn({
241
+ diffFiles: [
242
+ // Session 1 isolation: OK
243
+ ["test/user.test.ts"],
244
+ // Session 1 getChangedFiles
245
+ ["test/user.test.ts"],
246
+ // Session 2 isolation: implementer touched tests (warning, not violation)
247
+ ["test/user.test.ts", "src/user.ts"],
248
+ // Session 2 getChangedFiles
249
+ ["test/user.test.ts", "src/user.ts"],
250
+ // Session 3 isolation: OK
251
+ [],
252
+ // Session 3 getChangedFiles
253
+ [],
254
+ ],
255
+ });
256
+
257
+ const agent = createMockAgent([
258
+ { success: true, estimatedCost: 0.01 },
259
+ { success: true, estimatedCost: 0.02 },
260
+ { success: true, estimatedCost: 0.01 },
261
+ ]);
262
+
263
+ const result = await runThreeSessionTdd({
264
+ agent,
265
+ story,
266
+ config: DEFAULT_CONFIG,
267
+ workdir: "/tmp/test",
268
+ modelTier: "balanced",
269
+ });
270
+
271
+ // v0.9.2: implementer touching test files is a warning, not a failure
272
+ expect(result.sessions).toHaveLength(3);
273
+ expect(result.sessions[1].success).toBe(true);
274
+ expect(result.sessions[1].isolation?.warnings).toContain("test/user.test.ts");
275
+ expect(result.success).toBe(true);
276
+ });
277
+
278
+ test("dry-run mode logs sessions without executing", async () => {
279
+ const agent = createMockAgent([]);
280
+
281
+ const result = await runThreeSessionTdd({
282
+ agent,
283
+ story,
284
+ config: DEFAULT_CONFIG,
285
+ workdir: "/tmp/test",
286
+ modelTier: "balanced",
287
+ dryRun: true,
288
+ });
289
+
290
+ expect(result.success).toBe(true);
291
+ expect(result.sessions).toHaveLength(0);
292
+ expect(result.needsHumanReview).toBe(false);
293
+ expect(result.totalCost).toBe(0);
294
+ // Agent should not have been called
295
+ expect(agent.run).not.toHaveBeenCalled();
296
+ });
297
+
298
+ test("dry-run mode works with context markdown", async () => {
299
+ const agent = createMockAgent([]);
300
+ const contextMarkdown = "## Dependencies\n- US-000: Setup database\n";
301
+
302
+ const result = await runThreeSessionTdd({
303
+ agent,
304
+ story,
305
+ config: DEFAULT_CONFIG,
306
+ workdir: "/tmp/test",
307
+ modelTier: "powerful",
308
+ contextMarkdown,
309
+ dryRun: true,
310
+ });
311
+
312
+ expect(result.success).toBe(true);
313
+ expect(result.sessions).toHaveLength(0);
314
+ expect(result.totalCost).toBe(0);
315
+ // Agent should not have been called
316
+ expect(agent.run).not.toHaveBeenCalled();
317
+ });
318
+
319
+ test("BUG-22: post-TDD verification overrides session failures when tests pass", async () => {
320
+ // Scenario: All 3 sessions complete but verifier has non-zero exit code
321
+ // However, when we run tests independently, they pass
322
+ // Expected: allSuccessful should be overridden to true
323
+
324
+ let testCommandCalled = false;
325
+ let revParseCount = 0;
326
+ let diffCount = 0;
327
+
328
+ const diffFiles = [
329
+ // Session 1 isolation + getChangedFiles
330
+ ["test/user.test.ts"],
331
+ ["test/user.test.ts"],
332
+ // Session 2 isolation + getChangedFiles
333
+ ["src/user.ts"],
334
+ ["src/user.ts"],
335
+ // Session 3 getChangedFiles
336
+ ["src/user.ts"],
337
+ ];
338
+
339
+ // @ts-ignore — mocking global
340
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
341
+ // Intercept the post-TDD test command (bun test)
342
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
343
+ testCommandCalled = true;
344
+ return {
345
+ pid: 9999,
346
+ exited: Promise.resolve(0), // Tests pass!
347
+ stdout: new Response("5 pass, 0 fail\n").body,
348
+ stderr: new Response("").body,
349
+ };
350
+ }
351
+ // Git rev-parse
352
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
353
+ revParseCount++;
354
+ return {
355
+ exited: Promise.resolve(0),
356
+ stdout: new Response(`ref-${revParseCount}\n`).body,
357
+ stderr: new Response("").body,
358
+ };
359
+ }
360
+ // Git diff
361
+ if (cmd[0] === "git" && cmd[1] === "diff") {
362
+ const files = diffFiles[diffCount] || [];
363
+ diffCount++;
364
+ return {
365
+ exited: Promise.resolve(0),
366
+ stdout: new Response(files.join("\n") + "\n").body,
367
+ stderr: new Response("").body,
368
+ };
369
+ }
370
+ return originalSpawn(cmd, spawnOpts);
371
+ });
372
+
373
+ const agent = createMockAgent([
374
+ { success: true, estimatedCost: 0.01 }, // test-writer succeeds
375
+ { success: true, estimatedCost: 0.02 }, // implementer succeeds
376
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails (e.g., fixed issues)
377
+ ]);
378
+
379
+ const result = await runThreeSessionTdd({
380
+ agent,
381
+ story,
382
+ config: DEFAULT_CONFIG,
383
+ workdir: "/tmp/test",
384
+ modelTier: "balanced",
385
+ });
386
+
387
+ // Assertions
388
+ expect(testCommandCalled).toBe(true); // Post-TDD test was executed
389
+ expect(result.sessions).toHaveLength(3);
390
+ expect(result.sessions[2].success).toBe(false); // Verifier session itself failed
391
+ expect(result.success).toBe(true); // But overall result is success (overridden)
392
+ expect(result.needsHumanReview).toBe(false); // No human review needed
393
+ expect(result.reviewReason).toBeUndefined();
394
+ });
395
+
396
+ test("BUG-20: failure when test-writer creates no test files", async () => {
397
+ // Scenario: Test-writer session succeeds and passes isolation but creates no test files
398
+ // (e.g., creates requirements.md instead)
399
+ // Expected: Should fail with needsHumanReview and specific reason
400
+ mockGitSpawn({
401
+ diffFiles: [
402
+ // Isolation check: only non-test files
403
+ ["requirements.md", "docs/plan.md"],
404
+ // getChangedFiles
405
+ ["requirements.md", "docs/plan.md"],
406
+ ],
407
+ });
408
+
409
+ const agent = createMockAgent([
410
+ { success: true, estimatedCost: 0.01 }, // test-writer succeeds but creates wrong files
411
+ ]);
412
+
413
+ const result = await runThreeSessionTdd({
414
+ agent,
415
+ story,
416
+ config: DEFAULT_CONFIG,
417
+ workdir: "/tmp/test",
418
+ modelTier: "balanced",
419
+ });
420
+
421
+ expect(result.success).toBe(false);
422
+ expect(result.sessions).toHaveLength(1); // Should stop after session 1
423
+ expect(result.needsHumanReview).toBe(true);
424
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
425
+ });
426
+
427
+ test("BUG-20: failure when test-writer creates zero files", async () => {
428
+ // Scenario: Test-writer session succeeds but creates no files at all
429
+ // Expected: Should fail with needsHumanReview
430
+ mockGitSpawn({
431
+ diffFiles: [
432
+ // Isolation check: no files
433
+ [],
434
+ // getChangedFiles: no files
435
+ [],
436
+ ],
437
+ });
438
+
439
+ const agent = createMockAgent([
440
+ { success: true, estimatedCost: 0.01 }, // test-writer succeeds but creates nothing
441
+ ]);
442
+
443
+ const result = await runThreeSessionTdd({
444
+ agent,
445
+ story,
446
+ config: DEFAULT_CONFIG,
447
+ workdir: "/tmp/test",
448
+ modelTier: "balanced",
449
+ });
450
+
451
+ expect(result.success).toBe(false);
452
+ expect(result.sessions).toHaveLength(1);
453
+ expect(result.needsHumanReview).toBe(true);
454
+ expect(result.reviewReason).toBe("Test writer session created no test files (greenfield project)");
455
+ });
456
+
457
+ test("BUG-20: success when test-writer creates test files with various extensions", async () => {
458
+ // Scenario: Test-writer creates test files with different valid extensions
459
+ // Expected: Should succeed and continue to session 2
460
+ mockGitSpawn({
461
+ diffFiles: [
462
+ // Isolation check: various test file formats
463
+ ["test/user.test.ts", "test/auth.spec.js", "test/api.test.tsx"],
464
+ // getChangedFiles
465
+ ["test/user.test.ts", "test/auth.spec.js", "test/api.test.tsx"],
466
+ // Session 2 isolation
467
+ ["src/user.ts", "src/auth.js"],
468
+ // Session 2 getChangedFiles
469
+ ["src/user.ts", "src/auth.js"],
470
+ // Session 3 getChangedFiles
471
+ ["src/user.ts"],
472
+ ],
473
+ });
474
+
475
+ const agent = createMockAgent([
476
+ { success: true, estimatedCost: 0.01 },
477
+ { success: true, estimatedCost: 0.02 },
478
+ { success: true, estimatedCost: 0.01 },
479
+ ]);
480
+
481
+ const result = await runThreeSessionTdd({
482
+ agent,
483
+ story,
484
+ config: DEFAULT_CONFIG,
485
+ workdir: "/tmp/test",
486
+ modelTier: "balanced",
487
+ });
488
+
489
+ expect(result.success).toBe(true);
490
+ expect(result.sessions).toHaveLength(3); // All sessions run
491
+ expect(result.needsHumanReview).toBe(false);
492
+ });
493
+
494
+ test("BUG-22: post-TDD verification does not override when tests actually fail", async () => {
495
+ // Scenario: Sessions complete with failures AND independent test run also fails
496
+ // Expected: Result should remain failed
497
+
498
+ let testCommandCalled = false;
499
+ let revParseCount = 0;
500
+ let diffCount = 0;
501
+
502
+ const diffFiles = [["test/user.test.ts"], ["test/user.test.ts"], ["src/user.ts"], ["src/user.ts"], ["src/user.ts"]];
503
+
504
+ // @ts-ignore — mocking global
505
+ Bun.spawn = mock((cmd: string[], spawnOpts?: any) => {
506
+ if (cmd[0] === "/bin/sh" && cmd[2]?.includes("bun test")) {
507
+ testCommandCalled = true;
508
+ return {
509
+ pid: 9999,
510
+ exited: Promise.resolve(1), // Tests FAIL!
511
+ stdout: new Response("3 pass, 2 fail\n").body,
512
+ stderr: new Response("Test errors...\n").body,
513
+ };
514
+ }
515
+ if (cmd[0] === "git" && cmd[1] === "rev-parse") {
516
+ revParseCount++;
517
+ return {
518
+ exited: Promise.resolve(0),
519
+ stdout: new Response(`ref-${revParseCount}\n`).body,
520
+ stderr: new Response("").body,
521
+ };
522
+ }
523
+ if (cmd[0] === "git" && cmd[1] === "diff") {
524
+ const files = diffFiles[diffCount] || [];
525
+ diffCount++;
526
+ return {
527
+ exited: Promise.resolve(0),
528
+ stdout: new Response(files.join("\n") + "\n").body,
529
+ stderr: new Response("").body,
530
+ };
531
+ }
532
+ return originalSpawn(cmd, spawnOpts);
533
+ });
534
+
535
+ const agent = createMockAgent([
536
+ { success: true, estimatedCost: 0.01 },
537
+ { success: true, estimatedCost: 0.02 },
538
+ { success: false, exitCode: 1, estimatedCost: 0.01 }, // verifier fails
539
+ ]);
540
+
541
+ const result = await runThreeSessionTdd({
542
+ agent,
543
+ story,
544
+ config: DEFAULT_CONFIG,
545
+ workdir: "/tmp/test",
546
+ modelTier: "balanced",
547
+ });
548
+
549
+ expect(testCommandCalled).toBe(true);
550
+ expect(result.success).toBe(false); // Should remain failed
551
+ expect(result.needsHumanReview).toBe(true); // Needs review
552
+ expect(result.reviewReason).toBeDefined();
553
+ });
554
+ });
555
+
556
+ // ─── Lite-mode prompt tests ───────────────────────────────────────────────────
557
+
558
+ import {
559
+ buildImplementerLitePrompt,
560
+ buildImplementerPrompt,
561
+ buildTestWriterLitePrompt,
562
+ buildTestWriterPrompt,
563
+ buildVerifierPrompt,
564
+ } from "../../../src/tdd/prompts";
565
+