@nathapp/nax 0.18.2 → 0.18.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/rules/01-project-conventions.md +34 -0
- package/.claude/rules/02-test-architecture.md +39 -0
- package/.claude/rules/03-test-writing.md +58 -0
- package/.claude/rules/04-forbidden-patterns.md +29 -0
- package/.githooks/pre-commit +13 -0
- package/.gitlab-ci.yml +11 -5
- package/CHANGELOG.md +9 -0
- package/CLAUDE.md +45 -122
- package/bun.lock +1 -1
- package/bunfig.toml +2 -1
- package/docker-compose.test.yml +15 -0
- package/docs/ROADMAP.md +83 -14
- package/docs/specs/verification-architecture-v2.md +343 -0
- package/nax/config.json +7 -7
- package/nax/features/v0.18.3-execution-reliability/prd.json +80 -0
- package/nax/features/v0.18.3-execution-reliability/progress.txt +3 -0
- package/package.json +2 -2
- package/src/config/defaults.ts +1 -0
- package/src/config/schema.ts +1 -0
- package/src/config/schemas.ts +26 -1
- package/src/config/types.ts +21 -4
- package/src/context/builder.ts +11 -0
- package/src/context/elements.ts +38 -1
- package/src/execution/escalation/tier-escalation.ts +28 -3
- package/src/execution/post-verify-rectification.ts +4 -2
- package/src/execution/post-verify.ts +102 -20
- package/src/execution/progress.ts +2 -0
- package/src/pipeline/stages/execution.ts +10 -2
- package/src/pipeline/stages/review.ts +5 -3
- package/src/pipeline/stages/routing.ts +28 -9
- package/src/pipeline/stages/verify.ts +49 -8
- package/src/prd/index.ts +16 -1
- package/src/prd/types.ts +33 -0
- package/src/routing/strategies/keyword.ts +7 -4
- package/src/routing/strategies/llm.ts +45 -4
- package/src/verification/gate.ts +2 -1
- package/src/verification/smart-runner.ts +68 -0
- package/src/verification/types.ts +2 -0
- package/test/context/prior-failures.test.ts +462 -0
- package/test/execution/structured-failure.test.ts +414 -0
- package/test/integration/logger.test.ts +1 -1
- package/test/{US-002-orchestrator.test.ts → integration/precheck-orchestrator.test.ts} +3 -3
- package/test/integration/review-plugin-integration.test.ts +2 -1
- package/test/integration/story-id-in-events.test.ts +1 -1
- package/test/unit/config/smart-runner-flag.test.ts +36 -12
- package/test/unit/execution/post-verify-regression.test.ts +415 -0
- package/test/{execution → unit/execution}/post-verify.test.ts +33 -1
- package/test/unit/pipeline/routing-partial-override.test.ts +15 -36
- package/test/unit/pipeline/verify-smart-runner.test.ts +8 -6
- package/test/unit/prd-get-next-story.test.ts +28 -0
- package/test/unit/routing/routing-stability.test.ts +207 -0
- package/test/unit/routing.test.ts +102 -0
- package/test/unit/storyid-events.test.ts +20 -32
- package/test/unit/verification/smart-runner-config.test.ts +162 -0
- package/test/unit/verification/smart-runner-discovery.test.ts +353 -0
- package/test/TEST_COVERAGE_US001.md +0 -217
- package/test/TEST_COVERAGE_US003.md +0 -84
- package/test/TEST_COVERAGE_US005.md +0 -86
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BUG-026: Regression gate timeout accepts scoped pass instead of escalating
|
|
3
|
+
*
|
|
4
|
+
* Tests that runRegressionGate (via runPostAgentVerification):
|
|
5
|
+
* - Returns passed when regression gate TIMES OUT and acceptOnTimeout=true (default)
|
|
6
|
+
* - Returns failed when regression gate TIMES OUT and acceptOnTimeout=false
|
|
7
|
+
* - Returns failed when regression gate returns TEST_FAILURE (existing behavior unchanged)
|
|
8
|
+
* - Defaults acceptOnTimeout to true when not set in config
|
|
9
|
+
*
|
|
10
|
+
* These are behavioral tests that call the actual function with mocked dependencies.
|
|
11
|
+
* They complement the type-level tests already in post-verify.test.ts.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
15
|
+
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
16
|
+
import { join } from "node:path";
|
|
17
|
+
import { tmpdir } from "node:os";
|
|
18
|
+
import type { NaxConfig } from "../../../src/config";
|
|
19
|
+
import type { PRD, UserStory } from "../../../src/prd/types";
|
|
20
|
+
import type { StoryMetrics } from "../../../src/metrics";
|
|
21
|
+
import type { VerificationResult } from "../../../src/verification";
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Mock runVerification with call-order-based responses
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
type VerResult = Pick<VerificationResult, "success" | "status" | "countsTowardEscalation" | "output" | "error">;
|
|
28
|
+
|
|
29
|
+
let _verificationResponses: VerResult[] = [];
|
|
30
|
+
let _verificationCallIndex = 0;
|
|
31
|
+
|
|
32
|
+
const mockRunVerification = mock(async (): Promise<VerResult> => {
|
|
33
|
+
const resp =
|
|
34
|
+
_verificationResponses[_verificationCallIndex] ??
|
|
35
|
+
_verificationResponses[_verificationResponses.length - 1];
|
|
36
|
+
_verificationCallIndex++;
|
|
37
|
+
return resp;
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const mockRevertStoriesOnFailure = mock(async ({ prd }: { prd: PRD; [k: string]: unknown }) => prd);
|
|
41
|
+
const mockRunRectificationLoop = mock(async () => false);
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Static imports — uses _postVerifyDeps pattern (no mock.module() needed)
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
import { _postVerifyDeps, runPostAgentVerification } from "../../../src/execution/post-verify";
|
|
48
|
+
|
|
49
|
+
// ── Capture originals for afterEach restoration ───────────────────────────────
|
|
50
|
+
const _origPostVerifyDeps = { ..._postVerifyDeps };
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Fixtures
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
/** Run a git command in a directory using Bun-native spawn. */
|
|
57
|
+
function gitSync(args: string[], cwd: string): void {
|
|
58
|
+
const proc = Bun.spawnSync(["git", ...args], { cwd, stdin: "ignore", stdout: "ignore", stderr: "ignore" });
|
|
59
|
+
if (proc.exitCode !== 0) {
|
|
60
|
+
throw new Error(`git ${args[0]} failed in ${cwd}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** Read stdout from a git command. */
|
|
65
|
+
function gitOutput(args: string[], cwd: string): string {
|
|
66
|
+
const proc = Bun.spawnSync(["git", ...args], { cwd, stdin: "ignore", stdout: "pipe", stderr: "ignore" });
|
|
67
|
+
return new TextDecoder().decode(proc.stdout).trim();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Create a temp git repo with two commits so that `git diff storyGitRef HEAD`
|
|
72
|
+
* returns at least one test file — needed for the regression gate to activate.
|
|
73
|
+
*/
|
|
74
|
+
function makeGitRepo(): { dir: string; storyGitRef: string } {
|
|
75
|
+
const dir = mkdtempSync(join(tmpdir(), "nax-bug026-"));
|
|
76
|
+
|
|
77
|
+
gitSync(["init"], dir);
|
|
78
|
+
gitSync(["config", "user.email", "test@example.com"], dir);
|
|
79
|
+
gitSync(["config", "user.name", "test"], dir);
|
|
80
|
+
|
|
81
|
+
// Initial commit → becomes storyGitRef
|
|
82
|
+
writeFileSync(join(dir, "src.ts"), "export const x = 1;");
|
|
83
|
+
gitSync(["add", "."], dir);
|
|
84
|
+
gitSync(["commit", "-m", "initial"], dir);
|
|
85
|
+
const storyGitRef = gitOutput(["rev-parse", "HEAD"], dir);
|
|
86
|
+
|
|
87
|
+
// Second commit: adds a test file (changed after storyGitRef)
|
|
88
|
+
mkdirSync(join(dir, "test"), { recursive: true });
|
|
89
|
+
writeFileSync(
|
|
90
|
+
join(dir, "test", "example.test.ts"),
|
|
91
|
+
'import { test, expect } from "bun:test";\ntest("x", () => expect(1).toBe(1));',
|
|
92
|
+
);
|
|
93
|
+
gitSync(["add", "."], dir);
|
|
94
|
+
gitSync(["commit", "-m", "add test"], dir);
|
|
95
|
+
|
|
96
|
+
return { dir, storyGitRef };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function makeConfig(
|
|
100
|
+
regressionGateOverrides: Partial<NaxConfig["execution"]["regressionGate"]> = {},
|
|
101
|
+
): NaxConfig {
|
|
102
|
+
return {
|
|
103
|
+
version: 1,
|
|
104
|
+
models: {
|
|
105
|
+
fast: "claude-sonnet-4-5",
|
|
106
|
+
balanced: "claude-sonnet-4-5",
|
|
107
|
+
powerful: "claude-opus-4-6",
|
|
108
|
+
},
|
|
109
|
+
autoMode: {
|
|
110
|
+
enabled: true,
|
|
111
|
+
defaultAgent: "nax-agent-claude",
|
|
112
|
+
fallbackOrder: ["nax-agent-claude"],
|
|
113
|
+
complexityRouting: {
|
|
114
|
+
simple: "fast",
|
|
115
|
+
medium: "balanced",
|
|
116
|
+
complex: "powerful",
|
|
117
|
+
expert: "powerful",
|
|
118
|
+
},
|
|
119
|
+
escalation: {
|
|
120
|
+
enabled: true,
|
|
121
|
+
tierOrder: [],
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
execution: {
|
|
125
|
+
maxIterations: 100,
|
|
126
|
+
iterationDelayMs: 0,
|
|
127
|
+
costLimit: 50,
|
|
128
|
+
sessionTimeoutSeconds: 600,
|
|
129
|
+
verificationTimeoutSeconds: 30,
|
|
130
|
+
maxStoriesPerFeature: 50,
|
|
131
|
+
smartTestRunner: false,
|
|
132
|
+
rectification: {
|
|
133
|
+
enabled: false,
|
|
134
|
+
maxRetries: 2,
|
|
135
|
+
fullSuiteTimeoutSeconds: 120,
|
|
136
|
+
maxFailureSummaryChars: 2000,
|
|
137
|
+
abortOnIncreasingFailures: true,
|
|
138
|
+
},
|
|
139
|
+
regressionGate: {
|
|
140
|
+
enabled: true,
|
|
141
|
+
timeoutSeconds: 120,
|
|
142
|
+
...regressionGateOverrides,
|
|
143
|
+
},
|
|
144
|
+
contextProviderTokenBudget: 2000,
|
|
145
|
+
},
|
|
146
|
+
quality: {
|
|
147
|
+
requireTypecheck: false,
|
|
148
|
+
requireLint: false,
|
|
149
|
+
requireTests: true,
|
|
150
|
+
commands: { test: "bun test" },
|
|
151
|
+
forceExit: false,
|
|
152
|
+
detectOpenHandles: false,
|
|
153
|
+
detectOpenHandlesRetries: 0,
|
|
154
|
+
gracePeriodMs: 0,
|
|
155
|
+
drainTimeoutMs: 0,
|
|
156
|
+
shell: false,
|
|
157
|
+
stripEnvVars: [],
|
|
158
|
+
environmentalEscalationDivisor: 3,
|
|
159
|
+
},
|
|
160
|
+
tdd: {
|
|
161
|
+
maxRetries: 2,
|
|
162
|
+
autoVerifyIsolation: false,
|
|
163
|
+
strategy: "off",
|
|
164
|
+
autoApproveVerifier: false,
|
|
165
|
+
},
|
|
166
|
+
constitution: { enabled: false, path: "constitution.md", maxTokens: 2000 },
|
|
167
|
+
analyze: {
|
|
168
|
+
llmEnhanced: false,
|
|
169
|
+
model: "balanced",
|
|
170
|
+
fallbackToKeywords: true,
|
|
171
|
+
maxCodebaseSummaryTokens: 4000,
|
|
172
|
+
},
|
|
173
|
+
review: { enabled: false, checks: [], commands: {} },
|
|
174
|
+
plan: { model: "balanced", outputPath: "features" },
|
|
175
|
+
acceptance: { enabled: false, maxRetries: 2, generateTests: false, testPath: "acceptance.test.ts" },
|
|
176
|
+
routing: { strategy: "keyword" },
|
|
177
|
+
context: {
|
|
178
|
+
testCoverage: {
|
|
179
|
+
enabled: false,
|
|
180
|
+
detail: "names-only",
|
|
181
|
+
maxTokens: 500,
|
|
182
|
+
testPattern: "**/*.test.ts",
|
|
183
|
+
scopeToStory: false,
|
|
184
|
+
},
|
|
185
|
+
autoDetect: { enabled: false, maxFiles: 10, traceImports: false },
|
|
186
|
+
},
|
|
187
|
+
} as unknown as NaxConfig;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function makeStory(id = "US-001"): UserStory {
|
|
191
|
+
return {
|
|
192
|
+
id,
|
|
193
|
+
title: "Test story",
|
|
194
|
+
description: "Test",
|
|
195
|
+
acceptanceCriteria: [],
|
|
196
|
+
tags: [],
|
|
197
|
+
dependencies: [],
|
|
198
|
+
status: "in-progress",
|
|
199
|
+
passes: false,
|
|
200
|
+
escalations: [],
|
|
201
|
+
attempts: 0,
|
|
202
|
+
contextFiles: [],
|
|
203
|
+
} as unknown as UserStory;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function makePRD(story: UserStory): PRD {
|
|
207
|
+
return {
|
|
208
|
+
id: "prd-001",
|
|
209
|
+
title: "Test PRD",
|
|
210
|
+
userStories: [story],
|
|
211
|
+
version: "1.0",
|
|
212
|
+
createdAt: new Date().toISOString(),
|
|
213
|
+
updatedAt: new Date().toISOString(),
|
|
214
|
+
} as unknown as PRD;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function makeOpts(
|
|
218
|
+
workdir: string,
|
|
219
|
+
storyGitRef: string,
|
|
220
|
+
config: NaxConfig,
|
|
221
|
+
story: UserStory,
|
|
222
|
+
prd: PRD,
|
|
223
|
+
) {
|
|
224
|
+
return {
|
|
225
|
+
config,
|
|
226
|
+
prd,
|
|
227
|
+
prdPath: join(workdir, "prd.json"),
|
|
228
|
+
workdir,
|
|
229
|
+
story,
|
|
230
|
+
storiesToExecute: [story],
|
|
231
|
+
allStoryMetrics: [] as StoryMetrics[],
|
|
232
|
+
timeoutRetryCountMap: new Map<string, number>(),
|
|
233
|
+
storyGitRef,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ---------------------------------------------------------------------------
|
|
238
|
+
// Test lifecycle
|
|
239
|
+
// ---------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
let tempDir: string;
|
|
242
|
+
let storyGitRef: string;
|
|
243
|
+
|
|
244
|
+
beforeEach(() => {
|
|
245
|
+
// Wire _postVerifyDeps to mocks
|
|
246
|
+
_postVerifyDeps.runVerification = mockRunVerification as typeof _postVerifyDeps.runVerification;
|
|
247
|
+
_postVerifyDeps.parseTestOutput = () => ({ passCount: 5, failCount: 0, isEnvironmentalFailure: false }) as any;
|
|
248
|
+
_postVerifyDeps.getEnvironmentalEscalationThreshold = () => 3;
|
|
249
|
+
_postVerifyDeps.revertStoriesOnFailure = mockRevertStoriesOnFailure as typeof _postVerifyDeps.revertStoriesOnFailure;
|
|
250
|
+
_postVerifyDeps.runRectificationLoop = mockRunRectificationLoop as typeof _postVerifyDeps.runRectificationLoop;
|
|
251
|
+
_postVerifyDeps.getExpectedFiles = () => [];
|
|
252
|
+
_postVerifyDeps.savePRD = mock(async () => {}) as typeof _postVerifyDeps.savePRD;
|
|
253
|
+
_postVerifyDeps.appendProgress = mock(async () => {}) as typeof _postVerifyDeps.appendProgress;
|
|
254
|
+
_postVerifyDeps.getTierConfig = () => undefined as any;
|
|
255
|
+
_postVerifyDeps.parseBunTestOutput = () => ({ failed: 0, passed: 5, failures: [] }) as any;
|
|
256
|
+
mockRunVerification.mockClear();
|
|
257
|
+
mockRevertStoriesOnFailure.mockClear();
|
|
258
|
+
mockRunRectificationLoop.mockClear();
|
|
259
|
+
_verificationResponses = [];
|
|
260
|
+
_verificationCallIndex = 0;
|
|
261
|
+
|
|
262
|
+
const repo = makeGitRepo();
|
|
263
|
+
tempDir = repo.dir;
|
|
264
|
+
storyGitRef = repo.storyGitRef;
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
afterEach(() => {
|
|
268
|
+
Object.assign(_postVerifyDeps, _origPostVerifyDeps);
|
|
269
|
+
mock.restore();
|
|
270
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// BUG-026 behavioral tests
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
describe("BUG-026: regression gate TIMEOUT acceptance", () => {
|
|
278
|
+
test("TIMEOUT + acceptOnTimeout=true → runPostAgentVerification returns passed", async () => {
|
|
279
|
+
// Call 1: scoped verification passes; Call 2: regression gate times out
|
|
280
|
+
_verificationResponses = [
|
|
281
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
282
|
+
{ success: false, status: "TIMEOUT", countsTowardEscalation: false },
|
|
283
|
+
];
|
|
284
|
+
|
|
285
|
+
const config = makeConfig({ acceptOnTimeout: true });
|
|
286
|
+
const story = makeStory();
|
|
287
|
+
const prd = makePRD(story);
|
|
288
|
+
|
|
289
|
+
const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
290
|
+
|
|
291
|
+
expect(result.passed).toBe(true);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("TIMEOUT + acceptOnTimeout=true → revertStoriesOnFailure is NOT called", async () => {
|
|
295
|
+
_verificationResponses = [
|
|
296
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
297
|
+
{ success: false, status: "TIMEOUT", countsTowardEscalation: false },
|
|
298
|
+
];
|
|
299
|
+
|
|
300
|
+
const config = makeConfig({ acceptOnTimeout: true });
|
|
301
|
+
const story = makeStory();
|
|
302
|
+
const prd = makePRD(story);
|
|
303
|
+
|
|
304
|
+
await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
305
|
+
|
|
306
|
+
expect(mockRevertStoriesOnFailure).not.toHaveBeenCalled();
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
test("TIMEOUT + acceptOnTimeout=false → runPostAgentVerification returns failed", async () => {
|
|
310
|
+
_verificationResponses = [
|
|
311
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
312
|
+
{ success: false, status: "TIMEOUT", countsTowardEscalation: false },
|
|
313
|
+
];
|
|
314
|
+
|
|
315
|
+
const config = makeConfig({ acceptOnTimeout: false });
|
|
316
|
+
const story = makeStory();
|
|
317
|
+
const prd = makePRD(story);
|
|
318
|
+
|
|
319
|
+
const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
320
|
+
|
|
321
|
+
expect(result.passed).toBe(false);
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
test("TIMEOUT + acceptOnTimeout=false → revertStoriesOnFailure IS called", async () => {
|
|
325
|
+
_verificationResponses = [
|
|
326
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
327
|
+
{ success: false, status: "TIMEOUT", countsTowardEscalation: false },
|
|
328
|
+
];
|
|
329
|
+
|
|
330
|
+
const config = makeConfig({ acceptOnTimeout: false });
|
|
331
|
+
const story = makeStory();
|
|
332
|
+
const prd = makePRD(story);
|
|
333
|
+
|
|
334
|
+
await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
335
|
+
|
|
336
|
+
expect(mockRevertStoriesOnFailure).toHaveBeenCalledTimes(1);
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
test("TIMEOUT + acceptOnTimeout not set → defaults to true → returns passed", async () => {
|
|
340
|
+
_verificationResponses = [
|
|
341
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
342
|
+
{ success: false, status: "TIMEOUT", countsTowardEscalation: false },
|
|
343
|
+
];
|
|
344
|
+
|
|
345
|
+
// No acceptOnTimeout — should default to true per BUG-026 spec
|
|
346
|
+
const config = makeConfig({});
|
|
347
|
+
const story = makeStory();
|
|
348
|
+
const prd = makePRD(story);
|
|
349
|
+
|
|
350
|
+
const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
351
|
+
|
|
352
|
+
expect(result.passed).toBe(true);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test("TEST_FAILURE in regression gate → returns failed regardless of acceptOnTimeout", async () => {
|
|
356
|
+
_verificationResponses = [
|
|
357
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
358
|
+
{ success: false, status: "TEST_FAILURE", countsTowardEscalation: true, output: "FAIL 1" },
|
|
359
|
+
];
|
|
360
|
+
|
|
361
|
+
const config = makeConfig({ acceptOnTimeout: true });
|
|
362
|
+
const story = makeStory();
|
|
363
|
+
const prd = makePRD(story);
|
|
364
|
+
|
|
365
|
+
const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
366
|
+
|
|
367
|
+
expect(result.passed).toBe(false);
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
test("TEST_FAILURE in regression gate → revertStoriesOnFailure IS called", async () => {
|
|
371
|
+
_verificationResponses = [
|
|
372
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
373
|
+
{ success: false, status: "TEST_FAILURE", countsTowardEscalation: true, output: "FAIL 1" },
|
|
374
|
+
];
|
|
375
|
+
|
|
376
|
+
const config = makeConfig({ acceptOnTimeout: true });
|
|
377
|
+
const story = makeStory();
|
|
378
|
+
const prd = makePRD(story);
|
|
379
|
+
|
|
380
|
+
await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
381
|
+
|
|
382
|
+
expect(mockRevertStoriesOnFailure).toHaveBeenCalledTimes(1);
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
test("regression gate runs second → runVerification called twice (scoped + full suite)", async () => {
|
|
386
|
+
_verificationResponses = [
|
|
387
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
388
|
+
{ success: false, status: "TIMEOUT", countsTowardEscalation: false },
|
|
389
|
+
];
|
|
390
|
+
|
|
391
|
+
const config = makeConfig({ acceptOnTimeout: true });
|
|
392
|
+
const story = makeStory();
|
|
393
|
+
const prd = makePRD(story);
|
|
394
|
+
|
|
395
|
+
await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
396
|
+
|
|
397
|
+
// Once for scoped verification, once for regression gate
|
|
398
|
+
expect(mockRunVerification).toHaveBeenCalledTimes(2);
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
test("regression gate disabled → only scoped test runs (one call to runVerification)", async () => {
|
|
402
|
+
_verificationResponses = [
|
|
403
|
+
{ success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
|
|
404
|
+
];
|
|
405
|
+
|
|
406
|
+
const config = makeConfig({ enabled: false, timeoutSeconds: 120 });
|
|
407
|
+
const story = makeStory();
|
|
408
|
+
const prd = makePRD(story);
|
|
409
|
+
|
|
410
|
+
const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
|
|
411
|
+
|
|
412
|
+
expect(result.passed).toBe(true);
|
|
413
|
+
expect(mockRunVerification).toHaveBeenCalledTimes(1);
|
|
414
|
+
});
|
|
415
|
+
});
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import { describe, expect, test } from "bun:test";
|
|
11
|
-
import type { RegressionGateConfig } from "
|
|
11
|
+
import type { RegressionGateConfig } from "../../../src/config/schema";
|
|
12
12
|
|
|
13
13
|
describe("RegressionGateConfig", () => {
|
|
14
14
|
test("should have correct default values", () => {
|
|
@@ -146,6 +146,38 @@ describe("Regression Gate Timeout", () => {
|
|
|
146
146
|
expect(regressionGateTimeoutSeconds).not.toBe(verificationTimeoutSeconds);
|
|
147
147
|
expect(regressionGateTimeoutSeconds).toBeLessThan(verificationTimeoutSeconds);
|
|
148
148
|
});
|
|
149
|
+
|
|
150
|
+
test("should have acceptOnTimeout config option (BUG-026)", () => {
|
|
151
|
+
const regressionGateConfig: RegressionGateConfig = {
|
|
152
|
+
enabled: true,
|
|
153
|
+
timeoutSeconds: 120,
|
|
154
|
+
acceptOnTimeout: true,
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
expect(regressionGateConfig.acceptOnTimeout).toBe(true);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test("should default acceptOnTimeout to true (BUG-026)", () => {
|
|
161
|
+
const regressionGateConfig: RegressionGateConfig = {
|
|
162
|
+
enabled: true,
|
|
163
|
+
timeoutSeconds: 120,
|
|
164
|
+
// acceptOnTimeout not specified - should default to true
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
// When acceptOnTimeout is undefined, it should be treated as true
|
|
168
|
+
const acceptOnTimeout = regressionGateConfig.acceptOnTimeout ?? true;
|
|
169
|
+
expect(acceptOnTimeout).toBe(true);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("should allow disabling acceptOnTimeout (BUG-026)", () => {
|
|
173
|
+
const regressionGateConfig: RegressionGateConfig = {
|
|
174
|
+
enabled: true,
|
|
175
|
+
timeoutSeconds: 120,
|
|
176
|
+
acceptOnTimeout: false,
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
expect(regressionGateConfig.acceptOnTimeout).toBe(false);
|
|
180
|
+
});
|
|
149
181
|
});
|
|
150
182
|
|
|
151
183
|
describe("Story State After Regression Failure", () => {
|
|
@@ -6,13 +6,14 @@
|
|
|
6
6
|
* a fresh classification.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import {
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
10
10
|
import { initLogger, resetLogger } from "../../../src/logger";
|
|
11
|
-
import
|
|
11
|
+
import { _routingDeps, routingStage } from "../../../src/pipeline/stages/routing";
|
|
12
12
|
import type { NaxConfig } from "../../../src/config";
|
|
13
|
+
import type { PipelineContext } from "../../../src/pipeline/types";
|
|
13
14
|
import type { UserStory } from "../../../src/prd/types";
|
|
14
15
|
|
|
15
|
-
// ──
|
|
16
|
+
// ── Mock functions ────────────────────────────────────────────────────────────
|
|
16
17
|
|
|
17
18
|
const mockRouteStory = mock(async () => ({
|
|
18
19
|
complexity: "medium",
|
|
@@ -22,26 +23,11 @@ const mockRouteStory = mock(async () => ({
|
|
|
22
23
|
}));
|
|
23
24
|
|
|
24
25
|
const mockComplexityToModelTier = mock((_complexity: string, _config: unknown) => "balanced" as const);
|
|
26
|
+
const mockIsGreenfieldStory = mock(async () => false);
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
routeStory: mockRouteStory,
|
|
28
|
-
complexityToModelTier: mockComplexityToModelTier,
|
|
29
|
-
}));
|
|
30
|
-
|
|
31
|
-
// Greenfield check: return false so it never interferes with test strategy
|
|
32
|
-
mock.module("../../../src/context/greenfield", () => ({
|
|
33
|
-
isGreenfieldStory: mock(async () => false),
|
|
34
|
-
}));
|
|
35
|
-
|
|
36
|
-
// LLM batch cache is not relevant here
|
|
37
|
-
mock.module("../../../src/routing/strategies/llm", () => ({
|
|
38
|
-
clearCache: mock(() => {}),
|
|
39
|
-
routeBatch: mock(async () => []),
|
|
40
|
-
}));
|
|
41
|
-
|
|
42
|
-
// ── Dynamic imports after mocks ───────────────────────────────────────────────
|
|
28
|
+
// ── Capture originals for afterEach restoration ───────────────────────────────
|
|
43
29
|
|
|
44
|
-
const {
|
|
30
|
+
const _origDeps = { ..._routingDeps };
|
|
45
31
|
|
|
46
32
|
// ── Fixtures ──────────────────────────────────────────────────────────────────
|
|
47
33
|
|
|
@@ -58,11 +44,9 @@ function makeStory(routingOverride?: Partial<UserStory["routing"]>): UserStory {
|
|
|
58
44
|
tags: [],
|
|
59
45
|
dependencies: [],
|
|
60
46
|
};
|
|
61
|
-
|
|
62
47
|
if (routingOverride !== undefined) {
|
|
63
48
|
story.routing = routingOverride as UserStory["routing"];
|
|
64
49
|
}
|
|
65
|
-
|
|
66
50
|
return story;
|
|
67
51
|
}
|
|
68
52
|
|
|
@@ -82,16 +66,22 @@ function makeCtx(story: UserStory): PipelineContext {
|
|
|
82
66
|
} as PipelineContext;
|
|
83
67
|
}
|
|
84
68
|
|
|
85
|
-
// ──
|
|
69
|
+
// ── Lifecycle ─────────────────────────────────────────────────────────────────
|
|
86
70
|
|
|
87
71
|
beforeEach(() => {
|
|
88
72
|
resetLogger();
|
|
89
73
|
initLogger({ level: "error", useChalk: false });
|
|
74
|
+
_routingDeps.routeStory = mockRouteStory as typeof _routingDeps.routeStory;
|
|
75
|
+
_routingDeps.complexityToModelTier = mockComplexityToModelTier as typeof _routingDeps.complexityToModelTier;
|
|
76
|
+
_routingDeps.isGreenfieldStory = mockIsGreenfieldStory as typeof _routingDeps.isGreenfieldStory;
|
|
90
77
|
mockRouteStory.mockClear();
|
|
91
78
|
mockComplexityToModelTier.mockClear();
|
|
79
|
+
mockIsGreenfieldStory.mockClear();
|
|
92
80
|
});
|
|
93
81
|
|
|
94
82
|
afterEach(() => {
|
|
83
|
+
Object.assign(_routingDeps, _origDeps);
|
|
84
|
+
mock.restore();
|
|
95
85
|
resetLogger();
|
|
96
86
|
});
|
|
97
87
|
|
|
@@ -99,42 +89,31 @@ afterEach(() => {
|
|
|
99
89
|
|
|
100
90
|
describe("routing stage — partial override (FIX-001)", () => {
|
|
101
91
|
test("(1) partial override with only testStrategy preserves LLM complexity", async () => {
|
|
102
|
-
// Story sets only testStrategy — complexity should come from LLM
|
|
103
92
|
const story = makeStory({ testStrategy: "test-after", complexity: undefined as any, reasoning: "manual" });
|
|
104
93
|
const ctx = makeCtx(story);
|
|
105
94
|
|
|
106
95
|
await routingStage.execute(ctx);
|
|
107
96
|
|
|
108
|
-
// testStrategy is overridden by the story field
|
|
109
97
|
expect(ctx.routing.testStrategy).toBe("test-after");
|
|
110
|
-
// complexity should remain from the LLM result ("medium"), not undefined
|
|
111
98
|
expect(ctx.routing.complexity).toBe("medium");
|
|
112
99
|
});
|
|
113
100
|
|
|
114
101
|
test("(2) LLM-classified complexity is preserved when story.routing has no complexity", async () => {
|
|
115
|
-
// story.routing is present but complexity is undefined (falsy)
|
|
116
102
|
const story = makeStory({ testStrategy: "test-after", complexity: undefined as any, reasoning: "" });
|
|
117
103
|
const ctx = makeCtx(story);
|
|
118
104
|
|
|
119
105
|
await routingStage.execute(ctx);
|
|
120
106
|
|
|
121
|
-
// LLM returned "medium" — it must not be overwritten with undefined
|
|
122
107
|
expect(ctx.routing.complexity).toBe("medium");
|
|
123
108
|
expect(ctx.routing.complexity).not.toBeUndefined();
|
|
124
109
|
});
|
|
125
110
|
|
|
126
111
|
test("(3) full override works when both complexity and testStrategy are set", async () => {
|
|
127
|
-
|
|
128
|
-
const story = makeStory({
|
|
129
|
-
complexity: "simple",
|
|
130
|
-
testStrategy: "test-after",
|
|
131
|
-
reasoning: "manual override",
|
|
132
|
-
});
|
|
112
|
+
const story = makeStory({ complexity: "simple", testStrategy: "test-after", reasoning: "manual override" });
|
|
133
113
|
const ctx = makeCtx(story);
|
|
134
114
|
|
|
135
115
|
await routingStage.execute(ctx);
|
|
136
116
|
|
|
137
|
-
// Both fields should be overridden from the story
|
|
138
117
|
expect(ctx.routing.complexity).toBe("simple");
|
|
139
118
|
expect(ctx.routing.testStrategy).toBe("test-after");
|
|
140
119
|
});
|
|
@@ -23,20 +23,18 @@ import type { PRD, UserStory } from "../../../src/prd/types";
|
|
|
23
23
|
|
|
24
24
|
const mockRegression = mock(async () => ({ success: true, status: "SUCCESS" as const }));
|
|
25
25
|
|
|
26
|
-
mock.module(
|
|
27
|
-
|
|
28
|
-
}));
|
|
26
|
+
// ---- Static imports — no mock.module() needed (uses _deps pattern) ----------
|
|
27
|
+
import { _verifyDeps, verifyStage } from "../../../src/pipeline/stages/verify";
|
|
29
28
|
|
|
30
29
|
// ---- Capture originals for afterEach restoration ----------------------------
|
|
31
30
|
const _origDeps = { ..._smartRunnerDeps };
|
|
32
|
-
|
|
33
|
-
// ---- Dynamic import after gate mock -----------------------------------------
|
|
34
|
-
const { verifyStage } = await import("../../../src/pipeline/stages/verify");
|
|
31
|
+
const _origVerifyDeps = { ..._verifyDeps };
|
|
35
32
|
|
|
36
33
|
// ---- Mock functions ---------------------------------------------------------
|
|
37
34
|
|
|
38
35
|
const mockGetChangedSourceFiles = mock(async (_workdir: string) => [] as string[]);
|
|
39
36
|
const mockMapSourceToTests = mock(async (_files: string[], _workdir: string) => [] as string[]);
|
|
37
|
+
const mockImportGrepFallback = mock(async (_files: string[], _workdir: string, _patterns: string[]) => [] as string[]);
|
|
40
38
|
const mockBuildSmartTestCommand = mock((testFiles: string[], baseCommand: string) => {
|
|
41
39
|
if (testFiles.length === 0) return baseCommand;
|
|
42
40
|
return `${baseCommand.split(" ").slice(0, -1).join(" ")} ${testFiles.join(" ")}`;
|
|
@@ -157,16 +155,20 @@ describe("Verify Stage --- Smart Runner Integration", () => {
|
|
|
157
155
|
initLogger({ level: "error", useChalk: false });
|
|
158
156
|
_smartRunnerDeps.getChangedSourceFiles = mockGetChangedSourceFiles;
|
|
159
157
|
_smartRunnerDeps.mapSourceToTests = mockMapSourceToTests;
|
|
158
|
+
_smartRunnerDeps.importGrepFallback = mockImportGrepFallback;
|
|
160
159
|
_smartRunnerDeps.buildSmartTestCommand = mockBuildSmartTestCommand;
|
|
160
|
+
_verifyDeps.regression = mockRegression as typeof _verifyDeps.regression;
|
|
161
161
|
mockRegression.mockClear();
|
|
162
162
|
mockGetChangedSourceFiles.mockClear();
|
|
163
163
|
mockMapSourceToTests.mockClear();
|
|
164
|
+
mockImportGrepFallback.mockClear();
|
|
164
165
|
mockBuildSmartTestCommand.mockClear();
|
|
165
166
|
});
|
|
166
167
|
|
|
167
168
|
afterEach(() => {
|
|
168
169
|
resetLogger();
|
|
169
170
|
Object.assign(_smartRunnerDeps, _origDeps);
|
|
171
|
+
Object.assign(_verifyDeps, _origVerifyDeps);
|
|
170
172
|
});
|
|
171
173
|
|
|
172
174
|
describe("AC1: uses scoped test command when smart runner finds test files", () => {
|
|
@@ -183,4 +183,32 @@ describe("getNextStory() — run order S1-I1 -> S1-I2 (retry) -> S2-I1", () => {
|
|
|
183
183
|
const pick2 = getNextStory(prd, lastId, maxRetries);
|
|
184
184
|
expect(pick2?.id).toBe("US-002");
|
|
185
185
|
});
|
|
186
|
+
|
|
187
|
+
test("BUG-029: prioritizes escalated story (pending + attempts > 0) over other pending stories", () => {
|
|
188
|
+
const prd = makePrd([makeStory("US-001"), makeStory("US-002"), makeStory("US-003")]);
|
|
189
|
+
const maxRetries = 2;
|
|
190
|
+
|
|
191
|
+
// Simulate: US-001 was escalated — status reset to "pending" but has prior attempts
|
|
192
|
+
prd.userStories[0].status = "pending";
|
|
193
|
+
prd.userStories[0].attempts = 1;
|
|
194
|
+
prd.userStories[0].routing = { complexity: "simple", modelTier: "balanced", testStrategy: "test-after" };
|
|
195
|
+
|
|
196
|
+
// getNextStory should prioritize US-001 (escalated, pending with attempts)
|
|
197
|
+
const pick = getNextStory(prd, "US-001", maxRetries);
|
|
198
|
+
expect(pick?.id).toBe("US-001");
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
test("BUG-029: does not reprioritize story with 0 attempts (fresh pending)", () => {
|
|
202
|
+
const prd = makePrd([makeStory("US-001"), makeStory("US-002")]);
|
|
203
|
+
const maxRetries = 2;
|
|
204
|
+
|
|
205
|
+
// US-001 is fresh pending (no prior attempts) — normal ordering applies
|
|
206
|
+
prd.userStories[0].status = "pending";
|
|
207
|
+
prd.userStories[0].attempts = 0;
|
|
208
|
+
|
|
209
|
+
// Should still pick US-001 (first pending), but via normal path not escalation path
|
|
210
|
+
const pick = getNextStory(prd, "US-002", maxRetries);
|
|
211
|
+
expect(pick?.id).toBe("US-001");
|
|
212
|
+
});
|
|
213
|
+
|
|
186
214
|
});
|