@nathapp/nax 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +15 -0
- package/docs/20260304-review-nax.md +492 -0
- package/docs/ROADMAP.md +52 -18
- package/docs/specs/bug-039-orphan-processes.md +131 -0
- package/docs/specs/bug-040-review-rectification.md +82 -0
- package/docs/specs/bug-041-cross-story-test-isolation.md +88 -0
- package/docs/specs/bug-042-verifier-failure-capture.md +117 -0
- package/docs/specs/feat-010-smart-runner-git-history.md +96 -0
- package/docs/specs/feat-011-file-context-strategy.md +73 -0
- package/docs/specs/feat-012-tdd-writer-tier.md +79 -0
- package/docs/specs/feat-013-test-after-review.md +89 -0
- package/docs/specs/feat-014-heartbeat-observability.md +127 -0
- package/memory/topic/feat-010-baseref.md +28 -0
- package/memory/topic/feat-013-test-after-deprecation.md +22 -0
- package/nax/config.json +7 -4
- package/nax/features/bug-039-medium/prd.json +45 -0
- package/nax/features/verify-v2/prd.json +79 -0
- package/nax/features/verify-v2/progress.txt +3 -0
- package/package.json +2 -2
- package/src/agents/claude.ts +66 -7
- package/src/config/defaults.ts +2 -1
- package/src/config/schemas.ts +2 -0
- package/src/config/types.ts +4 -0
- package/src/context/builder.ts +9 -1
- package/src/execution/lifecycle/index.ts +1 -0
- package/src/execution/lifecycle/run-completion.ts +29 -0
- package/src/execution/lifecycle/run-regression.ts +301 -0
- package/src/execution/pipeline-result-handler.ts +0 -1
- package/src/execution/post-verify.ts +31 -194
- package/src/execution/runner.ts +1 -0
- package/src/execution/sequential-executor.ts +1 -0
- package/src/pipeline/stages/verify.ts +27 -23
- package/src/pipeline/types.ts +2 -0
- package/src/review/runner.ts +39 -4
- package/src/routing/router.ts +3 -3
- package/src/routing/strategies/keyword.ts +5 -2
- package/src/routing/strategies/llm.ts +27 -1
- package/src/utils/git.ts +49 -25
- package/src/verification/executor.ts +8 -2
- package/src/verification/smart-runner.ts +58 -10
- package/test/integration/plugin-routing.test.ts +1 -1
- package/test/integration/rectification-flow.test.ts +3 -3
- package/test/integration/review-config-commands.test.ts +1 -1
- package/test/integration/verify-stage.test.ts +9 -0
- package/test/unit/agents/claude.test.ts +106 -0
- package/test/unit/config/defaults.test.ts +69 -0
- package/test/unit/config/regression-gate-schema.test.ts +159 -0
- package/test/unit/context.test.ts +6 -3
- package/test/unit/execution/lifecycle/run-completion.test.ts +239 -0
- package/test/unit/execution/lifecycle/run-regression.test.ts +418 -0
- package/test/unit/execution/post-verify-regression.test.ts +31 -84
- package/test/unit/execution/post-verify.test.ts +28 -48
- package/test/unit/pipeline/stages/verify.test.ts +266 -0
- package/test/unit/pipeline/verify-smart-runner.test.ts +2 -1
- package/test/unit/prd-auto-default.test.ts +2 -2
- package/test/unit/routing/routing-stability.test.ts +1 -1
- package/test/unit/routing/strategies/llm.test.ts +250 -0
- package/test/unit/routing.test.ts +7 -7
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deferred Regression Gate
|
|
3
|
+
*
|
|
4
|
+
* Runs full test suite once after all stories complete, then attempts
|
|
5
|
+
* targeted rectification per responsible story. Handles edge cases:
|
|
6
|
+
* - Partial completion: only check stories marked passed
|
|
7
|
+
* - Overlapping file changes: try last modified story first
|
|
8
|
+
* - Unmapped tests: warn and mark all passed stories for re-verification
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { NaxConfig } from "../../config";
|
|
12
|
+
import { getSafeLogger } from "../../logger";
|
|
13
|
+
import type { PRD, UserStory } from "../../prd";
|
|
14
|
+
import { countStories } from "../../prd";
|
|
15
|
+
import { hasCommitsForStory } from "../../utils/git";
|
|
16
|
+
import { parseBunTestOutput } from "../../verification";
|
|
17
|
+
import { reverseMapTestToSource } from "../../verification/smart-runner";
|
|
18
|
+
import { runRectificationLoop } from "../post-verify-rectification";
|
|
19
|
+
import { runVerification } from "../verification";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Injectable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
|
|
23
|
+
* @internal - test use only.
|
|
24
|
+
*/
|
|
25
|
+
export const _regressionDeps = {
|
|
26
|
+
runVerification,
|
|
27
|
+
runRectificationLoop,
|
|
28
|
+
parseBunTestOutput,
|
|
29
|
+
reverseMapTestToSource,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export interface DeferredRegressionOptions {
|
|
33
|
+
config: NaxConfig;
|
|
34
|
+
prd: PRD;
|
|
35
|
+
workdir: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface DeferredRegressionResult {
|
|
39
|
+
success: boolean;
|
|
40
|
+
failedTests: number;
|
|
41
|
+
passedTests: number;
|
|
42
|
+
rectificationAttempts: number;
|
|
43
|
+
affectedStories: string[];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Map a test file to the story responsible for it via git log.
|
|
48
|
+
*
|
|
49
|
+
* Searches recent commits for story IDs in the format US-NNN.
|
|
50
|
+
* Returns the first matching story ID, or undefined if not found.
|
|
51
|
+
*/
|
|
52
|
+
async function findResponsibleStory(
|
|
53
|
+
testFile: string,
|
|
54
|
+
workdir: string,
|
|
55
|
+
passedStories: UserStory[],
|
|
56
|
+
): Promise<UserStory | undefined> {
|
|
57
|
+
const logger = getSafeLogger();
|
|
58
|
+
|
|
59
|
+
// Try each passed story in reverse order (most recent first)
|
|
60
|
+
for (let i = passedStories.length - 1; i >= 0; i--) {
|
|
61
|
+
const story = passedStories[i];
|
|
62
|
+
const hasCommits = await hasCommitsForStory(workdir, story.id, 50);
|
|
63
|
+
if (hasCommits) {
|
|
64
|
+
logger?.info("regression", `Mapped test to story ${story.id}`, { testFile });
|
|
65
|
+
return story;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Run deferred regression gate after all stories complete.
|
|
74
|
+
*
|
|
75
|
+
* Steps:
|
|
76
|
+
* 1. Run full test suite
|
|
77
|
+
* 2. If failures, reverse-map test files to source files to stories
|
|
78
|
+
* 3. For each affected story, attempt targeted rectification
|
|
79
|
+
* 4. Re-run full suite to confirm fixes
|
|
80
|
+
* 5. Return results with affected story list
|
|
81
|
+
*/
|
|
82
|
+
export async function runDeferredRegression(options: DeferredRegressionOptions): Promise<DeferredRegressionResult> {
|
|
83
|
+
const logger = getSafeLogger();
|
|
84
|
+
const { config, prd, workdir } = options;
|
|
85
|
+
|
|
86
|
+
// Check if regression gate is deferred
|
|
87
|
+
const regressionMode = config.execution.regressionGate?.mode ?? "deferred";
|
|
88
|
+
if (regressionMode === "disabled") {
|
|
89
|
+
logger?.info("regression", "Deferred regression gate disabled");
|
|
90
|
+
return {
|
|
91
|
+
success: true,
|
|
92
|
+
failedTests: 0,
|
|
93
|
+
passedTests: 0,
|
|
94
|
+
rectificationAttempts: 0,
|
|
95
|
+
affectedStories: [],
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (regressionMode !== "deferred") {
|
|
100
|
+
logger?.info("regression", "Regression gate mode is not deferred, skipping");
|
|
101
|
+
return {
|
|
102
|
+
success: true,
|
|
103
|
+
failedTests: 0,
|
|
104
|
+
passedTests: 0,
|
|
105
|
+
rectificationAttempts: 0,
|
|
106
|
+
affectedStories: [],
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const testCommand = config.quality.commands.test ?? "bun test";
|
|
111
|
+
const timeoutSeconds = config.execution.regressionGate?.timeoutSeconds ?? 120;
|
|
112
|
+
const maxRectificationAttempts = config.execution.regressionGate?.maxRectificationAttempts ?? 2;
|
|
113
|
+
|
|
114
|
+
// Only check stories that have been marked as passed
|
|
115
|
+
const counts = countStories(prd);
|
|
116
|
+
const passedStories = prd.userStories.filter((s) => s.status === "passed");
|
|
117
|
+
|
|
118
|
+
if (passedStories.length === 0) {
|
|
119
|
+
logger?.info("regression", "No passed stories to verify (partial completion)");
|
|
120
|
+
return {
|
|
121
|
+
success: true,
|
|
122
|
+
failedTests: 0,
|
|
123
|
+
passedTests: 0,
|
|
124
|
+
rectificationAttempts: 0,
|
|
125
|
+
affectedStories: [],
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
logger?.info("regression", "Running deferred full-suite regression gate", {
|
|
130
|
+
totalStories: counts.total,
|
|
131
|
+
passedStories: passedStories.length,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
// Step 1: Run full test suite
|
|
135
|
+
const fullSuiteResult = await _regressionDeps.runVerification({
|
|
136
|
+
workingDirectory: workdir,
|
|
137
|
+
command: testCommand,
|
|
138
|
+
timeoutSeconds,
|
|
139
|
+
forceExit: config.quality.forceExit,
|
|
140
|
+
detectOpenHandles: config.quality.detectOpenHandles,
|
|
141
|
+
detectOpenHandlesRetries: config.quality.detectOpenHandlesRetries,
|
|
142
|
+
timeoutRetryCount: 0,
|
|
143
|
+
gracePeriodMs: config.quality.gracePeriodMs,
|
|
144
|
+
drainTimeoutMs: config.quality.drainTimeoutMs,
|
|
145
|
+
shell: config.quality.shell,
|
|
146
|
+
stripEnvVars: config.quality.stripEnvVars,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
if (fullSuiteResult.success) {
|
|
150
|
+
logger?.info("regression", "Full suite passed");
|
|
151
|
+
return {
|
|
152
|
+
success: true,
|
|
153
|
+
failedTests: 0,
|
|
154
|
+
passedTests: fullSuiteResult.passCount ?? 0,
|
|
155
|
+
rectificationAttempts: 0,
|
|
156
|
+
affectedStories: [],
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Handle timeout
|
|
161
|
+
const acceptOnTimeout = config.execution.regressionGate?.acceptOnTimeout ?? true;
|
|
162
|
+
if (fullSuiteResult.status === "TIMEOUT" && acceptOnTimeout) {
|
|
163
|
+
logger?.warn("regression", "Full-suite regression gate timed out (accepted as pass)");
|
|
164
|
+
return {
|
|
165
|
+
success: true,
|
|
166
|
+
failedTests: 0,
|
|
167
|
+
passedTests: 0,
|
|
168
|
+
rectificationAttempts: 0,
|
|
169
|
+
affectedStories: [],
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (!fullSuiteResult.output) {
|
|
174
|
+
logger?.error("regression", "Full suite failed with no output");
|
|
175
|
+
return {
|
|
176
|
+
success: false,
|
|
177
|
+
failedTests: fullSuiteResult.failCount ?? 0,
|
|
178
|
+
passedTests: fullSuiteResult.passCount ?? 0,
|
|
179
|
+
rectificationAttempts: 0,
|
|
180
|
+
affectedStories: [],
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Step 2: Parse failures and map to source files to stories
|
|
185
|
+
const testSummary = _regressionDeps.parseBunTestOutput(fullSuiteResult.output);
|
|
186
|
+
const affectedStories = new Set<string>();
|
|
187
|
+
const affectedStoriesObjs = new Map<string, UserStory>();
|
|
188
|
+
|
|
189
|
+
logger?.warn("regression", "Regression detected", {
|
|
190
|
+
failedTests: testSummary.failed,
|
|
191
|
+
passedTests: testSummary.passed,
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// Extract test file paths from failures
|
|
195
|
+
const testFilesInFailures = new Set<string>();
|
|
196
|
+
for (const failure of testSummary.failures) {
|
|
197
|
+
if (failure.file) {
|
|
198
|
+
testFilesInFailures.add(failure.file);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (testFilesInFailures.size === 0) {
|
|
203
|
+
logger?.warn("regression", "No test files found in failures (unmapped)");
|
|
204
|
+
// Mark all passed stories for re-verification
|
|
205
|
+
for (const story of passedStories) {
|
|
206
|
+
affectedStories.add(story.id);
|
|
207
|
+
affectedStoriesObjs.set(story.id, story);
|
|
208
|
+
}
|
|
209
|
+
} else {
|
|
210
|
+
// Map test files to source files to stories
|
|
211
|
+
const testFilesArray = Array.from(testFilesInFailures);
|
|
212
|
+
const sourceFilesArray = _regressionDeps.reverseMapTestToSource(testFilesArray, workdir);
|
|
213
|
+
|
|
214
|
+
logger?.info("regression", "Mapped test files to source files", {
|
|
215
|
+
testFiles: testFilesArray.length,
|
|
216
|
+
sourceFiles: sourceFilesArray.length,
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
for (const testFile of testFilesArray) {
|
|
220
|
+
const responsibleStory = await findResponsibleStory(testFile, workdir, passedStories);
|
|
221
|
+
if (responsibleStory) {
|
|
222
|
+
affectedStories.add(responsibleStory.id);
|
|
223
|
+
affectedStoriesObjs.set(responsibleStory.id, responsibleStory);
|
|
224
|
+
} else {
|
|
225
|
+
logger?.warn("regression", "Could not map test file to story", { testFile });
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (affectedStories.size === 0) {
|
|
231
|
+
logger?.warn("regression", "No stories could be mapped to failures");
|
|
232
|
+
return {
|
|
233
|
+
success: false,
|
|
234
|
+
failedTests: testSummary.failed,
|
|
235
|
+
passedTests: testSummary.passed,
|
|
236
|
+
rectificationAttempts: 0,
|
|
237
|
+
affectedStories: Array.from(affectedStories),
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Step 3: Attempt rectification per story
|
|
242
|
+
let rectificationAttempts = 0;
|
|
243
|
+
const affectedStoriesList = Array.from(affectedStoriesObjs.values());
|
|
244
|
+
|
|
245
|
+
for (const story of affectedStoriesList) {
|
|
246
|
+
for (let attempt = 0; attempt < maxRectificationAttempts; attempt++) {
|
|
247
|
+
rectificationAttempts++;
|
|
248
|
+
|
|
249
|
+
logger?.info("regression", `Rectifying story ${story.id} (attempt ${attempt + 1}/${maxRectificationAttempts})`);
|
|
250
|
+
|
|
251
|
+
const fixed = await _regressionDeps.runRectificationLoop({
|
|
252
|
+
config,
|
|
253
|
+
workdir,
|
|
254
|
+
story,
|
|
255
|
+
testCommand,
|
|
256
|
+
timeoutSeconds,
|
|
257
|
+
testOutput: fullSuiteResult.output,
|
|
258
|
+
promptPrefix: `# DEFERRED REGRESSION: Full-Suite Failures\n\nYour story ${story.id} broke tests in the full suite. Fix these regressions.`,
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
if (fixed) {
|
|
262
|
+
logger?.info("regression", `Story ${story.id} rectified successfully`);
|
|
263
|
+
break; // Move to next story
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Step 4: Re-run full suite to confirm
|
|
269
|
+
logger?.info("regression", "Re-running full suite after rectification");
|
|
270
|
+
const retryResult = await _regressionDeps.runVerification({
|
|
271
|
+
workingDirectory: workdir,
|
|
272
|
+
command: testCommand,
|
|
273
|
+
timeoutSeconds,
|
|
274
|
+
forceExit: config.quality.forceExit,
|
|
275
|
+
detectOpenHandles: config.quality.detectOpenHandles,
|
|
276
|
+
detectOpenHandlesRetries: config.quality.detectOpenHandlesRetries,
|
|
277
|
+
timeoutRetryCount: 0,
|
|
278
|
+
gracePeriodMs: config.quality.gracePeriodMs,
|
|
279
|
+
drainTimeoutMs: config.quality.drainTimeoutMs,
|
|
280
|
+
shell: config.quality.shell,
|
|
281
|
+
stripEnvVars: config.quality.stripEnvVars,
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
const success = retryResult.success || (retryResult.status === "TIMEOUT" && acceptOnTimeout);
|
|
285
|
+
|
|
286
|
+
if (success) {
|
|
287
|
+
logger?.info("regression", "Deferred regression gate passed after rectification");
|
|
288
|
+
} else {
|
|
289
|
+
logger?.warn("regression", "Deferred regression gate still failing after rectification", {
|
|
290
|
+
remainingFailures: retryResult.failCount,
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
return {
|
|
295
|
+
success,
|
|
296
|
+
failedTests: retryResult.failCount ?? 0,
|
|
297
|
+
passedTests: retryResult.passCount ?? 0,
|
|
298
|
+
rectificationAttempts,
|
|
299
|
+
affectedStories: Array.from(affectedStories),
|
|
300
|
+
};
|
|
301
|
+
}
|
|
@@ -83,7 +83,6 @@ export async function handlePipelineSuccess(
|
|
|
83
83
|
storiesToExecute: ctx.storiesToExecute,
|
|
84
84
|
allStoryMetrics: ctx.allStoryMetrics,
|
|
85
85
|
timeoutRetryCountMap: ctx.timeoutRetryCountMap,
|
|
86
|
-
storyGitRef: ctx.storyGitRef ?? undefined,
|
|
87
86
|
});
|
|
88
87
|
const verificationPassed = verifyResult.passed;
|
|
89
88
|
prd = verifyResult.prd;
|
|
@@ -4,18 +4,15 @@
|
|
|
4
4
|
* Runs verification after the agent completes, reverts story state on failure.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { spawn } from "bun";
|
|
8
7
|
import type { NaxConfig } from "../config";
|
|
9
8
|
import { getSafeLogger } from "../logger";
|
|
10
9
|
import type { StoryMetrics } from "../metrics";
|
|
11
10
|
import type { PRD, StructuredFailure, UserStory, VerificationStage } from "../prd";
|
|
12
11
|
import { getExpectedFiles, savePRD } from "../prd";
|
|
13
|
-
import type {
|
|
12
|
+
import type { VerificationResult } from "../verification";
|
|
14
13
|
import { parseBunTestOutput } from "../verification";
|
|
15
|
-
import { getTierConfig } from "./escalation";
|
|
16
14
|
import { revertStoriesOnFailure, runRectificationLoop } from "./post-verify-rectification";
|
|
17
|
-
import {
|
|
18
|
-
import { getEnvironmentalEscalationThreshold, parseTestOutput, runVerification } from "./verification";
|
|
15
|
+
import { runVerification } from "./verification";
|
|
19
16
|
|
|
20
17
|
/** Build a StructuredFailure from verification result and test output. */
|
|
21
18
|
function buildStructuredFailure(
|
|
@@ -44,37 +41,6 @@ function buildStructuredFailure(
|
|
|
44
41
|
};
|
|
45
42
|
}
|
|
46
43
|
|
|
47
|
-
/** Get test files changed since a git ref. Returns empty array if detection fails. */
|
|
48
|
-
async function getChangedTestFiles(workdir: string, gitRef?: string): Promise<string[]> {
|
|
49
|
-
if (!gitRef) return [];
|
|
50
|
-
try {
|
|
51
|
-
const proc = spawn({
|
|
52
|
-
cmd: ["git", "diff", "--name-only", gitRef, "HEAD"],
|
|
53
|
-
cwd: workdir,
|
|
54
|
-
stdout: "pipe",
|
|
55
|
-
stderr: "pipe",
|
|
56
|
-
});
|
|
57
|
-
const exitCode = await proc.exited;
|
|
58
|
-
if (exitCode !== 0) return [];
|
|
59
|
-
const stdout = await new Response(proc.stdout).text();
|
|
60
|
-
return stdout
|
|
61
|
-
.trim()
|
|
62
|
-
.split("\n")
|
|
63
|
-
.filter(
|
|
64
|
-
(f) =>
|
|
65
|
-
f && (f.includes("test/") || f.includes("__tests__/") || f.endsWith(".test.ts") || f.endsWith(".spec.ts")),
|
|
66
|
-
);
|
|
67
|
-
} catch {
|
|
68
|
-
return [];
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/** Scope a test command to only run specific test files. */
|
|
73
|
-
function scopeTestCommand(baseCommand: string, testFiles: string[]): string {
|
|
74
|
-
if (testFiles.length === 0) return baseCommand;
|
|
75
|
-
return `${baseCommand} ${testFiles.join(" ")}`;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
44
|
export interface PostVerifyOptions {
|
|
79
45
|
config: NaxConfig;
|
|
80
46
|
prd: PRD;
|
|
@@ -85,7 +51,6 @@ export interface PostVerifyOptions {
|
|
|
85
51
|
storiesToExecute: UserStory[];
|
|
86
52
|
allStoryMetrics: StoryMetrics[];
|
|
87
53
|
timeoutRetryCountMap: Map<string, number>;
|
|
88
|
-
storyGitRef?: string;
|
|
89
54
|
}
|
|
90
55
|
|
|
91
56
|
export interface PostVerifyResult {
|
|
@@ -100,128 +65,38 @@ export interface PostVerifyResult {
|
|
|
100
65
|
* not user/PRD input. No shell injection risk from untrusted sources.
|
|
101
66
|
*/
|
|
102
67
|
export async function runPostAgentVerification(opts: PostVerifyOptions): Promise<PostVerifyResult> {
|
|
103
|
-
const {
|
|
104
|
-
config,
|
|
105
|
-
prd,
|
|
106
|
-
prdPath,
|
|
107
|
-
workdir,
|
|
108
|
-
featureDir,
|
|
109
|
-
story,
|
|
110
|
-
storiesToExecute,
|
|
111
|
-
allStoryMetrics,
|
|
112
|
-
timeoutRetryCountMap,
|
|
113
|
-
storyGitRef,
|
|
114
|
-
} = opts;
|
|
115
|
-
const logger = getSafeLogger();
|
|
68
|
+
const { config, prd, prdPath, workdir, featureDir, story, storiesToExecute, allStoryMetrics } = opts;
|
|
116
69
|
|
|
117
70
|
if (!config.quality.commands.test) return { passed: true, prd };
|
|
118
71
|
|
|
119
|
-
// Scoped verification: only run test files changed by this story
|
|
120
|
-
const changedTestFiles = await getChangedTestFiles(workdir, storyGitRef);
|
|
121
|
-
const testCommand = scopeTestCommand(config.quality.commands.test, changedTestFiles);
|
|
122
|
-
const timeoutRetryCount = timeoutRetryCountMap.get(story.id) || 0;
|
|
123
|
-
|
|
124
|
-
const verificationResult = await _postVerifyDeps.runVerification({
|
|
125
|
-
workingDirectory: workdir,
|
|
126
|
-
expectedFiles: _postVerifyDeps.getExpectedFiles(story),
|
|
127
|
-
command: testCommand,
|
|
128
|
-
timeoutSeconds: config.execution.verificationTimeoutSeconds,
|
|
129
|
-
forceExit: config.quality.forceExit,
|
|
130
|
-
detectOpenHandles: config.quality.detectOpenHandles,
|
|
131
|
-
detectOpenHandlesRetries: config.quality.detectOpenHandlesRetries,
|
|
132
|
-
timeoutRetryCount,
|
|
133
|
-
gracePeriodMs: config.quality.gracePeriodMs,
|
|
134
|
-
drainTimeoutMs: config.quality.drainTimeoutMs,
|
|
135
|
-
shell: config.quality.shell,
|
|
136
|
-
stripEnvVars: config.quality.stripEnvVars,
|
|
137
|
-
});
|
|
138
|
-
|
|
139
72
|
const rectificationEnabled = config.execution.rectification?.enabled ?? false;
|
|
73
|
+
const regressionMode = config.execution.regressionGate?.mode;
|
|
140
74
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
const analysis = _postVerifyDeps.parseTestOutput(verificationResult.output, 0);
|
|
145
|
-
if (analysis.passCount > 0) {
|
|
146
|
-
logger?.debug("verification", "Scoped test results", {
|
|
147
|
-
passCount: analysis.passCount,
|
|
148
|
-
failCount: analysis.failCount,
|
|
149
|
-
});
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Regression Gate (BUG-009): run full suite after scoped tests pass
|
|
154
|
-
const regressionGateResult = await runRegressionGate(
|
|
155
|
-
config,
|
|
156
|
-
workdir,
|
|
157
|
-
story,
|
|
158
|
-
changedTestFiles,
|
|
159
|
-
rectificationEnabled,
|
|
160
|
-
);
|
|
161
|
-
if (regressionGateResult.status === "passed" || regressionGateResult.status === "skipped") {
|
|
162
|
-
return { passed: true, prd };
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// Regression failed -- build StructuredFailure and revert stories
|
|
166
|
-
// verificationResult is always set when status === "failed" (see RegressionGateResult)
|
|
167
|
-
const regressionVerificationResult = regressionGateResult.verificationResult ?? {
|
|
168
|
-
status: "TEST_FAILURE" as const,
|
|
169
|
-
success: false,
|
|
170
|
-
countsTowardEscalation: true,
|
|
171
|
-
};
|
|
172
|
-
const regressionFailure = buildStructuredFailure(
|
|
173
|
-
story,
|
|
174
|
-
"regression",
|
|
175
|
-
regressionVerificationResult,
|
|
176
|
-
"Full-suite regression detected",
|
|
177
|
-
);
|
|
178
|
-
const updatedPrd = await _postVerifyDeps.revertStoriesOnFailure({
|
|
179
|
-
prd,
|
|
180
|
-
prdPath,
|
|
181
|
-
story,
|
|
182
|
-
storiesToExecute,
|
|
183
|
-
allStoryMetrics,
|
|
184
|
-
featureDir,
|
|
185
|
-
diagnosticContext: "REGRESSION: full-suite regression detected",
|
|
186
|
-
countsTowardEscalation: true,
|
|
187
|
-
priorFailure: regressionFailure,
|
|
188
|
-
});
|
|
189
|
-
return { passed: false, prd: updatedPrd };
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// --- Verification failed ---
|
|
193
|
-
// Attempt rectification if enabled and tests failed (not timeout/env)
|
|
194
|
-
const isTestFailure = verificationResult.status === "TEST_FAILURE" && verificationResult.output;
|
|
195
|
-
if (rectificationEnabled && isTestFailure && verificationResult.output) {
|
|
196
|
-
const fixed = await _postVerifyDeps.runRectificationLoop({
|
|
197
|
-
config,
|
|
198
|
-
workdir,
|
|
199
|
-
story,
|
|
200
|
-
testCommand,
|
|
201
|
-
timeoutSeconds: config.execution.verificationTimeoutSeconds,
|
|
202
|
-
testOutput: verificationResult.output,
|
|
203
|
-
});
|
|
204
|
-
if (fixed) return { passed: true, prd };
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Track timeout retries for --detectOpenHandles escalation
|
|
208
|
-
if (verificationResult.status === "TIMEOUT") {
|
|
209
|
-
timeoutRetryCountMap.set(story.id, timeoutRetryCount + 1);
|
|
75
|
+
// Skip per-story regression gate only when explicitly set to deferred
|
|
76
|
+
if (regressionMode === "deferred") {
|
|
77
|
+
return { passed: true, prd };
|
|
210
78
|
}
|
|
211
79
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
error: verificationResult.error?.split("\n")[0],
|
|
215
|
-
});
|
|
80
|
+
// Run full-suite regression gate (per-story mode)
|
|
81
|
+
const regressionGateResult = await runRegressionGate(config, workdir, story, rectificationEnabled);
|
|
216
82
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
checkEnvironmentalEscalation(config, story, prd, logger);
|
|
83
|
+
if (regressionGateResult.status === "passed" || regressionGateResult.status === "skipped") {
|
|
84
|
+
return { passed: true, prd };
|
|
220
85
|
}
|
|
221
86
|
|
|
222
|
-
//
|
|
223
|
-
|
|
224
|
-
const
|
|
87
|
+
// Regression failed -- build StructuredFailure and revert stories
|
|
88
|
+
// verificationResult is always set when status === "failed" (see RegressionGateResult)
|
|
89
|
+
const regressionVerificationResult = regressionGateResult.verificationResult ?? {
|
|
90
|
+
status: "TEST_FAILURE" as const,
|
|
91
|
+
success: false,
|
|
92
|
+
countsTowardEscalation: true,
|
|
93
|
+
};
|
|
94
|
+
const regressionFailure = buildStructuredFailure(
|
|
95
|
+
story,
|
|
96
|
+
"regression",
|
|
97
|
+
regressionVerificationResult,
|
|
98
|
+
"Full-suite regression detected",
|
|
99
|
+
);
|
|
225
100
|
const updatedPrd = await _postVerifyDeps.revertStoriesOnFailure({
|
|
226
101
|
prd,
|
|
227
102
|
prdPath,
|
|
@@ -229,11 +104,10 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
|
|
|
229
104
|
storiesToExecute,
|
|
230
105
|
allStoryMetrics,
|
|
231
106
|
featureDir,
|
|
232
|
-
diagnosticContext,
|
|
233
|
-
countsTowardEscalation:
|
|
234
|
-
priorFailure:
|
|
107
|
+
diagnosticContext: "REGRESSION: full-suite regression detected",
|
|
108
|
+
countsTowardEscalation: true,
|
|
109
|
+
priorFailure: regressionFailure,
|
|
235
110
|
});
|
|
236
|
-
|
|
237
111
|
return { passed: false, prd: updatedPrd };
|
|
238
112
|
}
|
|
239
113
|
|
|
@@ -242,22 +116,17 @@ interface RegressionGateResult {
|
|
|
242
116
|
verificationResult?: VerificationResult;
|
|
243
117
|
}
|
|
244
118
|
|
|
245
|
-
/** Run
|
|
119
|
+
/** Run full-suite regression gate. */
|
|
246
120
|
async function runRegressionGate(
|
|
247
121
|
config: NaxConfig,
|
|
248
122
|
workdir: string,
|
|
249
123
|
story: UserStory,
|
|
250
|
-
changedTestFiles: string[],
|
|
251
124
|
rectificationEnabled: boolean,
|
|
252
125
|
): Promise<RegressionGateResult> {
|
|
253
126
|
const logger = getSafeLogger();
|
|
254
127
|
const regressionGateEnabled = config.execution.regressionGate?.enabled ?? true;
|
|
255
|
-
const scopedTestsWereRun = changedTestFiles.length > 0;
|
|
256
128
|
|
|
257
|
-
if (!regressionGateEnabled
|
|
258
|
-
if (regressionGateEnabled && !scopedTestsWereRun) {
|
|
259
|
-
logger?.debug("regression-gate", "Skipping regression gate (full suite already run in scoped verification)");
|
|
260
|
-
}
|
|
129
|
+
if (!regressionGateEnabled) {
|
|
261
130
|
return { status: "skipped" };
|
|
262
131
|
}
|
|
263
132
|
|
|
@@ -286,9 +155,7 @@ async function runRegressionGate(
|
|
|
286
155
|
// Handle timeout: accept as pass if configured (BUG-026)
|
|
287
156
|
const acceptOnTimeout = config.execution.regressionGate?.acceptOnTimeout ?? true;
|
|
288
157
|
if (regressionResult.status === "TIMEOUT" && acceptOnTimeout) {
|
|
289
|
-
logger?.warn("regression-gate", "[BUG-026] Full-suite regression gate timed out (accepted as pass)"
|
|
290
|
-
reason: "Timeout is not evidence of regression — scoped verification already passed",
|
|
291
|
-
});
|
|
158
|
+
logger?.warn("regression-gate", "[BUG-026] Full-suite regression gate timed out (accepted as pass)");
|
|
292
159
|
return { status: "passed" };
|
|
293
160
|
}
|
|
294
161
|
|
|
@@ -305,7 +172,7 @@ async function runRegressionGate(
|
|
|
305
172
|
timeoutSeconds: config.execution.regressionGate.timeoutSeconds,
|
|
306
173
|
testOutput: regressionResult.output,
|
|
307
174
|
promptPrefix:
|
|
308
|
-
"# REGRESSION:
|
|
175
|
+
"# REGRESSION: Full-Suite Test Failures\n\nYour changes broke tests in the full suite. Fix these regressions.",
|
|
309
176
|
});
|
|
310
177
|
if (fixed) return { status: "passed" };
|
|
311
178
|
}
|
|
@@ -313,44 +180,14 @@ async function runRegressionGate(
|
|
|
313
180
|
return { status: "failed", verificationResult: regressionResult };
|
|
314
181
|
}
|
|
315
182
|
|
|
316
|
-
/** Check if environmental failure should trigger early escalation. */
|
|
317
|
-
function checkEnvironmentalEscalation(
|
|
318
|
-
config: NaxConfig,
|
|
319
|
-
story: UserStory,
|
|
320
|
-
prd: PRD,
|
|
321
|
-
logger: ReturnType<typeof getSafeLogger>,
|
|
322
|
-
): void {
|
|
323
|
-
const currentTier = story.routing?.modelTier || config.autoMode.escalation.tierOrder[0]?.tier;
|
|
324
|
-
const tierCfg = currentTier
|
|
325
|
-
? _postVerifyDeps.getTierConfig(currentTier, config.autoMode.escalation.tierOrder)
|
|
326
|
-
: undefined;
|
|
327
|
-
if (!tierCfg) return;
|
|
328
|
-
|
|
329
|
-
const threshold = _postVerifyDeps.getEnvironmentalEscalationThreshold(
|
|
330
|
-
tierCfg.attempts,
|
|
331
|
-
config.quality.environmentalEscalationDivisor,
|
|
332
|
-
);
|
|
333
|
-
const currentAttempts = prd.userStories.find((s) => s.id === story.id)?.attempts ?? 0;
|
|
334
|
-
if (currentAttempts >= threshold) {
|
|
335
|
-
logger?.warn("verification", "Environmental failure hit early escalation threshold", {
|
|
336
|
-
currentAttempts,
|
|
337
|
-
threshold,
|
|
338
|
-
});
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
|
-
|
|
342
183
|
/**
|
|
343
184
|
* Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
|
|
344
185
|
*/
|
|
345
186
|
export const _postVerifyDeps = {
|
|
346
187
|
parseBunTestOutput,
|
|
347
|
-
parseTestOutput,
|
|
348
188
|
runVerification,
|
|
349
189
|
getExpectedFiles,
|
|
350
190
|
savePRD,
|
|
351
191
|
revertStoriesOnFailure,
|
|
352
192
|
runRectificationLoop,
|
|
353
|
-
appendProgress,
|
|
354
|
-
getTierConfig,
|
|
355
|
-
getEnvironmentalEscalationThreshold,
|
|
356
193
|
};
|
package/src/execution/runner.ts
CHANGED
|
@@ -271,6 +271,7 @@ export async function executeSequential(
|
|
|
271
271
|
hooks: ctx.hooks,
|
|
272
272
|
plugins: ctx.pluginRegistry,
|
|
273
273
|
storyStartTime,
|
|
274
|
+
storyGitRef: storyGitRef ?? undefined, // FEAT-010: per-attempt baseRef for precise smart-runner diff
|
|
274
275
|
interaction: ctx.interactionChain ?? undefined,
|
|
275
276
|
};
|
|
276
277
|
|