@nathapp/nax 0.18.1 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitlab-ci.yml +12 -6
- package/bun.lock +1 -1
- package/bunfig.toml +2 -1
- package/docker-compose.test.yml +17 -0
- package/docs/ROADMAP.md +121 -36
- package/docs/specs/verification-architecture-v2.md +343 -0
- package/nax/config.json +13 -10
- package/nax/features/smart-test-runner/plan.md +7 -0
- package/nax/features/smart-test-runner/prd.json +203 -0
- package/nax/features/smart-test-runner/progress.txt +13 -0
- package/nax/features/smart-test-runner/spec.md +7 -0
- package/nax/features/smart-test-runner/tasks.md +8 -0
- package/nax/features/v0.18.3-execution-reliability/prd.json +80 -0
- package/nax/features/v0.18.3-execution-reliability/progress.txt +3 -0
- package/package.json +2 -2
- package/src/config/defaults.ts +2 -0
- package/src/config/schema.ts +1 -0
- package/src/config/schemas.ts +24 -0
- package/src/config/types.ts +16 -1
- package/src/context/builder.ts +11 -0
- package/src/context/elements.ts +38 -1
- package/src/execution/escalation/tier-escalation.ts +28 -3
- package/src/execution/post-verify-rectification.ts +4 -2
- package/src/execution/post-verify.ts +73 -9
- package/src/execution/progress.ts +2 -0
- package/src/pipeline/stages/review.ts +5 -3
- package/src/pipeline/stages/routing.ts +14 -9
- package/src/pipeline/stages/verify.ts +54 -1
- package/src/prd/index.ts +16 -1
- package/src/prd/types.ts +33 -0
- package/src/precheck/index.ts +9 -4
- package/src/routing/strategies/llm.ts +5 -0
- package/src/verification/gate.ts +2 -1
- package/src/verification/smart-runner.ts +214 -0
- package/src/verification/types.ts +2 -0
- package/test/US-002-orchestrator.test.ts +5 -5
- package/test/context/prior-failures.test.ts +462 -0
- package/test/execution/post-verify-bug026.test.ts +443 -0
- package/test/execution/post-verify.test.ts +32 -0
- package/test/execution/structured-failure.test.ts +414 -0
- package/test/integration/logger.test.ts +1 -1
- package/test/integration/review-plugin-integration.test.ts +2 -1
- package/test/integration/story-id-in-events.test.ts +1 -1
- package/test/unit/config/smart-runner-flag.test.ts +249 -0
- package/test/unit/pipeline/routing-partial-override.test.ts +141 -0
- package/test/unit/pipeline/verify-smart-runner.test.ts +344 -0
- package/test/unit/prd-get-next-story.test.ts +28 -0
- package/test/unit/routing.test.ts +102 -0
- package/test/unit/smart-test-runner.test.ts +512 -0
- package/test/unit/verification/smart-runner.test.ts +246 -0
|
@@ -10,7 +10,7 @@ import type { NaxConfig } from "../config";
|
|
|
10
10
|
import { resolveModel } from "../config";
|
|
11
11
|
import { getSafeLogger } from "../logger";
|
|
12
12
|
import type { StoryMetrics } from "../metrics";
|
|
13
|
-
import type { PRD, UserStory } from "../prd";
|
|
13
|
+
import type { PRD, StructuredFailure, UserStory } from "../prd";
|
|
14
14
|
import { getExpectedFiles, savePRD } from "../prd";
|
|
15
15
|
import { appendProgress } from "./progress";
|
|
16
16
|
import { type RectificationState, createRectificationPrompt, shouldRetryRectification } from "./rectification";
|
|
@@ -147,6 +147,7 @@ export interface RevertStoriesOptions {
|
|
|
147
147
|
featureDir?: string;
|
|
148
148
|
diagnosticContext: string;
|
|
149
149
|
countsTowardEscalation: boolean;
|
|
150
|
+
priorFailure?: StructuredFailure;
|
|
150
151
|
}
|
|
151
152
|
|
|
152
153
|
/** Revert stories to pending on verification failure and save PRD. */
|
|
@@ -160,12 +161,13 @@ export async function revertStoriesOnFailure(opts: RevertStoriesOptions): Promis
|
|
|
160
161
|
}
|
|
161
162
|
}
|
|
162
163
|
|
|
163
|
-
// Revert stories to pending with diagnostic context
|
|
164
|
+
// Revert stories to pending with diagnostic context and priorFailures
|
|
164
165
|
opts.prd.userStories = opts.prd.userStories.map((s) =>
|
|
165
166
|
storyIds.has(s.id)
|
|
166
167
|
? {
|
|
167
168
|
...s,
|
|
168
169
|
priorErrors: [...(s.priorErrors || []), opts.diagnosticContext],
|
|
170
|
+
priorFailures: opts.priorFailure ? [...(s.priorFailures || []), opts.priorFailure] : s.priorFailures,
|
|
169
171
|
status: "pending" as const,
|
|
170
172
|
passes: false,
|
|
171
173
|
}
|
|
@@ -8,13 +8,42 @@ import { spawn } from "bun";
|
|
|
8
8
|
import type { NaxConfig } from "../config";
|
|
9
9
|
import { getSafeLogger } from "../logger";
|
|
10
10
|
import type { StoryMetrics } from "../metrics";
|
|
11
|
-
import type { PRD, UserStory } from "../prd";
|
|
11
|
+
import type { PRD, StructuredFailure, UserStory, VerificationStage } from "../prd";
|
|
12
12
|
import { getExpectedFiles, savePRD } from "../prd";
|
|
13
|
+
import type { TestFailure, VerificationResult } from "../verification";
|
|
14
|
+
import { parseBunTestOutput } from "../verification";
|
|
13
15
|
import { getTierConfig } from "./escalation";
|
|
14
16
|
import { revertStoriesOnFailure, runRectificationLoop } from "./post-verify-rectification";
|
|
15
17
|
import { appendProgress } from "./progress";
|
|
16
18
|
import { getEnvironmentalEscalationThreshold, parseTestOutput, runVerification } from "./verification";
|
|
17
19
|
|
|
20
|
+
/** Build a StructuredFailure from verification result and test output. */
|
|
21
|
+
function buildStructuredFailure(
|
|
22
|
+
story: UserStory,
|
|
23
|
+
stage: VerificationStage,
|
|
24
|
+
verificationResult: VerificationResult,
|
|
25
|
+
summary: string,
|
|
26
|
+
): StructuredFailure {
|
|
27
|
+
const testFailures =
|
|
28
|
+
verificationResult.status === "TEST_FAILURE" && verificationResult.output
|
|
29
|
+
? parseBunTestOutput(verificationResult.output).failures.map((f) => ({
|
|
30
|
+
file: f.file,
|
|
31
|
+
testName: f.testName,
|
|
32
|
+
error: f.error,
|
|
33
|
+
stackTrace: f.stackTrace,
|
|
34
|
+
}))
|
|
35
|
+
: undefined;
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
attempt: (story.attempts ?? 0) + 1,
|
|
39
|
+
modelTier: story.routing?.modelTier ?? "unknown",
|
|
40
|
+
stage,
|
|
41
|
+
summary,
|
|
42
|
+
testFailures: testFailures && testFailures.length > 0 ? testFailures : undefined,
|
|
43
|
+
timestamp: new Date().toISOString(),
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
18
47
|
/** Get test files changed since a git ref. Returns empty array if detection fails. */
|
|
19
48
|
async function getChangedTestFiles(workdir: string, gitRef?: string): Promise<string[]> {
|
|
20
49
|
if (!gitRef) return [];
|
|
@@ -122,12 +151,30 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
|
|
|
122
151
|
}
|
|
123
152
|
|
|
124
153
|
// Regression Gate (BUG-009): run full suite after scoped tests pass
|
|
125
|
-
const
|
|
126
|
-
|
|
154
|
+
const regressionGateResult = await runRegressionGate(
|
|
155
|
+
config,
|
|
156
|
+
workdir,
|
|
157
|
+
story,
|
|
158
|
+
changedTestFiles,
|
|
159
|
+
rectificationEnabled,
|
|
160
|
+
);
|
|
161
|
+
if (regressionGateResult.status === "passed" || regressionGateResult.status === "skipped") {
|
|
127
162
|
return { passed: true, prd };
|
|
128
163
|
}
|
|
129
164
|
|
|
130
|
-
// Regression failed -- revert stories
|
|
165
|
+
// Regression failed -- build StructuredFailure and revert stories
|
|
166
|
+
// verificationResult is always set when status === "failed" (see RegressionGateResult)
|
|
167
|
+
const regressionVerificationResult = regressionGateResult.verificationResult ?? {
|
|
168
|
+
status: "TEST_FAILURE" as const,
|
|
169
|
+
success: false,
|
|
170
|
+
countsTowardEscalation: true,
|
|
171
|
+
};
|
|
172
|
+
const regressionFailure = buildStructuredFailure(
|
|
173
|
+
story,
|
|
174
|
+
"regression",
|
|
175
|
+
regressionVerificationResult,
|
|
176
|
+
"Full-suite regression detected",
|
|
177
|
+
);
|
|
131
178
|
const updatedPrd = await revertStoriesOnFailure({
|
|
132
179
|
prd,
|
|
133
180
|
prdPath,
|
|
@@ -137,6 +184,7 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
|
|
|
137
184
|
featureDir,
|
|
138
185
|
diagnosticContext: "REGRESSION: full-suite regression detected",
|
|
139
186
|
countsTowardEscalation: true,
|
|
187
|
+
priorFailure: regressionFailure,
|
|
140
188
|
});
|
|
141
189
|
return { passed: false, prd: updatedPrd };
|
|
142
190
|
}
|
|
@@ -173,6 +221,7 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
|
|
|
173
221
|
|
|
174
222
|
// Revert stories and save
|
|
175
223
|
const diagnosticContext = verificationResult.error || `Verification failed: ${verificationResult.status}`;
|
|
224
|
+
const verifyFailure = buildStructuredFailure(story, "verify", verificationResult, diagnosticContext);
|
|
176
225
|
const updatedPrd = await revertStoriesOnFailure({
|
|
177
226
|
prd,
|
|
178
227
|
prdPath,
|
|
@@ -182,11 +231,17 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
|
|
|
182
231
|
featureDir,
|
|
183
232
|
diagnosticContext,
|
|
184
233
|
countsTowardEscalation: verificationResult.countsTowardEscalation ?? false,
|
|
234
|
+
priorFailure: verifyFailure,
|
|
185
235
|
});
|
|
186
236
|
|
|
187
237
|
return { passed: false, prd: updatedPrd };
|
|
188
238
|
}
|
|
189
239
|
|
|
240
|
+
interface RegressionGateResult {
|
|
241
|
+
status: "passed" | "skipped" | "failed";
|
|
242
|
+
verificationResult?: VerificationResult;
|
|
243
|
+
}
|
|
244
|
+
|
|
190
245
|
/** Run regression gate (full suite) after scoped tests pass. */
|
|
191
246
|
async function runRegressionGate(
|
|
192
247
|
config: NaxConfig,
|
|
@@ -194,7 +249,7 @@ async function runRegressionGate(
|
|
|
194
249
|
story: UserStory,
|
|
195
250
|
changedTestFiles: string[],
|
|
196
251
|
rectificationEnabled: boolean,
|
|
197
|
-
): Promise<
|
|
252
|
+
): Promise<RegressionGateResult> {
|
|
198
253
|
const logger = getSafeLogger();
|
|
199
254
|
const regressionGateEnabled = config.execution.regressionGate?.enabled ?? true;
|
|
200
255
|
const scopedTestsWereRun = changedTestFiles.length > 0;
|
|
@@ -203,7 +258,7 @@ async function runRegressionGate(
|
|
|
203
258
|
if (regressionGateEnabled && !scopedTestsWereRun) {
|
|
204
259
|
logger?.debug("regression-gate", "Skipping regression gate (full suite already run in scoped verification)");
|
|
205
260
|
}
|
|
206
|
-
return "skipped";
|
|
261
|
+
return { status: "skipped" };
|
|
207
262
|
}
|
|
208
263
|
|
|
209
264
|
logger?.info("regression-gate", "Running full-suite regression gate");
|
|
@@ -225,7 +280,16 @@ async function runRegressionGate(
|
|
|
225
280
|
|
|
226
281
|
if (regressionResult.success) {
|
|
227
282
|
logger?.info("regression-gate", "Full-suite regression gate passed");
|
|
228
|
-
return "passed";
|
|
283
|
+
return { status: "passed" };
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Handle timeout: accept as pass if configured (BUG-026)
|
|
287
|
+
const acceptOnTimeout = config.execution.regressionGate?.acceptOnTimeout ?? true;
|
|
288
|
+
if (regressionResult.status === "TIMEOUT" && acceptOnTimeout) {
|
|
289
|
+
logger?.warn("regression-gate", "[BUG-026] Full-suite regression gate timed out (accepted as pass)", {
|
|
290
|
+
reason: "Timeout is not evidence of regression — scoped verification already passed",
|
|
291
|
+
});
|
|
292
|
+
return { status: "passed" };
|
|
229
293
|
}
|
|
230
294
|
|
|
231
295
|
logger?.warn("regression-gate", "Full-suite regression detected", { status: regressionResult.status });
|
|
@@ -243,10 +307,10 @@ async function runRegressionGate(
|
|
|
243
307
|
promptPrefix:
|
|
244
308
|
"# REGRESSION: Cross-Story Test Failures\n\nYour changes passed scoped tests but broke unrelated tests. Fix these regressions.",
|
|
245
309
|
});
|
|
246
|
-
if (fixed) return "passed";
|
|
310
|
+
if (fixed) return { status: "passed" };
|
|
247
311
|
}
|
|
248
312
|
|
|
249
|
-
return "failed";
|
|
313
|
+
return { status: "failed", verificationResult: regressionResult };
|
|
250
314
|
}
|
|
251
315
|
|
|
252
316
|
/** Check if environmental failure should trigger early escalation. */
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Append timestamped entries to progress.txt after story completion.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
import { mkdirSync } from "node:fs";
|
|
7
8
|
import { join } from "node:path";
|
|
8
9
|
import type { StoryStatus } from "../prd";
|
|
9
10
|
|
|
@@ -14,6 +15,7 @@ export async function appendProgress(
|
|
|
14
15
|
status: StoryStatus,
|
|
15
16
|
message: string,
|
|
16
17
|
): Promise<void> {
|
|
18
|
+
mkdirSync(featureDir, { recursive: true });
|
|
17
19
|
const progressPath = join(featureDir, "progress.txt");
|
|
18
20
|
const timestamp = new Date().toISOString();
|
|
19
21
|
const entry = `[${timestamp}] ${storyId} — ${status.toUpperCase()} — ${message}\n`;
|
|
@@ -83,13 +83,15 @@ export const reviewStage: PipelineStage = {
|
|
|
83
83
|
const reviewResult = await runReview(ctx.config.review, ctx.workdir, ctx.config.execution);
|
|
84
84
|
ctx.reviewResult = reviewResult;
|
|
85
85
|
|
|
86
|
-
//
|
|
86
|
+
// BUG-030: Review failure (lint/typecheck) should escalate, not hard-fail.
|
|
87
|
+
// Lint/typecheck errors are auto-fixable — give the agent a retry with error context.
|
|
88
|
+
// Only plugin reviewer rejections are hard failures.
|
|
87
89
|
if (!reviewResult.success) {
|
|
88
|
-
logger.
|
|
90
|
+
logger.warn("review", "Review failed (built-in checks) — escalating for retry", {
|
|
89
91
|
reason: reviewResult.failureReason,
|
|
90
92
|
storyId: ctx.story.id,
|
|
91
93
|
});
|
|
92
|
-
return { action: "
|
|
94
|
+
return { action: "escalate", reason: `Review failed: ${reviewResult.failureReason}` };
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
// Run plugin reviewers if any are registered
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Routing Stage
|
|
3
3
|
*
|
|
4
4
|
* Classifies story complexity and determines model tier + test strategy.
|
|
5
|
-
* Uses cached complexity/testStrategy from story if available
|
|
6
|
-
*
|
|
5
|
+
* Uses cached complexity/testStrategy/modelTier from story if available.
|
|
6
|
+
* modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
|
|
7
7
|
*
|
|
8
8
|
* @returns
|
|
9
9
|
* - `continue`: Routing determined, proceed to next stage
|
|
@@ -30,17 +30,22 @@ export const routingStage: PipelineStage = {
|
|
|
30
30
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
31
31
|
const logger = getLogger();
|
|
32
32
|
|
|
33
|
-
// If story has cached routing, use
|
|
33
|
+
// If story has cached routing, use cached values (escalated modelTier takes priority)
|
|
34
34
|
// Otherwise, perform fresh classification
|
|
35
35
|
let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
|
|
36
36
|
if (ctx.story.routing) {
|
|
37
|
-
// Use cached complexity/testStrategy
|
|
37
|
+
// Use cached complexity/testStrategy/modelTier
|
|
38
38
|
routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
39
|
-
// Override with cached
|
|
40
|
-
routing.complexity = ctx.story.routing.complexity;
|
|
41
|
-
routing.testStrategy = ctx.story.routing.testStrategy;
|
|
42
|
-
//
|
|
43
|
-
|
|
39
|
+
// Override with cached values only when they are actually set
|
|
40
|
+
if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
|
|
41
|
+
if (ctx.story.routing?.testStrategy) routing.testStrategy = ctx.story.routing.testStrategy;
|
|
42
|
+
// BUG-032: Use escalated modelTier if explicitly set (by handleTierEscalation),
|
|
43
|
+
// otherwise derive from complexity + current config
|
|
44
|
+
if (ctx.story.routing?.modelTier) {
|
|
45
|
+
routing.modelTier = ctx.story.routing.modelTier;
|
|
46
|
+
} else {
|
|
47
|
+
routing.modelTier = complexityToModelTier(routing.complexity as import("../../config").Complexity, ctx.config);
|
|
48
|
+
}
|
|
44
49
|
} else {
|
|
45
50
|
// Fresh classification
|
|
46
51
|
routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
@@ -20,10 +20,24 @@
|
|
|
20
20
|
* ```
|
|
21
21
|
*/
|
|
22
22
|
|
|
23
|
+
import type { SmartTestRunnerConfig } from "../../config/types";
|
|
23
24
|
import { getLogger } from "../../logger";
|
|
24
25
|
import { regression } from "../../verification/gate";
|
|
26
|
+
import { _smartRunnerDeps } from "../../verification/smart-runner";
|
|
25
27
|
import type { PipelineContext, PipelineStage, StageResult } from "../types";
|
|
26
28
|
|
|
29
|
+
const DEFAULT_SMART_RUNNER_CONFIG: SmartTestRunnerConfig = {
|
|
30
|
+
enabled: true,
|
|
31
|
+
testFilePatterns: ["test/**/*.test.ts"],
|
|
32
|
+
fallback: "import-grep",
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
function coerceSmartTestRunner(val: boolean | SmartTestRunnerConfig | undefined): SmartTestRunnerConfig {
|
|
36
|
+
if (val === undefined || val === true) return DEFAULT_SMART_RUNNER_CONFIG;
|
|
37
|
+
if (val === false) return { ...DEFAULT_SMART_RUNNER_CONFIG, enabled: false };
|
|
38
|
+
return val;
|
|
39
|
+
}
|
|
40
|
+
|
|
27
41
|
export const verifyStage: PipelineStage = {
|
|
28
42
|
name: "verify",
|
|
29
43
|
enabled: () => true,
|
|
@@ -46,11 +60,50 @@ export const verifyStage: PipelineStage = {
|
|
|
46
60
|
|
|
47
61
|
logger.info("verify", "Running verification", { storyId: ctx.story.id });
|
|
48
62
|
|
|
63
|
+
// Determine effective test command (smart runner or full suite)
|
|
64
|
+
let effectiveCommand = testCommand;
|
|
65
|
+
const smartRunnerConfig = coerceSmartTestRunner(ctx.config.execution.smartTestRunner);
|
|
66
|
+
|
|
67
|
+
if (smartRunnerConfig.enabled) {
|
|
68
|
+
const sourceFiles = await _smartRunnerDeps.getChangedSourceFiles(ctx.workdir);
|
|
69
|
+
|
|
70
|
+
// Pass 1: path convention mapping
|
|
71
|
+
const pass1Files = await _smartRunnerDeps.mapSourceToTests(sourceFiles, ctx.workdir);
|
|
72
|
+
if (pass1Files.length > 0) {
|
|
73
|
+
logger.info("verify", `[smart-runner] Pass 1: path convention matched ${pass1Files.length} test files`, {
|
|
74
|
+
storyId: ctx.story.id,
|
|
75
|
+
});
|
|
76
|
+
effectiveCommand = _smartRunnerDeps.buildSmartTestCommand(pass1Files, testCommand);
|
|
77
|
+
} else if (smartRunnerConfig.fallback === "import-grep") {
|
|
78
|
+
// Pass 2: import-grep fallback
|
|
79
|
+
const pass2Files = await _smartRunnerDeps.importGrepFallback(
|
|
80
|
+
sourceFiles,
|
|
81
|
+
ctx.workdir,
|
|
82
|
+
smartRunnerConfig.testFilePatterns,
|
|
83
|
+
);
|
|
84
|
+
if (pass2Files.length > 0) {
|
|
85
|
+
logger.info("verify", `[smart-runner] Pass 2: import-grep matched ${pass2Files.length} test files`, {
|
|
86
|
+
storyId: ctx.story.id,
|
|
87
|
+
});
|
|
88
|
+
effectiveCommand = _smartRunnerDeps.buildSmartTestCommand(pass2Files, testCommand);
|
|
89
|
+
} else {
|
|
90
|
+
logger.info("verify", "[smart-runner] No mapped tests — falling back to full suite", {
|
|
91
|
+
storyId: ctx.story.id,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
} else {
|
|
95
|
+
logger.info("verify", "[smart-runner] No mapped tests — falling back to full suite", {
|
|
96
|
+
storyId: ctx.story.id,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
49
101
|
// Use unified regression gate (includes 2s wait for agent process cleanup)
|
|
50
102
|
const result = await regression({
|
|
51
103
|
workdir: ctx.workdir,
|
|
52
|
-
command:
|
|
104
|
+
command: effectiveCommand,
|
|
53
105
|
timeoutSeconds: ctx.config.execution.verificationTimeoutSeconds,
|
|
106
|
+
acceptOnTimeout: ctx.config.execution.regressionGate?.acceptOnTimeout ?? true,
|
|
54
107
|
});
|
|
55
108
|
|
|
56
109
|
// HARD FAILURE: Tests must pass for story to be marked complete
|
package/src/prd/index.ts
CHANGED
|
@@ -6,7 +6,16 @@ import { existsSync, statSync } from "node:fs";
|
|
|
6
6
|
import type { FailureCategory } from "../tdd/types";
|
|
7
7
|
import type { PRD, UserStory } from "./types";
|
|
8
8
|
|
|
9
|
-
export type {
|
|
9
|
+
export type {
|
|
10
|
+
PRD,
|
|
11
|
+
UserStory,
|
|
12
|
+
StoryRouting,
|
|
13
|
+
StoryStatus,
|
|
14
|
+
EscalationAttempt,
|
|
15
|
+
StructuredFailure,
|
|
16
|
+
TestFailureContext,
|
|
17
|
+
VerificationStage,
|
|
18
|
+
} from "./types";
|
|
10
19
|
export { isStalled, markStoryAsBlocked, generateHumanHaltSummary, getContextFiles, getExpectedFiles } from "./types";
|
|
11
20
|
export type { FailureCategory } from "../tdd/types";
|
|
12
21
|
|
|
@@ -36,6 +45,7 @@ export async function loadPRD(path: string): Promise<PRD> {
|
|
|
36
45
|
for (const story of prd.userStories) {
|
|
37
46
|
story.attempts = story.attempts ?? 0;
|
|
38
47
|
story.priorErrors = story.priorErrors ?? [];
|
|
48
|
+
story.priorFailures = story.priorFailures ?? [];
|
|
39
49
|
story.escalations = story.escalations ?? [];
|
|
40
50
|
story.dependencies = story.dependencies ?? [];
|
|
41
51
|
story.tags = story.tags ?? [];
|
|
@@ -73,6 +83,11 @@ export function getNextStory(prd: PRD, currentStoryId?: string | null, maxRetrie
|
|
|
73
83
|
if (currentStory && currentStory.status === "failed" && (currentStory.attempts ?? 0) <= maxRetries) {
|
|
74
84
|
return currentStory;
|
|
75
85
|
}
|
|
86
|
+
// BUG-029: After tier escalation, story is set to "pending" (not "failed").
|
|
87
|
+
// Prioritize current story if it was escalated (pending + has prior attempts).
|
|
88
|
+
if (currentStory && currentStory.status === "pending" && (currentStory.attempts ?? 0) > 0) {
|
|
89
|
+
return currentStory;
|
|
90
|
+
}
|
|
76
91
|
}
|
|
77
92
|
|
|
78
93
|
const completedIds = new Set(
|
package/src/prd/types.ts
CHANGED
|
@@ -11,6 +11,37 @@ import type { FailureCategory } from "../tdd/types";
|
|
|
11
11
|
/** User story status */
|
|
12
12
|
export type StoryStatus = "pending" | "in-progress" | "passed" | "failed" | "skipped" | "blocked" | "paused";
|
|
13
13
|
|
|
14
|
+
/** Verification stage where failure occurred */
|
|
15
|
+
export type VerificationStage = "verify" | "review" | "regression" | "rectification" | "agent-session" | "escalation";
|
|
16
|
+
|
|
17
|
+
/** Test failure context from parsed test output */
|
|
18
|
+
export interface TestFailureContext {
|
|
19
|
+
/** Test file path */
|
|
20
|
+
file: string;
|
|
21
|
+
/** Full test name (including describe blocks) */
|
|
22
|
+
testName: string;
|
|
23
|
+
/** Error message */
|
|
24
|
+
error: string;
|
|
25
|
+
/** Stack trace lines */
|
|
26
|
+
stackTrace: string[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Structured failure context for escalated tiers */
|
|
30
|
+
export interface StructuredFailure {
|
|
31
|
+
/** Attempt number when failure occurred */
|
|
32
|
+
attempt: number;
|
|
33
|
+
/** Model tier that was running */
|
|
34
|
+
modelTier: string;
|
|
35
|
+
/** Stage where failure occurred */
|
|
36
|
+
stage: VerificationStage;
|
|
37
|
+
/** Summary of what failed */
|
|
38
|
+
summary: string;
|
|
39
|
+
/** Parsed test failures (if applicable) */
|
|
40
|
+
testFailures?: TestFailureContext[];
|
|
41
|
+
/** ISO timestamp when failure was recorded */
|
|
42
|
+
timestamp: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
14
45
|
/** Routing metadata per story */
|
|
15
46
|
export interface StoryRouting {
|
|
16
47
|
complexity: Complexity;
|
|
@@ -71,6 +102,8 @@ export interface UserStory {
|
|
|
71
102
|
expectedFiles?: string[];
|
|
72
103
|
/** Prior error messages from failed attempts */
|
|
73
104
|
priorErrors?: string[];
|
|
105
|
+
/** Structured failure context for escalated tiers */
|
|
106
|
+
priorFailures?: StructuredFailure[];
|
|
74
107
|
/** Custom context strings */
|
|
75
108
|
customContext?: string[];
|
|
76
109
|
/** Category of the last failure (set when story is marked failed) */
|
package/src/precheck/index.ts
CHANGED
|
@@ -62,6 +62,8 @@ export interface PrecheckOptions {
|
|
|
62
62
|
format?: "human" | "json";
|
|
63
63
|
/** Working directory */
|
|
64
64
|
workdir: string;
|
|
65
|
+
/** Suppress console output (for programmatic use) */
|
|
66
|
+
silent?: boolean;
|
|
65
67
|
}
|
|
66
68
|
|
|
67
69
|
/** Extended result with exit code for CLI usage */
|
|
@@ -87,6 +89,7 @@ export async function runPrecheck(
|
|
|
87
89
|
): Promise<PrecheckResultWithCode> {
|
|
88
90
|
const workdir = options?.workdir || process.cwd();
|
|
89
91
|
const format = options?.format || "human";
|
|
92
|
+
const silent = options?.silent ?? false;
|
|
90
93
|
|
|
91
94
|
const passed: Check[] = [];
|
|
92
95
|
const blockers: Check[] = [];
|
|
@@ -196,10 +199,12 @@ export async function runPrecheck(
|
|
|
196
199
|
exitCode = hasPRDError ? EXIT_CODES.INVALID_PRD : EXIT_CODES.BLOCKER;
|
|
197
200
|
}
|
|
198
201
|
|
|
199
|
-
if (
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
202
|
+
if (!silent) {
|
|
203
|
+
if (format === "json") {
|
|
204
|
+
console.log(JSON.stringify(output, null, 2));
|
|
205
|
+
} else {
|
|
206
|
+
printSummary(output);
|
|
207
|
+
}
|
|
203
208
|
}
|
|
204
209
|
|
|
205
210
|
return {
|
|
@@ -36,6 +36,11 @@ export function getCacheSize(): number {
|
|
|
36
36
|
return cachedDecisions.size;
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
+
/** Clear routing cache entry for a specific story (used on tier escalation) */
|
|
40
|
+
export function clearCacheForStory(storyId: string): void {
|
|
41
|
+
cachedDecisions.delete(storyId);
|
|
42
|
+
}
|
|
43
|
+
|
|
39
44
|
/** Evict oldest entry when cache is full (LRU) */
|
|
40
45
|
function evictOldest(): void {
|
|
41
46
|
const firstKey = cachedDecisions.keys().next().value;
|
package/src/verification/gate.ts
CHANGED
|
@@ -95,9 +95,10 @@ async function runVerificationCore(options: VerificationGateOptions): Promise<Ve
|
|
|
95
95
|
});
|
|
96
96
|
|
|
97
97
|
if (execution.timeout) {
|
|
98
|
+
const success = options.acceptOnTimeout ?? false;
|
|
98
99
|
return {
|
|
99
100
|
status: "TIMEOUT",
|
|
100
|
-
success
|
|
101
|
+
success,
|
|
101
102
|
countsTowardEscalation: false, // Timeout is environmental, not code failure
|
|
102
103
|
error: execution.error,
|
|
103
104
|
output: execution.output,
|