@nathapp/nax 0.18.1 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.gitlab-ci.yml +12 -6
  2. package/bun.lock +1 -1
  3. package/bunfig.toml +2 -1
  4. package/docker-compose.test.yml +17 -0
  5. package/docs/ROADMAP.md +121 -36
  6. package/docs/specs/verification-architecture-v2.md +343 -0
  7. package/nax/config.json +13 -10
  8. package/nax/features/smart-test-runner/plan.md +7 -0
  9. package/nax/features/smart-test-runner/prd.json +203 -0
  10. package/nax/features/smart-test-runner/progress.txt +13 -0
  11. package/nax/features/smart-test-runner/spec.md +7 -0
  12. package/nax/features/smart-test-runner/tasks.md +8 -0
  13. package/nax/features/v0.18.3-execution-reliability/prd.json +80 -0
  14. package/nax/features/v0.18.3-execution-reliability/progress.txt +3 -0
  15. package/package.json +2 -2
  16. package/src/config/defaults.ts +2 -0
  17. package/src/config/schema.ts +1 -0
  18. package/src/config/schemas.ts +24 -0
  19. package/src/config/types.ts +16 -1
  20. package/src/context/builder.ts +11 -0
  21. package/src/context/elements.ts +38 -1
  22. package/src/execution/escalation/tier-escalation.ts +28 -3
  23. package/src/execution/post-verify-rectification.ts +4 -2
  24. package/src/execution/post-verify.ts +73 -9
  25. package/src/execution/progress.ts +2 -0
  26. package/src/pipeline/stages/review.ts +5 -3
  27. package/src/pipeline/stages/routing.ts +14 -9
  28. package/src/pipeline/stages/verify.ts +54 -1
  29. package/src/prd/index.ts +16 -1
  30. package/src/prd/types.ts +33 -0
  31. package/src/precheck/index.ts +9 -4
  32. package/src/routing/strategies/llm.ts +5 -0
  33. package/src/verification/gate.ts +2 -1
  34. package/src/verification/smart-runner.ts +214 -0
  35. package/src/verification/types.ts +2 -0
  36. package/test/US-002-orchestrator.test.ts +5 -5
  37. package/test/context/prior-failures.test.ts +462 -0
  38. package/test/execution/post-verify-bug026.test.ts +443 -0
  39. package/test/execution/post-verify.test.ts +32 -0
  40. package/test/execution/structured-failure.test.ts +414 -0
  41. package/test/integration/logger.test.ts +1 -1
  42. package/test/integration/review-plugin-integration.test.ts +2 -1
  43. package/test/integration/story-id-in-events.test.ts +1 -1
  44. package/test/unit/config/smart-runner-flag.test.ts +249 -0
  45. package/test/unit/pipeline/routing-partial-override.test.ts +141 -0
  46. package/test/unit/pipeline/verify-smart-runner.test.ts +344 -0
  47. package/test/unit/prd-get-next-story.test.ts +28 -0
  48. package/test/unit/routing.test.ts +102 -0
  49. package/test/unit/smart-test-runner.test.ts +512 -0
  50. package/test/unit/verification/smart-runner.test.ts +246 -0
@@ -10,7 +10,7 @@ import type { NaxConfig } from "../config";
10
10
  import { resolveModel } from "../config";
11
11
  import { getSafeLogger } from "../logger";
12
12
  import type { StoryMetrics } from "../metrics";
13
- import type { PRD, UserStory } from "../prd";
13
+ import type { PRD, StructuredFailure, UserStory } from "../prd";
14
14
  import { getExpectedFiles, savePRD } from "../prd";
15
15
  import { appendProgress } from "./progress";
16
16
  import { type RectificationState, createRectificationPrompt, shouldRetryRectification } from "./rectification";
@@ -147,6 +147,7 @@ export interface RevertStoriesOptions {
147
147
  featureDir?: string;
148
148
  diagnosticContext: string;
149
149
  countsTowardEscalation: boolean;
150
+ priorFailure?: StructuredFailure;
150
151
  }
151
152
 
152
153
  /** Revert stories to pending on verification failure and save PRD. */
@@ -160,12 +161,13 @@ export async function revertStoriesOnFailure(opts: RevertStoriesOptions): Promis
160
161
  }
161
162
  }
162
163
 
163
- // Revert stories to pending with diagnostic context
164
+ // Revert stories to pending with diagnostic context and priorFailures
164
165
  opts.prd.userStories = opts.prd.userStories.map((s) =>
165
166
  storyIds.has(s.id)
166
167
  ? {
167
168
  ...s,
168
169
  priorErrors: [...(s.priorErrors || []), opts.diagnosticContext],
170
+ priorFailures: opts.priorFailure ? [...(s.priorFailures || []), opts.priorFailure] : s.priorFailures,
169
171
  status: "pending" as const,
170
172
  passes: false,
171
173
  }
@@ -8,13 +8,42 @@ import { spawn } from "bun";
8
8
  import type { NaxConfig } from "../config";
9
9
  import { getSafeLogger } from "../logger";
10
10
  import type { StoryMetrics } from "../metrics";
11
- import type { PRD, UserStory } from "../prd";
11
+ import type { PRD, StructuredFailure, UserStory, VerificationStage } from "../prd";
12
12
  import { getExpectedFiles, savePRD } from "../prd";
13
+ import type { TestFailure, VerificationResult } from "../verification";
14
+ import { parseBunTestOutput } from "../verification";
13
15
  import { getTierConfig } from "./escalation";
14
16
  import { revertStoriesOnFailure, runRectificationLoop } from "./post-verify-rectification";
15
17
  import { appendProgress } from "./progress";
16
18
  import { getEnvironmentalEscalationThreshold, parseTestOutput, runVerification } from "./verification";
17
19
 
20
+ /** Build a StructuredFailure from verification result and test output. */
21
+ function buildStructuredFailure(
22
+ story: UserStory,
23
+ stage: VerificationStage,
24
+ verificationResult: VerificationResult,
25
+ summary: string,
26
+ ): StructuredFailure {
27
+ const testFailures =
28
+ verificationResult.status === "TEST_FAILURE" && verificationResult.output
29
+ ? parseBunTestOutput(verificationResult.output).failures.map((f) => ({
30
+ file: f.file,
31
+ testName: f.testName,
32
+ error: f.error,
33
+ stackTrace: f.stackTrace,
34
+ }))
35
+ : undefined;
36
+
37
+ return {
38
+ attempt: (story.attempts ?? 0) + 1,
39
+ modelTier: story.routing?.modelTier ?? "unknown",
40
+ stage,
41
+ summary,
42
+ testFailures: testFailures && testFailures.length > 0 ? testFailures : undefined,
43
+ timestamp: new Date().toISOString(),
44
+ };
45
+ }
46
+
18
47
  /** Get test files changed since a git ref. Returns empty array if detection fails. */
19
48
  async function getChangedTestFiles(workdir: string, gitRef?: string): Promise<string[]> {
20
49
  if (!gitRef) return [];
@@ -122,12 +151,30 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
122
151
  }
123
152
 
124
153
  // Regression Gate (BUG-009): run full suite after scoped tests pass
125
- const regressionResult = await runRegressionGate(config, workdir, story, changedTestFiles, rectificationEnabled);
126
- if (regressionResult === "passed" || regressionResult === "skipped") {
154
+ const regressionGateResult = await runRegressionGate(
155
+ config,
156
+ workdir,
157
+ story,
158
+ changedTestFiles,
159
+ rectificationEnabled,
160
+ );
161
+ if (regressionGateResult.status === "passed" || regressionGateResult.status === "skipped") {
127
162
  return { passed: true, prd };
128
163
  }
129
164
 
130
- // Regression failed -- revert stories
165
+ // Regression failed -- build StructuredFailure and revert stories
166
+ // verificationResult is always set when status === "failed" (see RegressionGateResult)
167
+ const regressionVerificationResult = regressionGateResult.verificationResult ?? {
168
+ status: "TEST_FAILURE" as const,
169
+ success: false,
170
+ countsTowardEscalation: true,
171
+ };
172
+ const regressionFailure = buildStructuredFailure(
173
+ story,
174
+ "regression",
175
+ regressionVerificationResult,
176
+ "Full-suite regression detected",
177
+ );
131
178
  const updatedPrd = await revertStoriesOnFailure({
132
179
  prd,
133
180
  prdPath,
@@ -137,6 +184,7 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
137
184
  featureDir,
138
185
  diagnosticContext: "REGRESSION: full-suite regression detected",
139
186
  countsTowardEscalation: true,
187
+ priorFailure: regressionFailure,
140
188
  });
141
189
  return { passed: false, prd: updatedPrd };
142
190
  }
@@ -173,6 +221,7 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
173
221
 
174
222
  // Revert stories and save
175
223
  const diagnosticContext = verificationResult.error || `Verification failed: ${verificationResult.status}`;
224
+ const verifyFailure = buildStructuredFailure(story, "verify", verificationResult, diagnosticContext);
176
225
  const updatedPrd = await revertStoriesOnFailure({
177
226
  prd,
178
227
  prdPath,
@@ -182,11 +231,17 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
182
231
  featureDir,
183
232
  diagnosticContext,
184
233
  countsTowardEscalation: verificationResult.countsTowardEscalation ?? false,
234
+ priorFailure: verifyFailure,
185
235
  });
186
236
 
187
237
  return { passed: false, prd: updatedPrd };
188
238
  }
189
239
 
240
+ interface RegressionGateResult {
241
+ status: "passed" | "skipped" | "failed";
242
+ verificationResult?: VerificationResult;
243
+ }
244
+
190
245
  /** Run regression gate (full suite) after scoped tests pass. */
191
246
  async function runRegressionGate(
192
247
  config: NaxConfig,
@@ -194,7 +249,7 @@ async function runRegressionGate(
194
249
  story: UserStory,
195
250
  changedTestFiles: string[],
196
251
  rectificationEnabled: boolean,
197
- ): Promise<"passed" | "skipped" | "failed"> {
252
+ ): Promise<RegressionGateResult> {
198
253
  const logger = getSafeLogger();
199
254
  const regressionGateEnabled = config.execution.regressionGate?.enabled ?? true;
200
255
  const scopedTestsWereRun = changedTestFiles.length > 0;
@@ -203,7 +258,7 @@ async function runRegressionGate(
203
258
  if (regressionGateEnabled && !scopedTestsWereRun) {
204
259
  logger?.debug("regression-gate", "Skipping regression gate (full suite already run in scoped verification)");
205
260
  }
206
- return "skipped";
261
+ return { status: "skipped" };
207
262
  }
208
263
 
209
264
  logger?.info("regression-gate", "Running full-suite regression gate");
@@ -225,7 +280,16 @@ async function runRegressionGate(
225
280
 
226
281
  if (regressionResult.success) {
227
282
  logger?.info("regression-gate", "Full-suite regression gate passed");
228
- return "passed";
283
+ return { status: "passed" };
284
+ }
285
+
286
+ // Handle timeout: accept as pass if configured (BUG-026)
287
+ const acceptOnTimeout = config.execution.regressionGate?.acceptOnTimeout ?? true;
288
+ if (regressionResult.status === "TIMEOUT" && acceptOnTimeout) {
289
+ logger?.warn("regression-gate", "[BUG-026] Full-suite regression gate timed out (accepted as pass)", {
290
+ reason: "Timeout is not evidence of regression — scoped verification already passed",
291
+ });
292
+ return { status: "passed" };
229
293
  }
230
294
 
231
295
  logger?.warn("regression-gate", "Full-suite regression detected", { status: regressionResult.status });
@@ -243,10 +307,10 @@ async function runRegressionGate(
243
307
  promptPrefix:
244
308
  "# REGRESSION: Cross-Story Test Failures\n\nYour changes passed scoped tests but broke unrelated tests. Fix these regressions.",
245
309
  });
246
- if (fixed) return "passed";
310
+ if (fixed) return { status: "passed" };
247
311
  }
248
312
 
249
- return "failed";
313
+ return { status: "failed", verificationResult: regressionResult };
250
314
  }
251
315
 
252
316
  /** Check if environmental failure should trigger early escalation. */
@@ -4,6 +4,7 @@
4
4
  * Append timestamped entries to progress.txt after story completion.
5
5
  */
6
6
 
7
+ import { mkdirSync } from "node:fs";
7
8
  import { join } from "node:path";
8
9
  import type { StoryStatus } from "../prd";
9
10
 
@@ -14,6 +15,7 @@ export async function appendProgress(
14
15
  status: StoryStatus,
15
16
  message: string,
16
17
  ): Promise<void> {
18
+ mkdirSync(featureDir, { recursive: true });
17
19
  const progressPath = join(featureDir, "progress.txt");
18
20
  const timestamp = new Date().toISOString();
19
21
  const entry = `[${timestamp}] ${storyId} — ${status.toUpperCase()} — ${message}\n`;
@@ -83,13 +83,15 @@ export const reviewStage: PipelineStage = {
83
83
  const reviewResult = await runReview(ctx.config.review, ctx.workdir, ctx.config.execution);
84
84
  ctx.reviewResult = reviewResult;
85
85
 
86
- // HARD FAILURE: Review failure means code quality gate not met
86
+ // BUG-030: Review failure (lint/typecheck) should escalate, not hard-fail.
87
+ // Lint/typecheck errors are auto-fixable — give the agent a retry with error context.
88
+ // Only plugin reviewer rejections are hard failures.
87
89
  if (!reviewResult.success) {
88
- logger.error("review", "Review failed (built-in checks)", {
90
+ logger.warn("review", "Review failed (built-in checks) — escalating for retry", {
89
91
  reason: reviewResult.failureReason,
90
92
  storyId: ctx.story.id,
91
93
  });
92
- return { action: "fail", reason: `Review failed: ${reviewResult.failureReason}` };
94
+ return { action: "escalate", reason: `Review failed: ${reviewResult.failureReason}` };
93
95
  }
94
96
 
95
97
  // Run plugin reviewers if any are registered
@@ -2,8 +2,8 @@
2
2
  * Routing Stage
3
3
  *
4
4
  * Classifies story complexity and determines model tier + test strategy.
5
- * Uses cached complexity/testStrategy from story if available, but ALWAYS
6
- * derives modelTier from current config (never cached).
5
+ * Uses cached complexity/testStrategy/modelTier from story if available.
6
+ * modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
7
7
  *
8
8
  * @returns
9
9
  * - `continue`: Routing determined, proceed to next stage
@@ -30,17 +30,22 @@ export const routingStage: PipelineStage = {
30
30
  async execute(ctx: PipelineContext): Promise<StageResult> {
31
31
  const logger = getLogger();
32
32
 
33
- // If story has cached routing, use it but re-derive modelTier from current config
33
+ // If story has cached routing, use cached values (escalated modelTier takes priority)
34
34
  // Otherwise, perform fresh classification
35
35
  let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
36
36
  if (ctx.story.routing) {
37
- // Use cached complexity/testStrategy, but re-derive modelTier from current config
37
+ // Use cached complexity/testStrategy/modelTier
38
38
  routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
39
- // Override with cached complexity if available
40
- routing.complexity = ctx.story.routing.complexity;
41
- routing.testStrategy = ctx.story.routing.testStrategy;
42
- // Re-derive modelTier from cached complexity and current config
43
- routing.modelTier = complexityToModelTier(routing.complexity as import("../../config").Complexity, ctx.config);
39
+ // Override with cached values only when they are actually set
40
+ if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
41
+ if (ctx.story.routing?.testStrategy) routing.testStrategy = ctx.story.routing.testStrategy;
42
+ // BUG-032: Use escalated modelTier if explicitly set (by handleTierEscalation),
43
+ // otherwise derive from complexity + current config
44
+ if (ctx.story.routing?.modelTier) {
45
+ routing.modelTier = ctx.story.routing.modelTier;
46
+ } else {
47
+ routing.modelTier = complexityToModelTier(routing.complexity as import("../../config").Complexity, ctx.config);
48
+ }
44
49
  } else {
45
50
  // Fresh classification
46
51
  routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
@@ -20,10 +20,24 @@
20
20
  * ```
21
21
  */
22
22
 
23
+ import type { SmartTestRunnerConfig } from "../../config/types";
23
24
  import { getLogger } from "../../logger";
24
25
  import { regression } from "../../verification/gate";
26
+ import { _smartRunnerDeps } from "../../verification/smart-runner";
25
27
  import type { PipelineContext, PipelineStage, StageResult } from "../types";
26
28
 
29
+ const DEFAULT_SMART_RUNNER_CONFIG: SmartTestRunnerConfig = {
30
+ enabled: true,
31
+ testFilePatterns: ["test/**/*.test.ts"],
32
+ fallback: "import-grep",
33
+ };
34
+
35
+ function coerceSmartTestRunner(val: boolean | SmartTestRunnerConfig | undefined): SmartTestRunnerConfig {
36
+ if (val === undefined || val === true) return DEFAULT_SMART_RUNNER_CONFIG;
37
+ if (val === false) return { ...DEFAULT_SMART_RUNNER_CONFIG, enabled: false };
38
+ return val;
39
+ }
40
+
27
41
  export const verifyStage: PipelineStage = {
28
42
  name: "verify",
29
43
  enabled: () => true,
@@ -46,11 +60,50 @@ export const verifyStage: PipelineStage = {
46
60
 
47
61
  logger.info("verify", "Running verification", { storyId: ctx.story.id });
48
62
 
63
+ // Determine effective test command (smart runner or full suite)
64
+ let effectiveCommand = testCommand;
65
+ const smartRunnerConfig = coerceSmartTestRunner(ctx.config.execution.smartTestRunner);
66
+
67
+ if (smartRunnerConfig.enabled) {
68
+ const sourceFiles = await _smartRunnerDeps.getChangedSourceFiles(ctx.workdir);
69
+
70
+ // Pass 1: path convention mapping
71
+ const pass1Files = await _smartRunnerDeps.mapSourceToTests(sourceFiles, ctx.workdir);
72
+ if (pass1Files.length > 0) {
73
+ logger.info("verify", `[smart-runner] Pass 1: path convention matched ${pass1Files.length} test files`, {
74
+ storyId: ctx.story.id,
75
+ });
76
+ effectiveCommand = _smartRunnerDeps.buildSmartTestCommand(pass1Files, testCommand);
77
+ } else if (smartRunnerConfig.fallback === "import-grep") {
78
+ // Pass 2: import-grep fallback
79
+ const pass2Files = await _smartRunnerDeps.importGrepFallback(
80
+ sourceFiles,
81
+ ctx.workdir,
82
+ smartRunnerConfig.testFilePatterns,
83
+ );
84
+ if (pass2Files.length > 0) {
85
+ logger.info("verify", `[smart-runner] Pass 2: import-grep matched ${pass2Files.length} test files`, {
86
+ storyId: ctx.story.id,
87
+ });
88
+ effectiveCommand = _smartRunnerDeps.buildSmartTestCommand(pass2Files, testCommand);
89
+ } else {
90
+ logger.info("verify", "[smart-runner] No mapped tests — falling back to full suite", {
91
+ storyId: ctx.story.id,
92
+ });
93
+ }
94
+ } else {
95
+ logger.info("verify", "[smart-runner] No mapped tests — falling back to full suite", {
96
+ storyId: ctx.story.id,
97
+ });
98
+ }
99
+ }
100
+
49
101
  // Use unified regression gate (includes 2s wait for agent process cleanup)
50
102
  const result = await regression({
51
103
  workdir: ctx.workdir,
52
- command: testCommand,
104
+ command: effectiveCommand,
53
105
  timeoutSeconds: ctx.config.execution.verificationTimeoutSeconds,
106
+ acceptOnTimeout: ctx.config.execution.regressionGate?.acceptOnTimeout ?? true,
54
107
  });
55
108
 
56
109
  // HARD FAILURE: Tests must pass for story to be marked complete
package/src/prd/index.ts CHANGED
@@ -6,7 +6,16 @@ import { existsSync, statSync } from "node:fs";
6
6
  import type { FailureCategory } from "../tdd/types";
7
7
  import type { PRD, UserStory } from "./types";
8
8
 
9
- export type { PRD, UserStory, StoryRouting, StoryStatus, EscalationAttempt } from "./types";
9
+ export type {
10
+ PRD,
11
+ UserStory,
12
+ StoryRouting,
13
+ StoryStatus,
14
+ EscalationAttempt,
15
+ StructuredFailure,
16
+ TestFailureContext,
17
+ VerificationStage,
18
+ } from "./types";
10
19
  export { isStalled, markStoryAsBlocked, generateHumanHaltSummary, getContextFiles, getExpectedFiles } from "./types";
11
20
  export type { FailureCategory } from "../tdd/types";
12
21
 
@@ -36,6 +45,7 @@ export async function loadPRD(path: string): Promise<PRD> {
36
45
  for (const story of prd.userStories) {
37
46
  story.attempts = story.attempts ?? 0;
38
47
  story.priorErrors = story.priorErrors ?? [];
48
+ story.priorFailures = story.priorFailures ?? [];
39
49
  story.escalations = story.escalations ?? [];
40
50
  story.dependencies = story.dependencies ?? [];
41
51
  story.tags = story.tags ?? [];
@@ -73,6 +83,11 @@ export function getNextStory(prd: PRD, currentStoryId?: string | null, maxRetrie
73
83
  if (currentStory && currentStory.status === "failed" && (currentStory.attempts ?? 0) <= maxRetries) {
74
84
  return currentStory;
75
85
  }
86
+ // BUG-029: After tier escalation, story is set to "pending" (not "failed").
87
+ // Prioritize current story if it was escalated (pending + has prior attempts).
88
+ if (currentStory && currentStory.status === "pending" && (currentStory.attempts ?? 0) > 0) {
89
+ return currentStory;
90
+ }
76
91
  }
77
92
 
78
93
  const completedIds = new Set(
package/src/prd/types.ts CHANGED
@@ -11,6 +11,37 @@ import type { FailureCategory } from "../tdd/types";
11
11
  /** User story status */
12
12
  export type StoryStatus = "pending" | "in-progress" | "passed" | "failed" | "skipped" | "blocked" | "paused";
13
13
 
14
+ /** Verification stage where failure occurred */
15
+ export type VerificationStage = "verify" | "review" | "regression" | "rectification" | "agent-session" | "escalation";
16
+
17
+ /** Test failure context from parsed test output */
18
+ export interface TestFailureContext {
19
+ /** Test file path */
20
+ file: string;
21
+ /** Full test name (including describe blocks) */
22
+ testName: string;
23
+ /** Error message */
24
+ error: string;
25
+ /** Stack trace lines */
26
+ stackTrace: string[];
27
+ }
28
+
29
+ /** Structured failure context for escalated tiers */
30
+ export interface StructuredFailure {
31
+ /** Attempt number when failure occurred */
32
+ attempt: number;
33
+ /** Model tier that was running */
34
+ modelTier: string;
35
+ /** Stage where failure occurred */
36
+ stage: VerificationStage;
37
+ /** Summary of what failed */
38
+ summary: string;
39
+ /** Parsed test failures (if applicable) */
40
+ testFailures?: TestFailureContext[];
41
+ /** ISO timestamp when failure was recorded */
42
+ timestamp: string;
43
+ }
44
+
14
45
  /** Routing metadata per story */
15
46
  export interface StoryRouting {
16
47
  complexity: Complexity;
@@ -71,6 +102,8 @@ export interface UserStory {
71
102
  expectedFiles?: string[];
72
103
  /** Prior error messages from failed attempts */
73
104
  priorErrors?: string[];
105
+ /** Structured failure context for escalated tiers */
106
+ priorFailures?: StructuredFailure[];
74
107
  /** Custom context strings */
75
108
  customContext?: string[];
76
109
  /** Category of the last failure (set when story is marked failed) */
@@ -62,6 +62,8 @@ export interface PrecheckOptions {
62
62
  format?: "human" | "json";
63
63
  /** Working directory */
64
64
  workdir: string;
65
+ /** Suppress console output (for programmatic use) */
66
+ silent?: boolean;
65
67
  }
66
68
 
67
69
  /** Extended result with exit code for CLI usage */
@@ -87,6 +89,7 @@ export async function runPrecheck(
87
89
  ): Promise<PrecheckResultWithCode> {
88
90
  const workdir = options?.workdir || process.cwd();
89
91
  const format = options?.format || "human";
92
+ const silent = options?.silent ?? false;
90
93
 
91
94
  const passed: Check[] = [];
92
95
  const blockers: Check[] = [];
@@ -196,10 +199,12 @@ export async function runPrecheck(
196
199
  exitCode = hasPRDError ? EXIT_CODES.INVALID_PRD : EXIT_CODES.BLOCKER;
197
200
  }
198
201
 
199
- if (format === "json") {
200
- console.log(JSON.stringify(output, null, 2));
201
- } else {
202
- printSummary(output);
202
+ if (!silent) {
203
+ if (format === "json") {
204
+ console.log(JSON.stringify(output, null, 2));
205
+ } else {
206
+ printSummary(output);
207
+ }
203
208
  }
204
209
 
205
210
  return {
@@ -36,6 +36,11 @@ export function getCacheSize(): number {
36
36
  return cachedDecisions.size;
37
37
  }
38
38
 
39
+ /** Clear routing cache entry for a specific story (used on tier escalation) */
40
+ export function clearCacheForStory(storyId: string): void {
41
+ cachedDecisions.delete(storyId);
42
+ }
43
+
39
44
  /** Evict oldest entry when cache is full (LRU) */
40
45
  function evictOldest(): void {
41
46
  const firstKey = cachedDecisions.keys().next().value;
@@ -95,9 +95,10 @@ async function runVerificationCore(options: VerificationGateOptions): Promise<Ve
95
95
  });
96
96
 
97
97
  if (execution.timeout) {
98
+ const success = options.acceptOnTimeout ?? false;
98
99
  return {
99
100
  status: "TIMEOUT",
100
- success: false,
101
+ success,
101
102
  countsTowardEscalation: false, // Timeout is environmental, not code failure
102
103
  error: execution.error,
103
104
  output: execution.output,