@nathapp/nax 0.32.2 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +191 -6
  2. package/dist/nax.js +1150 -382
  3. package/package.json +1 -1
  4. package/src/cli/analyze.ts +145 -0
  5. package/src/cli/config.ts +9 -0
  6. package/src/config/defaults.ts +8 -0
  7. package/src/config/schema.ts +1 -0
  8. package/src/config/schemas.ts +10 -0
  9. package/src/config/types.ts +18 -0
  10. package/src/context/elements.ts +13 -0
  11. package/src/context/greenfield.ts +1 -1
  12. package/src/decompose/apply.ts +44 -0
  13. package/src/decompose/builder.ts +181 -0
  14. package/src/decompose/index.ts +8 -0
  15. package/src/decompose/sections/codebase.ts +26 -0
  16. package/src/decompose/sections/constraints.ts +32 -0
  17. package/src/decompose/sections/index.ts +4 -0
  18. package/src/decompose/sections/sibling-stories.ts +25 -0
  19. package/src/decompose/sections/target-story.ts +31 -0
  20. package/src/decompose/types.ts +55 -0
  21. package/src/decompose/validators/complexity.ts +45 -0
  22. package/src/decompose/validators/coverage.ts +134 -0
  23. package/src/decompose/validators/dependency.ts +91 -0
  24. package/src/decompose/validators/index.ts +35 -0
  25. package/src/decompose/validators/overlap.ts +128 -0
  26. package/src/execution/crash-recovery.ts +8 -0
  27. package/src/execution/escalation/tier-escalation.ts +9 -2
  28. package/src/execution/iteration-runner.ts +2 -0
  29. package/src/execution/lifecycle/run-completion.ts +100 -15
  30. package/src/execution/parallel-executor.ts +20 -1
  31. package/src/execution/pipeline-result-handler.ts +5 -1
  32. package/src/execution/runner.ts +20 -0
  33. package/src/execution/sequential-executor.ts +2 -11
  34. package/src/hooks/types.ts +20 -10
  35. package/src/interaction/index.ts +1 -0
  36. package/src/interaction/triggers.ts +21 -0
  37. package/src/interaction/types.ts +7 -0
  38. package/src/metrics/tracker.ts +7 -0
  39. package/src/metrics/types.ts +2 -0
  40. package/src/pipeline/stages/review.ts +6 -0
  41. package/src/pipeline/stages/routing.ts +89 -0
  42. package/src/pipeline/types.ts +2 -0
  43. package/src/plugins/types.ts +33 -0
  44. package/src/prd/index.ts +7 -2
  45. package/src/prd/types.ts +17 -2
  46. package/src/review/orchestrator.ts +1 -0
  47. package/src/review/types.ts +2 -0
  48. package/src/tdd/isolation.ts +1 -1
@@ -9,9 +9,12 @@
9
9
  */
10
10
 
11
11
  import type { NaxConfig } from "../../config";
12
+ import { fireHook } from "../../hooks/runner";
13
+ import type { HooksConfig } from "../../hooks/types";
12
14
  import { getSafeLogger } from "../../logger";
13
15
  import type { StoryMetrics } from "../../metrics";
14
16
  import { saveRunMetrics } from "../../metrics";
17
+ import { pipelineEventBus } from "../../pipeline/event-bus";
15
18
  import { countStories, isComplete, isStalled } from "../../prd";
16
19
  import type { PRD } from "../../prd";
17
20
  import type { StatusWriter } from "../status-writer";
@@ -23,6 +26,7 @@ import { runDeferredRegression } from "./run-regression";
23
26
  */
24
27
  export const _runCompletionDeps = {
25
28
  runDeferredRegression,
29
+ fireHook,
26
30
  };
27
31
 
28
32
  export interface RunCompletionOptions {
@@ -38,6 +42,9 @@ export interface RunCompletionOptions {
38
42
  workdir: string;
39
43
  statusWriter: StatusWriter;
40
44
  config: NaxConfig;
45
+ hooksConfig?: HooksConfig;
46
+ /** Whether the run used sequential (non-parallel) execution. Defaults to true. */
47
+ isSequential?: boolean;
41
48
  }
42
49
 
43
50
  export interface RunCompletionResult {
@@ -52,6 +59,32 @@ export interface RunCompletionResult {
52
59
  };
53
60
  }
54
61
 
62
+ /**
63
+ * Check if deferred regression should be skipped (RL-006).
64
+ *
65
+ * Smart-skip applies when:
66
+ * 1. All stories have fullSuiteGatePassed === true
67
+ * 2. Execution is sequential (or defaults to sequential when not specified)
68
+ * 3. There is at least one story metric
69
+ */
70
+ function shouldSkipDeferredRegression(allStoryMetrics: StoryMetrics[], isSequential: boolean | undefined): boolean {
71
+ // Default to sequential mode
72
+ const effectiveSequential = isSequential !== false;
73
+
74
+ // Must be sequential mode
75
+ if (!effectiveSequential) {
76
+ return false;
77
+ }
78
+
79
+ // Must have at least one story metric
80
+ if (allStoryMetrics.length === 0) {
81
+ return false;
82
+ }
83
+
84
+ // All stories must have fullSuiteGatePassed === true
85
+ return allStoryMetrics.every((m) => m.fullSuiteGatePassed === true);
86
+ }
87
+
55
88
  /**
56
89
  * Handle final run completion: save metrics, log summary, update status
57
90
  */
@@ -70,28 +103,77 @@ export async function handleRunCompletion(options: RunCompletionOptions): Promis
70
103
  workdir,
71
104
  statusWriter,
72
105
  config,
106
+ hooksConfig,
107
+ isSequential,
73
108
  } = options;
74
109
 
75
110
  // Run deferred regression gate before final metrics
76
111
  const regressionMode = config.execution.regressionGate?.mode;
77
112
  if (regressionMode === "deferred" && config.quality.commands.test) {
78
- const regressionResult = await _runCompletionDeps.runDeferredRegression({
79
- config,
80
- prd,
81
- workdir,
82
- });
83
-
84
- logger?.info("regression", "Deferred regression gate completed", {
85
- success: regressionResult.success,
86
- failedTests: regressionResult.failedTests,
87
- affectedStories: regressionResult.affectedStories,
88
- });
113
+ if (shouldSkipDeferredRegression(allStoryMetrics, isSequential)) {
114
+ logger?.info(
115
+ "regression",
116
+ "Smart-skip: skipping deferred regression (all stories passed full-suite gate in sequential mode)",
117
+ );
118
+ } else {
119
+ const regressionResult = await _runCompletionDeps.runDeferredRegression({
120
+ config,
121
+ prd,
122
+ workdir,
123
+ });
124
+
125
+ logger?.info("regression", "Deferred regression gate completed", {
126
+ success: regressionResult.success,
127
+ failedTests: regressionResult.failedTests,
128
+ affectedStories: regressionResult.affectedStories,
129
+ });
130
+
131
+ if (!regressionResult.success) {
132
+ // Mark affected stories as regression-failed (RL-004)
133
+ for (const storyId of regressionResult.affectedStories) {
134
+ const story = prd.userStories.find((s) => s.id === storyId);
135
+ if (story) {
136
+ story.status = "regression-failed";
137
+ }
138
+ }
139
+ // Reflect regression gate failure in run status (RL-004)
140
+ statusWriter.setRunStatus("failed");
141
+
142
+ if (hooksConfig) {
143
+ await _runCompletionDeps.fireHook(
144
+ hooksConfig as import("../../hooks/runner").LoadedHooksConfig,
145
+ "on-final-regression-fail",
146
+ {
147
+ event: "on-final-regression-fail",
148
+ feature,
149
+ status: "failed",
150
+ failedTests: regressionResult.failedTests,
151
+ affectedStories: regressionResult.affectedStories,
152
+ },
153
+ workdir,
154
+ );
155
+ }
156
+ }
157
+ }
89
158
  }
90
159
 
91
160
  const durationMs = Date.now() - startTime;
92
161
  const runCompletedAt = new Date().toISOString();
93
162
 
94
- // Save run metrics
163
+ // Compute final story counts before emitting completion event (RL-002)
164
+ const finalCounts = countStories(prd);
165
+
166
+ // Emit run:completed after regression gate with real story counts (RL-002)
167
+ pipelineEventBus.emit({
168
+ type: "run:completed",
169
+ totalStories: finalCounts.total,
170
+ passedStories: finalCounts.passed,
171
+ failedStories: finalCounts.failed,
172
+ durationMs,
173
+ totalCost,
174
+ });
175
+
176
+ // Save run metrics (best-effort — disk write errors do not fail the run)
95
177
  const runMetrics = {
96
178
  runId,
97
179
  feature,
@@ -100,15 +182,18 @@ export async function handleRunCompletion(options: RunCompletionOptions): Promis
100
182
  totalCost,
101
183
  totalStories: allStoryMetrics.length,
102
184
  storiesCompleted,
103
- storiesFailed: countStories(prd).failed,
185
+ storiesFailed: finalCounts.failed,
104
186
  totalDurationMs: durationMs,
105
187
  stories: allStoryMetrics,
106
188
  };
107
189
 
108
- await saveRunMetrics(workdir, runMetrics);
190
+ try {
191
+ await saveRunMetrics(workdir, runMetrics);
192
+ } catch (err) {
193
+ logger?.warn("run.complete", "Failed to save run metrics", { error: String(err) });
194
+ }
109
195
 
110
196
  // Log run completion
111
- const finalCounts = countStories(prd);
112
197
 
113
198
  // Prepare per-story metrics summary
114
199
  const storyMetricsSummary = allStoryMetrics.map((sm) => ({
@@ -22,6 +22,14 @@ import { getAllReadyStories, hookCtx } from "./helpers";
22
22
  import { executeParallel } from "./parallel";
23
23
  import type { StatusWriter } from "./status-writer";
24
24
 
25
+ /**
26
+ * Injectable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
27
+ * @internal - test use only.
28
+ */
29
+ export const _parallelExecutorDeps = {
30
+ fireHook,
31
+ };
32
+
25
33
  export interface ParallelExecutorOptions {
26
34
  prdPath: string;
27
35
  workdir: string;
@@ -158,7 +166,18 @@ export async function runParallelExecution(
158
166
  feature,
159
167
  totalCost,
160
168
  });
161
- await fireHook(hooks, "on-complete", hookCtx(feature, { status: "complete", cost: totalCost }), workdir);
169
+ await _parallelExecutorDeps.fireHook(
170
+ hooks,
171
+ "on-all-stories-complete",
172
+ hookCtx(feature, { status: "passed", cost: totalCost }),
173
+ workdir,
174
+ );
175
+ await _parallelExecutorDeps.fireHook(
176
+ hooks,
177
+ "on-complete",
178
+ hookCtx(feature, { status: "complete", cost: totalCost }),
179
+ workdir,
180
+ );
162
181
 
163
182
  // Skip to metrics and cleanup
164
183
  const durationMs = Date.now() - startTime;
@@ -39,6 +39,7 @@ export interface PipelineHandlerContext {
39
39
  allStoryMetrics: StoryMetrics[];
40
40
  storyGitRef: string | null | undefined;
41
41
  interactionChain?: InteractionChain | null;
42
+ storyStartTime?: number;
42
43
  }
43
44
 
44
45
  export interface PipelineSuccessResult {
@@ -62,11 +63,13 @@ export async function handlePipelineSuccess(
62
63
 
63
64
  const storiesCompletedDelta = ctx.storiesToExecute.length;
64
65
  for (const completedStory of ctx.storiesToExecute) {
66
+ const now = Date.now();
65
67
  logger?.info("story.complete", "Story completed successfully", {
66
68
  storyId: completedStory.id,
67
69
  storyTitle: completedStory.title,
68
70
  totalCost: ctx.totalCost + costDelta,
69
- durationMs: Date.now() - ctx.startTime,
71
+ durationMs: now - ctx.startTime,
72
+ storyDurationMs: ctx.storyStartTime ? now - ctx.storyStartTime : undefined,
70
73
  });
71
74
 
72
75
  pipelineEventBus.emit({
@@ -90,6 +93,7 @@ export async function handlePipelineSuccess(
90
93
  totalCost: ctx.totalCost + costDelta,
91
94
  costLimit: ctx.config.execution.costLimit,
92
95
  elapsedMs: Date.now() - ctx.startTime,
96
+ storyDurationMs: ctx.storyStartTime ? Date.now() - ctx.storyStartTime : undefined,
93
97
  });
94
98
 
95
99
  return { storiesCompletedDelta, costDelta, prd, prdDirty: true };
@@ -10,6 +10,7 @@
10
10
 
11
11
  import type { NaxConfig } from "../config";
12
12
  import type { LoadedHooksConfig } from "../hooks";
13
+ import { fireHook } from "../hooks";
13
14
  import { getSafeLogger } from "../logger";
14
15
  import type { StoryMetrics } from "../metrics";
15
16
  import type { PipelineEventEmitter } from "../pipeline/events";
@@ -20,6 +21,15 @@ import { clearCache as clearLlmCache, routeBatch as llmRouteBatch } from "../rou
20
21
  import { precomputeBatchPlan } from "./batching";
21
22
  import { stopHeartbeat, writeExitSummary } from "./crash-recovery";
22
23
  import { getAllReadyStories } from "./helpers";
24
+ import { hookCtx } from "./story-context";
25
+
26
+ /**
27
+ * Injectable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
28
+ * @internal - test use only.
29
+ */
30
+ export const _runnerDeps = {
31
+ fireHook,
32
+ };
23
33
 
24
34
  // Re-export for backward compatibility
25
35
  export { resolveMaxAttemptsOutcome } from "./escalation";
@@ -289,6 +299,16 @@ export async function run(options: RunOptions): Promise<RunResult> {
289
299
  storiesCompleted = acceptanceResult.storiesCompleted;
290
300
  }
291
301
 
302
+ // Fire on-all-stories-complete before regression gate (RL-001)
303
+ if (isComplete(prd)) {
304
+ await _runnerDeps.fireHook(
305
+ hooks,
306
+ "on-all-stories-complete",
307
+ hookCtx(feature, { status: "passed", cost: totalCost }),
308
+ workdir,
309
+ );
310
+ }
311
+
292
312
  // Handle run completion: save metrics, log summary, update status
293
313
  const { handleRunCompletion } = await import("./lifecycle/run-completion");
294
314
  const completionResult = await handleRunCompletion({
@@ -14,7 +14,7 @@ import { wireReporters } from "../pipeline/subscribers/reporters";
14
14
  import type { PipelineContext } from "../pipeline/types";
15
15
  import { generateHumanHaltSummary, isComplete, isStalled, loadPRD } from "../prd";
16
16
  import type { PRD } from "../prd/types";
17
- import { startHeartbeat, stopHeartbeat, writeExitSummary } from "./crash-recovery";
17
+ import { startHeartbeat } from "./crash-recovery";
18
18
  import type { SequentialExecutionContext, SequentialExecutionResult } from "./executor-types";
19
19
  import { runIteration } from "./iteration-runner";
20
20
  import { selectNextStories } from "./story-selector";
@@ -82,14 +82,6 @@ export async function executeSequential(
82
82
  return buildResult("pre-merge-aborted");
83
83
  }
84
84
  }
85
- pipelineEventBus.emit({
86
- type: "run:completed",
87
- totalStories: 0,
88
- passedStories: 0,
89
- failedStories: 0,
90
- durationMs: Date.now() - ctx.startTime,
91
- totalCost,
92
- });
93
85
  return buildResult("completed");
94
86
  }
95
87
 
@@ -181,7 +173,6 @@ export async function executeSequential(
181
173
 
182
174
  return buildResult("max-iterations");
183
175
  } finally {
184
- stopHeartbeat();
185
- writeExitSummary(ctx.logFilePath, totalCost, iterations, storiesCompleted, Date.now() - ctx.startTime);
176
+ // Cleanup moved to runner.ts (RL-007): exit summary and heartbeat stop are owned by runner
186
177
  }
187
178
  }
@@ -4,17 +4,23 @@
4
4
  * Script-based lifecycle hooks configured via hooks.json.
5
5
  */
6
6
 
7
+ /** All supported hook events — runtime array used for validation */
8
+ export const HOOK_EVENTS = [
9
+ "on-start",
10
+ "on-story-start",
11
+ "on-story-complete",
12
+ "on-story-fail",
13
+ "on-pause",
14
+ "on-resume",
15
+ "on-session-end",
16
+ "on-all-stories-complete",
17
+ "on-complete",
18
+ "on-error",
19
+ "on-final-regression-fail",
20
+ ] as const;
21
+
7
22
  /** All supported hook events */
8
- export type HookEvent =
9
- | "on-start"
10
- | "on-story-start"
11
- | "on-story-complete"
12
- | "on-story-fail"
13
- | "on-pause"
14
- | "on-resume"
15
- | "on-session-end"
16
- | "on-complete"
17
- | "on-error";
23
+ export type HookEvent = (typeof HOOK_EVENTS)[number];
18
24
 
19
25
  /** Single hook definition */
20
26
  export interface HookDef {
@@ -64,4 +70,8 @@ export interface HookContext {
64
70
  agent?: string;
65
71
  /** Current iteration number */
66
72
  iteration?: number;
73
+ /** Number of failed tests (on-final-regression-fail) */
74
+ failedTests?: number;
75
+ /** Stories affected by regression failure (on-final-regression-fail) */
76
+ affectedStories?: string[];
67
77
  }
@@ -53,6 +53,7 @@ export {
53
53
  checkPreMerge,
54
54
  checkStoryAmbiguity,
55
55
  checkReviewGate,
56
+ checkStoryOversized,
56
57
  } from "./triggers";
57
58
  export type { TriggerContext } from "./triggers";
58
59
 
@@ -227,3 +227,24 @@ export async function checkReviewGate(
227
227
  const response = await executeTrigger("review-gate", context, config, chain);
228
228
  return response.action === "approve";
229
229
  }
230
+
231
+ /**
232
+ * Check story-oversized trigger (decompose, skip, or continue)
233
+ */
234
+ export async function checkStoryOversized(
235
+ context: TriggerContext,
236
+ config: NaxConfig,
237
+ chain: InteractionChain,
238
+ ): Promise<"decompose" | "skip" | "continue"> {
239
+ if (!isTriggerEnabled("story-oversized", config)) return "continue";
240
+
241
+ try {
242
+ const response = await executeTrigger("story-oversized", context, config, chain);
243
+ if (response.action === "approve") return "decompose";
244
+ if (response.action === "skip") return "skip";
245
+ return "continue";
246
+ } catch {
247
+ // No plugin registered or all plugins failed — apply default fallback
248
+ return "continue";
249
+ }
250
+ }
@@ -83,6 +83,7 @@ export type TriggerName =
83
83
  | "max-retries" // skip (yellow) — max retries reached
84
84
  | "pre-merge" // escalate (yellow) — before merging to main
85
85
  | "human-review" // skip (yellow) — human review required on max retries / critical failure
86
+ | "story-oversized" // continue (yellow) — story has too many acceptance criteria
86
87
  | "story-ambiguity" // continue (green) — story requirements unclear
87
88
  | "review-gate"; // continue (green) — code review checkpoint
88
89
 
@@ -150,6 +151,12 @@ export const TRIGGER_METADATA: Record<TriggerName, TriggerMetadata> = {
150
151
  safety: "yellow",
151
152
  defaultSummary: "Human review required for story {{storyId}} — skip and continue?",
152
153
  },
154
+ "story-oversized": {
155
+ defaultFallback: "continue",
156
+ safety: "yellow",
157
+ defaultSummary:
158
+ "Story {{storyId}} is oversized ({{criteriaCount}} acceptance criteria) — decompose into smaller stories?",
159
+ },
153
160
  "story-ambiguity": {
154
161
  defaultFallback: "continue",
155
162
  safety: "green",
@@ -62,6 +62,11 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
62
62
  // fall back to routing.complexity for backward compat
63
63
  const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
64
64
 
65
+ // fullSuiteGatePassed: true only for TDD strategies when gate passes
66
+ const isTddStrategy =
67
+ routing.testStrategy === "three-session-tdd" || routing.testStrategy === "three-session-tdd-lite";
68
+ const fullSuiteGatePassed = isTddStrategy ? (ctx.fullSuiteGatePassed ?? false) : false;
69
+
65
70
  return {
66
71
  storyId: story.id,
67
72
  complexity: routing.complexity,
@@ -76,6 +81,7 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
76
81
  firstPassSuccess,
77
82
  startedAt: storyStartTime,
78
83
  completedAt: new Date().toISOString(),
84
+ fullSuiteGatePassed,
79
85
  };
80
86
  }
81
87
 
@@ -132,6 +138,7 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
132
138
  firstPassSuccess: true, // batch = first pass success
133
139
  startedAt: storyStartTime,
134
140
  completedAt: new Date().toISOString(),
141
+ fullSuiteGatePassed: false, // batches are not TDD-gated
135
142
  };
136
143
  });
137
144
  }
@@ -34,6 +34,8 @@ export interface StoryMetrics {
34
34
  startedAt: string;
35
35
  /** Timestamp when completed */
36
36
  completedAt: string;
37
+ /** Whether TDD full-suite gate passed (only true for TDD strategies when gate passes) */
38
+ fullSuiteGatePassed?: boolean;
37
39
  }
38
40
 
39
41
  /**
@@ -30,6 +30,12 @@ export const reviewStage: PipelineStage = {
30
30
  ctx.reviewResult = result.builtIn;
31
31
 
32
32
  if (!result.success) {
33
+ // Collect structured findings from plugin reviewers for escalation context
34
+ const allFindings = result.builtIn.pluginReviewers?.flatMap((pr) => pr.findings ?? []) ?? [];
35
+ if (allFindings.length > 0) {
36
+ ctx.reviewFindings = allFindings;
37
+ }
38
+
33
39
  if (result.pluginFailed) {
34
40
  // security-review trigger: prompt before permanently failing
35
41
  if (ctx.interaction && isTriggerEnabled("security-review", ctx.config)) {
@@ -8,8 +8,13 @@
8
8
  * RRP-003: contentHash staleness detection — if story.routing.contentHash is missing or
9
9
  * does not match the current story content, treats cached routing as a miss and re-classifies.
10
10
  *
11
+ * SD-004: Oversized story detection — after routing, checks if story exceeds
12
+ * config.decompose.maxAcceptanceCriteria with complex/expert complexity. Decomposes
13
+ * based on trigger mode (auto / confirm / disabled).
14
+ *
11
15
  * @returns
12
16
  * - `continue`: Routing determined, proceed to next stage
17
+ * - `skip`: Story was decomposed into substories; runner should pick up first substory
13
18
  *
14
19
  * @example
15
20
  * ```ts
@@ -20,13 +25,42 @@
20
25
  * ```
21
26
  */
22
27
 
28
+ import type { NaxConfig } from "../../config";
23
29
  import { isGreenfieldStory } from "../../context/greenfield";
30
+ import { applyDecomposition } from "../../decompose/apply";
31
+ import { DecomposeBuilder } from "../../decompose/builder";
32
+ import type { DecomposeConfig as BuilderDecomposeConfig, DecomposeResult } from "../../decompose/types";
33
+ import { checkStoryOversized } from "../../interaction/triggers";
24
34
  import { getLogger } from "../../logger";
25
35
  import { savePRD } from "../../prd";
36
+ import type { PRD, UserStory } from "../../prd";
26
37
  import { complexityToModelTier, computeStoryContentHash, routeStory } from "../../routing";
27
38
  import { clearCache, routeBatch } from "../../routing/strategies/llm";
28
39
  import type { PipelineContext, PipelineStage, RoutingResult, StageResult } from "../types";
29
40
 
41
+ /**
42
+ * Run story decomposition using DecomposeBuilder.
43
+ * Used as the default implementation in _routingDeps.runDecompose.
44
+ * In production, replace with an LLM-backed adapter.
45
+ */
46
+ async function runDecompose(story: UserStory, prd: PRD, config: NaxConfig, _workdir: string): Promise<DecomposeResult> {
47
+ const naxDecompose = config.decompose;
48
+ const builderConfig: BuilderDecomposeConfig = {
49
+ maxSubStories: naxDecompose?.maxSubstories ?? 5,
50
+ maxComplexity: naxDecompose?.maxSubstoryComplexity ?? "medium",
51
+ maxRetries: naxDecompose?.maxRetries ?? 2,
52
+ };
53
+
54
+ // Stub adapter — replaced in tests via _routingDeps injection.
55
+ const adapter = {
56
+ async decompose(_prompt: string): Promise<string> {
57
+ throw new Error("[decompose] No LLM adapter configured for story decomposition");
58
+ },
59
+ };
60
+
61
+ return DecomposeBuilder.for(story).prd(prd).config(builderConfig).decompose(adapter);
62
+ }
63
+
30
64
  export const routingStage: PipelineStage = {
31
65
  name: "routing",
32
66
  enabled: () => true,
@@ -116,6 +150,58 @@ export const routingStage: PipelineStage = {
116
150
  logger.debug("routing", ctx.routing.reasoning);
117
151
  }
118
152
 
153
+ // SD-004: Oversized story detection and decomposition
154
+ const decomposeConfig = ctx.config.decompose;
155
+ if (decomposeConfig) {
156
+ const acCount = ctx.story.acceptanceCriteria.length;
157
+ const complexity = ctx.routing.complexity;
158
+ const isOversized =
159
+ acCount > decomposeConfig.maxAcceptanceCriteria && (complexity === "complex" || complexity === "expert");
160
+
161
+ if (isOversized) {
162
+ if (decomposeConfig.trigger === "disabled") {
163
+ logger.warn(
164
+ "routing",
165
+ `Story ${ctx.story.id} is oversized (${acCount} ACs) but decompose is disabled — continuing with original`,
166
+ );
167
+ } else if (decomposeConfig.trigger === "auto") {
168
+ const result = await _routingDeps.runDecompose(ctx.story, ctx.prd, ctx.config, ctx.workdir);
169
+ if (result.validation.valid) {
170
+ _routingDeps.applyDecomposition(ctx.prd, result);
171
+ if (ctx.prdPath) {
172
+ await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
173
+ }
174
+ logger.info("routing", `Story ${ctx.story.id} decomposed into ${result.subStories.length} substories`);
175
+ return { action: "skip", reason: `Decomposed into ${result.subStories.length} substories` };
176
+ }
177
+ logger.warn("routing", `Story ${ctx.story.id} decompose failed after retries — continuing with original`, {
178
+ errors: result.validation.errors,
179
+ });
180
+ } else if (decomposeConfig.trigger === "confirm") {
181
+ const action = await _routingDeps.checkStoryOversized(
182
+ { featureName: ctx.prd.feature, storyId: ctx.story.id, criteriaCount: acCount },
183
+ ctx.config,
184
+ // biome-ignore lint/style/noNonNullAssertion: confirm mode is only reached when interaction chain is present in production; tests mock checkStoryOversized directly
185
+ ctx.interaction!,
186
+ );
187
+ if (action === "decompose") {
188
+ const result = await _routingDeps.runDecompose(ctx.story, ctx.prd, ctx.config, ctx.workdir);
189
+ if (result.validation.valid) {
190
+ _routingDeps.applyDecomposition(ctx.prd, result);
191
+ if (ctx.prdPath) {
192
+ await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
193
+ }
194
+ logger.info("routing", `Story ${ctx.story.id} decomposed into ${result.subStories.length} substories`);
195
+ return { action: "skip", reason: `Decomposed into ${result.subStories.length} substories` };
196
+ }
197
+ logger.warn("routing", `Story ${ctx.story.id} decompose failed after retries — continuing with original`, {
198
+ errors: result.validation.errors,
199
+ });
200
+ }
201
+ }
202
+ }
203
+ }
204
+
119
205
  return { action: "continue" };
120
206
  },
121
207
  };
@@ -131,4 +217,7 @@ export const _routingDeps = {
131
217
  clearCache,
132
218
  savePRD,
133
219
  computeStoryContentHash,
220
+ applyDecomposition,
221
+ runDecompose,
222
+ checkStoryOversized,
134
223
  };
@@ -110,6 +110,8 @@ export interface PipelineContext {
110
110
  tddFailureCategory?: FailureCategory;
111
111
  /** Set to true when TDD full-suite gate already passed — verify stage skips to avoid redundant run (BUG-054) */
112
112
  fullSuiteGatePassed?: boolean;
113
+ /** Structured review findings from plugin reviewers — passed to escalation for retry context */
114
+ reviewFindings?: import("../plugins/types").ReviewFinding[];
113
115
  }
114
116
 
115
117
  /**
@@ -119,6 +119,37 @@ export type { IPromptOptimizer } from "../optimizer/types";
119
119
  // Review Extension
120
120
  // ============================================================================
121
121
 
122
+ /**
123
+ * A single structured finding from a review check.
124
+ *
125
+ * Designed to be service-agnostic — works with Semgrep, ESLint, SonarQube,
126
+ * Snyk, CodeQL, and other SAST/DAST/linting tools.
127
+ */
128
+ export interface ReviewFinding {
129
+ /** Rule or check ID (e.g., "detect-non-literal-regexp", "no-unused-vars") */
130
+ ruleId: string;
131
+ /** Severity level (tool-agnostic scale) */
132
+ severity: "critical" | "error" | "warning" | "info" | "low";
133
+ /** File path (relative to workdir) */
134
+ file: string;
135
+ /** Line number (1-indexed) */
136
+ line: number;
137
+ /** Column number (1-indexed, optional) */
138
+ column?: number;
139
+ /** End line number (optional, for multi-line findings) */
140
+ endLine?: number;
141
+ /** End column number (optional) */
142
+ endColumn?: number;
143
+ /** Human-readable message */
144
+ message: string;
145
+ /** Optional URL for rule documentation or details */
146
+ url?: string;
147
+ /** Source tool that produced this finding (e.g., "semgrep", "eslint", "snyk") */
148
+ source?: string;
149
+ /** Finding category (e.g., "security", "performance", "style", "bug") */
150
+ category?: string;
151
+ }
152
+
122
153
  /**
123
154
  * Result from a review check.
124
155
  */
@@ -129,6 +160,8 @@ export interface ReviewCheckResult {
129
160
  output: string;
130
161
  /** Exit code from the check process (if applicable) */
131
162
  exitCode?: number;
163
+ /** Structured findings (optional — plugins can provide machine-readable results) */
164
+ findings?: ReviewFinding[];
132
165
  }
133
166
 
134
167
  /**