@nathapp/nax 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CLAUDE.md +70 -56
  2. package/docs/ROADMAP.md +45 -15
  3. package/docs/specs/trigger-completion.md +145 -0
  4. package/nax/features/routing-persistence/prd.json +104 -0
  5. package/nax/features/routing-persistence/progress.txt +1 -0
  6. package/nax/features/trigger-completion/prd.json +150 -0
  7. package/nax/features/trigger-completion/progress.txt +7 -0
  8. package/nax/status.json +15 -16
  9. package/package.json +1 -1
  10. package/src/config/types.ts +3 -1
  11. package/src/execution/crash-recovery.ts +11 -0
  12. package/src/execution/executor-types.ts +1 -1
  13. package/src/execution/iteration-runner.ts +1 -0
  14. package/src/execution/lifecycle/run-setup.ts +4 -0
  15. package/src/execution/sequential-executor.ts +45 -7
  16. package/src/interaction/plugins/auto.ts +10 -1
  17. package/src/metrics/aggregator.ts +2 -1
  18. package/src/metrics/tracker.ts +26 -14
  19. package/src/metrics/types.ts +2 -0
  20. package/src/pipeline/event-bus.ts +14 -1
  21. package/src/pipeline/stages/completion.ts +20 -0
  22. package/src/pipeline/stages/execution.ts +62 -0
  23. package/src/pipeline/stages/review.ts +25 -1
  24. package/src/pipeline/stages/routing.ts +42 -8
  25. package/src/pipeline/subscribers/hooks.ts +32 -0
  26. package/src/pipeline/subscribers/interaction.ts +36 -1
  27. package/src/pipeline/types.ts +2 -0
  28. package/src/prd/types.ts +4 -0
  29. package/src/routing/content-hash.ts +25 -0
  30. package/src/routing/index.ts +3 -0
  31. package/src/routing/router.ts +3 -2
  32. package/src/routing/strategies/keyword.ts +2 -1
  33. package/src/routing/strategies/llm-prompts.ts +29 -28
  34. package/src/utils/git.ts +21 -0
  35. package/test/integration/routing/plugin-routing-core.test.ts +1 -1
  36. package/test/unit/execution/sequential-executor.test.ts +235 -0
  37. package/test/unit/interaction/auto-plugin.test.ts +162 -0
  38. package/test/unit/interaction-plugins.test.ts +308 -1
  39. package/test/unit/metrics/aggregator.test.ts +164 -0
  40. package/test/unit/metrics/tracker.test.ts +186 -0
  41. package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
  42. package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
  43. package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
  44. package/test/unit/pipeline/stages/review.test.ts +201 -0
  45. package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
  46. package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
  47. package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
  48. package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
  49. package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
  50. package/test/unit/prd-auto-default.test.ts +2 -2
  51. package/test/unit/routing/content-hash.test.ts +99 -0
  52. package/test/unit/routing/routing-stability.test.ts +1 -1
  53. package/test/unit/routing-core.test.ts +5 -5
  54. package/test/unit/routing-strategies.test.ts +1 -3
  55. package/test/unit/utils/git.test.ts +50 -0
@@ -0,0 +1,150 @@
1
+ {
2
+ "project": "nax",
3
+ "branchName": "feat/trigger-completion",
4
+ "feature": "trigger-completion",
5
+ "version": "0.25.0",
6
+ "description": "Wire all 8 unwired interaction triggers to correct pipeline decision points, add 3 missing hook events, and add integration tests for auto/telegram/webhook plugins.",
7
+ "userStories": [
8
+ {
9
+ "id": "TC-001",
10
+ "title": "Wire cost-exceeded and cost-warning triggers",
11
+ "description": "In src/execution/sequential-executor.ts: (1) Before exiting on cost limit (line ~93), call checkCostExceeded({featureName: ctx.feature, cost: totalCost, limit: ctx.config.execution.costLimit}, ctx.config, ctx.interactionChain). Import checkCostExceeded from src/interaction/triggers.ts. If isTriggerEnabled(\"cost-exceeded\", config) is false or chain is null, keep today behavior. Trigger abort = exit \"cost-limit\". Trigger skip/continue = allow run to proceed past limit. (2) Add cost-warning: track a boolean warningSent=false. In the iteration loop, when totalCost >= costLimit * (interaction.triggers[\"cost-warning\"]?.threshold ?? 0.8) and !warningSent, call checkCostWarning({featureName, cost, limit}, config, interactionChain), set warningSent=true. isTriggerEnabled guards the call. Default fallback continue = proceed silently. Both calls must be best-effort: guard with if(interactionChain) check.",
12
+ "complexity": "medium",
13
+ "status": "passed",
14
+ "acceptanceCriteria": [
15
+ "When cost hits 80% of limit and cost-warning trigger is enabled, checkCostWarning fires once",
16
+ "Warning fires only once per run even if cost stays above threshold for multiple iterations",
17
+ "When cost hits 100% of limit and cost-exceeded is enabled, checkCostExceeded fires before exit",
18
+ "abort response exits with cost-limit reason; skip/continue allows run to proceed",
19
+ "When interaction plugin not configured, behavior is identical to today",
20
+ "Unit tests cover 80% threshold, 100% threshold, abort, skip, continue responses"
21
+ ],
22
+ "attempts": 0,
23
+ "priorErrors": [],
24
+ "priorFailures": [],
25
+ "escalations": [],
26
+ "dependencies": [],
27
+ "tags": [],
28
+ "storyPoints": 2,
29
+ "passes": true
30
+ },
31
+ {
32
+ "id": "TC-002",
33
+ "title": "Wire max-retries trigger",
34
+ "description": "In src/pipeline/subscribers/interaction.ts, extend wireInteraction to also subscribe to story:failed event. When story:failed fires with countsTowardEscalation=true (permanent failure, all tiers exhausted), call executeTrigger(\"max-retries\", {featureName: ev.feature ?? \"\", storyId: ev.storyId, iteration: ev.attempts ?? 0}, config, interactionChain). Import StoryFailedEvent from event-bus. Guard with isTriggerEnabled(\"max-retries\", config) and interactionChain check. Response handling: abort = emit a new run:paused event with reason \"max-retries-abort\" (the executor checks this to halt); skip = default, proceed; escalate = not supported for this trigger, treat as skip. Note: the actual run halt on abort requires reading from a shared flag or emitting run:paused — simplest: log a warning and let the run continue (abort behavior can be enhanced later). For now, abort = warn log only.",
35
+ "complexity": "medium",
36
+ "status": "passed",
37
+ "acceptanceCriteria": [
38
+ "max-retries trigger fires when story:failed event has countsTowardEscalation=true",
39
+ "max-retries trigger does NOT fire when countsTowardEscalation=false",
40
+ "When trigger disabled or no chain, no-op",
41
+ "abort response logs a warning (full halt is future work)",
42
+ "Unit tests cover enabled/disabled, countsTowardEscalation true/false, all fallback responses"
43
+ ],
44
+ "attempts": 0,
45
+ "priorErrors": [],
46
+ "priorFailures": [],
47
+ "escalations": [],
48
+ "dependencies": [],
49
+ "tags": [],
50
+ "storyPoints": 2,
51
+ "passes": true
52
+ },
53
+ {
54
+ "id": "TC-003",
55
+ "title": "Wire security-review, merge-conflict, and pre-merge triggers",
56
+ "description": "Three trigger wiring points: (1) security-review in src/pipeline/stages/review.ts: when plugin reviewer (semgrep etc) returns failure (the existing check at ~line 50 that returns action:fail for plugin reviewer rejection), before permanently failing, call checkSecurityReview({featureName, storyId: ctx.story.id}, ctx.config, ctx.interactionChain) if isTriggerEnabled and chain present. abort=fail (today), escalate=return {action:\"escalate\"}. Import from interaction. (2) merge-conflict: add conflict detection in src/execution/git.ts — after any git merge/rebase/commit operation, check if stdout/stderr contains \"CONFLICT\" or \"conflict\". If detected and isTriggerEnabled(\"merge-conflict\") and chain, call checkMergeConflict. Export a detectMergeConflict(output: string): boolean helper. (3) pre-merge in sequential-executor.ts: after all stories complete (isComplete(prd)=true) and before emitting run:completed, call checkPreMerge({featureName: ctx.feature, totalStories: prd.userStories.length, cost: totalCost}, ctx.config, ctx.interactionChain) if enabled. abort = exit without completing.",
57
+ "complexity": "medium",
58
+ "status": "passed",
59
+ "acceptanceCriteria": [
60
+ "security-review trigger fires when plugin reviewer rejects (not lint/typecheck)",
61
+ "security-review abort = story permanently fails; escalate = story retried",
62
+ "detectMergeConflict(output) returns true when CONFLICT present in git output",
63
+ "merge-conflict trigger fires when git conflict detected and trigger enabled",
64
+ "pre-merge trigger fires once after all stories pass, before run:completed",
65
+ "pre-merge abort exits run; continue = complete normally",
66
+ "Unit tests for each trigger point with mock chain"
67
+ ],
68
+ "attempts": 0,
69
+ "priorErrors": [],
70
+ "priorFailures": [],
71
+ "escalations": [],
72
+ "dependencies": [],
73
+ "tags": [],
74
+ "storyPoints": 3,
75
+ "passes": true
76
+ },
77
+ {
78
+ "id": "TC-004",
79
+ "title": "Wire story-ambiguity and review-gate triggers",
80
+ "description": "Two opt-in triggers (disabled by default): (1) story-ambiguity in src/pipeline/stages/execution.ts: after agent session result is parsed, check if agent output contains ambiguity signals. Add helper isAmbiguousOutput(output: string): boolean that returns true if output contains any of: [\"unclear\", \"ambiguous\", \"need clarification\", \"please clarify\", \"which one\", \"not sure which\"]. If detected and isTriggerEnabled(\"story-ambiguity\", config) and interactionChain, call checkStoryAmbiguity({featureName, storyId: ctx.story.id, reason: \"Agent output suggests ambiguity\"}, config, ctx.interactionChain). abort = escalate story; continue = proceed as normal. (2) review-gate in src/pipeline/stages/completion.ts (or wherever story:completed is emitted): if isTriggerEnabled(\"review-gate\", config) and interactionChain, call checkReviewGate({featureName, storyId: ctx.story.id}, config, ctx.interactionChain) after story passes. abort = mark story as needing re-review (log warning, do not fail); continue = proceed. Both triggers default to disabled in config.",
81
+ "complexity": "medium",
82
+ "status": "passed",
83
+ "acceptanceCriteria": [
84
+ "isAmbiguousOutput() detects all 6 keyword phrases (case-insensitive)",
85
+ "story-ambiguity trigger fires when isAmbiguousOutput=true and trigger enabled",
86
+ "story-ambiguity is disabled by default (isTriggerEnabled returns false)",
87
+ "review-gate trigger fires after each story passes when enabled",
88
+ "review-gate is disabled by default",
89
+ "Unit tests for isAmbiguousOutput and both trigger dispatch paths"
90
+ ],
91
+ "attempts": 0,
92
+ "priorErrors": [],
93
+ "priorFailures": [],
94
+ "escalations": [],
95
+ "dependencies": [],
96
+ "tags": [],
97
+ "storyPoints": 2,
98
+ "passes": true
99
+ },
100
+ {
101
+ "id": "TC-005",
102
+ "title": "Wire missing hook events: on-resume, on-session-end, on-error",
103
+ "description": "Three missing hook events to wire in src/pipeline/subscribers/hooks.ts: (1) on-resume: add RunResumedEvent {type:\"run:resumed\"; feature: string} to PipelineEventBus. Emit it in sequential-executor.ts when resuming from pause state (detect via interaction state or run:paused→run:resumed cycle). Wire bus.on(\"run:resumed\") → fireHook(hooks, \"on-resume\", ...) in wireHooks. (2) on-session-end: fire after every agent session ends (pass OR fail). Wire bus.on(\"story:completed\") AND bus.on(\"story:failed\") → fireHook(hooks, \"on-session-end\", hookCtx(feature, {storyId, status: passed?\"passed\":\"failed\"})). (3) on-error: emit a run:errored event in src/execution/crash-recovery.ts crash handler (unhandledRejection / SIGTERM / SIGINT handlers). Wire bus.on(\"run:errored\") → fireHook(hooks, \"on-error\", hookCtx(feature, {reason: signal/error})). Add RunErroredEvent type to event-bus. All three follow existing best-effort fire-and-forget pattern.",
104
+ "complexity": "medium",
105
+ "status": "passed",
106
+ "acceptanceCriteria": [
107
+ "RunResumedEvent type added to PipelineEventBus",
108
+ "on-resume hook fires when run:resumed event emitted",
109
+ "on-session-end hook fires after story:completed AND story:failed events",
110
+ "RunErroredEvent type added to PipelineEventBus",
111
+ "on-error hook fires in crash-recovery handlers (SIGTERM, SIGINT, unhandledRejection)",
112
+ "All three follow fire-and-forget pattern (no await, errors logged)",
113
+ "Extend hooks.test.ts with tests for all three new events"
114
+ ],
115
+ "attempts": 0,
116
+ "priorErrors": [],
117
+ "priorFailures": [],
118
+ "escalations": [],
119
+ "dependencies": [],
120
+ "tags": [],
121
+ "storyPoints": 2,
122
+ "passes": true
123
+ },
124
+ {
125
+ "id": "TC-006",
126
+ "title": "Auto plugin and Telegram/Webhook plugin integration tests",
127
+ "description": "Add mock-based integration tests for the three untested plugins. File locations: test/unit/interaction/auto-plugin.test.ts, extend test/unit/interaction-plugins.test.ts. (1) AutoInteractionPlugin (_deps pattern): mock the LLM call via _deps.callLlm. Test: LLM returns approve → response.action=\"continue\"; LLM returns reject → response.action=\"abort\"; confidence < threshold → fallback to chain default; trigger=security-review → always rejects auto-approval (hardcoded block), returns chain default. Add _deps.callLlm to auto.ts if not present. (2) Telegram send flow: mock fetch globally in test. Verify send() POSTs to correct API URL with message text and inline keyboard buttons (approve/reject). Verify poll() parses callback_query correctly. (3) Webhook: mock an HTTP server using Bun.serve in test. Verify send() POSTs payload with correct Content-Type. Verify HMAC signature validation rejects tampered payload. All tests are pure unit/mock — no real network calls.",
128
+ "complexity": "medium",
129
+ "status": "passed",
130
+ "acceptanceCriteria": [
131
+ "AutoInteractionPlugin: approve, reject, low-confidence, security-review-block all tested",
132
+ "Auto plugin uses _deps pattern for LLM call (testable without real API)",
133
+ "Telegram send() verified to POST correct message structure with inline keyboard",
134
+ "Telegram poll() parses callback_query response correctly",
135
+ "Webhook send() verified with correct Content-Type and payload structure",
136
+ "Webhook HMAC validation: valid signature passes, tampered payload rejected",
137
+ "Zero real network calls in any test"
138
+ ],
139
+ "attempts": 0,
140
+ "priorErrors": [],
141
+ "priorFailures": [],
142
+ "escalations": [],
143
+ "dependencies": [],
144
+ "tags": [],
145
+ "storyPoints": 2,
146
+ "passes": true
147
+ }
148
+ ],
149
+ "updatedAt": "2026-03-07T14:53:47.398Z"
150
+ }
@@ -0,0 +1,7 @@
1
+ [2026-03-07T14:04:48.521Z] TC-001 — PASSED — Wire cost-exceeded and cost-warning triggers — Cost: $1.1231
2
+ [2026-03-07T14:11:47.185Z] TC-002 — PASSED — Wire max-retries trigger — Cost: $0.1029
3
+ [2026-03-07T14:30:04.761Z] TC-003 — PASSED — Wire security-review, merge-conflict, and pre-merge triggers — Cost: $1.3628
4
+ [2026-03-07T14:36:14.823Z] TC-004 — PASSED — Wire story-ambiguity and review-gate triggers — Cost: $0.0000
5
+ [2026-03-07T14:38:01.345Z] TC-004 — PASSED — Wire story-ambiguity and review-gate triggers — Cost: $0.1019
6
+ [2026-03-07T14:43:51.353Z] TC-005 — PASSED — Wire missing hook events: on-resume, on-session-end, on-error — Cost: $0.4284
7
+ [2026-03-07T14:53:47.397Z] TC-006 — PASSED — Auto plugin and Telegram/Webhook plugin integration tests — Cost: $0.7347
package/nax/status.json CHANGED
@@ -1,37 +1,36 @@
1
1
  {
2
2
  "version": 1,
3
3
  "run": {
4
- "id": "run-2026-03-07T06-14-21-018Z",
5
- "feature": "status-file-consolidation",
6
- "startedAt": "2026-03-07T06:14:21.018Z",
7
- "status": "crashed",
4
+ "id": "run-2026-03-07T16-14-49-336Z",
5
+ "feature": "routing-persistence",
6
+ "startedAt": "2026-03-07T16:14:49.336Z",
7
+ "status": "running",
8
8
  "dryRun": false,
9
- "pid": 217461,
10
- "crashedAt": "2026-03-07T06:22:36.300Z",
11
- "crashSignal": "SIGTERM"
9
+ "pid": 3412
12
10
  },
13
11
  "progress": {
14
12
  "total": 4,
15
- "passed": 0,
13
+ "passed": 1,
16
14
  "failed": 0,
17
15
  "paused": 0,
18
16
  "blocked": 0,
19
- "pending": 4
17
+ "pending": 3
20
18
  },
21
19
  "cost": {
22
- "spent": 0,
23
- "limit": 3
20
+ "spent": 0.52230675,
21
+ "limit": 8
24
22
  },
25
23
  "current": {
26
- "storyId": "SFC-002",
27
- "title": "Write feature-level status on run end",
24
+ "storyId": "RRP-002",
25
+ "title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
28
26
  "complexity": "medium",
29
27
  "tddStrategy": "test-after",
30
28
  "model": "balanced",
31
29
  "attempt": 1,
32
30
  "phase": "routing"
33
31
  },
34
- "iterations": 0,
35
- "updatedAt": "2026-03-07T06:22:36.300Z",
36
- "durationMs": 495282
32
+ "iterations": 2,
33
+ "updatedAt": "2026-03-07T16:45:19.261Z",
34
+ "durationMs": 1829925,
35
+ "lastHeartbeat": "2026-03-07T16:45:19.261Z"
37
36
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.24.0",
3
+ "version": "0.26.0",
4
4
  "description": "AI Coding Agent Orchestrator \u2014 loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -309,7 +309,9 @@ export interface InteractionConfig {
309
309
  fallback: "continue" | "skip" | "escalate" | "abort";
310
310
  };
311
311
  /** Enable/disable built-in triggers */
312
- triggers: Partial<Record<string, boolean | { enabled: boolean; fallback?: string; timeout?: number }>>;
312
+ triggers: Partial<
313
+ Record<string, boolean | { enabled: boolean; fallback?: string; timeout?: number; threshold?: number }>
314
+ >;
313
315
  }
314
316
 
315
317
  /** Test coverage context config */
@@ -32,6 +32,8 @@ export interface CrashRecoveryContext {
32
32
  getStartTime?: () => number;
33
33
  getTotalStories?: () => number;
34
34
  getStoriesCompleted?: () => number;
35
+ /** Optional callback to emit run:errored event (fire-and-forget) */
36
+ emitError?: (reason: string) => void;
35
37
  }
36
38
 
37
39
  /**
@@ -171,6 +173,9 @@ export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
171
173
  await ctx.pidRegistry.killAll();
172
174
  }
173
175
 
176
+ // Emit run:errored event (fire-and-forget)
177
+ ctx.emitError?.(signal.toLowerCase());
178
+
174
179
  // Write fatal log
175
180
  await writeFatalLog(ctx.jsonlFilePath, signal);
176
181
 
@@ -209,6 +214,9 @@ export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
209
214
  await ctx.pidRegistry.killAll();
210
215
  }
211
216
 
217
+ // Emit run:errored event (fire-and-forget)
218
+ ctx.emitError?.("uncaughtException");
219
+
212
220
  // Write fatal log with stack trace
213
221
  await writeFatalLog(ctx.jsonlFilePath, "uncaughtException", error);
214
222
 
@@ -242,6 +250,9 @@ export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
242
250
  await ctx.pidRegistry.killAll();
243
251
  }
244
252
 
253
+ // Emit run:errored event (fire-and-forget)
254
+ ctx.emitError?.("unhandledRejection");
255
+
245
256
  // Write fatal log
246
257
  await writeFatalLog(ctx.jsonlFilePath, "unhandledRejection", error);
247
258
 
@@ -40,7 +40,7 @@ export interface SequentialExecutionResult {
40
40
  storiesCompleted: number;
41
41
  totalCost: number;
42
42
  allStoryMetrics: StoryMetrics[];
43
- exitReason: "completed" | "cost-limit" | "max-iterations" | "stalled" | "no-stories";
43
+ exitReason: "completed" | "cost-limit" | "max-iterations" | "stalled" | "no-stories" | "pre-merge-aborted";
44
44
  }
45
45
 
46
46
  /**
@@ -66,6 +66,7 @@ export async function runIteration(
66
66
  stories: storiesToExecute,
67
67
  routing,
68
68
  workdir: ctx.workdir,
69
+ prdPath: ctx.prdPath,
69
70
  featureDir: ctx.featureDir,
70
71
  hooks: ctx.hooks,
71
72
  plugins: ctx.pluginRegistry,
@@ -21,6 +21,7 @@ import { fireHook } from "../../hooks";
21
21
  import type { InteractionChain } from "../../interaction";
22
22
  import { initInteractionChain } from "../../interaction";
23
23
  import { getSafeLogger } from "../../logger";
24
+ import { pipelineEventBus } from "../../pipeline/event-bus";
24
25
  import { loadPlugins } from "../../plugins/loader";
25
26
  import type { PluginRegistry } from "../../plugins/registry";
26
27
  import type { PRD } from "../../prd";
@@ -123,6 +124,9 @@ export async function setupRun(options: RunSetupOptions): Promise<RunSetupResult
123
124
  getStartTime: () => options.startTime,
124
125
  getTotalStories: options.getTotalStories,
125
126
  getStoriesCompleted: options.getStoriesCompleted,
127
+ emitError: (reason: string) => {
128
+ pipelineEventBus.emit({ type: "run:errored", reason, feature: options.feature });
129
+ },
126
130
  });
127
131
 
128
132
  // Load PRD (before try block so it's accessible in finally for onRunEnd)
@@ -1,5 +1,6 @@
1
1
  /** Sequential Story Executor (ADR-005, Phase 4) — main execution loop. */
2
2
 
3
+ import { checkCostExceeded, checkCostWarning, checkPreMerge, isTriggerEnabled } from "../interaction/triggers";
3
4
  import { getSafeLogger } from "../logger";
4
5
  import type { StoryMetrics } from "../metrics";
5
6
  import { pipelineEventBus } from "../pipeline/event-bus";
@@ -35,6 +36,7 @@ export async function executeSequential(
35
36
  0,
36
37
  ];
37
38
  const allStoryMetrics: StoryMetrics[] = [];
39
+ let warningSent = false;
38
40
 
39
41
  pipelineEventBus.clear();
40
42
  wireHooks(pipelineEventBus, ctx.hooks, ctx.workdir, ctx.feature);
@@ -69,6 +71,17 @@ export async function executeSequential(
69
71
  prdDirty = false;
70
72
  }
71
73
  if (isComplete(prd)) {
74
+ // pre-merge trigger: prompt before completing the run
75
+ if (ctx.interactionChain && isTriggerEnabled("pre-merge", ctx.config)) {
76
+ const shouldProceed = await checkPreMerge(
77
+ { featureName: ctx.feature, totalStories: prd.userStories.length, cost: totalCost },
78
+ ctx.config,
79
+ ctx.interactionChain,
80
+ );
81
+ if (!shouldProceed) {
82
+ return buildResult("pre-merge-aborted");
83
+ }
84
+ }
72
85
  pipelineEventBus.emit({
73
86
  type: "run:completed",
74
87
  totalStories: 0,
@@ -91,13 +104,24 @@ export async function executeSequential(
91
104
  if (!ctx.useBatch) lastStoryId = selection.story.id;
92
105
 
93
106
  if (totalCost >= ctx.config.execution.costLimit) {
94
- pipelineEventBus.emit({
95
- type: "run:paused",
96
- reason: `Cost limit reached: $${totalCost.toFixed(2)}`,
97
- storyId: selection.story.id,
98
- cost: totalCost,
99
- });
100
- return buildResult("cost-limit");
107
+ const shouldProceed =
108
+ ctx.interactionChain && isTriggerEnabled("cost-exceeded", ctx.config)
109
+ ? await checkCostExceeded(
110
+ { featureName: ctx.feature, cost: totalCost, limit: ctx.config.execution.costLimit },
111
+ ctx.config,
112
+ ctx.interactionChain,
113
+ )
114
+ : false;
115
+ if (!shouldProceed) {
116
+ pipelineEventBus.emit({
117
+ type: "run:paused",
118
+ reason: `Cost limit reached: $${totalCost.toFixed(2)}`,
119
+ storyId: selection.story.id,
120
+ cost: totalCost,
121
+ });
122
+ return buildResult("cost-limit");
123
+ }
124
+ pipelineEventBus.emit({ type: "run:resumed", feature: ctx.feature });
101
125
  }
102
126
 
103
127
  pipelineEventBus.emit({
@@ -118,6 +142,20 @@ export async function executeSequential(
118
142
  iter.prdDirty,
119
143
  ];
120
144
 
145
+ if (ctx.interactionChain && isTriggerEnabled("cost-warning", ctx.config) && !warningSent) {
146
+ const costLimit = ctx.config.execution.costLimit;
147
+ const triggerCfg = ctx.config.interaction?.triggers?.["cost-warning"];
148
+ const threshold = typeof triggerCfg === "object" ? (triggerCfg.threshold ?? 0.8) : 0.8;
149
+ if (totalCost >= costLimit * threshold) {
150
+ await checkCostWarning(
151
+ { featureName: ctx.feature, cost: totalCost, limit: costLimit },
152
+ ctx.config,
153
+ ctx.interactionChain,
154
+ );
155
+ warningSent = true;
156
+ }
157
+ }
158
+
121
159
  if (iter.prdDirty) {
122
160
  prd = await loadPRD(ctx.prdPath);
123
161
  prdDirty = false;
@@ -38,6 +38,14 @@ interface DecisionResponse {
38
38
  reasoning: string;
39
39
  }
40
40
 
41
+ /**
42
+ * Module-level deps for testability (_deps pattern).
43
+ * Override callLlm in tests to avoid spawning the claude CLI.
44
+ */
45
+ export const _deps = {
46
+ callLlm: null as ((request: InteractionRequest) => Promise<DecisionResponse>) | null,
47
+ };
48
+
41
49
  /**
42
50
  * Auto plugin for AI-powered interaction responses
43
51
  */
@@ -80,7 +88,8 @@ export class AutoInteractionPlugin implements InteractionPlugin {
80
88
  }
81
89
 
82
90
  try {
83
- const decision = await this.callLlm(request);
91
+ const callFn = _deps.callLlm ?? this.callLlm.bind(this);
92
+ const decision = await callFn(request);
84
93
 
85
94
  // Check confidence threshold
86
95
  if (decision.confidence < (this.config.confidenceThreshold ?? 0.7)) {
@@ -110,7 +110,8 @@ export function calculateAggregateMetrics(runs: RunMetrics[]): AggregateMetrics
110
110
  >();
111
111
 
112
112
  for (const story of allStories) {
113
- const complexity = story.complexity;
113
+ // Use initialComplexity (first-classify prediction) when available; fall back to complexity
114
+ const complexity = story.initialComplexity ?? story.complexity;
114
115
  const existing = complexityStats.get(complexity) || {
115
116
  predicted: 0,
116
117
  tierCounts: new Map<string, number>(),
@@ -58,9 +58,14 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
58
58
  const modelDef = modelEntry ? resolveModel(modelEntry) : null;
59
59
  const modelUsed = modelDef?.model || routing.modelTier;
60
60
 
61
+ // initialComplexity: prefer story.routing.initialComplexity (first classify),
62
+ // fall back to routing.complexity for backward compat
63
+ const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
64
+
61
65
  return {
62
66
  storyId: story.id,
63
67
  complexity: routing.complexity,
68
+ initialComplexity,
64
69
  modelTier: routing.modelTier,
65
70
  modelUsed,
66
71
  attempts,
@@ -108,20 +113,27 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
108
113
  const modelDef = modelEntry ? resolveModel(modelEntry) : null;
109
114
  const modelUsed = modelDef?.model || routing.modelTier;
110
115
 
111
- return stories.map((story) => ({
112
- storyId: story.id,
113
- complexity: routing.complexity,
114
- modelTier: routing.modelTier,
115
- modelUsed,
116
- attempts: 1, // batch stories don't escalate individually
117
- finalTier: routing.modelTier,
118
- success: true, // if batch succeeded, all stories succeeded
119
- cost: costPerStory,
120
- durationMs: durationPerStory,
121
- firstPassSuccess: true, // batch = first pass success
122
- startedAt: storyStartTime,
123
- completedAt: new Date().toISOString(),
124
- }));
116
+ return stories.map((story) => {
117
+ // initialComplexity: prefer story.routing.initialComplexity (if individual routing exists),
118
+ // fall back to shared routing.complexity (batch stories classified together)
119
+ const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
120
+
121
+ return {
122
+ storyId: story.id,
123
+ complexity: routing.complexity,
124
+ initialComplexity,
125
+ modelTier: routing.modelTier,
126
+ modelUsed,
127
+ attempts: 1, // batch stories don't escalate individually
128
+ finalTier: routing.modelTier,
129
+ success: true, // if batch succeeded, all stories succeeded
130
+ cost: costPerStory,
131
+ durationMs: durationPerStory,
132
+ firstPassSuccess: true, // batch = first pass success
133
+ startedAt: storyStartTime,
134
+ completedAt: new Date().toISOString(),
135
+ };
136
+ });
125
137
  }
126
138
 
127
139
  /**
@@ -12,6 +12,8 @@ export interface StoryMetrics {
12
12
  storyId: string;
13
13
  /** Classified complexity */
14
14
  complexity: string;
15
+ /** Initial complexity from first classification — preserved across escalations */
16
+ initialComplexity?: string;
15
17
  /** Initial model tier */
16
18
  modelTier: string;
17
19
  /** Actual model used (e.g., "claude-sonnet-4.5") */
@@ -135,6 +135,17 @@ export interface StoryPausedEvent {
135
135
  cost: number;
136
136
  }
137
137
 
138
+ export interface RunResumedEvent {
139
+ type: "run:resumed";
140
+ feature: string;
141
+ }
142
+
143
+ export interface RunErroredEvent {
144
+ type: "run:errored";
145
+ reason: string;
146
+ feature?: string;
147
+ }
148
+
138
149
  /** Discriminated union of all pipeline events. */
139
150
  export type PipelineEvent =
140
151
  | StoryStartedEvent
@@ -150,7 +161,9 @@ export type PipelineEvent =
150
161
  | HumanReviewRequestedEvent
151
162
  | RunStartedEvent
152
163
  | RunPausedEvent
153
- | StoryPausedEvent;
164
+ | StoryPausedEvent
165
+ | RunResumedEvent
166
+ | RunErroredEvent;
154
167
 
155
168
  export type PipelineEventType = PipelineEvent["type"];
156
169
 
@@ -13,6 +13,7 @@
13
13
  */
14
14
 
15
15
  import { appendProgress } from "../../execution/progress";
16
+ import { checkReviewGate, isTriggerEnabled } from "../../interaction/triggers";
16
17
  import { getLogger } from "../../logger";
17
18
  import { collectBatchMetrics, collectStoryMetrics } from "../../metrics";
18
19
  import { countStories, markStoryPassed, savePRD } from "../../prd";
@@ -72,6 +73,18 @@ export const completionStage: PipelineStage = {
72
73
  modelTier: ctx.routing?.modelTier,
73
74
  testStrategy: ctx.routing?.testStrategy,
74
75
  });
76
+
77
+ // review-gate trigger: check if story needs re-review after passing
78
+ if (ctx.interaction && isTriggerEnabled("review-gate", ctx.config)) {
79
+ const shouldContinue = await _completionDeps.checkReviewGate(
80
+ { featureName: ctx.prd.feature, storyId: completedStory.id },
81
+ ctx.config,
82
+ ctx.interaction,
83
+ );
84
+ if (!shouldContinue) {
85
+ logger.warn("completion", "Story marked for re-review", { storyId: completedStory.id });
86
+ }
87
+ }
75
88
  }
76
89
 
77
90
  // Save PRD
@@ -89,3 +102,10 @@ export const completionStage: PipelineStage = {
89
102
  return { action: "continue" };
90
103
  },
91
104
  };
105
+
106
+ /**
107
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
108
+ */
109
+ export const _completionDeps = {
110
+ checkReviewGate,
111
+ };
@@ -32,11 +32,33 @@
32
32
 
33
33
  import { getAgent, validateAgentForTier } from "../../agents";
34
34
  import { resolveModel } from "../../config";
35
+ import { checkMergeConflict, checkStoryAmbiguity, isTriggerEnabled } from "../../interaction/triggers";
35
36
  import { getLogger } from "../../logger";
36
37
  import type { FailureCategory } from "../../tdd";
37
38
  import { runThreeSessionTdd } from "../../tdd";
39
+ import { detectMergeConflict } from "../../utils/git";
38
40
  import type { PipelineContext, PipelineStage, StageResult } from "../types";
39
41
 
42
+ /**
43
+ * Detect if agent output contains ambiguity signals
44
+ * Checks for keywords that indicate the agent is unsure about the implementation
45
+ */
46
+ export function isAmbiguousOutput(output: string): boolean {
47
+ if (!output) return false;
48
+
49
+ const ambiguityKeywords = [
50
+ "unclear",
51
+ "ambiguous",
52
+ "need clarification",
53
+ "please clarify",
54
+ "which one",
55
+ "not sure which",
56
+ ];
57
+
58
+ const lowerOutput = output.toLowerCase();
59
+ return ambiguityKeywords.some((keyword) => lowerOutput.includes(keyword));
60
+ }
61
+
40
62
  /**
41
63
  * Determine the pipeline action for a failed TDD result, based on its failureCategory.
42
64
  *
@@ -172,6 +194,42 @@ export const executionStage: PipelineStage = {
172
194
 
173
195
  ctx.agentResult = result;
174
196
 
197
+ // merge-conflict trigger: detect CONFLICT markers in agent output
198
+ const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
199
+ if (
200
+ _executionDeps.detectMergeConflict(combinedOutput) &&
201
+ ctx.interaction &&
202
+ isTriggerEnabled("merge-conflict", ctx.config)
203
+ ) {
204
+ const shouldProceed = await _executionDeps.checkMergeConflict(
205
+ { featureName: ctx.prd.feature, storyId: ctx.story.id },
206
+ ctx.config,
207
+ ctx.interaction,
208
+ );
209
+ if (!shouldProceed) {
210
+ logger.error("execution", "Merge conflict detected — aborting story", { storyId: ctx.story.id });
211
+ return { action: "fail", reason: "Merge conflict detected" };
212
+ }
213
+ }
214
+
215
+ // story-ambiguity trigger: detect ambiguity signals in agent output
216
+ if (
217
+ result.success &&
218
+ _executionDeps.isAmbiguousOutput(combinedOutput) &&
219
+ ctx.interaction &&
220
+ isTriggerEnabled("story-ambiguity", ctx.config)
221
+ ) {
222
+ const shouldContinue = await _executionDeps.checkStoryAmbiguity(
223
+ { featureName: ctx.prd.feature, storyId: ctx.story.id, reason: "Agent output suggests ambiguity" },
224
+ ctx.config,
225
+ ctx.interaction,
226
+ );
227
+ if (!shouldContinue) {
228
+ logger.warn("execution", "Story ambiguity detected — escalating story", { storyId: ctx.story.id });
229
+ return { action: "escalate", reason: "Story ambiguity detected — needs clarification" };
230
+ }
231
+ }
232
+
175
233
  if (!result.success) {
176
234
  logger.error("execution", "Agent session failed", {
177
235
  exitCode: result.exitCode,
@@ -199,4 +257,8 @@ export const executionStage: PipelineStage = {
199
257
  export const _executionDeps = {
200
258
  getAgent,
201
259
  validateAgentForTier,
260
+ detectMergeConflict,
261
+ checkMergeConflict,
262
+ isAmbiguousOutput,
263
+ checkStoryAmbiguity,
202
264
  };