@nathapp/nax 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +70 -56
- package/docs/ROADMAP.md +45 -15
- package/docs/specs/trigger-completion.md +145 -0
- package/nax/features/routing-persistence/prd.json +104 -0
- package/nax/features/routing-persistence/progress.txt +1 -0
- package/nax/features/trigger-completion/prd.json +150 -0
- package/nax/features/trigger-completion/progress.txt +7 -0
- package/nax/status.json +15 -16
- package/package.json +1 -1
- package/src/config/types.ts +3 -1
- package/src/execution/crash-recovery.ts +11 -0
- package/src/execution/executor-types.ts +1 -1
- package/src/execution/iteration-runner.ts +1 -0
- package/src/execution/lifecycle/run-setup.ts +4 -0
- package/src/execution/sequential-executor.ts +45 -7
- package/src/interaction/plugins/auto.ts +10 -1
- package/src/metrics/aggregator.ts +2 -1
- package/src/metrics/tracker.ts +26 -14
- package/src/metrics/types.ts +2 -0
- package/src/pipeline/event-bus.ts +14 -1
- package/src/pipeline/stages/completion.ts +20 -0
- package/src/pipeline/stages/execution.ts +62 -0
- package/src/pipeline/stages/review.ts +25 -1
- package/src/pipeline/stages/routing.ts +42 -8
- package/src/pipeline/subscribers/hooks.ts +32 -0
- package/src/pipeline/subscribers/interaction.ts +36 -1
- package/src/pipeline/types.ts +2 -0
- package/src/prd/types.ts +4 -0
- package/src/routing/content-hash.ts +25 -0
- package/src/routing/index.ts +3 -0
- package/src/routing/router.ts +3 -2
- package/src/routing/strategies/keyword.ts +2 -1
- package/src/routing/strategies/llm-prompts.ts +29 -28
- package/src/utils/git.ts +21 -0
- package/test/integration/routing/plugin-routing-core.test.ts +1 -1
- package/test/unit/execution/sequential-executor.test.ts +235 -0
- package/test/unit/interaction/auto-plugin.test.ts +162 -0
- package/test/unit/interaction-plugins.test.ts +308 -1
- package/test/unit/metrics/aggregator.test.ts +164 -0
- package/test/unit/metrics/tracker.test.ts +186 -0
- package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
- package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
- package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
- package/test/unit/pipeline/stages/review.test.ts +201 -0
- package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
- package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
- package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
- package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
- package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
- package/test/unit/prd-auto-default.test.ts +2 -2
- package/test/unit/routing/content-hash.test.ts +99 -0
- package/test/unit/routing/routing-stability.test.ts +1 -1
- package/test/unit/routing-core.test.ts +5 -5
- package/test/unit/routing-strategies.test.ts +1 -3
- package/test/unit/utils/git.test.ts +50 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
{
|
|
2
|
+
"project": "nax",
|
|
3
|
+
"branchName": "feat/trigger-completion",
|
|
4
|
+
"feature": "trigger-completion",
|
|
5
|
+
"version": "0.25.0",
|
|
6
|
+
"description": "Wire all 8 unwired interaction triggers to correct pipeline decision points, add 3 missing hook events, and add integration tests for auto/telegram/webhook plugins.",
|
|
7
|
+
"userStories": [
|
|
8
|
+
{
|
|
9
|
+
"id": "TC-001",
|
|
10
|
+
"title": "Wire cost-exceeded and cost-warning triggers",
|
|
11
|
+
"description": "In src/execution/sequential-executor.ts: (1) Before exiting on cost limit (line ~93), call checkCostExceeded({featureName: ctx.feature, cost: totalCost, limit: ctx.config.execution.costLimit}, ctx.config, ctx.interactionChain). Import checkCostExceeded from src/interaction/triggers.ts. If isTriggerEnabled(\"cost-exceeded\", config) is false or chain is null, keep today behavior. Trigger abort = exit \"cost-limit\". Trigger skip/continue = allow run to proceed past limit. (2) Add cost-warning: track a boolean warningSent=false. In the iteration loop, when totalCost >= costLimit * (interaction.triggers[\"cost-warning\"]?.threshold ?? 0.8) and !warningSent, call checkCostWarning({featureName, cost, limit}, config, interactionChain), set warningSent=true. isTriggerEnabled guards the call. Default fallback continue = proceed silently. Both calls must be best-effort: guard with if(interactionChain) check.",
|
|
12
|
+
"complexity": "medium",
|
|
13
|
+
"status": "passed",
|
|
14
|
+
"acceptanceCriteria": [
|
|
15
|
+
"When cost hits 80% of limit and cost-warning trigger is enabled, checkCostWarning fires once",
|
|
16
|
+
"Warning fires only once per run even if cost stays above threshold for multiple iterations",
|
|
17
|
+
"When cost hits 100% of limit and cost-exceeded is enabled, checkCostExceeded fires before exit",
|
|
18
|
+
"abort response exits with cost-limit reason; skip/continue allows run to proceed",
|
|
19
|
+
"When interaction plugin not configured, behavior is identical to today",
|
|
20
|
+
"Unit tests cover 80% threshold, 100% threshold, abort, skip, continue responses"
|
|
21
|
+
],
|
|
22
|
+
"attempts": 0,
|
|
23
|
+
"priorErrors": [],
|
|
24
|
+
"priorFailures": [],
|
|
25
|
+
"escalations": [],
|
|
26
|
+
"dependencies": [],
|
|
27
|
+
"tags": [],
|
|
28
|
+
"storyPoints": 2,
|
|
29
|
+
"passes": true
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"id": "TC-002",
|
|
33
|
+
"title": "Wire max-retries trigger",
|
|
34
|
+
"description": "In src/pipeline/subscribers/interaction.ts, extend wireInteraction to also subscribe to story:failed event. When story:failed fires with countsTowardEscalation=true (permanent failure, all tiers exhausted), call executeTrigger(\"max-retries\", {featureName: ev.feature ?? \"\", storyId: ev.storyId, iteration: ev.attempts ?? 0}, config, interactionChain). Import StoryFailedEvent from event-bus. Guard with isTriggerEnabled(\"max-retries\", config) and interactionChain check. Response handling: abort = emit a new run:paused event with reason \"max-retries-abort\" (the executor checks this to halt); skip = default, proceed; escalate = not supported for this trigger, treat as skip. Note: the actual run halt on abort requires reading from a shared flag or emitting run:paused — simplest: log a warning and let the run continue (abort behavior can be enhanced later). For now, abort = warn log only.",
|
|
35
|
+
"complexity": "medium",
|
|
36
|
+
"status": "passed",
|
|
37
|
+
"acceptanceCriteria": [
|
|
38
|
+
"max-retries trigger fires when story:failed event has countsTowardEscalation=true",
|
|
39
|
+
"max-retries trigger does NOT fire when countsTowardEscalation=false",
|
|
40
|
+
"When trigger disabled or no chain, no-op",
|
|
41
|
+
"abort response logs a warning (full halt is future work)",
|
|
42
|
+
"Unit tests cover enabled/disabled, countsTowardEscalation true/false, all fallback responses"
|
|
43
|
+
],
|
|
44
|
+
"attempts": 0,
|
|
45
|
+
"priorErrors": [],
|
|
46
|
+
"priorFailures": [],
|
|
47
|
+
"escalations": [],
|
|
48
|
+
"dependencies": [],
|
|
49
|
+
"tags": [],
|
|
50
|
+
"storyPoints": 2,
|
|
51
|
+
"passes": true
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"id": "TC-003",
|
|
55
|
+
"title": "Wire security-review, merge-conflict, and pre-merge triggers",
|
|
56
|
+
"description": "Three trigger wiring points: (1) security-review in src/pipeline/stages/review.ts: when plugin reviewer (semgrep etc) returns failure (the existing check at ~line 50 that returns action:fail for plugin reviewer rejection), before permanently failing, call checkSecurityReview({featureName, storyId: ctx.story.id}, ctx.config, ctx.interactionChain) if isTriggerEnabled and chain present. abort=fail (today), escalate=return {action:\"escalate\"}. Import from interaction. (2) merge-conflict: add conflict detection in src/execution/git.ts — after any git merge/rebase/commit operation, check if stdout/stderr contains \"CONFLICT\" or \"conflict\". If detected and isTriggerEnabled(\"merge-conflict\") and chain, call checkMergeConflict. Export a detectMergeConflict(output: string): boolean helper. (3) pre-merge in sequential-executor.ts: after all stories complete (isComplete(prd)=true) and before emitting run:completed, call checkPreMerge({featureName: ctx.feature, totalStories: prd.userStories.length, cost: totalCost}, ctx.config, ctx.interactionChain) if enabled. abort = exit without completing.",
|
|
57
|
+
"complexity": "medium",
|
|
58
|
+
"status": "passed",
|
|
59
|
+
"acceptanceCriteria": [
|
|
60
|
+
"security-review trigger fires when plugin reviewer rejects (not lint/typecheck)",
|
|
61
|
+
"security-review abort = story permanently fails; escalate = story retried",
|
|
62
|
+
"detectMergeConflict(output) returns true when CONFLICT present in git output",
|
|
63
|
+
"merge-conflict trigger fires when git conflict detected and trigger enabled",
|
|
64
|
+
"pre-merge trigger fires once after all stories pass, before run:completed",
|
|
65
|
+
"pre-merge abort exits run; continue = complete normally",
|
|
66
|
+
"Unit tests for each trigger point with mock chain"
|
|
67
|
+
],
|
|
68
|
+
"attempts": 0,
|
|
69
|
+
"priorErrors": [],
|
|
70
|
+
"priorFailures": [],
|
|
71
|
+
"escalations": [],
|
|
72
|
+
"dependencies": [],
|
|
73
|
+
"tags": [],
|
|
74
|
+
"storyPoints": 3,
|
|
75
|
+
"passes": true
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"id": "TC-004",
|
|
79
|
+
"title": "Wire story-ambiguity and review-gate triggers",
|
|
80
|
+
"description": "Two opt-in triggers (disabled by default): (1) story-ambiguity in src/pipeline/stages/execution.ts: after agent session result is parsed, check if agent output contains ambiguity signals. Add helper isAmbiguousOutput(output: string): boolean that returns true if output contains any of: [\"unclear\", \"ambiguous\", \"need clarification\", \"please clarify\", \"which one\", \"not sure which\"]. If detected and isTriggerEnabled(\"story-ambiguity\", config) and interactionChain, call checkStoryAmbiguity({featureName, storyId: ctx.story.id, reason: \"Agent output suggests ambiguity\"}, config, ctx.interactionChain). abort = escalate story; continue = proceed as normal. (2) review-gate in src/pipeline/stages/completion.ts (or wherever story:completed is emitted): if isTriggerEnabled(\"review-gate\", config) and interactionChain, call checkReviewGate({featureName, storyId: ctx.story.id}, config, ctx.interactionChain) after story passes. abort = mark story as needing re-review (log warning, do not fail); continue = proceed. Both triggers default to disabled in config.",
|
|
81
|
+
"complexity": "medium",
|
|
82
|
+
"status": "passed",
|
|
83
|
+
"acceptanceCriteria": [
|
|
84
|
+
"isAmbiguousOutput() detects all 6 keyword phrases (case-insensitive)",
|
|
85
|
+
"story-ambiguity trigger fires when isAmbiguousOutput=true and trigger enabled",
|
|
86
|
+
"story-ambiguity is disabled by default (isTriggerEnabled returns false)",
|
|
87
|
+
"review-gate trigger fires after each story passes when enabled",
|
|
88
|
+
"review-gate is disabled by default",
|
|
89
|
+
"Unit tests for isAmbiguousOutput and both trigger dispatch paths"
|
|
90
|
+
],
|
|
91
|
+
"attempts": 0,
|
|
92
|
+
"priorErrors": [],
|
|
93
|
+
"priorFailures": [],
|
|
94
|
+
"escalations": [],
|
|
95
|
+
"dependencies": [],
|
|
96
|
+
"tags": [],
|
|
97
|
+
"storyPoints": 2,
|
|
98
|
+
"passes": true
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"id": "TC-005",
|
|
102
|
+
"title": "Wire missing hook events: on-resume, on-session-end, on-error",
|
|
103
|
+
"description": "Three missing hook events to wire in src/pipeline/subscribers/hooks.ts: (1) on-resume: add RunResumedEvent {type:\"run:resumed\"; feature: string} to PipelineEventBus. Emit it in sequential-executor.ts when resuming from pause state (detect via interaction state or run:paused→run:resumed cycle). Wire bus.on(\"run:resumed\") → fireHook(hooks, \"on-resume\", ...) in wireHooks. (2) on-session-end: fire after every agent session ends (pass OR fail). Wire bus.on(\"story:completed\") AND bus.on(\"story:failed\") → fireHook(hooks, \"on-session-end\", hookCtx(feature, {storyId, status: passed?\"passed\":\"failed\"})). (3) on-error: emit a run:errored event in src/execution/crash-recovery.ts crash handler (unhandledRejection / SIGTERM / SIGINT handlers). Wire bus.on(\"run:errored\") → fireHook(hooks, \"on-error\", hookCtx(feature, {reason: signal/error})). Add RunErroredEvent type to event-bus. All three follow existing best-effort fire-and-forget pattern.",
|
|
104
|
+
"complexity": "medium",
|
|
105
|
+
"status": "passed",
|
|
106
|
+
"acceptanceCriteria": [
|
|
107
|
+
"RunResumedEvent type added to PipelineEventBus",
|
|
108
|
+
"on-resume hook fires when run:resumed event emitted",
|
|
109
|
+
"on-session-end hook fires after story:completed AND story:failed events",
|
|
110
|
+
"RunErroredEvent type added to PipelineEventBus",
|
|
111
|
+
"on-error hook fires in crash-recovery handlers (SIGTERM, SIGINT, unhandledRejection)",
|
|
112
|
+
"All three follow fire-and-forget pattern (no await, errors logged)",
|
|
113
|
+
"Extend hooks.test.ts with tests for all three new events"
|
|
114
|
+
],
|
|
115
|
+
"attempts": 0,
|
|
116
|
+
"priorErrors": [],
|
|
117
|
+
"priorFailures": [],
|
|
118
|
+
"escalations": [],
|
|
119
|
+
"dependencies": [],
|
|
120
|
+
"tags": [],
|
|
121
|
+
"storyPoints": 2,
|
|
122
|
+
"passes": true
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"id": "TC-006",
|
|
126
|
+
"title": "Auto plugin and Telegram/Webhook plugin integration tests",
|
|
127
|
+
"description": "Add mock-based integration tests for the three untested plugins. File locations: test/unit/interaction/auto-plugin.test.ts, extend test/unit/interaction-plugins.test.ts. (1) AutoInteractionPlugin (_deps pattern): mock the LLM call via _deps.callLlm. Test: LLM returns approve → response.action=\"continue\"; LLM returns reject → response.action=\"abort\"; confidence < threshold → fallback to chain default; trigger=security-review → always rejects auto-approval (hardcoded block), returns chain default. Add _deps.callLlm to auto.ts if not present. (2) Telegram send flow: mock fetch globally in test. Verify send() POSTs to correct API URL with message text and inline keyboard buttons (approve/reject). Verify poll() parses callback_query correctly. (3) Webhook: mock an HTTP server using Bun.serve in test. Verify send() POSTs payload with correct Content-Type. Verify HMAC signature validation rejects tampered payload. All tests are pure unit/mock — no real network calls.",
|
|
128
|
+
"complexity": "medium",
|
|
129
|
+
"status": "passed",
|
|
130
|
+
"acceptanceCriteria": [
|
|
131
|
+
"AutoInteractionPlugin: approve, reject, low-confidence, security-review-block all tested",
|
|
132
|
+
"Auto plugin uses _deps pattern for LLM call (testable without real API)",
|
|
133
|
+
"Telegram send() verified to POST correct message structure with inline keyboard",
|
|
134
|
+
"Telegram poll() parses callback_query response correctly",
|
|
135
|
+
"Webhook send() verified with correct Content-Type and payload structure",
|
|
136
|
+
"Webhook HMAC validation: valid signature passes, tampered payload rejected",
|
|
137
|
+
"Zero real network calls in any test"
|
|
138
|
+
],
|
|
139
|
+
"attempts": 0,
|
|
140
|
+
"priorErrors": [],
|
|
141
|
+
"priorFailures": [],
|
|
142
|
+
"escalations": [],
|
|
143
|
+
"dependencies": [],
|
|
144
|
+
"tags": [],
|
|
145
|
+
"storyPoints": 2,
|
|
146
|
+
"passes": true
|
|
147
|
+
}
|
|
148
|
+
],
|
|
149
|
+
"updatedAt": "2026-03-07T14:53:47.398Z"
|
|
150
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
[2026-03-07T14:04:48.521Z] TC-001 — PASSED — Wire cost-exceeded and cost-warning triggers — Cost: $1.1231
|
|
2
|
+
[2026-03-07T14:11:47.185Z] TC-002 — PASSED — Wire max-retries trigger — Cost: $0.1029
|
|
3
|
+
[2026-03-07T14:30:04.761Z] TC-003 — PASSED — Wire security-review, merge-conflict, and pre-merge triggers — Cost: $1.3628
|
|
4
|
+
[2026-03-07T14:36:14.823Z] TC-004 — PASSED — Wire story-ambiguity and review-gate triggers — Cost: $0.0000
|
|
5
|
+
[2026-03-07T14:38:01.345Z] TC-004 — PASSED — Wire story-ambiguity and review-gate triggers — Cost: $0.1019
|
|
6
|
+
[2026-03-07T14:43:51.353Z] TC-005 — PASSED — Wire missing hook events: on-resume, on-session-end, on-error — Cost: $0.4284
|
|
7
|
+
[2026-03-07T14:53:47.397Z] TC-006 — PASSED — Auto plugin and Telegram/Webhook plugin integration tests — Cost: $0.7347
|
package/nax/status.json
CHANGED
|
@@ -1,37 +1,36 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 1,
|
|
3
3
|
"run": {
|
|
4
|
-
"id": "run-2026-03-
|
|
5
|
-
"feature": "
|
|
6
|
-
"startedAt": "2026-03-
|
|
7
|
-
"status": "
|
|
4
|
+
"id": "run-2026-03-07T16-14-49-336Z",
|
|
5
|
+
"feature": "routing-persistence",
|
|
6
|
+
"startedAt": "2026-03-07T16:14:49.336Z",
|
|
7
|
+
"status": "running",
|
|
8
8
|
"dryRun": false,
|
|
9
|
-
"pid":
|
|
10
|
-
"crashedAt": "2026-03-07T06:22:36.300Z",
|
|
11
|
-
"crashSignal": "SIGTERM"
|
|
9
|
+
"pid": 3412
|
|
12
10
|
},
|
|
13
11
|
"progress": {
|
|
14
12
|
"total": 4,
|
|
15
|
-
"passed":
|
|
13
|
+
"passed": 1,
|
|
16
14
|
"failed": 0,
|
|
17
15
|
"paused": 0,
|
|
18
16
|
"blocked": 0,
|
|
19
|
-
"pending":
|
|
17
|
+
"pending": 3
|
|
20
18
|
},
|
|
21
19
|
"cost": {
|
|
22
|
-
"spent": 0,
|
|
23
|
-
"limit":
|
|
20
|
+
"spent": 0.52230675,
|
|
21
|
+
"limit": 8
|
|
24
22
|
},
|
|
25
23
|
"current": {
|
|
26
|
-
"storyId": "
|
|
27
|
-
"title": "
|
|
24
|
+
"storyId": "RRP-002",
|
|
25
|
+
"title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
|
|
28
26
|
"complexity": "medium",
|
|
29
27
|
"tddStrategy": "test-after",
|
|
30
28
|
"model": "balanced",
|
|
31
29
|
"attempt": 1,
|
|
32
30
|
"phase": "routing"
|
|
33
31
|
},
|
|
34
|
-
"iterations":
|
|
35
|
-
"updatedAt": "2026-03-
|
|
36
|
-
"durationMs":
|
|
32
|
+
"iterations": 2,
|
|
33
|
+
"updatedAt": "2026-03-07T16:45:19.261Z",
|
|
34
|
+
"durationMs": 1829925,
|
|
35
|
+
"lastHeartbeat": "2026-03-07T16:45:19.261Z"
|
|
37
36
|
}
|
package/package.json
CHANGED
package/src/config/types.ts
CHANGED
|
@@ -309,7 +309,9 @@ export interface InteractionConfig {
|
|
|
309
309
|
fallback: "continue" | "skip" | "escalate" | "abort";
|
|
310
310
|
};
|
|
311
311
|
/** Enable/disable built-in triggers */
|
|
312
|
-
triggers: Partial<
|
|
312
|
+
triggers: Partial<
|
|
313
|
+
Record<string, boolean | { enabled: boolean; fallback?: string; timeout?: number; threshold?: number }>
|
|
314
|
+
>;
|
|
313
315
|
}
|
|
314
316
|
|
|
315
317
|
/** Test coverage context config */
|
|
@@ -32,6 +32,8 @@ export interface CrashRecoveryContext {
|
|
|
32
32
|
getStartTime?: () => number;
|
|
33
33
|
getTotalStories?: () => number;
|
|
34
34
|
getStoriesCompleted?: () => number;
|
|
35
|
+
/** Optional callback to emit run:errored event (fire-and-forget) */
|
|
36
|
+
emitError?: (reason: string) => void;
|
|
35
37
|
}
|
|
36
38
|
|
|
37
39
|
/**
|
|
@@ -171,6 +173,9 @@ export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
|
|
|
171
173
|
await ctx.pidRegistry.killAll();
|
|
172
174
|
}
|
|
173
175
|
|
|
176
|
+
// Emit run:errored event (fire-and-forget)
|
|
177
|
+
ctx.emitError?.(signal.toLowerCase());
|
|
178
|
+
|
|
174
179
|
// Write fatal log
|
|
175
180
|
await writeFatalLog(ctx.jsonlFilePath, signal);
|
|
176
181
|
|
|
@@ -209,6 +214,9 @@ export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
|
|
|
209
214
|
await ctx.pidRegistry.killAll();
|
|
210
215
|
}
|
|
211
216
|
|
|
217
|
+
// Emit run:errored event (fire-and-forget)
|
|
218
|
+
ctx.emitError?.("uncaughtException");
|
|
219
|
+
|
|
212
220
|
// Write fatal log with stack trace
|
|
213
221
|
await writeFatalLog(ctx.jsonlFilePath, "uncaughtException", error);
|
|
214
222
|
|
|
@@ -242,6 +250,9 @@ export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
|
|
|
242
250
|
await ctx.pidRegistry.killAll();
|
|
243
251
|
}
|
|
244
252
|
|
|
253
|
+
// Emit run:errored event (fire-and-forget)
|
|
254
|
+
ctx.emitError?.("unhandledRejection");
|
|
255
|
+
|
|
245
256
|
// Write fatal log
|
|
246
257
|
await writeFatalLog(ctx.jsonlFilePath, "unhandledRejection", error);
|
|
247
258
|
|
|
@@ -40,7 +40,7 @@ export interface SequentialExecutionResult {
|
|
|
40
40
|
storiesCompleted: number;
|
|
41
41
|
totalCost: number;
|
|
42
42
|
allStoryMetrics: StoryMetrics[];
|
|
43
|
-
exitReason: "completed" | "cost-limit" | "max-iterations" | "stalled" | "no-stories";
|
|
43
|
+
exitReason: "completed" | "cost-limit" | "max-iterations" | "stalled" | "no-stories" | "pre-merge-aborted";
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
/**
|
|
@@ -21,6 +21,7 @@ import { fireHook } from "../../hooks";
|
|
|
21
21
|
import type { InteractionChain } from "../../interaction";
|
|
22
22
|
import { initInteractionChain } from "../../interaction";
|
|
23
23
|
import { getSafeLogger } from "../../logger";
|
|
24
|
+
import { pipelineEventBus } from "../../pipeline/event-bus";
|
|
24
25
|
import { loadPlugins } from "../../plugins/loader";
|
|
25
26
|
import type { PluginRegistry } from "../../plugins/registry";
|
|
26
27
|
import type { PRD } from "../../prd";
|
|
@@ -123,6 +124,9 @@ export async function setupRun(options: RunSetupOptions): Promise<RunSetupResult
|
|
|
123
124
|
getStartTime: () => options.startTime,
|
|
124
125
|
getTotalStories: options.getTotalStories,
|
|
125
126
|
getStoriesCompleted: options.getStoriesCompleted,
|
|
127
|
+
emitError: (reason: string) => {
|
|
128
|
+
pipelineEventBus.emit({ type: "run:errored", reason, feature: options.feature });
|
|
129
|
+
},
|
|
126
130
|
});
|
|
127
131
|
|
|
128
132
|
// Load PRD (before try block so it's accessible in finally for onRunEnd)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/** Sequential Story Executor (ADR-005, Phase 4) — main execution loop. */
|
|
2
2
|
|
|
3
|
+
import { checkCostExceeded, checkCostWarning, checkPreMerge, isTriggerEnabled } from "../interaction/triggers";
|
|
3
4
|
import { getSafeLogger } from "../logger";
|
|
4
5
|
import type { StoryMetrics } from "../metrics";
|
|
5
6
|
import { pipelineEventBus } from "../pipeline/event-bus";
|
|
@@ -35,6 +36,7 @@ export async function executeSequential(
|
|
|
35
36
|
0,
|
|
36
37
|
];
|
|
37
38
|
const allStoryMetrics: StoryMetrics[] = [];
|
|
39
|
+
let warningSent = false;
|
|
38
40
|
|
|
39
41
|
pipelineEventBus.clear();
|
|
40
42
|
wireHooks(pipelineEventBus, ctx.hooks, ctx.workdir, ctx.feature);
|
|
@@ -69,6 +71,17 @@ export async function executeSequential(
|
|
|
69
71
|
prdDirty = false;
|
|
70
72
|
}
|
|
71
73
|
if (isComplete(prd)) {
|
|
74
|
+
// pre-merge trigger: prompt before completing the run
|
|
75
|
+
if (ctx.interactionChain && isTriggerEnabled("pre-merge", ctx.config)) {
|
|
76
|
+
const shouldProceed = await checkPreMerge(
|
|
77
|
+
{ featureName: ctx.feature, totalStories: prd.userStories.length, cost: totalCost },
|
|
78
|
+
ctx.config,
|
|
79
|
+
ctx.interactionChain,
|
|
80
|
+
);
|
|
81
|
+
if (!shouldProceed) {
|
|
82
|
+
return buildResult("pre-merge-aborted");
|
|
83
|
+
}
|
|
84
|
+
}
|
|
72
85
|
pipelineEventBus.emit({
|
|
73
86
|
type: "run:completed",
|
|
74
87
|
totalStories: 0,
|
|
@@ -91,13 +104,24 @@ export async function executeSequential(
|
|
|
91
104
|
if (!ctx.useBatch) lastStoryId = selection.story.id;
|
|
92
105
|
|
|
93
106
|
if (totalCost >= ctx.config.execution.costLimit) {
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
107
|
+
const shouldProceed =
|
|
108
|
+
ctx.interactionChain && isTriggerEnabled("cost-exceeded", ctx.config)
|
|
109
|
+
? await checkCostExceeded(
|
|
110
|
+
{ featureName: ctx.feature, cost: totalCost, limit: ctx.config.execution.costLimit },
|
|
111
|
+
ctx.config,
|
|
112
|
+
ctx.interactionChain,
|
|
113
|
+
)
|
|
114
|
+
: false;
|
|
115
|
+
if (!shouldProceed) {
|
|
116
|
+
pipelineEventBus.emit({
|
|
117
|
+
type: "run:paused",
|
|
118
|
+
reason: `Cost limit reached: $${totalCost.toFixed(2)}`,
|
|
119
|
+
storyId: selection.story.id,
|
|
120
|
+
cost: totalCost,
|
|
121
|
+
});
|
|
122
|
+
return buildResult("cost-limit");
|
|
123
|
+
}
|
|
124
|
+
pipelineEventBus.emit({ type: "run:resumed", feature: ctx.feature });
|
|
101
125
|
}
|
|
102
126
|
|
|
103
127
|
pipelineEventBus.emit({
|
|
@@ -118,6 +142,20 @@ export async function executeSequential(
|
|
|
118
142
|
iter.prdDirty,
|
|
119
143
|
];
|
|
120
144
|
|
|
145
|
+
if (ctx.interactionChain && isTriggerEnabled("cost-warning", ctx.config) && !warningSent) {
|
|
146
|
+
const costLimit = ctx.config.execution.costLimit;
|
|
147
|
+
const triggerCfg = ctx.config.interaction?.triggers?.["cost-warning"];
|
|
148
|
+
const threshold = typeof triggerCfg === "object" ? (triggerCfg.threshold ?? 0.8) : 0.8;
|
|
149
|
+
if (totalCost >= costLimit * threshold) {
|
|
150
|
+
await checkCostWarning(
|
|
151
|
+
{ featureName: ctx.feature, cost: totalCost, limit: costLimit },
|
|
152
|
+
ctx.config,
|
|
153
|
+
ctx.interactionChain,
|
|
154
|
+
);
|
|
155
|
+
warningSent = true;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
121
159
|
if (iter.prdDirty) {
|
|
122
160
|
prd = await loadPRD(ctx.prdPath);
|
|
123
161
|
prdDirty = false;
|
|
@@ -38,6 +38,14 @@ interface DecisionResponse {
|
|
|
38
38
|
reasoning: string;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Module-level deps for testability (_deps pattern).
|
|
43
|
+
* Override callLlm in tests to avoid spawning the claude CLI.
|
|
44
|
+
*/
|
|
45
|
+
export const _deps = {
|
|
46
|
+
callLlm: null as ((request: InteractionRequest) => Promise<DecisionResponse>) | null,
|
|
47
|
+
};
|
|
48
|
+
|
|
41
49
|
/**
|
|
42
50
|
* Auto plugin for AI-powered interaction responses
|
|
43
51
|
*/
|
|
@@ -80,7 +88,8 @@ export class AutoInteractionPlugin implements InteractionPlugin {
|
|
|
80
88
|
}
|
|
81
89
|
|
|
82
90
|
try {
|
|
83
|
-
const
|
|
91
|
+
const callFn = _deps.callLlm ?? this.callLlm.bind(this);
|
|
92
|
+
const decision = await callFn(request);
|
|
84
93
|
|
|
85
94
|
// Check confidence threshold
|
|
86
95
|
if (decision.confidence < (this.config.confidenceThreshold ?? 0.7)) {
|
|
@@ -110,7 +110,8 @@ export function calculateAggregateMetrics(runs: RunMetrics[]): AggregateMetrics
|
|
|
110
110
|
>();
|
|
111
111
|
|
|
112
112
|
for (const story of allStories) {
|
|
113
|
-
|
|
113
|
+
// Use initialComplexity (first-classify prediction) when available; fall back to complexity
|
|
114
|
+
const complexity = story.initialComplexity ?? story.complexity;
|
|
114
115
|
const existing = complexityStats.get(complexity) || {
|
|
115
116
|
predicted: 0,
|
|
116
117
|
tierCounts: new Map<string, number>(),
|
package/src/metrics/tracker.ts
CHANGED
|
@@ -58,9 +58,14 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
|
|
|
58
58
|
const modelDef = modelEntry ? resolveModel(modelEntry) : null;
|
|
59
59
|
const modelUsed = modelDef?.model || routing.modelTier;
|
|
60
60
|
|
|
61
|
+
// initialComplexity: prefer story.routing.initialComplexity (first classify),
|
|
62
|
+
// fall back to routing.complexity for backward compat
|
|
63
|
+
const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
|
|
64
|
+
|
|
61
65
|
return {
|
|
62
66
|
storyId: story.id,
|
|
63
67
|
complexity: routing.complexity,
|
|
68
|
+
initialComplexity,
|
|
64
69
|
modelTier: routing.modelTier,
|
|
65
70
|
modelUsed,
|
|
66
71
|
attempts,
|
|
@@ -108,20 +113,27 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
|
|
|
108
113
|
const modelDef = modelEntry ? resolveModel(modelEntry) : null;
|
|
109
114
|
const modelUsed = modelDef?.model || routing.modelTier;
|
|
110
115
|
|
|
111
|
-
return stories.map((story) =>
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
116
|
+
return stories.map((story) => {
|
|
117
|
+
// initialComplexity: prefer story.routing.initialComplexity (if individual routing exists),
|
|
118
|
+
// fall back to shared routing.complexity (batch stories classified together)
|
|
119
|
+
const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
storyId: story.id,
|
|
123
|
+
complexity: routing.complexity,
|
|
124
|
+
initialComplexity,
|
|
125
|
+
modelTier: routing.modelTier,
|
|
126
|
+
modelUsed,
|
|
127
|
+
attempts: 1, // batch stories don't escalate individually
|
|
128
|
+
finalTier: routing.modelTier,
|
|
129
|
+
success: true, // if batch succeeded, all stories succeeded
|
|
130
|
+
cost: costPerStory,
|
|
131
|
+
durationMs: durationPerStory,
|
|
132
|
+
firstPassSuccess: true, // batch = first pass success
|
|
133
|
+
startedAt: storyStartTime,
|
|
134
|
+
completedAt: new Date().toISOString(),
|
|
135
|
+
};
|
|
136
|
+
});
|
|
125
137
|
}
|
|
126
138
|
|
|
127
139
|
/**
|
package/src/metrics/types.ts
CHANGED
|
@@ -12,6 +12,8 @@ export interface StoryMetrics {
|
|
|
12
12
|
storyId: string;
|
|
13
13
|
/** Classified complexity */
|
|
14
14
|
complexity: string;
|
|
15
|
+
/** Initial complexity from first classification — preserved across escalations */
|
|
16
|
+
initialComplexity?: string;
|
|
15
17
|
/** Initial model tier */
|
|
16
18
|
modelTier: string;
|
|
17
19
|
/** Actual model used (e.g., "claude-sonnet-4.5") */
|
|
@@ -135,6 +135,17 @@ export interface StoryPausedEvent {
|
|
|
135
135
|
cost: number;
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
+
export interface RunResumedEvent {
|
|
139
|
+
type: "run:resumed";
|
|
140
|
+
feature: string;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface RunErroredEvent {
|
|
144
|
+
type: "run:errored";
|
|
145
|
+
reason: string;
|
|
146
|
+
feature?: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
138
149
|
/** Discriminated union of all pipeline events. */
|
|
139
150
|
export type PipelineEvent =
|
|
140
151
|
| StoryStartedEvent
|
|
@@ -150,7 +161,9 @@ export type PipelineEvent =
|
|
|
150
161
|
| HumanReviewRequestedEvent
|
|
151
162
|
| RunStartedEvent
|
|
152
163
|
| RunPausedEvent
|
|
153
|
-
| StoryPausedEvent
|
|
164
|
+
| StoryPausedEvent
|
|
165
|
+
| RunResumedEvent
|
|
166
|
+
| RunErroredEvent;
|
|
154
167
|
|
|
155
168
|
export type PipelineEventType = PipelineEvent["type"];
|
|
156
169
|
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import { appendProgress } from "../../execution/progress";
|
|
16
|
+
import { checkReviewGate, isTriggerEnabled } from "../../interaction/triggers";
|
|
16
17
|
import { getLogger } from "../../logger";
|
|
17
18
|
import { collectBatchMetrics, collectStoryMetrics } from "../../metrics";
|
|
18
19
|
import { countStories, markStoryPassed, savePRD } from "../../prd";
|
|
@@ -72,6 +73,18 @@ export const completionStage: PipelineStage = {
|
|
|
72
73
|
modelTier: ctx.routing?.modelTier,
|
|
73
74
|
testStrategy: ctx.routing?.testStrategy,
|
|
74
75
|
});
|
|
76
|
+
|
|
77
|
+
// review-gate trigger: check if story needs re-review after passing
|
|
78
|
+
if (ctx.interaction && isTriggerEnabled("review-gate", ctx.config)) {
|
|
79
|
+
const shouldContinue = await _completionDeps.checkReviewGate(
|
|
80
|
+
{ featureName: ctx.prd.feature, storyId: completedStory.id },
|
|
81
|
+
ctx.config,
|
|
82
|
+
ctx.interaction,
|
|
83
|
+
);
|
|
84
|
+
if (!shouldContinue) {
|
|
85
|
+
logger.warn("completion", "Story marked for re-review", { storyId: completedStory.id });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
75
88
|
}
|
|
76
89
|
|
|
77
90
|
// Save PRD
|
|
@@ -89,3 +102,10 @@ export const completionStage: PipelineStage = {
|
|
|
89
102
|
return { action: "continue" };
|
|
90
103
|
},
|
|
91
104
|
};
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
|
|
108
|
+
*/
|
|
109
|
+
export const _completionDeps = {
|
|
110
|
+
checkReviewGate,
|
|
111
|
+
};
|
|
@@ -32,11 +32,33 @@
|
|
|
32
32
|
|
|
33
33
|
import { getAgent, validateAgentForTier } from "../../agents";
|
|
34
34
|
import { resolveModel } from "../../config";
|
|
35
|
+
import { checkMergeConflict, checkStoryAmbiguity, isTriggerEnabled } from "../../interaction/triggers";
|
|
35
36
|
import { getLogger } from "../../logger";
|
|
36
37
|
import type { FailureCategory } from "../../tdd";
|
|
37
38
|
import { runThreeSessionTdd } from "../../tdd";
|
|
39
|
+
import { detectMergeConflict } from "../../utils/git";
|
|
38
40
|
import type { PipelineContext, PipelineStage, StageResult } from "../types";
|
|
39
41
|
|
|
42
|
+
/**
|
|
43
|
+
* Detect if agent output contains ambiguity signals
|
|
44
|
+
* Checks for keywords that indicate the agent is unsure about the implementation
|
|
45
|
+
*/
|
|
46
|
+
export function isAmbiguousOutput(output: string): boolean {
|
|
47
|
+
if (!output) return false;
|
|
48
|
+
|
|
49
|
+
const ambiguityKeywords = [
|
|
50
|
+
"unclear",
|
|
51
|
+
"ambiguous",
|
|
52
|
+
"need clarification",
|
|
53
|
+
"please clarify",
|
|
54
|
+
"which one",
|
|
55
|
+
"not sure which",
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
const lowerOutput = output.toLowerCase();
|
|
59
|
+
return ambiguityKeywords.some((keyword) => lowerOutput.includes(keyword));
|
|
60
|
+
}
|
|
61
|
+
|
|
40
62
|
/**
|
|
41
63
|
* Determine the pipeline action for a failed TDD result, based on its failureCategory.
|
|
42
64
|
*
|
|
@@ -172,6 +194,42 @@ export const executionStage: PipelineStage = {
|
|
|
172
194
|
|
|
173
195
|
ctx.agentResult = result;
|
|
174
196
|
|
|
197
|
+
// merge-conflict trigger: detect CONFLICT markers in agent output
|
|
198
|
+
const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
|
|
199
|
+
if (
|
|
200
|
+
_executionDeps.detectMergeConflict(combinedOutput) &&
|
|
201
|
+
ctx.interaction &&
|
|
202
|
+
isTriggerEnabled("merge-conflict", ctx.config)
|
|
203
|
+
) {
|
|
204
|
+
const shouldProceed = await _executionDeps.checkMergeConflict(
|
|
205
|
+
{ featureName: ctx.prd.feature, storyId: ctx.story.id },
|
|
206
|
+
ctx.config,
|
|
207
|
+
ctx.interaction,
|
|
208
|
+
);
|
|
209
|
+
if (!shouldProceed) {
|
|
210
|
+
logger.error("execution", "Merge conflict detected — aborting story", { storyId: ctx.story.id });
|
|
211
|
+
return { action: "fail", reason: "Merge conflict detected" };
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// story-ambiguity trigger: detect ambiguity signals in agent output
|
|
216
|
+
if (
|
|
217
|
+
result.success &&
|
|
218
|
+
_executionDeps.isAmbiguousOutput(combinedOutput) &&
|
|
219
|
+
ctx.interaction &&
|
|
220
|
+
isTriggerEnabled("story-ambiguity", ctx.config)
|
|
221
|
+
) {
|
|
222
|
+
const shouldContinue = await _executionDeps.checkStoryAmbiguity(
|
|
223
|
+
{ featureName: ctx.prd.feature, storyId: ctx.story.id, reason: "Agent output suggests ambiguity" },
|
|
224
|
+
ctx.config,
|
|
225
|
+
ctx.interaction,
|
|
226
|
+
);
|
|
227
|
+
if (!shouldContinue) {
|
|
228
|
+
logger.warn("execution", "Story ambiguity detected — escalating story", { storyId: ctx.story.id });
|
|
229
|
+
return { action: "escalate", reason: "Story ambiguity detected — needs clarification" };
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
175
233
|
if (!result.success) {
|
|
176
234
|
logger.error("execution", "Agent session failed", {
|
|
177
235
|
exitCode: result.exitCode,
|
|
@@ -199,4 +257,8 @@ export const executionStage: PipelineStage = {
|
|
|
199
257
|
export const _executionDeps = {
|
|
200
258
|
getAgent,
|
|
201
259
|
validateAgentForTier,
|
|
260
|
+
detectMergeConflict,
|
|
261
|
+
checkMergeConflict,
|
|
262
|
+
isAmbiguousOutput,
|
|
263
|
+
checkStoryAmbiguity,
|
|
202
264
|
};
|