@nathapp/nax 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +70 -56
- package/docs/ROADMAP.md +45 -15
- package/docs/specs/trigger-completion.md +145 -0
- package/nax/features/routing-persistence/prd.json +104 -0
- package/nax/features/routing-persistence/progress.txt +1 -0
- package/nax/features/trigger-completion/prd.json +150 -0
- package/nax/features/trigger-completion/progress.txt +7 -0
- package/nax/status.json +15 -16
- package/package.json +1 -1
- package/src/config/types.ts +3 -1
- package/src/execution/crash-recovery.ts +11 -0
- package/src/execution/executor-types.ts +1 -1
- package/src/execution/iteration-runner.ts +1 -0
- package/src/execution/lifecycle/run-setup.ts +4 -0
- package/src/execution/sequential-executor.ts +45 -7
- package/src/interaction/plugins/auto.ts +10 -1
- package/src/metrics/aggregator.ts +2 -1
- package/src/metrics/tracker.ts +26 -14
- package/src/metrics/types.ts +2 -0
- package/src/pipeline/event-bus.ts +14 -1
- package/src/pipeline/stages/completion.ts +20 -0
- package/src/pipeline/stages/execution.ts +62 -0
- package/src/pipeline/stages/review.ts +25 -1
- package/src/pipeline/stages/routing.ts +42 -8
- package/src/pipeline/subscribers/hooks.ts +32 -0
- package/src/pipeline/subscribers/interaction.ts +36 -1
- package/src/pipeline/types.ts +2 -0
- package/src/prd/types.ts +4 -0
- package/src/routing/content-hash.ts +25 -0
- package/src/routing/index.ts +3 -0
- package/src/routing/router.ts +3 -2
- package/src/routing/strategies/keyword.ts +2 -1
- package/src/routing/strategies/llm-prompts.ts +29 -28
- package/src/utils/git.ts +21 -0
- package/test/integration/routing/plugin-routing-core.test.ts +1 -1
- package/test/unit/execution/sequential-executor.test.ts +235 -0
- package/test/unit/interaction/auto-plugin.test.ts +162 -0
- package/test/unit/interaction-plugins.test.ts +308 -1
- package/test/unit/metrics/aggregator.test.ts +164 -0
- package/test/unit/metrics/tracker.test.ts +186 -0
- package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
- package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
- package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
- package/test/unit/pipeline/stages/review.test.ts +201 -0
- package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
- package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
- package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
- package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
- package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
- package/test/unit/prd-auto-default.test.ts +2 -2
- package/test/unit/routing/content-hash.test.ts +99 -0
- package/test/unit/routing/routing-stability.test.ts +1 -1
- package/test/unit/routing-core.test.ts +5 -5
- package/test/unit/routing-strategies.test.ts +1 -3
- package/test/unit/utils/git.test.ts +50 -0
|
@@ -6,10 +6,12 @@
|
|
|
6
6
|
* @returns
|
|
7
7
|
* - `continue`: Review passed
|
|
8
8
|
* - `escalate`: Built-in check failed (lint/typecheck) — autofix stage handles retry
|
|
9
|
-
* - `
|
|
9
|
+
* - `escalate`: Plugin reviewer failed and security-review trigger responded non-abort
|
|
10
|
+
* - `fail`: Plugin reviewer hard-failed (no trigger, or trigger responded abort)
|
|
10
11
|
*/
|
|
11
12
|
|
|
12
13
|
// RE-ARCH: rewrite
|
|
14
|
+
import { checkSecurityReview, isTriggerEnabled } from "../../interaction/triggers";
|
|
13
15
|
import { getLogger } from "../../logger";
|
|
14
16
|
import { reviewOrchestrator } from "../../review/orchestrator";
|
|
15
17
|
import type { PipelineContext, PipelineStage, StageResult } from "../types";
|
|
@@ -29,6 +31,21 @@ export const reviewStage: PipelineStage = {
|
|
|
29
31
|
|
|
30
32
|
if (!result.success) {
|
|
31
33
|
if (result.pluginFailed) {
|
|
34
|
+
// security-review trigger: prompt before permanently failing
|
|
35
|
+
if (ctx.interaction && isTriggerEnabled("security-review", ctx.config)) {
|
|
36
|
+
const shouldContinue = await _reviewDeps.checkSecurityReview(
|
|
37
|
+
{ featureName: ctx.prd.feature, storyId: ctx.story.id },
|
|
38
|
+
ctx.config,
|
|
39
|
+
ctx.interaction,
|
|
40
|
+
);
|
|
41
|
+
if (!shouldContinue) {
|
|
42
|
+
logger.error("review", `Plugin reviewer failed: ${result.failureReason}`, { storyId: ctx.story.id });
|
|
43
|
+
return { action: "fail", reason: `Review failed: ${result.failureReason}` };
|
|
44
|
+
}
|
|
45
|
+
logger.warn("review", "Security-review trigger escalated — retrying story", { storyId: ctx.story.id });
|
|
46
|
+
return { action: "escalate", reason: `Review failed: ${result.failureReason}` };
|
|
47
|
+
}
|
|
48
|
+
|
|
32
49
|
logger.error("review", `Plugin reviewer failed: ${result.failureReason}`, { storyId: ctx.story.id });
|
|
33
50
|
return { action: "fail", reason: `Review failed: ${result.failureReason}` };
|
|
34
51
|
}
|
|
@@ -47,3 +64,10 @@ export const reviewStage: PipelineStage = {
|
|
|
47
64
|
return { action: "continue" };
|
|
48
65
|
},
|
|
49
66
|
};
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
|
|
70
|
+
*/
|
|
71
|
+
export const _reviewDeps = {
|
|
72
|
+
checkSecurityReview,
|
|
73
|
+
};
|
|
@@ -2,15 +2,18 @@
|
|
|
2
2
|
* Routing Stage
|
|
3
3
|
*
|
|
4
4
|
* Classifies story complexity and determines model tier + test strategy.
|
|
5
|
-
* Uses cached complexity/testStrategy/modelTier from story if
|
|
5
|
+
* Uses cached complexity/testStrategy/modelTier from story if contentHash matches.
|
|
6
6
|
* modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
|
|
7
7
|
*
|
|
8
|
+
* RRP-003: contentHash staleness detection — if story.routing.contentHash is missing or
|
|
9
|
+
* does not match the current story content, treats cached routing as a miss and re-classifies.
|
|
10
|
+
*
|
|
8
11
|
* @returns
|
|
9
12
|
* - `continue`: Routing determined, proceed to next stage
|
|
10
13
|
*
|
|
11
14
|
* @example
|
|
12
15
|
* ```ts
|
|
13
|
-
* // Story has cached routing with
|
|
16
|
+
* // Story has cached routing with matching contentHash
|
|
14
17
|
* await routingStage.execute(ctx);
|
|
15
18
|
* // ctx.routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "..." }
|
|
16
19
|
* // modelTier is derived from current config.autoMode.complexityRouting
|
|
@@ -19,7 +22,8 @@
|
|
|
19
22
|
|
|
20
23
|
import { isGreenfieldStory } from "../../context/greenfield";
|
|
21
24
|
import { getLogger } from "../../logger";
|
|
22
|
-
import {
|
|
25
|
+
import { savePRD } from "../../prd";
|
|
26
|
+
import { complexityToModelTier, computeStoryContentHash, routeStory } from "../../routing";
|
|
23
27
|
import { clearCache, routeBatch } from "../../routing/strategies/llm";
|
|
24
28
|
import type { PipelineContext, PipelineStage, RoutingResult, StageResult } from "../types";
|
|
25
29
|
|
|
@@ -30,11 +34,25 @@ export const routingStage: PipelineStage = {
|
|
|
30
34
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
31
35
|
const logger = getLogger();
|
|
32
36
|
|
|
33
|
-
//
|
|
34
|
-
//
|
|
37
|
+
// Staleness detection (RRP-003):
|
|
38
|
+
// - story.routing absent → cache miss (no prior routing)
|
|
39
|
+
// - story.routing + no contentHash → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
|
|
40
|
+
// - story.routing + contentHash matches → cache hit
|
|
41
|
+
// - story.routing + contentHash mismatches → cache miss (stale, re-classify)
|
|
42
|
+
const hasExistingRouting = ctx.story.routing !== undefined;
|
|
43
|
+
const hasContentHash = ctx.story.routing?.contentHash !== undefined;
|
|
44
|
+
let currentHash: string | undefined;
|
|
45
|
+
let hashMatch = false;
|
|
46
|
+
if (hasContentHash) {
|
|
47
|
+
currentHash = _routingDeps.computeStoryContentHash(ctx.story);
|
|
48
|
+
hashMatch = ctx.story.routing?.contentHash === currentHash;
|
|
49
|
+
}
|
|
50
|
+
const isCacheHit = hasExistingRouting && (!hasContentHash || hashMatch);
|
|
51
|
+
|
|
35
52
|
let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
|
|
36
|
-
|
|
37
|
-
|
|
53
|
+
|
|
54
|
+
if (isCacheHit) {
|
|
55
|
+
// Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
|
|
38
56
|
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
39
57
|
// Override with cached values only when they are actually set
|
|
40
58
|
if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
|
|
@@ -50,8 +68,22 @@ export const routingStage: PipelineStage = {
|
|
|
50
68
|
);
|
|
51
69
|
}
|
|
52
70
|
} else {
|
|
53
|
-
//
|
|
71
|
+
// Cache miss: no routing, or contentHash present but mismatched — fresh classification
|
|
54
72
|
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
73
|
+
// currentHash already computed if a mismatch was detected; compute now if starting fresh
|
|
74
|
+
currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
|
|
75
|
+
ctx.story.routing = {
|
|
76
|
+
...(ctx.story.routing ?? {}),
|
|
77
|
+
complexity: routing.complexity as import("../../config").Complexity,
|
|
78
|
+
initialComplexity:
|
|
79
|
+
ctx.story.routing?.initialComplexity ?? (routing.complexity as import("../../config").Complexity),
|
|
80
|
+
testStrategy: routing.testStrategy as import("../../config").TestStrategy,
|
|
81
|
+
reasoning: routing.reasoning ?? "",
|
|
82
|
+
contentHash: currentHash,
|
|
83
|
+
};
|
|
84
|
+
if (ctx.prdPath) {
|
|
85
|
+
await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
|
|
86
|
+
}
|
|
55
87
|
}
|
|
56
88
|
|
|
57
89
|
// BUG-010: Greenfield detection — force test-after if no test files exist
|
|
@@ -97,4 +129,6 @@ export const _routingDeps = {
|
|
|
97
129
|
complexityToModelTier,
|
|
98
130
|
isGreenfieldStory,
|
|
99
131
|
clearCache,
|
|
132
|
+
savePRD,
|
|
133
|
+
computeStoryContentHash,
|
|
100
134
|
};
|
|
@@ -127,6 +127,38 @@ export function wireHooks(
|
|
|
127
127
|
}),
|
|
128
128
|
);
|
|
129
129
|
|
|
130
|
+
// run:resumed → on-resume
|
|
131
|
+
unsubs.push(
|
|
132
|
+
bus.on("run:resumed", (ev) => {
|
|
133
|
+
safe("on-resume", () => fireHook(hooks, "on-resume", hookCtx(feature, { status: "running" }), workdir));
|
|
134
|
+
}),
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
// story:completed → on-session-end (passed)
|
|
138
|
+
unsubs.push(
|
|
139
|
+
bus.on("story:completed", (ev) => {
|
|
140
|
+
safe("on-session-end (completed)", () =>
|
|
141
|
+
fireHook(hooks, "on-session-end", hookCtx(feature, { storyId: ev.storyId, status: "passed" }), workdir),
|
|
142
|
+
);
|
|
143
|
+
}),
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
// story:failed → on-session-end (failed)
|
|
147
|
+
unsubs.push(
|
|
148
|
+
bus.on("story:failed", (ev) => {
|
|
149
|
+
safe("on-session-end (failed)", () =>
|
|
150
|
+
fireHook(hooks, "on-session-end", hookCtx(feature, { storyId: ev.storyId, status: "failed" }), workdir),
|
|
151
|
+
);
|
|
152
|
+
}),
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
// run:errored → on-error
|
|
156
|
+
unsubs.push(
|
|
157
|
+
bus.on("run:errored", (ev) => {
|
|
158
|
+
safe("on-error", () => fireHook(hooks, "on-error", hookCtx(feature, { reason: ev.reason }), workdir));
|
|
159
|
+
}),
|
|
160
|
+
);
|
|
161
|
+
|
|
130
162
|
return () => {
|
|
131
163
|
for (const u of unsubs) u();
|
|
132
164
|
};
|
|
@@ -19,7 +19,7 @@ import type { NaxConfig } from "../../config";
|
|
|
19
19
|
import type { InteractionChain } from "../../interaction/chain";
|
|
20
20
|
import { executeTrigger, isTriggerEnabled } from "../../interaction/triggers";
|
|
21
21
|
import { getSafeLogger } from "../../logger";
|
|
22
|
-
import type { PipelineEventBus } from "../event-bus";
|
|
22
|
+
import type { PipelineEventBus, StoryFailedEvent } from "../event-bus";
|
|
23
23
|
import type { UnsubscribeFn } from "./hooks";
|
|
24
24
|
|
|
25
25
|
/**
|
|
@@ -62,6 +62,41 @@ export function wireInteraction(
|
|
|
62
62
|
);
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
+
// story:failed (countsTowardEscalation=true) → executeTrigger("max-retries")
|
|
66
|
+
if (interactionChain && isTriggerEnabled("max-retries", config)) {
|
|
67
|
+
unsubs.push(
|
|
68
|
+
bus.on("story:failed", (ev: StoryFailedEvent) => {
|
|
69
|
+
if (!ev.countsTowardEscalation) {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
executeTrigger(
|
|
74
|
+
"max-retries",
|
|
75
|
+
{
|
|
76
|
+
featureName: ev.feature ?? "",
|
|
77
|
+
storyId: ev.storyId,
|
|
78
|
+
iteration: ev.attempts ?? 0,
|
|
79
|
+
},
|
|
80
|
+
config,
|
|
81
|
+
interactionChain,
|
|
82
|
+
)
|
|
83
|
+
.then((response) => {
|
|
84
|
+
if (response.action === "abort") {
|
|
85
|
+
logger?.warn("interaction-subscriber", "max-retries abort requested", {
|
|
86
|
+
storyId: ev.storyId,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
})
|
|
90
|
+
.catch((err) => {
|
|
91
|
+
logger?.warn("interaction-subscriber", "max-retries trigger failed", {
|
|
92
|
+
storyId: ev.storyId,
|
|
93
|
+
error: String(err),
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
}),
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
|
|
65
100
|
return () => {
|
|
66
101
|
for (const u of unsubs) u();
|
|
67
102
|
};
|
package/src/pipeline/types.ts
CHANGED
|
@@ -65,6 +65,8 @@ export interface PipelineContext {
|
|
|
65
65
|
routing: RoutingResult;
|
|
66
66
|
/** Working directory (project root) */
|
|
67
67
|
workdir: string;
|
|
68
|
+
/** Absolute path to the prd.json file (used by routing stage to persist initial classification) */
|
|
69
|
+
prdPath?: string;
|
|
68
70
|
/** Feature directory (optional, e.g., nax/features/my-feature/) */
|
|
69
71
|
featureDir?: string;
|
|
70
72
|
/** Hooks configuration */
|
package/src/prd/types.ts
CHANGED
|
@@ -45,6 +45,10 @@ export interface StructuredFailure {
|
|
|
45
45
|
/** Routing metadata per story */
|
|
46
46
|
export interface StoryRouting {
|
|
47
47
|
complexity: Complexity;
|
|
48
|
+
/** Initial complexity from first classification — written once, never overwritten by escalation */
|
|
49
|
+
initialComplexity?: Complexity;
|
|
50
|
+
/** Content hash of story fields at time of routing — used to detect stale cached routing (RRP-003) */
|
|
51
|
+
contentHash?: string;
|
|
48
52
|
/** Model tier (derived at runtime from config, not persisted) */
|
|
49
53
|
modelTier?: ModelTier;
|
|
50
54
|
testStrategy: TestStrategy;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Story Content Hash
|
|
3
|
+
*
|
|
4
|
+
* Computes a deterministic hash of the story content fields used for routing.
|
|
5
|
+
* Used by the routing stage (RRP-003) to detect stale cached routing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { UserStory } from "../prd/types";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Compute a deterministic hash of the story content fields used for routing.
|
|
12
|
+
* Hash input: title + "\0" + description + "\0" + acceptanceCriteria.join("") + "\0" + tags.join("")
|
|
13
|
+
*
|
|
14
|
+
* Null-byte separators between fields prevent cross-field collisions.
|
|
15
|
+
*
|
|
16
|
+
* @param story - The user story to hash
|
|
17
|
+
* @returns A hex string content hash
|
|
18
|
+
*/
|
|
19
|
+
export function computeStoryContentHash(story: UserStory): string {
|
|
20
|
+
const input = `${story.title}\0${story.description}\0${story.acceptanceCriteria.join("")}\0${story.tags.join("")}`;
|
|
21
|
+
|
|
22
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
23
|
+
hasher.update(input);
|
|
24
|
+
return hasher.digest("hex");
|
|
25
|
+
}
|
package/src/routing/index.ts
CHANGED
|
@@ -15,3 +15,6 @@ export { keywordStrategy, llmStrategy, manualStrategy } from "./strategies";
|
|
|
15
15
|
// Custom strategy loader
|
|
16
16
|
export { loadCustomStrategy } from "./loader";
|
|
17
17
|
export { tryLlmBatchRoute } from "./batch-route";
|
|
18
|
+
|
|
19
|
+
// Content hash for staleness detection (RRP-003)
|
|
20
|
+
export { computeStoryContentHash } from "./content-hash";
|
package/src/routing/router.ts
CHANGED
|
@@ -152,7 +152,7 @@ const LITE_TAGS = ["ui", "layout", "cli", "integration", "polyglot"];
|
|
|
152
152
|
* - 'auto' → existing heuristic logic, plus:
|
|
153
153
|
* if tags include ui/layout/cli/integration/polyglot → three-session-tdd-lite
|
|
154
154
|
* if security/public-api/complex/expert → three-session-tdd
|
|
155
|
-
*
|
|
155
|
+
* simple → test-after, medium → three-session-tdd-lite (BUG-045)
|
|
156
156
|
*
|
|
157
157
|
* @param complexity - Pre-classified complexity level
|
|
158
158
|
* @param title - Story title
|
|
@@ -201,7 +201,8 @@ export function determineTestStrategy(
|
|
|
201
201
|
return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
//
|
|
204
|
+
// BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
|
|
205
|
+
if (complexity === "simple") return "test-after";
|
|
205
206
|
return "three-session-tdd-lite";
|
|
206
207
|
}
|
|
207
208
|
|
|
@@ -117,7 +117,8 @@ function determineTestStrategy(
|
|
|
117
117
|
return "three-session-tdd";
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
//
|
|
120
|
+
// BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
|
|
121
|
+
if (complexity === "simple") return "test-after";
|
|
121
122
|
return "three-session-tdd-lite";
|
|
122
123
|
}
|
|
123
124
|
|
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
* for LLM-based routing decisions.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import type { Complexity, ModelTier, NaxConfig, TestStrategy } from "../../config";
|
|
8
|
+
import type { Complexity, ModelTier, NaxConfig, TddStrategy, TestStrategy } from "../../config";
|
|
9
9
|
import type { UserStory } from "../../prd/types";
|
|
10
|
+
import { determineTestStrategy } from "../router";
|
|
10
11
|
import type { RoutingDecision } from "../strategy";
|
|
11
12
|
|
|
12
13
|
/**
|
|
@@ -34,18 +35,13 @@ Tags: ${tags.join(", ")}
|
|
|
34
35
|
- balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
|
|
35
36
|
- powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
|
|
36
37
|
|
|
37
|
-
## Available Test Strategies
|
|
38
|
-
- test-after: Write implementation first, add tests after. For straightforward work.
|
|
39
|
-
- three-session-tdd: Separate test-writer → implementer → verifier sessions. For complex/critical work where test design matters.
|
|
40
|
-
|
|
41
38
|
## Rules
|
|
42
|
-
- Default to the CHEAPEST
|
|
43
|
-
-
|
|
44
|
-
- Simple barrel exports, re-exports, or index files are ALWAYS test-after + fast, regardless of keywords.
|
|
39
|
+
- Default to the CHEAPEST tier that will succeed.
|
|
40
|
+
- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
|
|
45
41
|
- A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
|
|
46
42
|
|
|
47
43
|
Respond with ONLY this JSON (no markdown, no explanation):
|
|
48
|
-
{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","
|
|
44
|
+
{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
|
|
49
45
|
}
|
|
50
46
|
|
|
51
47
|
/**
|
|
@@ -77,18 +73,13 @@ ${storyBlocks}
|
|
|
77
73
|
- balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
|
|
78
74
|
- powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
|
|
79
75
|
|
|
80
|
-
## Available Test Strategies
|
|
81
|
-
- test-after: Write implementation first, add tests after. For straightforward work.
|
|
82
|
-
- three-session-tdd: Separate test-writer → implementer → verifier sessions. For complex/critical work where test design matters.
|
|
83
|
-
|
|
84
76
|
## Rules
|
|
85
|
-
- Default to the CHEAPEST
|
|
86
|
-
-
|
|
87
|
-
- Simple barrel exports, re-exports, or index files are ALWAYS test-after + fast, regardless of keywords.
|
|
77
|
+
- Default to the CHEAPEST tier that will succeed.
|
|
78
|
+
- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
|
|
88
79
|
- A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
|
|
89
80
|
|
|
90
81
|
Respond with ONLY a JSON array (no markdown, no explanation):
|
|
91
|
-
[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","
|
|
82
|
+
[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
|
|
92
83
|
}
|
|
93
84
|
|
|
94
85
|
/**
|
|
@@ -99,33 +90,43 @@ Respond with ONLY a JSON array (no markdown, no explanation):
|
|
|
99
90
|
* @returns Validated routing decision
|
|
100
91
|
* @throws Error if validation fails
|
|
101
92
|
*/
|
|
102
|
-
export function validateRoutingDecision(
|
|
103
|
-
|
|
104
|
-
|
|
93
|
+
export function validateRoutingDecision(
|
|
94
|
+
parsed: Record<string, unknown>,
|
|
95
|
+
config: NaxConfig,
|
|
96
|
+
story?: UserStory,
|
|
97
|
+
): RoutingDecision {
|
|
98
|
+
// Validate required fields (testStrategy no longer required from LLM — derived via BUG-045)
|
|
99
|
+
if (!parsed.complexity || !parsed.modelTier || !parsed.reasoning) {
|
|
105
100
|
throw new Error(`Missing required fields in LLM response: ${JSON.stringify(parsed)}`);
|
|
106
101
|
}
|
|
107
102
|
|
|
108
103
|
// Validate field values
|
|
109
104
|
const validComplexities: Complexity[] = ["simple", "medium", "complex", "expert"];
|
|
110
|
-
const validTestStrategies: TestStrategy[] = ["test-after", "three-session-tdd"];
|
|
111
105
|
|
|
112
106
|
if (!validComplexities.includes(parsed.complexity as Complexity)) {
|
|
113
107
|
throw new Error(`Invalid complexity: ${parsed.complexity}`);
|
|
114
108
|
}
|
|
115
109
|
|
|
116
|
-
if (!validTestStrategies.includes(parsed.testStrategy as TestStrategy)) {
|
|
117
|
-
throw new Error(`Invalid testStrategy: ${parsed.testStrategy}`);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
110
|
// Validate modelTier exists in config
|
|
121
111
|
if (!config.models[parsed.modelTier as string]) {
|
|
122
112
|
throw new Error(`Invalid modelTier: ${parsed.modelTier} (not in config.models)`);
|
|
123
113
|
}
|
|
124
114
|
|
|
115
|
+
// BUG-045: Derive testStrategy from determineTestStrategy() — single source of truth.
|
|
116
|
+
// LLM decides complexity; testStrategy is a policy decision, not a judgment call.
|
|
117
|
+
const tddStrategy: TddStrategy = config.tdd?.strategy ?? "auto";
|
|
118
|
+
const testStrategy = determineTestStrategy(
|
|
119
|
+
parsed.complexity as Complexity,
|
|
120
|
+
story?.title ?? "",
|
|
121
|
+
story?.description ?? "",
|
|
122
|
+
story?.tags ?? [],
|
|
123
|
+
tddStrategy,
|
|
124
|
+
);
|
|
125
|
+
|
|
125
126
|
return {
|
|
126
127
|
complexity: parsed.complexity as Complexity,
|
|
127
128
|
modelTier: parsed.modelTier as ModelTier,
|
|
128
|
-
testStrategy
|
|
129
|
+
testStrategy,
|
|
129
130
|
reasoning: parsed.reasoning as string,
|
|
130
131
|
};
|
|
131
132
|
}
|
|
@@ -155,7 +156,7 @@ export function stripCodeFences(text: string): string {
|
|
|
155
156
|
export function parseRoutingResponse(output: string, story: UserStory, config: NaxConfig): RoutingDecision {
|
|
156
157
|
const jsonText = stripCodeFences(output);
|
|
157
158
|
const parsed = JSON.parse(jsonText);
|
|
158
|
-
return validateRoutingDecision(parsed, config);
|
|
159
|
+
return validateRoutingDecision(parsed, config, story);
|
|
159
160
|
}
|
|
160
161
|
|
|
161
162
|
/**
|
|
@@ -201,7 +202,7 @@ export function parseBatchResponse(
|
|
|
201
202
|
}
|
|
202
203
|
|
|
203
204
|
// Validate entry directly (no re-serialization needed)
|
|
204
|
-
const decision = validateRoutingDecision(entry, config);
|
|
205
|
+
const decision = validateRoutingDecision(entry, config, story);
|
|
205
206
|
decisions.set(entry.id, decision);
|
|
206
207
|
}
|
|
207
208
|
|
package/src/utils/git.ts
CHANGED
|
@@ -105,3 +105,24 @@ export async function hasCommitsForStory(workdir: string, storyId: string, maxCo
|
|
|
105
105
|
return false;
|
|
106
106
|
}
|
|
107
107
|
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Detect if git operation output contains merge conflict markers.
|
|
111
|
+
*
|
|
112
|
+
* Git outputs "CONFLICT" in uppercase for merge/rebase conflicts.
|
|
113
|
+
* Also checks lowercase "conflict" for edge cases.
|
|
114
|
+
*
|
|
115
|
+
* @param output - Combined stdout/stderr output from a git operation
|
|
116
|
+
* @returns true if output contains CONFLICT markers
|
|
117
|
+
*
|
|
118
|
+
* @example
|
|
119
|
+
* ```typescript
|
|
120
|
+
* const hasConflict = detectMergeConflict(agentOutput);
|
|
121
|
+
* if (hasConflict) {
|
|
122
|
+
* // fire merge-conflict trigger
|
|
123
|
+
* }
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
export function detectMergeConflict(output: string): boolean {
|
|
127
|
+
return output.includes("CONFLICT") || output.includes("conflict");
|
|
128
|
+
}
|
|
@@ -318,7 +318,7 @@ describe("Plugin router fallback to built-in strategy", () => {
|
|
|
318
318
|
// Keyword strategy decision (not from plugin)
|
|
319
319
|
expect(decision.complexity).toBe("simple");
|
|
320
320
|
expect(decision.modelTier).toBe("fast");
|
|
321
|
-
expect(decision.testStrategy).toBe("
|
|
321
|
+
expect(decision.testStrategy).toBe("test-after");
|
|
322
322
|
});
|
|
323
323
|
|
|
324
324
|
test("keyword strategy handles complex story when plugins return null", async () => {
|