@nathapp/nax 0.25.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitlab-ci.yml +1 -0
- package/CLAUDE.md +70 -56
- package/docs/ROADMAP.md +38 -8
- package/nax/features/review-quality/prd.json +55 -0
- package/nax/features/routing-persistence/prd.json +104 -0
- package/nax/features/routing-persistence/progress.txt +1 -0
- package/nax/status.json +22 -13
- package/package.json +1 -1
- package/src/execution/iteration-runner.ts +1 -0
- package/src/metrics/aggregator.ts +2 -1
- package/src/metrics/tracker.ts +26 -14
- package/src/metrics/types.ts +2 -0
- package/src/optimizer/index.ts +2 -1
- package/src/pipeline/stages/routing.ts +42 -8
- package/src/pipeline/types.ts +2 -0
- package/src/prd/types.ts +4 -0
- package/src/review/runner.ts +50 -1
- package/src/routing/content-hash.ts +25 -0
- package/src/routing/index.ts +3 -0
- package/src/version.ts +20 -4
- package/test/integration/review/review-plugin-integration.test.ts +12 -7
- package/test/unit/metrics/aggregator.test.ts +164 -0
- package/test/unit/metrics/tracker.test.ts +186 -0
- package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
- package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
- package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
- package/test/unit/review/runner.test.ts +117 -0
- package/test/unit/routing/content-hash.test.ts +99 -0
|
@@ -2,15 +2,18 @@
|
|
|
2
2
|
* Routing Stage
|
|
3
3
|
*
|
|
4
4
|
* Classifies story complexity and determines model tier + test strategy.
|
|
5
|
-
* Uses cached complexity/testStrategy/modelTier from story if
|
|
5
|
+
* Uses cached complexity/testStrategy/modelTier from story if contentHash matches.
|
|
6
6
|
* modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
|
|
7
7
|
*
|
|
8
|
+
* RRP-003: contentHash staleness detection — if story.routing.contentHash is missing or
|
|
9
|
+
* does not match the current story content, treats cached routing as a miss and re-classifies.
|
|
10
|
+
*
|
|
8
11
|
* @returns
|
|
9
12
|
* - `continue`: Routing determined, proceed to next stage
|
|
10
13
|
*
|
|
11
14
|
* @example
|
|
12
15
|
* ```ts
|
|
13
|
-
* // Story has cached routing with
|
|
16
|
+
* // Story has cached routing with matching contentHash
|
|
14
17
|
* await routingStage.execute(ctx);
|
|
15
18
|
* // ctx.routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "..." }
|
|
16
19
|
* // modelTier is derived from current config.autoMode.complexityRouting
|
|
@@ -19,7 +22,8 @@
|
|
|
19
22
|
|
|
20
23
|
import { isGreenfieldStory } from "../../context/greenfield";
|
|
21
24
|
import { getLogger } from "../../logger";
|
|
22
|
-
import {
|
|
25
|
+
import { savePRD } from "../../prd";
|
|
26
|
+
import { complexityToModelTier, computeStoryContentHash, routeStory } from "../../routing";
|
|
23
27
|
import { clearCache, routeBatch } from "../../routing/strategies/llm";
|
|
24
28
|
import type { PipelineContext, PipelineStage, RoutingResult, StageResult } from "../types";
|
|
25
29
|
|
|
@@ -30,11 +34,25 @@ export const routingStage: PipelineStage = {
|
|
|
30
34
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
31
35
|
const logger = getLogger();
|
|
32
36
|
|
|
33
|
-
//
|
|
34
|
-
//
|
|
37
|
+
// Staleness detection (RRP-003):
|
|
38
|
+
// - story.routing absent → cache miss (no prior routing)
|
|
39
|
+
// - story.routing + no contentHash → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
|
|
40
|
+
// - story.routing + contentHash matches → cache hit
|
|
41
|
+
// - story.routing + contentHash mismatches → cache miss (stale, re-classify)
|
|
42
|
+
const hasExistingRouting = ctx.story.routing !== undefined;
|
|
43
|
+
const hasContentHash = ctx.story.routing?.contentHash !== undefined;
|
|
44
|
+
let currentHash: string | undefined;
|
|
45
|
+
let hashMatch = false;
|
|
46
|
+
if (hasContentHash) {
|
|
47
|
+
currentHash = _routingDeps.computeStoryContentHash(ctx.story);
|
|
48
|
+
hashMatch = ctx.story.routing?.contentHash === currentHash;
|
|
49
|
+
}
|
|
50
|
+
const isCacheHit = hasExistingRouting && (!hasContentHash || hashMatch);
|
|
51
|
+
|
|
35
52
|
let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
|
|
36
|
-
|
|
37
|
-
|
|
53
|
+
|
|
54
|
+
if (isCacheHit) {
|
|
55
|
+
// Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
|
|
38
56
|
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
39
57
|
// Override with cached values only when they are actually set
|
|
40
58
|
if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
|
|
@@ -50,8 +68,22 @@ export const routingStage: PipelineStage = {
|
|
|
50
68
|
);
|
|
51
69
|
}
|
|
52
70
|
} else {
|
|
53
|
-
//
|
|
71
|
+
// Cache miss: no routing, or contentHash present but mismatched — fresh classification
|
|
54
72
|
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
73
|
+
// currentHash already computed if a mismatch was detected; compute now if starting fresh
|
|
74
|
+
currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
|
|
75
|
+
ctx.story.routing = {
|
|
76
|
+
...(ctx.story.routing ?? {}),
|
|
77
|
+
complexity: routing.complexity as import("../../config").Complexity,
|
|
78
|
+
initialComplexity:
|
|
79
|
+
ctx.story.routing?.initialComplexity ?? (routing.complexity as import("../../config").Complexity),
|
|
80
|
+
testStrategy: routing.testStrategy as import("../../config").TestStrategy,
|
|
81
|
+
reasoning: routing.reasoning ?? "",
|
|
82
|
+
contentHash: currentHash,
|
|
83
|
+
};
|
|
84
|
+
if (ctx.prdPath) {
|
|
85
|
+
await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
|
|
86
|
+
}
|
|
55
87
|
}
|
|
56
88
|
|
|
57
89
|
// BUG-010: Greenfield detection — force test-after if no test files exist
|
|
@@ -97,4 +129,6 @@ export const _routingDeps = {
|
|
|
97
129
|
complexityToModelTier,
|
|
98
130
|
isGreenfieldStory,
|
|
99
131
|
clearCache,
|
|
132
|
+
savePRD,
|
|
133
|
+
computeStoryContentHash,
|
|
100
134
|
};
|
package/src/pipeline/types.ts
CHANGED
|
@@ -65,6 +65,8 @@ export interface PipelineContext {
|
|
|
65
65
|
routing: RoutingResult;
|
|
66
66
|
/** Working directory (project root) */
|
|
67
67
|
workdir: string;
|
|
68
|
+
/** Absolute path to the prd.json file (used by routing stage to persist initial classification) */
|
|
69
|
+
prdPath?: string;
|
|
68
70
|
/** Feature directory (optional, e.g., nax/features/my-feature/) */
|
|
69
71
|
featureDir?: string;
|
|
70
72
|
/** Hooks configuration */
|
package/src/prd/types.ts
CHANGED
|
@@ -45,6 +45,10 @@ export interface StructuredFailure {
|
|
|
45
45
|
/** Routing metadata per story */
|
|
46
46
|
export interface StoryRouting {
|
|
47
47
|
complexity: Complexity;
|
|
48
|
+
/** Initial complexity from first classification — written once, never overwritten by escalation */
|
|
49
|
+
initialComplexity?: Complexity;
|
|
50
|
+
/** Content hash of story fields at time of routing — used to detect stale cached routing (RRP-003) */
|
|
51
|
+
contentHash?: string;
|
|
48
52
|
/** Model tier (derived at runtime from config, not persisted) */
|
|
49
53
|
modelTier?: ModelTier;
|
|
50
54
|
testStrategy: TestStrategy;
|
package/src/review/runner.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
import { spawn } from "bun";
|
|
8
8
|
import type { ExecutionConfig } from "../config/schema";
|
|
9
|
+
import { getSafeLogger } from "../logger";
|
|
9
10
|
import type { ReviewCheckName, ReviewCheckResult, ReviewConfig, ReviewResult } from "./types";
|
|
10
11
|
|
|
11
12
|
/** Default commands for each check type */
|
|
@@ -159,6 +160,40 @@ async function runCheck(check: ReviewCheckName, command: string, workdir: string
|
|
|
159
160
|
}
|
|
160
161
|
}
|
|
161
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Get uncommitted tracked files via git diff --name-only HEAD.
|
|
165
|
+
* Returns empty array if git command fails or working tree is clean.
|
|
166
|
+
*/
|
|
167
|
+
async function getUncommittedFilesImpl(workdir: string): Promise<string[]> {
|
|
168
|
+
try {
|
|
169
|
+
const proc = spawn({
|
|
170
|
+
cmd: ["git", "diff", "--name-only", "HEAD"],
|
|
171
|
+
cwd: workdir,
|
|
172
|
+
stdout: "pipe",
|
|
173
|
+
stderr: "pipe",
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const exitCode = await proc.exited;
|
|
177
|
+
if (exitCode !== 0) {
|
|
178
|
+
return [];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const output = await new Response(proc.stdout).text();
|
|
182
|
+
return output.trim().split("\n").filter(Boolean);
|
|
183
|
+
} catch {
|
|
184
|
+
return [];
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
|
|
190
|
+
* RQ-001: getUncommittedFiles enables mocking of the git dirty-tree check.
|
|
191
|
+
*/
|
|
192
|
+
export const _deps = {
|
|
193
|
+
/** Returns tracked files with uncommitted changes (git diff --name-only HEAD). */
|
|
194
|
+
getUncommittedFiles: getUncommittedFilesImpl,
|
|
195
|
+
};
|
|
196
|
+
|
|
162
197
|
/**
|
|
163
198
|
* Run all configured review checks
|
|
164
199
|
*/
|
|
@@ -168,16 +203,30 @@ export async function runReview(
|
|
|
168
203
|
executionConfig?: ExecutionConfig,
|
|
169
204
|
): Promise<ReviewResult> {
|
|
170
205
|
const startTime = Date.now();
|
|
206
|
+
const logger = getSafeLogger();
|
|
171
207
|
const checks: ReviewCheckResult[] = [];
|
|
172
208
|
let firstFailure: string | undefined;
|
|
173
209
|
|
|
210
|
+
// RQ-001: Check for uncommitted tracked files before running checks
|
|
211
|
+
const uncommittedFiles = await _deps.getUncommittedFiles(workdir);
|
|
212
|
+
if (uncommittedFiles.length > 0) {
|
|
213
|
+
const fileList = uncommittedFiles.join(", ");
|
|
214
|
+
logger?.warn("review", `Uncommitted changes detected before review: ${fileList}`);
|
|
215
|
+
return {
|
|
216
|
+
success: false,
|
|
217
|
+
checks: [],
|
|
218
|
+
totalDurationMs: Date.now() - startTime,
|
|
219
|
+
failureReason: `Working tree has uncommitted changes:\n${uncommittedFiles.map((f) => ` - ${f}`).join("\n")}\n\nStage and commit these files before running review.`,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
174
223
|
for (const checkName of config.checks) {
|
|
175
224
|
// Resolve command using resolution strategy
|
|
176
225
|
const command = await resolveCommand(checkName, config, executionConfig, workdir);
|
|
177
226
|
|
|
178
227
|
// Skip if explicitly disabled or not found
|
|
179
228
|
if (command === null) {
|
|
180
|
-
|
|
229
|
+
getSafeLogger()?.warn("review", `Skipping ${checkName} check (command not configured or disabled)`);
|
|
181
230
|
continue;
|
|
182
231
|
}
|
|
183
232
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Story Content Hash
|
|
3
|
+
*
|
|
4
|
+
* Computes a deterministic hash of the story content fields used for routing.
|
|
5
|
+
* Used by the routing stage (RRP-003) to detect stale cached routing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { UserStory } from "../prd/types";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Compute a deterministic hash of the story content fields used for routing.
|
|
12
|
+
* Hash input: title + "\0" + description + "\0" + acceptanceCriteria.join("") + "\0" + tags.join("")
|
|
13
|
+
*
|
|
14
|
+
* Null-byte separators between fields prevent cross-field collisions.
|
|
15
|
+
*
|
|
16
|
+
* @param story - The user story to hash
|
|
17
|
+
* @returns A hex string content hash
|
|
18
|
+
*/
|
|
19
|
+
export function computeStoryContentHash(story: UserStory): string {
|
|
20
|
+
const input = `${story.title}\0${story.description}\0${story.acceptanceCriteria.join("")}\0${story.tags.join("")}`;
|
|
21
|
+
|
|
22
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
23
|
+
hasher.update(input);
|
|
24
|
+
return hasher.digest("hex");
|
|
25
|
+
}
|
package/src/routing/index.ts
CHANGED
|
@@ -15,3 +15,6 @@ export { keywordStrategy, llmStrategy, manualStrategy } from "./strategies";
|
|
|
15
15
|
// Custom strategy loader
|
|
16
16
|
export { loadCustomStrategy } from "./loader";
|
|
17
17
|
export { tryLlmBatchRoute } from "./batch-route";
|
|
18
|
+
|
|
19
|
+
// Content hash for staleness detection (RRP-003)
|
|
20
|
+
export { computeStoryContentHash } from "./content-hash";
|
package/src/version.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Version and build info for nax.
|
|
3
3
|
*
|
|
4
4
|
* GIT_COMMIT is injected at build time via --define in the bun build script.
|
|
5
|
-
* When running from source (
|
|
5
|
+
* When running from source (bin/nax.ts), falls back to runtime git rev-parse.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import pkg from "../package.json";
|
|
@@ -11,13 +11,29 @@ declare const GIT_COMMIT: string;
|
|
|
11
11
|
|
|
12
12
|
export const NAX_VERSION: string = pkg.version;
|
|
13
13
|
|
|
14
|
-
/** Short git commit hash
|
|
14
|
+
/** Short git commit hash — injected at build time, or resolved at runtime from git. */
|
|
15
15
|
export const NAX_COMMIT: string = (() => {
|
|
16
|
+
// Build-time injection (bun build --define GIT_COMMIT=...)
|
|
17
|
+
// Guard: must be a non-empty string that looks like a real commit hash
|
|
16
18
|
try {
|
|
17
|
-
|
|
19
|
+
if (typeof GIT_COMMIT === "string" && /^[0-9a-f]{6,10}$/.test(GIT_COMMIT)) return GIT_COMMIT;
|
|
18
20
|
} catch {
|
|
19
|
-
|
|
21
|
+
// not injected — fall through to runtime resolution
|
|
20
22
|
}
|
|
23
|
+
// Runtime fallback: resolve from the source file's git repo (Bun-native)
|
|
24
|
+
try {
|
|
25
|
+
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
26
|
+
cwd: import.meta.dir,
|
|
27
|
+
stderr: "ignore",
|
|
28
|
+
});
|
|
29
|
+
if (result.exitCode === 0) {
|
|
30
|
+
const hash = result.stdout.toString().trim();
|
|
31
|
+
if (/^[0-9a-f]{6,10}$/.test(hash)) return hash;
|
|
32
|
+
}
|
|
33
|
+
} catch {
|
|
34
|
+
// git not available
|
|
35
|
+
}
|
|
36
|
+
return "dev";
|
|
21
37
|
})();
|
|
22
38
|
|
|
23
39
|
export const NAX_BUILD_INFO = `v${NAX_VERSION} (${NAX_COMMIT})`;
|
|
@@ -173,7 +173,7 @@ describe("Review Stage - Plugin Integration", () => {
|
|
|
173
173
|
expect(receivedWorkdir).toBe(tempDir);
|
|
174
174
|
});
|
|
175
175
|
|
|
176
|
-
test("
|
|
176
|
+
test("review fails when there are uncommitted changes (RQ-001)", async () => {
|
|
177
177
|
const tempDir = mkdtempSync(join(tmpdir(), "nax-review-plugin-"));
|
|
178
178
|
|
|
179
179
|
// Create a file first
|
|
@@ -181,15 +181,16 @@ describe("Review Stage - Plugin Integration", () => {
|
|
|
181
181
|
|
|
182
182
|
await initGitRepo(tempDir);
|
|
183
183
|
|
|
184
|
-
// Now modify the file after git init
|
|
184
|
+
// Now modify the file after git init WITHOUT committing
|
|
185
|
+
// This violates RQ-001 (dirty working tree)
|
|
185
186
|
writeFileSync(join(tempDir, "test.ts"), "// modified");
|
|
186
187
|
|
|
187
|
-
let
|
|
188
|
+
let reviewerCalled = false;
|
|
188
189
|
const mockReviewer: IReviewPlugin = {
|
|
189
190
|
name: "test-reviewer",
|
|
190
191
|
description: "Test reviewer",
|
|
191
|
-
async check(_workdir
|
|
192
|
-
|
|
192
|
+
async check(_workdir) {
|
|
193
|
+
reviewerCalled = true;
|
|
193
194
|
return { passed: true, output: "OK" };
|
|
194
195
|
},
|
|
195
196
|
};
|
|
@@ -204,9 +205,13 @@ describe("Review Stage - Plugin Integration", () => {
|
|
|
204
205
|
const registry = new PluginRegistry([mockPlugin]);
|
|
205
206
|
const ctx = createMockContext(tempDir, registry);
|
|
206
207
|
|
|
207
|
-
await reviewStage.execute(ctx);
|
|
208
|
+
const result = await reviewStage.execute(ctx);
|
|
208
209
|
|
|
209
|
-
|
|
210
|
+
// RQ-001: Review should fail with dirty working tree
|
|
211
|
+
expect(result.action).toBe("escalate");
|
|
212
|
+
expect(result.reason).toContain("Working tree has uncommitted changes");
|
|
213
|
+
// Reviewer should not be called due to dirty tree check
|
|
214
|
+
expect(reviewerCalled).toBe(false);
|
|
210
215
|
});
|
|
211
216
|
|
|
212
217
|
test("reviewer receives empty array when no files changed", async () => {
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Aggregator — RRP-002: complexityAccuracy uses initialComplexity
|
|
3
|
+
*
|
|
4
|
+
* AC-6: calculateAggregateMetrics complexityAccuracy compares
|
|
5
|
+
* initialComplexity (predicted) vs finalTier (actual), not
|
|
6
|
+
* complexity (which may reflect post-escalation state).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { calculateAggregateMetrics } from "../../../src/metrics/aggregator";
|
|
11
|
+
import type { RunMetrics, StoryMetrics } from "../../../src/metrics/types";
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Helpers
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
function makeStoryMetrics(overrides: Partial<StoryMetrics> & { storyId: string }): StoryMetrics {
|
|
18
|
+
return {
|
|
19
|
+
storyId: overrides.storyId,
|
|
20
|
+
complexity: "medium",
|
|
21
|
+
modelTier: "balanced",
|
|
22
|
+
modelUsed: "claude-sonnet-4-5",
|
|
23
|
+
attempts: 1,
|
|
24
|
+
finalTier: "balanced",
|
|
25
|
+
success: true,
|
|
26
|
+
cost: 0.01,
|
|
27
|
+
durationMs: 5000,
|
|
28
|
+
firstPassSuccess: true,
|
|
29
|
+
startedAt: "2026-01-01T00:00:00Z",
|
|
30
|
+
completedAt: "2026-01-01T00:00:05Z",
|
|
31
|
+
...overrides,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function makeRun(stories: StoryMetrics[]): RunMetrics {
|
|
36
|
+
return {
|
|
37
|
+
runId: "run-001",
|
|
38
|
+
feature: "test-feature",
|
|
39
|
+
startedAt: "2026-01-01T00:00:00Z",
|
|
40
|
+
completedAt: "2026-01-01T00:01:00Z",
|
|
41
|
+
totalCost: stories.reduce((sum, s) => sum + s.cost, 0),
|
|
42
|
+
totalStories: stories.length,
|
|
43
|
+
storiesCompleted: stories.filter((s) => s.success).length,
|
|
44
|
+
storiesFailed: stories.filter((s) => !s.success).length,
|
|
45
|
+
totalDurationMs: 60000,
|
|
46
|
+
stories,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// AC-6: complexityAccuracy uses initialComplexity as predicted complexity
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
describe("calculateAggregateMetrics - complexityAccuracy uses initialComplexity", () => {
|
|
55
|
+
test("complexityAccuracy keyed by initialComplexity when present", () => {
|
|
56
|
+
// Story originally predicted as 'simple' but escalated (finalTier = 'powerful')
|
|
57
|
+
const story = makeStoryMetrics({
|
|
58
|
+
storyId: "US-001",
|
|
59
|
+
complexity: "medium", // post-escalation complexity
|
|
60
|
+
initialComplexity: "simple", // original prediction
|
|
61
|
+
modelTier: "fast",
|
|
62
|
+
finalTier: "powerful",
|
|
63
|
+
attempts: 2,
|
|
64
|
+
firstPassSuccess: false,
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const runs = [makeRun([story])];
|
|
68
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
69
|
+
|
|
70
|
+
// complexityAccuracy should be keyed by initialComplexity ("simple"), not complexity ("medium")
|
|
71
|
+
expect(aggregate.complexityAccuracy["simple"]).toBeDefined();
|
|
72
|
+
expect(aggregate.complexityAccuracy["medium"]).toBeUndefined();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test("mismatch detected when initialComplexity tier != finalTier", () => {
|
|
76
|
+
const escalatedStory = makeStoryMetrics({
|
|
77
|
+
storyId: "US-001",
|
|
78
|
+
complexity: "medium",
|
|
79
|
+
initialComplexity: "simple",
|
|
80
|
+
modelTier: "fast",
|
|
81
|
+
finalTier: "powerful",
|
|
82
|
+
attempts: 2,
|
|
83
|
+
firstPassSuccess: false,
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
const runs = [makeRun([escalatedStory])];
|
|
87
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
88
|
+
|
|
89
|
+
// simple -> powerful: mismatch expected
|
|
90
|
+
expect(aggregate.complexityAccuracy["simple"].mismatchRate).toBeGreaterThan(0);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
test("no mismatch when initialComplexity tier matches finalTier", () => {
|
|
94
|
+
const successStory = makeStoryMetrics({
|
|
95
|
+
storyId: "US-001",
|
|
96
|
+
complexity: "medium",
|
|
97
|
+
initialComplexity: "medium",
|
|
98
|
+
modelTier: "balanced",
|
|
99
|
+
finalTier: "balanced",
|
|
100
|
+
attempts: 1,
|
|
101
|
+
firstPassSuccess: true,
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
const runs = [makeRun([successStory])];
|
|
105
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
106
|
+
|
|
107
|
+
expect(aggregate.complexityAccuracy["medium"].mismatchRate).toBe(0);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("falls back to complexity when initialComplexity is absent (backward compat)", () => {
|
|
111
|
+
// Legacy story metrics without initialComplexity
|
|
112
|
+
const legacyStory = makeStoryMetrics({
|
|
113
|
+
storyId: "US-001",
|
|
114
|
+
complexity: "complex",
|
|
115
|
+
// no initialComplexity
|
|
116
|
+
modelTier: "powerful",
|
|
117
|
+
finalTier: "powerful",
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const runs = [makeRun([legacyStory])];
|
|
121
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
122
|
+
|
|
123
|
+
// Falls back to complexity as key
|
|
124
|
+
expect(aggregate.complexityAccuracy["complex"]).toBeDefined();
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("mixes initialComplexity-keyed and legacy entries correctly", () => {
|
|
128
|
+
const modernStory = makeStoryMetrics({
|
|
129
|
+
storyId: "US-001",
|
|
130
|
+
complexity: "medium",
|
|
131
|
+
initialComplexity: "simple",
|
|
132
|
+
modelTier: "balanced",
|
|
133
|
+
finalTier: "balanced",
|
|
134
|
+
});
|
|
135
|
+
const legacyStory = makeStoryMetrics({
|
|
136
|
+
storyId: "US-002",
|
|
137
|
+
complexity: "complex",
|
|
138
|
+
// no initialComplexity
|
|
139
|
+
modelTier: "powerful",
|
|
140
|
+
finalTier: "powerful",
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const runs = [makeRun([modernStory, legacyStory])];
|
|
144
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
145
|
+
|
|
146
|
+
expect(aggregate.complexityAccuracy["simple"]).toBeDefined(); // from initialComplexity
|
|
147
|
+
expect(aggregate.complexityAccuracy["complex"]).toBeDefined(); // from complexity fallback
|
|
148
|
+
expect(aggregate.complexityAccuracy["medium"]).toBeUndefined(); // NOT used (initialComplexity takes over)
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test("complexityAccuracy.predicted count matches number of stories with that initialComplexity", () => {
|
|
152
|
+
const stories = [
|
|
153
|
+
makeStoryMetrics({ storyId: "US-001", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
|
|
154
|
+
makeStoryMetrics({ storyId: "US-002", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
|
|
155
|
+
makeStoryMetrics({ storyId: "US-003", complexity: "complex", initialComplexity: "complex", finalTier: "powerful" }),
|
|
156
|
+
];
|
|
157
|
+
|
|
158
|
+
const runs = [makeRun(stories)];
|
|
159
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
160
|
+
|
|
161
|
+
expect(aggregate.complexityAccuracy["simple"].predicted).toBe(2);
|
|
162
|
+
expect(aggregate.complexityAccuracy["complex"].predicted).toBe(1);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Tracker — RRP-002: initialComplexity in StoryMetrics
|
|
3
|
+
*
|
|
4
|
+
* AC-4: StoryMetrics gains initialComplexity?: string field
|
|
5
|
+
* AC-5: collectStoryMetrics() reads story.routing.initialComplexity,
|
|
6
|
+
* falls back to routing.complexity for backward compat
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { DEFAULT_CONFIG } from "../../../src/config/defaults";
|
|
11
|
+
import type { NaxConfig } from "../../../src/config";
|
|
12
|
+
import type { PipelineContext } from "../../../src/pipeline/types";
|
|
13
|
+
import type { PRD, UserStory } from "../../../src/prd";
|
|
14
|
+
import type { StoryRouting } from "../../../src/prd/types";
|
|
15
|
+
import { collectStoryMetrics } from "../../../src/metrics/tracker";
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Helpers
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
function makeStory(overrides?: Partial<UserStory>): UserStory {
|
|
22
|
+
return {
|
|
23
|
+
id: "US-001",
|
|
24
|
+
title: "Test Story",
|
|
25
|
+
description: "Test description",
|
|
26
|
+
acceptanceCriteria: [],
|
|
27
|
+
tags: [],
|
|
28
|
+
dependencies: [],
|
|
29
|
+
status: "passed",
|
|
30
|
+
passes: true,
|
|
31
|
+
escalations: [],
|
|
32
|
+
attempts: 1,
|
|
33
|
+
...overrides,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function makePRD(story: UserStory): PRD {
|
|
38
|
+
return {
|
|
39
|
+
project: "test-project",
|
|
40
|
+
feature: "test-feature",
|
|
41
|
+
branchName: "feat/test",
|
|
42
|
+
createdAt: new Date().toISOString(),
|
|
43
|
+
updatedAt: new Date().toISOString(),
|
|
44
|
+
userStories: [story],
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function makeConfig(): NaxConfig {
|
|
49
|
+
return { ...DEFAULT_CONFIG };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function makeCtx(story: UserStory, routingOverrides?: Partial<PipelineContext["routing"]>): PipelineContext {
|
|
53
|
+
return {
|
|
54
|
+
config: makeConfig(),
|
|
55
|
+
prd: makePRD(story),
|
|
56
|
+
story,
|
|
57
|
+
stories: [story],
|
|
58
|
+
routing: {
|
|
59
|
+
complexity: "medium",
|
|
60
|
+
modelTier: "balanced",
|
|
61
|
+
testStrategy: "test-after",
|
|
62
|
+
reasoning: "test",
|
|
63
|
+
...routingOverrides,
|
|
64
|
+
},
|
|
65
|
+
workdir: "/tmp/nax-tracker-test",
|
|
66
|
+
hooks: { hooks: {} },
|
|
67
|
+
agentResult: {
|
|
68
|
+
success: true,
|
|
69
|
+
output: "",
|
|
70
|
+
estimatedCost: 0.01,
|
|
71
|
+
durationMs: 5000,
|
|
72
|
+
},
|
|
73
|
+
} as unknown as PipelineContext;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// AC-5: collectStoryMetrics reads initialComplexity from story.routing
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
describe("collectStoryMetrics - initialComplexity field", () => {
|
|
81
|
+
test("includes initialComplexity from story.routing.initialComplexity", () => {
|
|
82
|
+
const routing: StoryRouting = {
|
|
83
|
+
complexity: "medium",
|
|
84
|
+
initialComplexity: "simple", // original prediction before potential escalation
|
|
85
|
+
testStrategy: "test-after",
|
|
86
|
+
reasoning: "test",
|
|
87
|
+
};
|
|
88
|
+
const story = makeStory({ routing });
|
|
89
|
+
const ctx = makeCtx(story, { complexity: "medium" });
|
|
90
|
+
|
|
91
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
92
|
+
|
|
93
|
+
expect(metrics.initialComplexity).toBe("simple");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("initialComplexity differs from complexity when story was escalated", () => {
|
|
97
|
+
const routing: StoryRouting = {
|
|
98
|
+
complexity: "medium", // complexity as classified
|
|
99
|
+
initialComplexity: "simple", // original first-classify prediction
|
|
100
|
+
modelTier: "powerful", // escalated tier
|
|
101
|
+
testStrategy: "three-session-tdd",
|
|
102
|
+
reasoning: "escalated",
|
|
103
|
+
};
|
|
104
|
+
const story = makeStory({
|
|
105
|
+
routing,
|
|
106
|
+
escalations: [
|
|
107
|
+
{
|
|
108
|
+
fromTier: "balanced",
|
|
109
|
+
toTier: "powerful",
|
|
110
|
+
reason: "test failure",
|
|
111
|
+
timestamp: new Date().toISOString(),
|
|
112
|
+
},
|
|
113
|
+
],
|
|
114
|
+
attempts: 2,
|
|
115
|
+
});
|
|
116
|
+
const ctx = makeCtx(story, { complexity: "medium", modelTier: "balanced" });
|
|
117
|
+
|
|
118
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
119
|
+
|
|
120
|
+
expect(metrics.initialComplexity).toBe("simple");
|
|
121
|
+
// complexity field unchanged (backward compat)
|
|
122
|
+
expect(metrics.complexity).toBe("medium");
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test("falls back to routing.complexity when story.routing.initialComplexity is absent", () => {
|
|
126
|
+
// Backward compat: story.routing exists but has no initialComplexity
|
|
127
|
+
const routing: StoryRouting = {
|
|
128
|
+
complexity: "complex",
|
|
129
|
+
testStrategy: "three-session-tdd",
|
|
130
|
+
reasoning: "legacy routing",
|
|
131
|
+
// no initialComplexity
|
|
132
|
+
};
|
|
133
|
+
const story = makeStory({ routing });
|
|
134
|
+
const ctx = makeCtx(story, { complexity: "complex" });
|
|
135
|
+
|
|
136
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
137
|
+
|
|
138
|
+
expect(metrics.initialComplexity).toBe("complex");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
test("falls back to routing.complexity when story.routing is undefined", () => {
|
|
142
|
+
const story = makeStory({ routing: undefined });
|
|
143
|
+
const ctx = makeCtx(story, { complexity: "simple" });
|
|
144
|
+
|
|
145
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
146
|
+
|
|
147
|
+
expect(metrics.initialComplexity).toBe("simple");
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// ---------------------------------------------------------------------------
|
|
152
|
+
// AC-4: StoryMetrics type has initialComplexity?: string
|
|
153
|
+
// ---------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
describe("StoryMetrics type - initialComplexity field", () => {
|
|
156
|
+
test("StoryMetrics includes initialComplexity field", () => {
|
|
157
|
+
const routing: StoryRouting = {
|
|
158
|
+
complexity: "medium",
|
|
159
|
+
initialComplexity: "simple",
|
|
160
|
+
testStrategy: "test-after",
|
|
161
|
+
reasoning: "test",
|
|
162
|
+
};
|
|
163
|
+
const story = makeStory({ routing });
|
|
164
|
+
const ctx = makeCtx(story, { complexity: "medium" });
|
|
165
|
+
|
|
166
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
167
|
+
|
|
168
|
+
// TypeScript will error at compile time if initialComplexity is not on StoryMetrics
|
|
169
|
+
expect("initialComplexity" in metrics).toBe(true);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("initialComplexity is a string when present", () => {
|
|
173
|
+
const routing: StoryRouting = {
|
|
174
|
+
complexity: "expert",
|
|
175
|
+
initialComplexity: "expert",
|
|
176
|
+
testStrategy: "three-session-tdd",
|
|
177
|
+
reasoning: "test",
|
|
178
|
+
};
|
|
179
|
+
const story = makeStory({ routing });
|
|
180
|
+
const ctx = makeCtx(story, { complexity: "expert" });
|
|
181
|
+
|
|
182
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
183
|
+
|
|
184
|
+
expect(typeof metrics.initialComplexity).toBe("string");
|
|
185
|
+
});
|
|
186
|
+
});
|