@nathapp/nax 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Metrics Aggregator — RRP-002: complexityAccuracy uses initialComplexity
3
+ *
4
+ * AC-6: calculateAggregateMetrics complexityAccuracy compares
5
+ * initialComplexity (predicted) vs finalTier (actual), not
6
+ * complexity (which may reflect post-escalation state).
7
+ */
8
+
9
+ import { describe, expect, test } from "bun:test";
10
+ import { calculateAggregateMetrics } from "../../../src/metrics/aggregator";
11
+ import type { RunMetrics, StoryMetrics } from "../../../src/metrics/types";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Helpers
15
+ // ---------------------------------------------------------------------------
16
+
17
+ function makeStoryMetrics(overrides: Partial<StoryMetrics> & { storyId: string }): StoryMetrics {
18
+ return {
19
+ storyId: overrides.storyId,
20
+ complexity: "medium",
21
+ modelTier: "balanced",
22
+ modelUsed: "claude-sonnet-4-5",
23
+ attempts: 1,
24
+ finalTier: "balanced",
25
+ success: true,
26
+ cost: 0.01,
27
+ durationMs: 5000,
28
+ firstPassSuccess: true,
29
+ startedAt: "2026-01-01T00:00:00Z",
30
+ completedAt: "2026-01-01T00:00:05Z",
31
+ ...overrides,
32
+ };
33
+ }
34
+
35
+ function makeRun(stories: StoryMetrics[]): RunMetrics {
36
+ return {
37
+ runId: "run-001",
38
+ feature: "test-feature",
39
+ startedAt: "2026-01-01T00:00:00Z",
40
+ completedAt: "2026-01-01T00:01:00Z",
41
+ totalCost: stories.reduce((sum, s) => sum + s.cost, 0),
42
+ totalStories: stories.length,
43
+ storiesCompleted: stories.filter((s) => s.success).length,
44
+ storiesFailed: stories.filter((s) => !s.success).length,
45
+ totalDurationMs: 60000,
46
+ stories,
47
+ };
48
+ }
49
+
50
+ // ---------------------------------------------------------------------------
51
+ // AC-6: complexityAccuracy uses initialComplexity as predicted complexity
52
+ // ---------------------------------------------------------------------------
53
+
54
+ describe("calculateAggregateMetrics - complexityAccuracy uses initialComplexity", () => {
55
+ test("complexityAccuracy keyed by initialComplexity when present", () => {
56
+ // Story originally predicted as 'simple' but escalated (finalTier = 'powerful')
57
+ const story = makeStoryMetrics({
58
+ storyId: "US-001",
59
+ complexity: "medium", // post-escalation complexity
60
+ initialComplexity: "simple", // original prediction
61
+ modelTier: "fast",
62
+ finalTier: "powerful",
63
+ attempts: 2,
64
+ firstPassSuccess: false,
65
+ });
66
+
67
+ const runs = [makeRun([story])];
68
+ const aggregate = calculateAggregateMetrics(runs);
69
+
70
+ // complexityAccuracy should be keyed by initialComplexity ("simple"), not complexity ("medium")
71
+ expect(aggregate.complexityAccuracy["simple"]).toBeDefined();
72
+ expect(aggregate.complexityAccuracy["medium"]).toBeUndefined();
73
+ });
74
+
75
+ test("mismatch detected when initialComplexity tier != finalTier", () => {
76
+ const escalatedStory = makeStoryMetrics({
77
+ storyId: "US-001",
78
+ complexity: "medium",
79
+ initialComplexity: "simple",
80
+ modelTier: "fast",
81
+ finalTier: "powerful",
82
+ attempts: 2,
83
+ firstPassSuccess: false,
84
+ });
85
+
86
+ const runs = [makeRun([escalatedStory])];
87
+ const aggregate = calculateAggregateMetrics(runs);
88
+
89
+ // simple -> powerful: mismatch expected
90
+ expect(aggregate.complexityAccuracy["simple"].mismatchRate).toBeGreaterThan(0);
91
+ });
92
+
93
+ test("no mismatch when initialComplexity tier matches finalTier", () => {
94
+ const successStory = makeStoryMetrics({
95
+ storyId: "US-001",
96
+ complexity: "medium",
97
+ initialComplexity: "medium",
98
+ modelTier: "balanced",
99
+ finalTier: "balanced",
100
+ attempts: 1,
101
+ firstPassSuccess: true,
102
+ });
103
+
104
+ const runs = [makeRun([successStory])];
105
+ const aggregate = calculateAggregateMetrics(runs);
106
+
107
+ expect(aggregate.complexityAccuracy["medium"].mismatchRate).toBe(0);
108
+ });
109
+
110
+ test("falls back to complexity when initialComplexity is absent (backward compat)", () => {
111
+ // Legacy story metrics without initialComplexity
112
+ const legacyStory = makeStoryMetrics({
113
+ storyId: "US-001",
114
+ complexity: "complex",
115
+ // no initialComplexity
116
+ modelTier: "powerful",
117
+ finalTier: "powerful",
118
+ });
119
+
120
+ const runs = [makeRun([legacyStory])];
121
+ const aggregate = calculateAggregateMetrics(runs);
122
+
123
+ // Falls back to complexity as key
124
+ expect(aggregate.complexityAccuracy["complex"]).toBeDefined();
125
+ });
126
+
127
+ test("mixes initialComplexity-keyed and legacy entries correctly", () => {
128
+ const modernStory = makeStoryMetrics({
129
+ storyId: "US-001",
130
+ complexity: "medium",
131
+ initialComplexity: "simple",
132
+ modelTier: "balanced",
133
+ finalTier: "balanced",
134
+ });
135
+ const legacyStory = makeStoryMetrics({
136
+ storyId: "US-002",
137
+ complexity: "complex",
138
+ // no initialComplexity
139
+ modelTier: "powerful",
140
+ finalTier: "powerful",
141
+ });
142
+
143
+ const runs = [makeRun([modernStory, legacyStory])];
144
+ const aggregate = calculateAggregateMetrics(runs);
145
+
146
+ expect(aggregate.complexityAccuracy["simple"]).toBeDefined(); // from initialComplexity
147
+ expect(aggregate.complexityAccuracy["complex"]).toBeDefined(); // from complexity fallback
148
+ expect(aggregate.complexityAccuracy["medium"]).toBeUndefined(); // NOT used (initialComplexity takes over)
149
+ });
150
+
151
+ test("complexityAccuracy.predicted count matches number of stories with that initialComplexity", () => {
152
+ const stories = [
153
+ makeStoryMetrics({ storyId: "US-001", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
154
+ makeStoryMetrics({ storyId: "US-002", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
155
+ makeStoryMetrics({ storyId: "US-003", complexity: "complex", initialComplexity: "complex", finalTier: "powerful" }),
156
+ ];
157
+
158
+ const runs = [makeRun(stories)];
159
+ const aggregate = calculateAggregateMetrics(runs);
160
+
161
+ expect(aggregate.complexityAccuracy["simple"].predicted).toBe(2);
162
+ expect(aggregate.complexityAccuracy["complex"].predicted).toBe(1);
163
+ });
164
+ });
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Metrics Tracker — RRP-002: initialComplexity in StoryMetrics
3
+ *
4
+ * AC-4: StoryMetrics gains initialComplexity?: string field
5
+ * AC-5: collectStoryMetrics() reads story.routing.initialComplexity,
6
+ * falls back to routing.complexity for backward compat
7
+ */
8
+
9
+ import { describe, expect, test } from "bun:test";
10
+ import { DEFAULT_CONFIG } from "../../../src/config/defaults";
11
+ import type { NaxConfig } from "../../../src/config";
12
+ import type { PipelineContext } from "../../../src/pipeline/types";
13
+ import type { PRD, UserStory } from "../../../src/prd";
14
+ import type { StoryRouting } from "../../../src/prd/types";
15
+ import { collectStoryMetrics } from "../../../src/metrics/tracker";
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Helpers
19
+ // ---------------------------------------------------------------------------
20
+
21
+ function makeStory(overrides?: Partial<UserStory>): UserStory {
22
+ return {
23
+ id: "US-001",
24
+ title: "Test Story",
25
+ description: "Test description",
26
+ acceptanceCriteria: [],
27
+ tags: [],
28
+ dependencies: [],
29
+ status: "passed",
30
+ passes: true,
31
+ escalations: [],
32
+ attempts: 1,
33
+ ...overrides,
34
+ };
35
+ }
36
+
37
+ function makePRD(story: UserStory): PRD {
38
+ return {
39
+ project: "test-project",
40
+ feature: "test-feature",
41
+ branchName: "feat/test",
42
+ createdAt: new Date().toISOString(),
43
+ updatedAt: new Date().toISOString(),
44
+ userStories: [story],
45
+ };
46
+ }
47
+
48
+ function makeConfig(): NaxConfig {
49
+ return { ...DEFAULT_CONFIG };
50
+ }
51
+
52
+ function makeCtx(story: UserStory, routingOverrides?: Partial<PipelineContext["routing"]>): PipelineContext {
53
+ return {
54
+ config: makeConfig(),
55
+ prd: makePRD(story),
56
+ story,
57
+ stories: [story],
58
+ routing: {
59
+ complexity: "medium",
60
+ modelTier: "balanced",
61
+ testStrategy: "test-after",
62
+ reasoning: "test",
63
+ ...routingOverrides,
64
+ },
65
+ workdir: "/tmp/nax-tracker-test",
66
+ hooks: { hooks: {} },
67
+ agentResult: {
68
+ success: true,
69
+ output: "",
70
+ estimatedCost: 0.01,
71
+ durationMs: 5000,
72
+ },
73
+ } as unknown as PipelineContext;
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // AC-5: collectStoryMetrics reads initialComplexity from story.routing
78
+ // ---------------------------------------------------------------------------
79
+
80
+ describe("collectStoryMetrics - initialComplexity field", () => {
81
+ test("includes initialComplexity from story.routing.initialComplexity", () => {
82
+ const routing: StoryRouting = {
83
+ complexity: "medium",
84
+ initialComplexity: "simple", // original prediction before potential escalation
85
+ testStrategy: "test-after",
86
+ reasoning: "test",
87
+ };
88
+ const story = makeStory({ routing });
89
+ const ctx = makeCtx(story, { complexity: "medium" });
90
+
91
+ const metrics = collectStoryMetrics(ctx, new Date().toISOString());
92
+
93
+ expect(metrics.initialComplexity).toBe("simple");
94
+ });
95
+
96
+ test("initialComplexity differs from complexity when story was escalated", () => {
97
+ const routing: StoryRouting = {
98
+ complexity: "medium", // complexity as classified
99
+ initialComplexity: "simple", // original first-classify prediction
100
+ modelTier: "powerful", // escalated tier
101
+ testStrategy: "three-session-tdd",
102
+ reasoning: "escalated",
103
+ };
104
+ const story = makeStory({
105
+ routing,
106
+ escalations: [
107
+ {
108
+ fromTier: "balanced",
109
+ toTier: "powerful",
110
+ reason: "test failure",
111
+ timestamp: new Date().toISOString(),
112
+ },
113
+ ],
114
+ attempts: 2,
115
+ });
116
+ const ctx = makeCtx(story, { complexity: "medium", modelTier: "balanced" });
117
+
118
+ const metrics = collectStoryMetrics(ctx, new Date().toISOString());
119
+
120
+ expect(metrics.initialComplexity).toBe("simple");
121
+ // complexity field unchanged (backward compat)
122
+ expect(metrics.complexity).toBe("medium");
123
+ });
124
+
125
+ test("falls back to routing.complexity when story.routing.initialComplexity is absent", () => {
126
+ // Backward compat: story.routing exists but has no initialComplexity
127
+ const routing: StoryRouting = {
128
+ complexity: "complex",
129
+ testStrategy: "three-session-tdd",
130
+ reasoning: "legacy routing",
131
+ // no initialComplexity
132
+ };
133
+ const story = makeStory({ routing });
134
+ const ctx = makeCtx(story, { complexity: "complex" });
135
+
136
+ const metrics = collectStoryMetrics(ctx, new Date().toISOString());
137
+
138
+ expect(metrics.initialComplexity).toBe("complex");
139
+ });
140
+
141
+ test("falls back to routing.complexity when story.routing is undefined", () => {
142
+ const story = makeStory({ routing: undefined });
143
+ const ctx = makeCtx(story, { complexity: "simple" });
144
+
145
+ const metrics = collectStoryMetrics(ctx, new Date().toISOString());
146
+
147
+ expect(metrics.initialComplexity).toBe("simple");
148
+ });
149
+ });
150
+
151
+ // ---------------------------------------------------------------------------
152
+ // AC-4: StoryMetrics type has initialComplexity?: string
153
+ // ---------------------------------------------------------------------------
154
+
155
+ describe("StoryMetrics type - initialComplexity field", () => {
156
+ test("StoryMetrics includes initialComplexity field", () => {
157
+ const routing: StoryRouting = {
158
+ complexity: "medium",
159
+ initialComplexity: "simple",
160
+ testStrategy: "test-after",
161
+ reasoning: "test",
162
+ };
163
+ const story = makeStory({ routing });
164
+ const ctx = makeCtx(story, { complexity: "medium" });
165
+
166
+ const metrics = collectStoryMetrics(ctx, new Date().toISOString());
167
+
168
+ // TypeScript will error at compile time if initialComplexity is not on StoryMetrics
169
+ expect("initialComplexity" in metrics).toBe(true);
170
+ });
171
+
172
+ test("initialComplexity is a string when present", () => {
173
+ const routing: StoryRouting = {
174
+ complexity: "expert",
175
+ initialComplexity: "expert",
176
+ testStrategy: "three-session-tdd",
177
+ reasoning: "test",
178
+ };
179
+ const story = makeStory({ routing });
180
+ const ctx = makeCtx(story, { complexity: "expert" });
181
+
182
+ const metrics = collectStoryMetrics(ctx, new Date().toISOString());
183
+
184
+ expect(typeof metrics.initialComplexity).toBe("string");
185
+ });
186
+ });
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Routing Stage — RRP-001: Idempotence and Dependencies
3
+ *
4
+ * AC-4: Tests for idempotent persistence and dependency exposure.
5
+ */
6
+
7
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
8
+ import { DEFAULT_CONFIG } from "../../../../src/config/defaults";
9
+ import type { NaxConfig } from "../../../../src/config";
10
+ import type { PRD, UserStory } from "../../../../src/prd";
11
+ import type { PipelineContext } from "../../../../src/pipeline/types";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Helpers
15
+ // ---------------------------------------------------------------------------
16
+
17
+ function makeStory(overrides?: Partial<UserStory>): UserStory {
18
+ return {
19
+ id: "US-001",
20
+ title: "Test Story",
21
+ description: "Test description",
22
+ acceptanceCriteria: [],
23
+ tags: [],
24
+ dependencies: [],
25
+ status: "in-progress",
26
+ passes: false,
27
+ escalations: [],
28
+ attempts: 0,
29
+ ...overrides,
30
+ };
31
+ }
32
+
33
+ function makePRD(story: UserStory): PRD {
34
+ return {
35
+ project: "test-project",
36
+ feature: "test-feature",
37
+ branchName: "feat/test",
38
+ createdAt: new Date().toISOString(),
39
+ updatedAt: new Date().toISOString(),
40
+ userStories: [story],
41
+ };
42
+ }
43
+
44
+ function makeConfig(): NaxConfig {
45
+ return {
46
+ ...DEFAULT_CONFIG,
47
+ tdd: {
48
+ ...DEFAULT_CONFIG.tdd,
49
+ greenfieldDetection: false,
50
+ },
51
+ };
52
+ }
53
+
54
+ function makeCtx(story: UserStory, overrides?: Partial<PipelineContext>): PipelineContext & { prdPath: string } {
55
+ const prd = makePRD(story);
56
+ return {
57
+ config: makeConfig(),
58
+ prd,
59
+ story,
60
+ stories: [story],
61
+ routing: {
62
+ complexity: "simple",
63
+ modelTier: "fast",
64
+ testStrategy: "test-after",
65
+ reasoning: "test",
66
+ },
67
+ workdir: "/tmp/nax-routing-test",
68
+ hooks: { hooks: {} },
69
+ prdPath: "/tmp/nax-routing-test/nax/prd.json",
70
+ ...overrides,
71
+ } as PipelineContext & { prdPath: string };
72
+ }
73
+
74
+ const FRESH_ROUTING_RESULT = {
75
+ complexity: "medium" as const,
76
+ modelTier: "balanced" as const,
77
+ testStrategy: "three-session-tdd" as const,
78
+ reasoning: "classified by routeStory",
79
+ };
80
+
81
+ // ---------------------------------------------------------------------------
82
+ // AC-4: savePRD called once per story, not on every iteration
83
+ // ---------------------------------------------------------------------------
84
+
85
+ describe("routingStage - savePRD called exactly once per story (not per iteration)", () => {
86
+ let origRoutingDeps: typeof import("../../../../src/pipeline/stages/routing")["_routingDeps"];
87
+
88
+ afterEach(() => {
89
+ mock.restore();
90
+ if (origRoutingDeps) {
91
+ const { _routingDeps } = require("../../../../src/pipeline/stages/routing");
92
+ Object.assign(_routingDeps, origRoutingDeps);
93
+ }
94
+ });
95
+
96
+ test("calling routingStage twice with routing already set only triggers savePRD once (first call)", async () => {
97
+ const { routingStage, _routingDeps } = await import(
98
+ "../../../../src/pipeline/stages/routing"
99
+ );
100
+
101
+ origRoutingDeps = { ..._routingDeps };
102
+
103
+ let savePRDCallCount = 0;
104
+
105
+ _routingDeps.routeStory = mock(() =>
106
+ Promise.resolve({ ...FRESH_ROUTING_RESULT }),
107
+ );
108
+ _routingDeps.isGreenfieldStory = mock(() => Promise.resolve(false));
109
+ _routingDeps.savePRD = mock((_prd: PRD, _path: string) => {
110
+ savePRDCallCount++;
111
+ return Promise.resolve();
112
+ });
113
+
114
+ // First iteration: story.routing is undefined → should persist
115
+ const story = makeStory({ routing: undefined });
116
+ const ctx = makeCtx(story);
117
+
118
+ await routingStage.execute(ctx as Parameters<typeof routingStage.execute>[0]);
119
+
120
+ // After first execution, story.routing is populated (simulating resume after crash)
121
+ // Second iteration: story.routing is now set → should NOT persist again
122
+ await routingStage.execute(ctx as Parameters<typeof routingStage.execute>[0]);
123
+
124
+ expect(savePRDCallCount).toBe(1);
125
+ });
126
+ });
127
+
128
+ // ---------------------------------------------------------------------------
129
+ // Sanity: _routingDeps exposes savePRD (fail if not added to deps object)
130
+ // ---------------------------------------------------------------------------
131
+
132
+ describe("routingStage - _routingDeps exposes savePRD", () => {
133
+ test("_routingDeps has a savePRD function", async () => {
134
+ const { _routingDeps } = await import(
135
+ "../../../../src/pipeline/stages/routing"
136
+ );
137
+ expect(typeof _routingDeps.savePRD).toBe("function");
138
+ });
139
+ });