@nathapp/nax 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.gitlab-ci.yml CHANGED
@@ -60,6 +60,7 @@ release:
60
60
  NPM_RELEASE_TOKEN: $NPM_TOKEN
61
61
  before_script:
62
62
  - apk add --no-cache git
63
+ - git config --global safe.directory '*'
63
64
  - git config --global user.name "$GITLAB_USER_NAME"
64
65
  - git config --global user.email "$GITLAB_USER_EMAIL"
65
66
  script:
package/CLAUDE.md CHANGED
@@ -1,6 +1,16 @@
1
1
  # nax — AI Coding Agent Orchestrator
2
2
 
3
- Bun + TypeScript CLI that orchestrates AI coding agents with model routing, TDD strategies, and lifecycle hooks.
3
+ Bun + TypeScript CLI that orchestrates AI coding agents (Claude Code) with model-tier routing, TDD strategies, plugin hooks, and a Central Run Registry.
4
+
5
+ ## Tech Stack
6
+
7
+ | Layer | Choice |
8
+ |:------|:-------|
9
+ | Runtime | **Bun 1.3.7+** — Bun-native APIs only, no Node.js equivalents |
10
+ | Language | **TypeScript strict** — no `any` without explicit justification |
11
+ | Test | **`bun:test`** — describe/test/expect |
12
+ | Lint/Format | **Biome** (`bun run lint`) |
13
+ | Build | `bun run build` |
4
14
 
5
15
  ## Git Identity
6
16
 
@@ -11,14 +21,21 @@ git config user.email "subrina8080@outlook.com"
11
21
 
12
22
  ## Commands
13
23
 
14
- ```bash
15
- bun test # Full test suite
16
- bun test test/unit/foo.test.ts # Specific file
17
- bun run typecheck # tsc --noEmit
18
- bun run lint # Biome
19
- bun run build # Production build
20
- bun test && bun run typecheck # Pre-commit check
21
- ```
24
+ | Command | Purpose |
25
+ |:--------|:--------|
26
+ | `bun run typecheck` | tsc --noEmit |
27
+ | `bun run lint` | Biome |
28
+ | `bun test test/unit/foo.test.ts` | Targeted test during iteration |
29
+ | `NAX_SKIP_PRECHECK=1 bun test test/ --timeout=60000 --bail` | Full suite |
30
+
31
+ nax runs lint, typecheck, and tests automatically via the pipeline. Run these manually only when working outside a nax session.
32
+
33
+ ## Engineering Persona
34
+
35
+ - **Senior Engineer mindset**: check edge cases, null/undefined, race conditions, and error states.
36
+ - **TDD first**: write or update tests before implementation when the story calls for it.
37
+ - **Stuck rule**: if the same test fails 2+ iterations, stop, summarise failed attempts, reassess approach.
38
+ - **Never push to remote** — the human reviews and pushes.
22
39
 
23
40
  ## Architecture
24
41
 
@@ -33,67 +50,64 @@ Runner.run() [src/execution/runner.ts — thin orchestrator only]
33
50
  → registry.teardownAll()
34
51
  ```
35
52
 
36
- ### Key Directories
53
+ ### Key Source Directories
37
54
 
38
55
  | Directory | Purpose |
39
- |:---|:---|
40
- | `src/execution/` | Runner loop, agent adapters, TDD strategies |
41
- | `src/execution/lifecycle/` | Lifecycle hooks, startup/teardown |
42
- | `src/execution/escalation/` | Escalation logic on repeated failures |
43
- | `src/execution/acceptance/` | Acceptance-loop iteration |
44
- | `src/pipeline/stages/` | Pipeline stages |
45
- | `src/routing/` | Model routing — tier classification, router chain |
56
+ |:----------|:--------|
57
+ | `src/execution/` | Runner loop, agent adapters, escalation, lifecycle hooks |
58
+ | `src/execution/escalation/` | Tier escalation on repeated failures |
59
+ | `src/pipeline/stages/` | One file per pipeline stage |
60
+ | `src/pipeline/subscribers/` | Event-driven hooks (interaction, hooks.ts) |
61
+ | `src/routing/` | Model-tier routing — keyword, LLM, plugin chain |
62
+ | `src/routing/strategies/` | keyword.ts, llm.ts, llm-prompts.ts |
63
+ | `src/interaction/` | Interaction triggers + plugins (Auto, Telegram, Webhook) |
46
64
  | `src/plugins/` | Plugin system — loader, registry, validator |
47
- | `src/config/` | Config schema, loader (layered global + project) |
65
+ | `src/verification/` | Test execution, smart runner, scoped runner |
66
+ | `src/metrics/` | StoryMetrics, aggregator, tracker |
67
+ | `src/config/` | Config schema + layered loader (global → project) |
48
68
  | `src/agents/adapters/` | Agent integrations (Claude Code) |
49
- | `src/cli/` + `src/commands/` | CLI commands (check both locations) |
50
- | `src/verification/` | Test execution, smart test runner |
51
- | `src/review/` | Post-verify review (typecheck, lint, plugin reviewers) |
69
+ | `src/cli/` + `src/commands/` | CLI commands check both locations |
70
+ | `src/prd/` | PRD types, loader, story state machine |
71
+ | `src/hooks/` | Lifecycle hook wiring |
72
+ | `src/constitution/` | Constitution loader + injection |
73
+ | `src/analyze/` | `nax analyze` — story classifier |
52
74
 
53
- ### Plugin System (4 extension points)
75
+ ### Plugin Extension Points
54
76
 
55
- | Extension | Interface | Integration Point |
56
- |:---|:---|:---|
57
- | Context Provider | `IContextProvider` | `context.ts` stage injects into prompts |
58
- | Reviewer | `IReviewer` | Review stage after built-in checks |
59
- | Reporter | `IReporter` | Runner onRunStart/onStoryComplete/onRunEnd |
60
- | Router | `IRoutingStrategy` | Router chain overrides model routing |
77
+ | Interface | Loaded By | Purpose |
78
+ |:----------|:----------|:--------|
79
+ | `IContextProvider` | `context.ts` stage | Inject context into agent prompts |
80
+ | `IReviewer` | Review stage | Post-verify quality checks |
81
+ | `IReporter` | Runner | onRunStart / onStoryComplete / onRunEnd events |
82
+ | `IRoutingStrategy` | Router chain | Override model-tier routing |
61
83
 
62
84
  ### Config
63
85
 
64
86
  - Global: `~/.nax/config.json` → Project: `<workdir>/nax/config.json`
65
- - Schema: `src/config/schema.ts` — no hardcoded flags or credentials
87
+ - Schema: `src/config/schema.ts` — no hardcoded flags or credentials anywhere
66
88
 
67
- ## Design Principles
89
+ ## Workflow Protocol
68
90
 
69
- - **`runner.ts` is a thin orchestrator.** Never add new concerns extract into focused sub-modules.
70
- - **`src/verification/` is the single test execution layer.** Don't duplicate test invocation in pipeline stages.
71
- - **Closures over values** for long-lived handlers (crash handlers, timers) — prevents stale state capture.
72
- - **New agent adapters** go in `src/agents/adapters/<name>.ts` — never inline in runner or existing adapters.
91
+ 1. **Explore first**: use `grep`, `cat`, and solograph MCP to understand context before writing code.
92
+ 2. **Plan complex tasks**: for multi-file changes, write a short plan before implementing.
93
+ 3. **Implement in small chunks**: one logical concern per commit.
73
94
 
74
- ## Rules
95
+ ## Code Intelligence (Solograph MCP)
75
96
 
76
- Detailed coding standards, test architecture, and forbidden patterns are in `.claude/rules/`. Claude Code loads these automatically.
97
+ Use **solograph** MCP tools on-demand do not use `web_search` or `kb_search`.
77
98
 
99
+ | Tool | When |
100
+ |:-----|:-----|
101
+ | `project_code_search` | Find existing patterns before writing new code |
102
+ | `codegraph_explain` | Architecture overview before tackling unfamiliar areas |
103
+ | `codegraph_query` | Dependency/impact analysis (Cypher) |
104
+ | `project_code_reindex` | After creating or deleting source files |
78
105
 
79
- ## Code Intelligence (Solograph MCP)
106
+ ## Coding Standards & Forbidden Patterns
107
+
108
+ Full rules in `.claude/rules/` (loaded automatically):
80
109
 
81
- Use **solograph** MCP tools on-demand for code understanding. Do not use web_search, kb_search, or source_* tools.
82
-
83
- | Tool | When to use |
84
- |:-----|:------------|
85
- | `project_code_search` | Find existing patterns, symbols, or implementations before writing new code |
86
- | `codegraph_explain` | Get architecture overview of nax before tackling unfamiliar areas |
87
- | `codegraph_query` | Cypher queries — dependency analysis, impact analysis, hub files |
88
- | `codegraph_stats` | Quick graph stats (file/symbol counts) |
89
- | `codegraph_shared` | Find packages shared across projects |
90
- | `session_search` | Search prior Claude Code session history for relevant context |
91
- | `project_info` | Project registry info |
92
- | `project_code_reindex` | Reindex after creating or deleting source files, or major refactors |
93
-
94
- Single source of truth: VPS solograph instance (Mac01 tunnels to VPS — same data either way).
95
- ## IMPORTANT
96
-
97
- - Do NOT push to remote — let the human review and push.
98
- - Never hardcode API keys — agents use their own auth from env.
99
- - Agent adapters spawn external processes — always handle timeouts and cleanup.
110
+ - `01-project-conventions.md` Bun-native APIs, 400-line limit, barrel imports, logging, commits
111
+ - `02-test-architecture.md` — directory mirroring, placement rules, file naming
112
+ - `03-test-writing.md` `_deps` injection pattern, mock discipline, CI guards
113
+ - `04-forbidden-patterns.md` — banned APIs and test anti-patterns with alternatives
package/docs/ROADMAP.md CHANGED
@@ -135,19 +135,42 @@
135
135
 
136
136
  ---
137
137
 
138
- ## v0.25.0 — Trigger Completion
138
+ ## v0.27.0 — Review Quality ✅ Shipped (2026-03-08)
139
+
140
+ **Theme:** Fix review stage reliability — dirty working tree false-positive, stale precheck, dead config fields
141
+ **Status:** ✅ Shipped (2026-03-08)
142
+ **Spec:** `nax/features/review-quality/prd.json`
143
+
144
+ ### Stories
145
+ - [x] **RQ-001:** Assert clean working tree before running review typecheck/lint (BUG-049)
146
+ - [x] **RQ-002:** Fix `checkOptionalCommands` precheck to use correct config resolution path (BUG-050)
147
+ - [x] **RQ-003:** Consolidate dead `quality.commands.typecheck/lint` into review resolution chain (BUG-051)
148
+
149
+ ---
150
+
151
+ ## v0.26.0 — Routing Persistence ✅ Shipped (2026-03-08)
152
+
153
+ - **RRP-001:** Persist initial routing classification to `prd.json` on first classification
154
+ - **RRP-002:** Add `initialComplexity` to `StoryRouting` and `StoryMetrics` for accurate reporting
155
+ - **RRP-003:** Add `contentHash` to `StoryRouting` for staleness detection — stale cached routing is re-classified
156
+ - **RRP-004:** Unit tests for routing persistence, idempotence, staleness, content hash, metrics
157
+ - **BUG-052:** Replace `console.warn` with structured JSONL logger in `review/runner.ts` and `optimizer/index.ts`
158
+
159
+ ---
160
+
161
+ ## v0.25.0 — Trigger Completion ✅ Shipped (2026-03-07)
139
162
 
140
163
  **Theme:** Wire all 8 unwired interaction triggers, 3 missing hook events, and add plugin integration tests
141
- **Status:** 🔲 Planned
164
+ **Status:** Shipped (2026-03-07)
142
165
  **Spec:** [docs/specs/trigger-completion.md](specs/trigger-completion.md)
143
166
 
144
167
  ### Stories
145
- - [ ] **TC-001:** Wire `cost-exceeded` + `cost-warning` triggers — fire at 80%/100% of cost limit in sequential-executor.ts
146
- - [ ] **TC-002:** Wire `max-retries` trigger — fire on permanent story failure via `story:failed` event in wireInteraction
147
- - [ ] **TC-003:** Wire `security-review`, `merge-conflict`, `pre-merge` triggers — review rejection, git conflict detection, pre-completion gate
148
- - [ ] **TC-004:** Wire `story-ambiguity` + `review-gate` triggers — ambiguity keyword detection, per-story human checkpoint
149
- - [ ] **TC-005:** Wire missing hook events — `on-resume`, `on-session-end`, `on-error` to pipeline events
150
- - [ ] **TC-006:** Auto plugin + Telegram + Webhook integration tests — mock LLM/network, cover approve/reject/HMAC flows
168
+ - [x] **TC-001:** Wire `cost-exceeded` + `cost-warning` triggers — fire at 80%/100% of cost limit in sequential-executor.ts
169
+ - [x] **TC-002:** Wire `max-retries` trigger — fire on permanent story failure via `story:failed` event in wireInteraction
170
+ - [x] **TC-003:** Wire `security-review`, `merge-conflict`, `pre-merge` triggers — review rejection, git conflict detection, pre-completion gate
171
+ - [x] **TC-004:** Wire `story-ambiguity` + `review-gate` triggers — ambiguity keyword detection, per-story human checkpoint
172
+ - [x] **TC-005:** Wire missing hook events — `on-resume`, `on-session-end`, `on-error` to pipeline events
173
+ - [x] **TC-006:** Auto plugin + Telegram + Webhook integration tests — mock LLM/network, cover approve/reject/HMAC flows
151
174
 
152
175
  ---
153
176
 
@@ -239,6 +262,8 @@
239
262
 
240
263
  | Version | Theme | Date | Details |
241
264
  |:---|:---|:---|:---|
265
+ | v0.26.0 | Routing Persistence | 2026-03-08 | RRP-001–004: persist initial routing, initialComplexity, contentHash staleness detection, unit tests; BUG-052: structured logger in review/optimizer |
266
+ | v0.25.0 | Trigger Completion | 2026-03-07 | TC-001–004: run.complete event, crash recovery, headless formatter, trigger completion |
242
267
  | v0.24.0 | Central Run Registry | 2026-03-07 | CRR-000–003: events writer, registry, nax runs CLI, nax logs --run global resolution |
243
268
  | v0.23.0 | Status File Consolidation | 2026-03-07 | SFC-001–004: auto-write status.json, feature-level status, align readers, remove dead code; BUG-043/044: testScoped config + command logging |
244
269
  | v0.18.1 | Type Safety + CI Pipeline | 2026-03-03 | 60 TS errors + 12 lint errors fixed, GitLab CI green (1952/56/0) |
@@ -307,6 +332,11 @@
307
332
  - [x] ~~**BUG-038:** `smart-runner` over-matching when global defaults change. Fixed by FEAT-010 (v0.21.0) — per-attempt `storyGitRef` baseRef tracking; `git diff <baseRef>..HEAD` prevents cross-story file pollution.~~
308
333
  - [x] ~~**BUG-043:** Scoped test command appends files instead of replacing path — `runners.ts:scoped()` concatenates `scopedTestPaths` to full-suite command, resulting in `bun test test/ --timeout=60000 /path/to/file.ts` (runs everything). Fix: use `testScoped` config with `{{files}}` template, fall back to `buildSmartTestCommand()` heuristic. **Location:** `src/verification/runners.ts:scoped()`
309
334
  - [x] ~~**BUG-044:** Scoped/full-suite test commands not logged — no visibility into what command was actually executed during verify stage. Fix: log at info level before execution.
335
+ - [ ] **BUG-049:** Review typecheck runs on dirty working tree — false-positive pass when agent commits partial changes. If the agent stages only some files (e.g. forgets `git add types.ts`), the working tree retains the uncommitted fix and `bun run typecheck` passes — but the committed state has a type error. Discovered in routing-persistence run: RRP-003 committed `contentHash` refs in `routing.ts` without the matching `StoryRouting.contentHash` field in `types.ts`; typecheck passed because `types.ts` was locally modified but uncommitted. **Location:** `src/review/runner.ts:runCheck()`. **Fix:** Before running built-in checks, assert working tree is clean (`git diff --name-only` returns empty). If dirty, fail with "uncommitted changes detected" or log a warning and stash/restore.
336
+ - [ ] **BUG-050:** `checkOptionalCommands` precheck uses legacy config fields — misleading "not configured" warning. Checks `config.execution.lintCommand` and `config.execution.typecheckCommand` (stale/legacy fields). Actual config uses `config.review.commands.typecheck` and `config.review.commands.lint`. Result: precheck always warns "Optional commands not configured: lint, typecheck" even when correctly configured, desensitizing operators to real warnings. **Location:** `src/precheck/checks-warnings.ts:checkOptionalCommands()`. **Fix:** Update check to resolve via the same priority chain as `review/runner.ts`: `execution.*Command` → `review.commands.*` → `package.json` scripts.
337
+ - [ ] **BUG-052:** `console.warn` in runtime pipeline code bypasses structured JSONL logger — invisible to log consumers. `src/review/runner.ts` and `src/optimizer/index.ts` used `console.warn()` for skip/fallback events, which print to stderr but are never written to the JSONL log file. This made review stage skip decisions invisible during post-run analysis. **Location:** `src/review/runner.ts:runReview()`, `src/optimizer/index.ts:resolveOptimizer()`. **Fix:** Replace with `getSafeLogger()?.warn()`. ✅ Fixed in feat/routing-persistence.
338
+ - [ ] **BUG-052:** `console.warn` in runtime pipeline code bypasses structured JSONL logger — invisible to log consumers. `src/review/runner.ts` and `src/optimizer/index.ts` used `console.warn()` for skip/fallback events, which print to stderr but are never written to the JSONL log file. This made review stage skip decisions invisible during post-run analysis. **Location:** `src/review/runner.ts:runReview()`, `src/optimizer/index.ts:resolveOptimizer()`. **Fix:** Replace with `getSafeLogger()?.warn()`. ✅ Fixed in feat/routing-persistence.
339
+ - [ ] **BUG-051:** `quality.commands.typecheck` and `quality.commands.lint` are dead config — silently ignored. `QualityConfig.commands.{typecheck,lint}` exist in the type definition and are documented in `nax config --explain`, but are never read by any runtime code. The review runner reads only `review.commands.typecheck/lint`. Users who set `quality.commands.typecheck` get no effect. **Location:** `src/config/types.ts` (QualityConfig), `src/review/runner.ts:resolveCommand()`. **Fix:** Either (A) remove the dead fields from `QualityConfig` and update docs, or (B) consolidate — make review runner read from `quality.commands` and deprecate `review.commands`.
310
340
 
311
341
  ### Features
312
342
  - [x] ~~`nax unlock` command~~
@@ -0,0 +1,55 @@
1
+ {
2
+ "project": "nax-review-quality",
3
+ "branchName": "feat/review-quality",
4
+ "feature": "review-quality",
5
+ "updatedAt": "2026-03-08T03:03:00.000Z",
6
+ "userStories": [
7
+ {
8
+ "id": "RQ-001",
9
+ "title": "Assert clean working tree before running review typecheck/lint (BUG-049)",
10
+ "description": "The review stage runs bun run typecheck and bun run lint on the working tree, not the committed state. If the agent forgets to git add a file (e.g. types.ts with a new interface field), the uncommitted change is still on disk, typecheck passes against the local working tree, but the committed code has a type error. This was observed in the routing-persistence run: RRP-003 committed contentHash refs in routing.ts without the matching StoryRouting.contentHash field in types.ts — typecheck passed because types.ts was locally modified but not staged. Fix: before running built-in checks in review/runner.ts, assert that the working tree has no uncommitted changes to tracked files (git diff --name-only HEAD returns empty). If dirty, fail the review with a clear message listing the uncommitted files so the agent can stage and commit them.",
11
+ "acceptanceCriteria": [
12
+ "Before running typecheck or lint in runReview(), call git diff --name-only HEAD (covers both staged and unstaged tracked-file changes)",
13
+ "If output is non-empty, return a ReviewResult with success: false and failureReason listing the uncommitted files",
14
+ "Log at warn level via getSafeLogger() with stage 'review' and message 'Uncommitted changes detected before review: <files>'",
15
+ "If working tree is clean, proceed with typecheck/lint as before — no regression for normal flow",
16
+ "Unit tests: dirty working tree (mock git diff) returns review failure before running typecheck; clean working tree allows typecheck to run normally",
17
+ "Unit tests: untracked files only (git diff HEAD returns empty) — review proceeds since only tracked changes matter"
18
+ ],
19
+ "complexity": "simple",
20
+ "status": "pending",
21
+ "tags": ["bug", "review", "typecheck"]
22
+ },
23
+ {
24
+ "id": "RQ-002",
25
+ "title": "Fix checkOptionalCommands precheck to use correct config resolution path (BUG-050)",
26
+ "description": "The precheck check checkOptionalCommands() in src/precheck/checks-warnings.ts checks config.execution.lintCommand and config.execution.typecheckCommand — these are legacy fields that no longer exist in the current config schema. The actual runtime resolution chain used by review/runner.ts is: (1) execution.typecheckCommand, (2) review.commands.typecheck, (3) package.json scripts. As a result, the precheck always warns 'Optional commands not configured: lint, typecheck' even when review.commands.typecheck and review.commands.lint are properly set. Fix: update checkOptionalCommands() to resolve via the same priority chain as review/runner.ts:resolveCommand().",
27
+ "acceptanceCriteria": [
28
+ "checkOptionalCommands() resolves typecheck via: execution.typecheckCommand -> review.commands.typecheck -> package.json typecheck script",
29
+ "checkOptionalCommands() resolves lint via: execution.lintCommand -> review.commands.lint -> package.json lint script",
30
+ "If config.review.commands.typecheck is set, precheck passes with no warning",
31
+ "If neither execution field, review.commands, nor package.json script exists, precheck still warns 'not configured'",
32
+ "Unit tests: config with review.commands.typecheck set -> check passes; config with neither -> check warns; config with package.json script -> check passes"
33
+ ],
34
+ "complexity": "simple",
35
+ "status": "pending",
36
+ "tags": ["bug", "precheck", "config"]
37
+ },
38
+ {
39
+ "id": "RQ-003",
40
+ "title": "Consolidate dead quality.commands.typecheck/lint into review resolution chain (BUG-051)",
41
+ "description": "QualityConfig.commands.typecheck and QualityConfig.commands.lint are declared in src/config/types.ts and documented in nax config --explain, but are never read by runtime code. The review runner reads only review.commands.typecheck/lint. Fix: make review/runner.ts:resolveCommand() also check quality.commands as a fallback after review.commands and before package.json. This gives quality.commands.typecheck semantic meaning without a breaking change. Do NOT remove the fields from QualityConfig — backward compatibility.",
42
+ "acceptanceCriteria": [
43
+ "review/runner.ts:resolveCommand() priority chain for typecheck: (1) execution.typecheckCommand, (2) review.commands.typecheck, (3) quality.commands.typecheck, (4) package.json typecheck script",
44
+ "review/runner.ts:resolveCommand() priority chain for lint: (1) execution.lintCommand, (2) review.commands.lint, (3) quality.commands.lint, (4) package.json lint script",
45
+ "Setting quality.commands.typecheck in config.json now correctly runs that command in the review stage",
46
+ "review.commands.typecheck still takes precedence over quality.commands.typecheck when both are set",
47
+ "CLI config --explain description for quality.commands.typecheck updated to note it is used as fallback in review stage",
48
+ "Unit tests: quality.commands.typecheck set with review.commands.typecheck absent -> quality command used; both set -> review command takes precedence"
49
+ ],
50
+ "complexity": "simple",
51
+ "status": "pending",
52
+ "tags": ["bug", "config", "review"]
53
+ }
54
+ ]
55
+ }
@@ -0,0 +1,104 @@
1
+ {
2
+ "project": "nax-routing-persistence",
3
+ "branchName": "feat/routing-persistence",
4
+ "feature": "routing-persistence",
5
+ "userStories": [
6
+ {
7
+ "id": "RRP-001",
8
+ "title": "Persist initial routing to prd.json on first classification",
9
+ "description": "Currently, when nax run classifies a story for the first time (no prior nax analyze, story.routing is undefined), the result lives only in ctx.routing (in-memory). If the run crashes and resumes, the routing stage re-classifies fresh — LLM may return different complexity/testStrategy, causing silent inconsistency mid-feature. Fix: after fresh classification in routing.ts, write the result back to prd.json via savePRD so story.routing is populated from the very first iteration.",
10
+ "acceptanceCriteria": [
11
+ "When story.routing is undefined before routing stage, after classification story.routing is written to prd.json",
12
+ "Subsequent iterations (or resume after crash) use the persisted story.routing — no re-classification",
13
+ "Escalation still overwrites modelTier and testStrategy as before — only initialComplexity is protected",
14
+ "savePRD is called once per story on first classification (not on every iteration if already persisted)",
15
+ "Unit tests verify prd.json is updated after first routing stage execution"
16
+ ],
17
+ "complexity": "medium",
18
+ "status": "passed",
19
+ "tags": [],
20
+ "dependencies": [],
21
+ "escalations": [],
22
+ "attempts": 0,
23
+ "priorErrors": [],
24
+ "priorFailures": [],
25
+ "storyPoints": 1,
26
+ "passes": true
27
+ },
28
+ {
29
+ "id": "RRP-002",
30
+ "title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
31
+ "description": "StoryMetrics.complexity currently captures ctx.routing.complexity at completion time — which may reflect a post-escalation re-classification, not the original prediction. Add story.routing.initialComplexity (written once at first classify, never overwritten) and StoryMetrics.initialComplexity. Update metrics/aggregator.ts complexityAccuracy to compare initialComplexity vs finalTier instead of current complexity vs finalTier.",
32
+ "acceptanceCriteria": [
33
+ "StoryRouting interface gains initialComplexity?: Complexity field",
34
+ "Routing stage writes initialComplexity when story.routing is first created (RRP-001 path)",
35
+ "Escalation path never overwrites initialComplexity — only modelTier and testStrategy change",
36
+ "StoryMetrics gains initialComplexity?: string field",
37
+ "collectStoryMetrics() reads initialComplexity from story.routing.initialComplexity (falls back to routing.complexity for backward compat)",
38
+ "metrics/aggregator.ts complexityAccuracy uses initialComplexity for predicted vs finalTier comparison",
39
+ "Unit tests verify initialComplexity is set on first classify and unchanged after escalation"
40
+ ],
41
+ "complexity": "medium",
42
+ "status": "pending",
43
+ "tags": [],
44
+ "dependencies": [
45
+ "RRP-001"
46
+ ],
47
+ "escalations": [],
48
+ "attempts": 0,
49
+ "priorErrors": [],
50
+ "priorFailures": [],
51
+ "storyPoints": 1
52
+ },
53
+ {
54
+ "id": "RRP-003",
55
+ "title": "Add contentHash to StoryRouting for staleness detection (BUG-048)",
56
+ "description": "When nax analyze is run, it writes story.routing to prd.json. If the story is subsequently edited (more ACs, changed tags, updated description), nax run blindly trusts the existing routing — wrong complexity, wrong testStrategy. Fix: add story.routing.contentHash — a hash of title+description+acceptanceCriteria.join()+tags.join() written at classify time. Routing stage recomputes hash on each run; if mismatch, treat as cache miss and re-classify.",
57
+ "acceptanceCriteria": [
58
+ "StoryRouting interface gains contentHash?: string field",
59
+ "A helper function computeStoryContentHash(story: UserStory): string computes a hash of title+description+ACs+tags",
60
+ "Routing stage: if story.routing exists but contentHash is missing or mismatches current story content, re-classify (treat as cache miss)",
61
+ "Routing stage: after classification, write contentHash to story.routing",
62
+ "If story content unchanged, routing stage uses cached routing as before — no regression",
63
+ "Unit tests cover: hash match uses cache; hash mismatch re-classifies; missing hash re-classifies"
64
+ ],
65
+ "complexity": "medium",
66
+ "status": "pending",
67
+ "tags": [],
68
+ "dependencies": [
69
+ "RRP-001"
70
+ ],
71
+ "escalations": [],
72
+ "attempts": 0,
73
+ "priorErrors": [],
74
+ "priorFailures": [],
75
+ "storyPoints": 1
76
+ },
77
+ {
78
+ "id": "RRP-004",
79
+ "title": "Integration tests: routing persistence across simulated crash-resume and staleness",
80
+ "description": "Write integration tests that verify routing persistence end-to-end: (1) first run classifies and persists story.routing to prd.json, (2) second run uses persisted routing without re-classifying, (3) escalation preserves initialComplexity, (4) story content change triggers re-classification via contentHash mismatch.",
81
+ "acceptanceCriteria": [
82
+ "Integration test: routing stage with story.routing=undefined writes story.routing to prd.json after classification",
83
+ "Integration test: routing stage re-run with same prd.json uses cached routing — no LLM call made",
84
+ "Integration test: escalation updates modelTier in prd.json but initialComplexity remains unchanged",
85
+ "Integration test: edit story content after routing — hash mismatch detected — routing stage re-classifies",
86
+ "Integration test: story.routing with matching contentHash — no re-classification (cache hit confirmed)"
87
+ ],
88
+ "complexity": "medium",
89
+ "status": "pending",
90
+ "tags": [],
91
+ "dependencies": [
92
+ "RRP-001",
93
+ "RRP-002",
94
+ "RRP-003"
95
+ ],
96
+ "escalations": [],
97
+ "attempts": 0,
98
+ "priorErrors": [],
99
+ "priorFailures": [],
100
+ "storyPoints": 1
101
+ }
102
+ ],
103
+ "updatedAt": "2026-03-07T16:32:39.496Z"
104
+ }
@@ -0,0 +1 @@
1
+ [2026-03-07T16:32:39.495Z] RRP-001 — PASSED — Persist initial routing to prd.json on first classification — Cost: $0.5223
package/nax/status.json CHANGED
@@ -1,27 +1,36 @@
1
1
  {
2
2
  "version": 1,
3
3
  "run": {
4
- "id": "run-2026-03-07T13-49-17-400Z",
5
- "feature": "trigger-completion",
6
- "startedAt": "2026-03-07T13:49:17.400Z",
7
- "status": "completed",
4
+ "id": "run-2026-03-07T16-14-49-336Z",
5
+ "feature": "routing-persistence",
6
+ "startedAt": "2026-03-07T16:14:49.336Z",
7
+ "status": "running",
8
8
  "dryRun": false,
9
- "pid": 97007
9
+ "pid": 3412
10
10
  },
11
11
  "progress": {
12
- "total": 6,
13
- "passed": 6,
12
+ "total": 4,
13
+ "passed": 1,
14
14
  "failed": 0,
15
15
  "paused": 0,
16
16
  "blocked": 0,
17
- "pending": 0
17
+ "pending": 3
18
18
  },
19
19
  "cost": {
20
- "spent": 3.85387425,
20
+ "spent": 0.52230675,
21
21
  "limit": 8
22
22
  },
23
- "current": null,
24
- "iterations": 7,
25
- "updatedAt": "2026-03-07T14:58:57.404Z",
26
- "durationMs": 4180004
23
+ "current": {
24
+ "storyId": "RRP-002",
25
+ "title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
26
+ "complexity": "medium",
27
+ "tddStrategy": "test-after",
28
+ "model": "balanced",
29
+ "attempt": 1,
30
+ "phase": "routing"
31
+ },
32
+ "iterations": 2,
33
+ "updatedAt": "2026-03-07T16:45:19.261Z",
34
+ "durationMs": 1829925,
35
+ "lastHeartbeat": "2026-03-07T16:45:19.261Z"
27
36
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.25.0",
3
+ "version": "0.27.0",
4
4
  "description": "AI Coding Agent Orchestrator \u2014 loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -66,6 +66,7 @@ export async function runIteration(
66
66
  stories: storiesToExecute,
67
67
  routing,
68
68
  workdir: ctx.workdir,
69
+ prdPath: ctx.prdPath,
69
70
  featureDir: ctx.featureDir,
70
71
  hooks: ctx.hooks,
71
72
  plugins: ctx.pluginRegistry,
@@ -110,7 +110,8 @@ export function calculateAggregateMetrics(runs: RunMetrics[]): AggregateMetrics
110
110
  >();
111
111
 
112
112
  for (const story of allStories) {
113
- const complexity = story.complexity;
113
+ // Use initialComplexity (first-classify prediction) when available; fall back to complexity
114
+ const complexity = story.initialComplexity ?? story.complexity;
114
115
  const existing = complexityStats.get(complexity) || {
115
116
  predicted: 0,
116
117
  tierCounts: new Map<string, number>(),
@@ -58,9 +58,14 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
58
58
  const modelDef = modelEntry ? resolveModel(modelEntry) : null;
59
59
  const modelUsed = modelDef?.model || routing.modelTier;
60
60
 
61
+ // initialComplexity: prefer story.routing.initialComplexity (first classify),
62
+ // fall back to routing.complexity for backward compat
63
+ const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
64
+
61
65
  return {
62
66
  storyId: story.id,
63
67
  complexity: routing.complexity,
68
+ initialComplexity,
64
69
  modelTier: routing.modelTier,
65
70
  modelUsed,
66
71
  attempts,
@@ -108,20 +113,27 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
108
113
  const modelDef = modelEntry ? resolveModel(modelEntry) : null;
109
114
  const modelUsed = modelDef?.model || routing.modelTier;
110
115
 
111
- return stories.map((story) => ({
112
- storyId: story.id,
113
- complexity: routing.complexity,
114
- modelTier: routing.modelTier,
115
- modelUsed,
116
- attempts: 1, // batch stories don't escalate individually
117
- finalTier: routing.modelTier,
118
- success: true, // if batch succeeded, all stories succeeded
119
- cost: costPerStory,
120
- durationMs: durationPerStory,
121
- firstPassSuccess: true, // batch = first pass success
122
- startedAt: storyStartTime,
123
- completedAt: new Date().toISOString(),
124
- }));
116
+ return stories.map((story) => {
117
+ // initialComplexity: prefer story.routing.initialComplexity (if individual routing exists),
118
+ // fall back to shared routing.complexity (batch stories classified together)
119
+ const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
120
+
121
+ return {
122
+ storyId: story.id,
123
+ complexity: routing.complexity,
124
+ initialComplexity,
125
+ modelTier: routing.modelTier,
126
+ modelUsed,
127
+ attempts: 1, // batch stories don't escalate individually
128
+ finalTier: routing.modelTier,
129
+ success: true, // if batch succeeded, all stories succeeded
130
+ cost: costPerStory,
131
+ durationMs: durationPerStory,
132
+ firstPassSuccess: true, // batch = first pass success
133
+ startedAt: storyStartTime,
134
+ completedAt: new Date().toISOString(),
135
+ };
136
+ });
125
137
  }
126
138
 
127
139
  /**
@@ -12,6 +12,8 @@ export interface StoryMetrics {
12
12
  storyId: string;
13
13
  /** Classified complexity */
14
14
  complexity: string;
15
+ /** Initial complexity from first classification — preserved across escalations */
16
+ initialComplexity?: string;
15
17
  /** Initial model tier */
16
18
  modelTier: string;
17
19
  /** Actual model used (e.g., "claude-sonnet-4.5") */
@@ -14,6 +14,7 @@ export { NoopOptimizer } from "./noop.optimizer.js";
14
14
  export { RuleBasedOptimizer } from "./rule-based.optimizer.js";
15
15
 
16
16
  import type { NaxConfig } from "../config/schema.js";
17
+ import { getSafeLogger } from "../logger/index.js";
17
18
  import type { PluginRegistry } from "../plugins/registry.js";
18
19
  import { NoopOptimizer } from "./noop.optimizer.js";
19
20
  import { RuleBasedOptimizer } from "./rule-based.optimizer.js";
@@ -56,7 +57,7 @@ export function resolveOptimizer(config: NaxConfig, pluginRegistry?: PluginRegis
56
57
  return new NoopOptimizer();
57
58
  default:
58
59
  // Unknown strategy, fallback to noop
59
- console.warn(`[nax] Unknown optimizer strategy '${strategy}', using noop`);
60
+ getSafeLogger()?.warn("optimizer", `Unknown optimizer strategy '${strategy}', using noop`);
60
61
  return new NoopOptimizer();
61
62
  }
62
63
  }