@nathapp/nax 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +70 -56
- package/docs/ROADMAP.md +13 -1
- package/nax/features/routing-persistence/prd.json +104 -0
- package/nax/features/routing-persistence/progress.txt +1 -0
- package/nax/status.json +22 -13
- package/package.json +1 -1
- package/src/execution/iteration-runner.ts +1 -0
- package/src/metrics/aggregator.ts +2 -1
- package/src/metrics/tracker.ts +26 -14
- package/src/metrics/types.ts +2 -0
- package/src/pipeline/stages/routing.ts +42 -8
- package/src/pipeline/types.ts +2 -0
- package/src/prd/types.ts +4 -0
- package/src/routing/content-hash.ts +25 -0
- package/src/routing/index.ts +3 -0
- package/test/unit/metrics/aggregator.test.ts +164 -0
- package/test/unit/metrics/tracker.test.ts +186 -0
- package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
- package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
- package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
- package/test/unit/routing/content-hash.test.ts +99 -0
package/CLAUDE.md
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
# nax — AI Coding Agent Orchestrator
|
|
2
2
|
|
|
3
|
-
Bun + TypeScript CLI that orchestrates AI coding agents with model routing, TDD strategies, and
|
|
3
|
+
Bun + TypeScript CLI that orchestrates AI coding agents (Claude Code) with model-tier routing, TDD strategies, plugin hooks, and a Central Run Registry.
|
|
4
|
+
|
|
5
|
+
## Tech Stack
|
|
6
|
+
|
|
7
|
+
| Layer | Choice |
|
|
8
|
+
|:------|:-------|
|
|
9
|
+
| Runtime | **Bun 1.3.7+** — Bun-native APIs only, no Node.js equivalents |
|
|
10
|
+
| Language | **TypeScript strict** — no `any` without explicit justification |
|
|
11
|
+
| Test | **`bun:test`** — describe/test/expect |
|
|
12
|
+
| Lint/Format | **Biome** (`bun run lint`) |
|
|
13
|
+
| Build | `bun run build` |
|
|
4
14
|
|
|
5
15
|
## Git Identity
|
|
6
16
|
|
|
@@ -11,14 +21,21 @@ git config user.email "subrina8080@outlook.com"
|
|
|
11
21
|
|
|
12
22
|
## Commands
|
|
13
23
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
bun
|
|
17
|
-
bun run
|
|
18
|
-
bun
|
|
19
|
-
bun
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
| Command | Purpose |
|
|
25
|
+
|:--------|:--------|
|
|
26
|
+
| `bun run typecheck` | tsc --noEmit |
|
|
27
|
+
| `bun run lint` | Biome |
|
|
28
|
+
| `bun test test/unit/foo.test.ts` | Targeted test during iteration |
|
|
29
|
+
| `NAX_SKIP_PRECHECK=1 bun test test/ --timeout=60000 --bail` | Full suite |
|
|
30
|
+
|
|
31
|
+
nax runs lint, typecheck, and tests automatically via the pipeline. Run these manually only when working outside a nax session.
|
|
32
|
+
|
|
33
|
+
## Engineering Persona
|
|
34
|
+
|
|
35
|
+
- **Senior Engineer mindset**: check edge cases, null/undefined, race conditions, and error states.
|
|
36
|
+
- **TDD first**: write or update tests before implementation when the story calls for it.
|
|
37
|
+
- **Stuck rule**: if the same test fails 2+ iterations, stop, summarise failed attempts, reassess approach.
|
|
38
|
+
- **Never push to remote** — the human reviews and pushes.
|
|
22
39
|
|
|
23
40
|
## Architecture
|
|
24
41
|
|
|
@@ -33,67 +50,64 @@ Runner.run() [src/execution/runner.ts — thin orchestrator only]
|
|
|
33
50
|
→ registry.teardownAll()
|
|
34
51
|
```
|
|
35
52
|
|
|
36
|
-
### Key Directories
|
|
53
|
+
### Key Source Directories
|
|
37
54
|
|
|
38
55
|
| Directory | Purpose |
|
|
39
|
-
|
|
40
|
-
| `src/execution/` | Runner loop, agent adapters,
|
|
41
|
-
| `src/execution/
|
|
42
|
-
| `src/
|
|
43
|
-
| `src/
|
|
44
|
-
| `src/
|
|
45
|
-
| `src/routing/` |
|
|
56
|
+
|:----------|:--------|
|
|
57
|
+
| `src/execution/` | Runner loop, agent adapters, escalation, lifecycle hooks |
|
|
58
|
+
| `src/execution/escalation/` | Tier escalation on repeated failures |
|
|
59
|
+
| `src/pipeline/stages/` | One file per pipeline stage |
|
|
60
|
+
| `src/pipeline/subscribers/` | Event-driven hooks (interaction, hooks.ts) |
|
|
61
|
+
| `src/routing/` | Model-tier routing — keyword, LLM, plugin chain |
|
|
62
|
+
| `src/routing/strategies/` | keyword.ts, llm.ts, llm-prompts.ts |
|
|
63
|
+
| `src/interaction/` | Interaction triggers + plugins (Auto, Telegram, Webhook) |
|
|
46
64
|
| `src/plugins/` | Plugin system — loader, registry, validator |
|
|
47
|
-
| `src/
|
|
65
|
+
| `src/verification/` | Test execution, smart runner, scoped runner |
|
|
66
|
+
| `src/metrics/` | StoryMetrics, aggregator, tracker |
|
|
67
|
+
| `src/config/` | Config schema + layered loader (global → project) |
|
|
48
68
|
| `src/agents/adapters/` | Agent integrations (Claude Code) |
|
|
49
|
-
| `src/cli/` + `src/commands/` | CLI commands
|
|
50
|
-
| `src/
|
|
51
|
-
| `src/
|
|
69
|
+
| `src/cli/` + `src/commands/` | CLI commands — check both locations |
|
|
70
|
+
| `src/prd/` | PRD types, loader, story state machine |
|
|
71
|
+
| `src/hooks/` | Lifecycle hook wiring |
|
|
72
|
+
| `src/constitution/` | Constitution loader + injection |
|
|
73
|
+
| `src/analyze/` | `nax analyze` — story classifier |
|
|
52
74
|
|
|
53
|
-
### Plugin
|
|
75
|
+
### Plugin Extension Points
|
|
54
76
|
|
|
55
|
-
|
|
|
56
|
-
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
77
|
+
| Interface | Loaded By | Purpose |
|
|
78
|
+
|:----------|:----------|:--------|
|
|
79
|
+
| `IContextProvider` | `context.ts` stage | Inject context into agent prompts |
|
|
80
|
+
| `IReviewer` | Review stage | Post-verify quality checks |
|
|
81
|
+
| `IReporter` | Runner | onRunStart / onStoryComplete / onRunEnd events |
|
|
82
|
+
| `IRoutingStrategy` | Router chain | Override model-tier routing |
|
|
61
83
|
|
|
62
84
|
### Config
|
|
63
85
|
|
|
64
86
|
- Global: `~/.nax/config.json` → Project: `<workdir>/nax/config.json`
|
|
65
|
-
- Schema: `src/config/schema.ts` — no hardcoded flags or credentials
|
|
87
|
+
- Schema: `src/config/schema.ts` — no hardcoded flags or credentials anywhere
|
|
66
88
|
|
|
67
|
-
##
|
|
89
|
+
## Workflow Protocol
|
|
68
90
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
- **New agent adapters** go in `src/agents/adapters/<name>.ts` — never inline in runner or existing adapters.
|
|
91
|
+
1. **Explore first**: use `grep`, `cat`, and solograph MCP to understand context before writing code.
|
|
92
|
+
2. **Plan complex tasks**: for multi-file changes, write a short plan before implementing.
|
|
93
|
+
3. **Implement in small chunks**: one logical concern per commit.
|
|
73
94
|
|
|
74
|
-
##
|
|
95
|
+
## Code Intelligence (Solograph MCP)
|
|
75
96
|
|
|
76
|
-
|
|
97
|
+
Use **solograph** MCP tools on-demand — do not use `web_search` or `kb_search`.
|
|
77
98
|
|
|
99
|
+
| Tool | When |
|
|
100
|
+
|:-----|:-----|
|
|
101
|
+
| `project_code_search` | Find existing patterns before writing new code |
|
|
102
|
+
| `codegraph_explain` | Architecture overview before tackling unfamiliar areas |
|
|
103
|
+
| `codegraph_query` | Dependency/impact analysis (Cypher) |
|
|
104
|
+
| `project_code_reindex` | After creating or deleting source files |
|
|
78
105
|
|
|
79
|
-
##
|
|
106
|
+
## Coding Standards & Forbidden Patterns
|
|
107
|
+
|
|
108
|
+
Full rules in `.claude/rules/` (loaded automatically):
|
|
80
109
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
| `project_code_search` | Find existing patterns, symbols, or implementations before writing new code |
|
|
86
|
-
| `codegraph_explain` | Get architecture overview of nax before tackling unfamiliar areas |
|
|
87
|
-
| `codegraph_query` | Cypher queries — dependency analysis, impact analysis, hub files |
|
|
88
|
-
| `codegraph_stats` | Quick graph stats (file/symbol counts) |
|
|
89
|
-
| `codegraph_shared` | Find packages shared across projects |
|
|
90
|
-
| `session_search` | Search prior Claude Code session history for relevant context |
|
|
91
|
-
| `project_info` | Project registry info |
|
|
92
|
-
| `project_code_reindex` | Reindex after creating or deleting source files, or major refactors |
|
|
93
|
-
|
|
94
|
-
Single source of truth: VPS solograph instance (Mac01 tunnels to VPS — same data either way).
|
|
95
|
-
## IMPORTANT
|
|
96
|
-
|
|
97
|
-
- Do NOT push to remote — let the human review and push.
|
|
98
|
-
- Never hardcode API keys — agents use their own auth from env.
|
|
99
|
-
- Agent adapters spawn external processes — always handle timeouts and cleanup.
|
|
110
|
+
- `01-project-conventions.md` — Bun-native APIs, 400-line limit, barrel imports, logging, commits
|
|
111
|
+
- `02-test-architecture.md` — directory mirroring, placement rules, file naming
|
|
112
|
+
- `03-test-writing.md` — `_deps` injection pattern, mock discipline, CI guards
|
|
113
|
+
- `04-forbidden-patterns.md` — banned APIs and test anti-patterns with alternatives
|
package/docs/ROADMAP.md
CHANGED
|
@@ -135,7 +135,17 @@
|
|
|
135
135
|
|
|
136
136
|
---
|
|
137
137
|
|
|
138
|
-
## v0.
|
|
138
|
+
## v0.26.0 — Routing Persistence ✅ Shipped (2026-03-08)
|
|
139
|
+
|
|
140
|
+
- **RRP-001:** Persist initial routing classification to `prd.json` on first classification
|
|
141
|
+
- **RRP-002:** Add `initialComplexity` to `StoryRouting` and `StoryMetrics` for accurate reporting
|
|
142
|
+
- **RRP-003:** Add `contentHash` to `StoryRouting` for staleness detection — stale cached routing is re-classified
|
|
143
|
+
- **RRP-004:** Unit tests for routing persistence, idempotence, staleness, content hash, metrics
|
|
144
|
+
- **BUG-052:** Replace `console.warn` with structured JSONL logger in `review/runner.ts` and `optimizer/index.ts`
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## v0.25.0 — Trigger Completion ✅ Shipped (2026-03-07)
|
|
139
149
|
|
|
140
150
|
**Theme:** Wire all 8 unwired interaction triggers, 3 missing hook events, and add plugin integration tests
|
|
141
151
|
**Status:** 🔲 Planned
|
|
@@ -239,6 +249,8 @@
|
|
|
239
249
|
|
|
240
250
|
| Version | Theme | Date | Details |
|
|
241
251
|
|:---|:---|:---|:---|
|
|
252
|
+
| v0.26.0 | Routing Persistence | 2026-03-08 | RRP-001–004: persist initial routing, initialComplexity, contentHash staleness detection, unit tests; BUG-052: structured logger in review/optimizer |
|
|
253
|
+
| v0.25.0 | Trigger Completion | 2026-03-07 | TC-001–004: run.complete event, crash recovery, headless formatter, trigger completion |
|
|
242
254
|
| v0.24.0 | Central Run Registry | 2026-03-07 | CRR-000–003: events writer, registry, nax runs CLI, nax logs --run global resolution |
|
|
243
255
|
| v0.23.0 | Status File Consolidation | 2026-03-07 | SFC-001–004: auto-write status.json, feature-level status, align readers, remove dead code; BUG-043/044: testScoped config + command logging |
|
|
244
256
|
| v0.18.1 | Type Safety + CI Pipeline | 2026-03-03 | 60 TS errors + 12 lint errors fixed, GitLab CI green (1952/56/0) |
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
{
|
|
2
|
+
"project": "nax-routing-persistence",
|
|
3
|
+
"branchName": "feat/routing-persistence",
|
|
4
|
+
"feature": "routing-persistence",
|
|
5
|
+
"userStories": [
|
|
6
|
+
{
|
|
7
|
+
"id": "RRP-001",
|
|
8
|
+
"title": "Persist initial routing to prd.json on first classification",
|
|
9
|
+
"description": "Currently, when nax run classifies a story for the first time (no prior nax analyze, story.routing is undefined), the result lives only in ctx.routing (in-memory). If the run crashes and resumes, the routing stage re-classifies fresh — LLM may return different complexity/testStrategy, causing silent inconsistency mid-feature. Fix: after fresh classification in routing.ts, write the result back to prd.json via savePRD so story.routing is populated from the very first iteration.",
|
|
10
|
+
"acceptanceCriteria": [
|
|
11
|
+
"When story.routing is undefined before routing stage, after classification story.routing is written to prd.json",
|
|
12
|
+
"Subsequent iterations (or resume after crash) use the persisted story.routing — no re-classification",
|
|
13
|
+
"Escalation still overwrites modelTier and testStrategy as before — only initialComplexity is protected",
|
|
14
|
+
"savePRD is called once per story on first classification (not on every iteration if already persisted)",
|
|
15
|
+
"Unit tests verify prd.json is updated after first routing stage execution"
|
|
16
|
+
],
|
|
17
|
+
"complexity": "medium",
|
|
18
|
+
"status": "passed",
|
|
19
|
+
"tags": [],
|
|
20
|
+
"dependencies": [],
|
|
21
|
+
"escalations": [],
|
|
22
|
+
"attempts": 0,
|
|
23
|
+
"priorErrors": [],
|
|
24
|
+
"priorFailures": [],
|
|
25
|
+
"storyPoints": 1,
|
|
26
|
+
"passes": true
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "RRP-002",
|
|
30
|
+
"title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
|
|
31
|
+
"description": "StoryMetrics.complexity currently captures ctx.routing.complexity at completion time — which may reflect a post-escalation re-classification, not the original prediction. Add story.routing.initialComplexity (written once at first classify, never overwritten) and StoryMetrics.initialComplexity. Update metrics/aggregator.ts complexityAccuracy to compare initialComplexity vs finalTier instead of current complexity vs finalTier.",
|
|
32
|
+
"acceptanceCriteria": [
|
|
33
|
+
"StoryRouting interface gains initialComplexity?: Complexity field",
|
|
34
|
+
"Routing stage writes initialComplexity when story.routing is first created (RRP-001 path)",
|
|
35
|
+
"Escalation path never overwrites initialComplexity — only modelTier and testStrategy change",
|
|
36
|
+
"StoryMetrics gains initialComplexity?: string field",
|
|
37
|
+
"collectStoryMetrics() reads initialComplexity from story.routing.initialComplexity (falls back to routing.complexity for backward compat)",
|
|
38
|
+
"metrics/aggregator.ts complexityAccuracy uses initialComplexity for predicted vs finalTier comparison",
|
|
39
|
+
"Unit tests verify initialComplexity is set on first classify and unchanged after escalation"
|
|
40
|
+
],
|
|
41
|
+
"complexity": "medium",
|
|
42
|
+
"status": "pending",
|
|
43
|
+
"tags": [],
|
|
44
|
+
"dependencies": [
|
|
45
|
+
"RRP-001"
|
|
46
|
+
],
|
|
47
|
+
"escalations": [],
|
|
48
|
+
"attempts": 0,
|
|
49
|
+
"priorErrors": [],
|
|
50
|
+
"priorFailures": [],
|
|
51
|
+
"storyPoints": 1
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"id": "RRP-003",
|
|
55
|
+
"title": "Add contentHash to StoryRouting for staleness detection (BUG-048)",
|
|
56
|
+
"description": "When nax analyze is run, it writes story.routing to prd.json. If the story is subsequently edited (more ACs, changed tags, updated description), nax run blindly trusts the existing routing — wrong complexity, wrong testStrategy. Fix: add story.routing.contentHash — a hash of title+description+acceptanceCriteria.join()+tags.join() written at classify time. Routing stage recomputes hash on each run; if mismatch, treat as cache miss and re-classify.",
|
|
57
|
+
"acceptanceCriteria": [
|
|
58
|
+
"StoryRouting interface gains contentHash?: string field",
|
|
59
|
+
"A helper function computeStoryContentHash(story: UserStory): string computes a hash of title+description+ACs+tags",
|
|
60
|
+
"Routing stage: if story.routing exists but contentHash is missing or mismatches current story content, re-classify (treat as cache miss)",
|
|
61
|
+
"Routing stage: after classification, write contentHash to story.routing",
|
|
62
|
+
"If story content unchanged, routing stage uses cached routing as before — no regression",
|
|
63
|
+
"Unit tests cover: hash match uses cache; hash mismatch re-classifies; missing hash re-classifies"
|
|
64
|
+
],
|
|
65
|
+
"complexity": "medium",
|
|
66
|
+
"status": "pending",
|
|
67
|
+
"tags": [],
|
|
68
|
+
"dependencies": [
|
|
69
|
+
"RRP-001"
|
|
70
|
+
],
|
|
71
|
+
"escalations": [],
|
|
72
|
+
"attempts": 0,
|
|
73
|
+
"priorErrors": [],
|
|
74
|
+
"priorFailures": [],
|
|
75
|
+
"storyPoints": 1
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"id": "RRP-004",
|
|
79
|
+
"title": "Integration tests: routing persistence across simulated crash-resume and staleness",
|
|
80
|
+
"description": "Write integration tests that verify routing persistence end-to-end: (1) first run classifies and persists story.routing to prd.json, (2) second run uses persisted routing without re-classifying, (3) escalation preserves initialComplexity, (4) story content change triggers re-classification via contentHash mismatch.",
|
|
81
|
+
"acceptanceCriteria": [
|
|
82
|
+
"Integration test: routing stage with story.routing=undefined writes story.routing to prd.json after classification",
|
|
83
|
+
"Integration test: routing stage re-run with same prd.json uses cached routing — no LLM call made",
|
|
84
|
+
"Integration test: escalation updates modelTier in prd.json but initialComplexity remains unchanged",
|
|
85
|
+
"Integration test: edit story content after routing — hash mismatch detected — routing stage re-classifies",
|
|
86
|
+
"Integration test: story.routing with matching contentHash — no re-classification (cache hit confirmed)"
|
|
87
|
+
],
|
|
88
|
+
"complexity": "medium",
|
|
89
|
+
"status": "pending",
|
|
90
|
+
"tags": [],
|
|
91
|
+
"dependencies": [
|
|
92
|
+
"RRP-001",
|
|
93
|
+
"RRP-002",
|
|
94
|
+
"RRP-003"
|
|
95
|
+
],
|
|
96
|
+
"escalations": [],
|
|
97
|
+
"attempts": 0,
|
|
98
|
+
"priorErrors": [],
|
|
99
|
+
"priorFailures": [],
|
|
100
|
+
"storyPoints": 1
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
"updatedAt": "2026-03-07T16:32:39.496Z"
|
|
104
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[2026-03-07T16:32:39.495Z] RRP-001 — PASSED — Persist initial routing to prd.json on first classification — Cost: $0.5223
|
package/nax/status.json
CHANGED
|
@@ -1,27 +1,36 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 1,
|
|
3
3
|
"run": {
|
|
4
|
-
"id": "run-2026-03-
|
|
5
|
-
"feature": "
|
|
6
|
-
"startedAt": "2026-03-
|
|
7
|
-
"status": "
|
|
4
|
+
"id": "run-2026-03-07T16-14-49-336Z",
|
|
5
|
+
"feature": "routing-persistence",
|
|
6
|
+
"startedAt": "2026-03-07T16:14:49.336Z",
|
|
7
|
+
"status": "running",
|
|
8
8
|
"dryRun": false,
|
|
9
|
-
"pid":
|
|
9
|
+
"pid": 3412
|
|
10
10
|
},
|
|
11
11
|
"progress": {
|
|
12
|
-
"total":
|
|
13
|
-
"passed":
|
|
12
|
+
"total": 4,
|
|
13
|
+
"passed": 1,
|
|
14
14
|
"failed": 0,
|
|
15
15
|
"paused": 0,
|
|
16
16
|
"blocked": 0,
|
|
17
|
-
"pending":
|
|
17
|
+
"pending": 3
|
|
18
18
|
},
|
|
19
19
|
"cost": {
|
|
20
|
-
"spent":
|
|
20
|
+
"spent": 0.52230675,
|
|
21
21
|
"limit": 8
|
|
22
22
|
},
|
|
23
|
-
"current":
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
"current": {
|
|
24
|
+
"storyId": "RRP-002",
|
|
25
|
+
"title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
|
|
26
|
+
"complexity": "medium",
|
|
27
|
+
"tddStrategy": "test-after",
|
|
28
|
+
"model": "balanced",
|
|
29
|
+
"attempt": 1,
|
|
30
|
+
"phase": "routing"
|
|
31
|
+
},
|
|
32
|
+
"iterations": 2,
|
|
33
|
+
"updatedAt": "2026-03-07T16:45:19.261Z",
|
|
34
|
+
"durationMs": 1829925,
|
|
35
|
+
"lastHeartbeat": "2026-03-07T16:45:19.261Z"
|
|
27
36
|
}
|
package/package.json
CHANGED
|
@@ -110,7 +110,8 @@ export function calculateAggregateMetrics(runs: RunMetrics[]): AggregateMetrics
|
|
|
110
110
|
>();
|
|
111
111
|
|
|
112
112
|
for (const story of allStories) {
|
|
113
|
-
|
|
113
|
+
// Use initialComplexity (first-classify prediction) when available; fall back to complexity
|
|
114
|
+
const complexity = story.initialComplexity ?? story.complexity;
|
|
114
115
|
const existing = complexityStats.get(complexity) || {
|
|
115
116
|
predicted: 0,
|
|
116
117
|
tierCounts: new Map<string, number>(),
|
package/src/metrics/tracker.ts
CHANGED
|
@@ -58,9 +58,14 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
|
|
|
58
58
|
const modelDef = modelEntry ? resolveModel(modelEntry) : null;
|
|
59
59
|
const modelUsed = modelDef?.model || routing.modelTier;
|
|
60
60
|
|
|
61
|
+
// initialComplexity: prefer story.routing.initialComplexity (first classify),
|
|
62
|
+
// fall back to routing.complexity for backward compat
|
|
63
|
+
const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
|
|
64
|
+
|
|
61
65
|
return {
|
|
62
66
|
storyId: story.id,
|
|
63
67
|
complexity: routing.complexity,
|
|
68
|
+
initialComplexity,
|
|
64
69
|
modelTier: routing.modelTier,
|
|
65
70
|
modelUsed,
|
|
66
71
|
attempts,
|
|
@@ -108,20 +113,27 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
|
|
|
108
113
|
const modelDef = modelEntry ? resolveModel(modelEntry) : null;
|
|
109
114
|
const modelUsed = modelDef?.model || routing.modelTier;
|
|
110
115
|
|
|
111
|
-
return stories.map((story) =>
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
116
|
+
return stories.map((story) => {
|
|
117
|
+
// initialComplexity: prefer story.routing.initialComplexity (if individual routing exists),
|
|
118
|
+
// fall back to shared routing.complexity (batch stories classified together)
|
|
119
|
+
const initialComplexity = story.routing?.initialComplexity ?? routing.complexity;
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
storyId: story.id,
|
|
123
|
+
complexity: routing.complexity,
|
|
124
|
+
initialComplexity,
|
|
125
|
+
modelTier: routing.modelTier,
|
|
126
|
+
modelUsed,
|
|
127
|
+
attempts: 1, // batch stories don't escalate individually
|
|
128
|
+
finalTier: routing.modelTier,
|
|
129
|
+
success: true, // if batch succeeded, all stories succeeded
|
|
130
|
+
cost: costPerStory,
|
|
131
|
+
durationMs: durationPerStory,
|
|
132
|
+
firstPassSuccess: true, // batch = first pass success
|
|
133
|
+
startedAt: storyStartTime,
|
|
134
|
+
completedAt: new Date().toISOString(),
|
|
135
|
+
};
|
|
136
|
+
});
|
|
125
137
|
}
|
|
126
138
|
|
|
127
139
|
/**
|
package/src/metrics/types.ts
CHANGED
|
@@ -12,6 +12,8 @@ export interface StoryMetrics {
|
|
|
12
12
|
storyId: string;
|
|
13
13
|
/** Classified complexity */
|
|
14
14
|
complexity: string;
|
|
15
|
+
/** Initial complexity from first classification — preserved across escalations */
|
|
16
|
+
initialComplexity?: string;
|
|
15
17
|
/** Initial model tier */
|
|
16
18
|
modelTier: string;
|
|
17
19
|
/** Actual model used (e.g., "claude-sonnet-4.5") */
|
|
@@ -2,15 +2,18 @@
|
|
|
2
2
|
* Routing Stage
|
|
3
3
|
*
|
|
4
4
|
* Classifies story complexity and determines model tier + test strategy.
|
|
5
|
-
* Uses cached complexity/testStrategy/modelTier from story if
|
|
5
|
+
* Uses cached complexity/testStrategy/modelTier from story if contentHash matches.
|
|
6
6
|
* modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
|
|
7
7
|
*
|
|
8
|
+
* RRP-003: contentHash staleness detection — if story.routing.contentHash is missing or
|
|
9
|
+
* does not match the current story content, treats cached routing as a miss and re-classifies.
|
|
10
|
+
*
|
|
8
11
|
* @returns
|
|
9
12
|
* - `continue`: Routing determined, proceed to next stage
|
|
10
13
|
*
|
|
11
14
|
* @example
|
|
12
15
|
* ```ts
|
|
13
|
-
* // Story has cached routing with
|
|
16
|
+
* // Story has cached routing with matching contentHash
|
|
14
17
|
* await routingStage.execute(ctx);
|
|
15
18
|
* // ctx.routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "..." }
|
|
16
19
|
* // modelTier is derived from current config.autoMode.complexityRouting
|
|
@@ -19,7 +22,8 @@
|
|
|
19
22
|
|
|
20
23
|
import { isGreenfieldStory } from "../../context/greenfield";
|
|
21
24
|
import { getLogger } from "../../logger";
|
|
22
|
-
import {
|
|
25
|
+
import { savePRD } from "../../prd";
|
|
26
|
+
import { complexityToModelTier, computeStoryContentHash, routeStory } from "../../routing";
|
|
23
27
|
import { clearCache, routeBatch } from "../../routing/strategies/llm";
|
|
24
28
|
import type { PipelineContext, PipelineStage, RoutingResult, StageResult } from "../types";
|
|
25
29
|
|
|
@@ -30,11 +34,25 @@ export const routingStage: PipelineStage = {
|
|
|
30
34
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
31
35
|
const logger = getLogger();
|
|
32
36
|
|
|
33
|
-
//
|
|
34
|
-
//
|
|
37
|
+
// Staleness detection (RRP-003):
|
|
38
|
+
// - story.routing absent → cache miss (no prior routing)
|
|
39
|
+
// - story.routing + no contentHash → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
|
|
40
|
+
// - story.routing + contentHash matches → cache hit
|
|
41
|
+
// - story.routing + contentHash mismatches → cache miss (stale, re-classify)
|
|
42
|
+
const hasExistingRouting = ctx.story.routing !== undefined;
|
|
43
|
+
const hasContentHash = ctx.story.routing?.contentHash !== undefined;
|
|
44
|
+
let currentHash: string | undefined;
|
|
45
|
+
let hashMatch = false;
|
|
46
|
+
if (hasContentHash) {
|
|
47
|
+
currentHash = _routingDeps.computeStoryContentHash(ctx.story);
|
|
48
|
+
hashMatch = ctx.story.routing?.contentHash === currentHash;
|
|
49
|
+
}
|
|
50
|
+
const isCacheHit = hasExistingRouting && (!hasContentHash || hashMatch);
|
|
51
|
+
|
|
35
52
|
let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
|
|
36
|
-
|
|
37
|
-
|
|
53
|
+
|
|
54
|
+
if (isCacheHit) {
|
|
55
|
+
// Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
|
|
38
56
|
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
39
57
|
// Override with cached values only when they are actually set
|
|
40
58
|
if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
|
|
@@ -50,8 +68,22 @@ export const routingStage: PipelineStage = {
|
|
|
50
68
|
);
|
|
51
69
|
}
|
|
52
70
|
} else {
|
|
53
|
-
//
|
|
71
|
+
// Cache miss: no routing, or contentHash present but mismatched — fresh classification
|
|
54
72
|
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
73
|
+
// currentHash already computed if a mismatch was detected; compute now if starting fresh
|
|
74
|
+
currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
|
|
75
|
+
ctx.story.routing = {
|
|
76
|
+
...(ctx.story.routing ?? {}),
|
|
77
|
+
complexity: routing.complexity as import("../../config").Complexity,
|
|
78
|
+
initialComplexity:
|
|
79
|
+
ctx.story.routing?.initialComplexity ?? (routing.complexity as import("../../config").Complexity),
|
|
80
|
+
testStrategy: routing.testStrategy as import("../../config").TestStrategy,
|
|
81
|
+
reasoning: routing.reasoning ?? "",
|
|
82
|
+
contentHash: currentHash,
|
|
83
|
+
};
|
|
84
|
+
if (ctx.prdPath) {
|
|
85
|
+
await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
|
|
86
|
+
}
|
|
55
87
|
}
|
|
56
88
|
|
|
57
89
|
// BUG-010: Greenfield detection — force test-after if no test files exist
|
|
@@ -97,4 +129,6 @@ export const _routingDeps = {
|
|
|
97
129
|
complexityToModelTier,
|
|
98
130
|
isGreenfieldStory,
|
|
99
131
|
clearCache,
|
|
132
|
+
savePRD,
|
|
133
|
+
computeStoryContentHash,
|
|
100
134
|
};
|
package/src/pipeline/types.ts
CHANGED
|
@@ -65,6 +65,8 @@ export interface PipelineContext {
|
|
|
65
65
|
routing: RoutingResult;
|
|
66
66
|
/** Working directory (project root) */
|
|
67
67
|
workdir: string;
|
|
68
|
+
/** Absolute path to the prd.json file (used by routing stage to persist initial classification) */
|
|
69
|
+
prdPath?: string;
|
|
68
70
|
/** Feature directory (optional, e.g., nax/features/my-feature/) */
|
|
69
71
|
featureDir?: string;
|
|
70
72
|
/** Hooks configuration */
|
package/src/prd/types.ts
CHANGED
|
@@ -45,6 +45,10 @@ export interface StructuredFailure {
|
|
|
45
45
|
/** Routing metadata per story */
|
|
46
46
|
export interface StoryRouting {
|
|
47
47
|
complexity: Complexity;
|
|
48
|
+
/** Initial complexity from first classification — written once, never overwritten by escalation */
|
|
49
|
+
initialComplexity?: Complexity;
|
|
50
|
+
/** Content hash of story fields at time of routing — used to detect stale cached routing (RRP-003) */
|
|
51
|
+
contentHash?: string;
|
|
48
52
|
/** Model tier (derived at runtime from config, not persisted) */
|
|
49
53
|
modelTier?: ModelTier;
|
|
50
54
|
testStrategy: TestStrategy;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Story Content Hash
|
|
3
|
+
*
|
|
4
|
+
* Computes a deterministic hash of the story content fields used for routing.
|
|
5
|
+
* Used by the routing stage (RRP-003) to detect stale cached routing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { UserStory } from "../prd/types";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Compute a deterministic hash of the story content fields used for routing.
|
|
12
|
+
* Hash input: title + "\0" + description + "\0" + acceptanceCriteria.join("") + "\0" + tags.join("")
|
|
13
|
+
*
|
|
14
|
+
* Null-byte separators between fields prevent cross-field collisions.
|
|
15
|
+
*
|
|
16
|
+
* @param story - The user story to hash
|
|
17
|
+
* @returns A hex string content hash
|
|
18
|
+
*/
|
|
19
|
+
export function computeStoryContentHash(story: UserStory): string {
|
|
20
|
+
const input = `${story.title}\0${story.description}\0${story.acceptanceCriteria.join("")}\0${story.tags.join("")}`;
|
|
21
|
+
|
|
22
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
23
|
+
hasher.update(input);
|
|
24
|
+
return hasher.digest("hex");
|
|
25
|
+
}
|
package/src/routing/index.ts
CHANGED
|
@@ -15,3 +15,6 @@ export { keywordStrategy, llmStrategy, manualStrategy } from "./strategies";
|
|
|
15
15
|
// Custom strategy loader
|
|
16
16
|
export { loadCustomStrategy } from "./loader";
|
|
17
17
|
export { tryLlmBatchRoute } from "./batch-route";
|
|
18
|
+
|
|
19
|
+
// Content hash for staleness detection (RRP-003)
|
|
20
|
+
export { computeStoryContentHash } from "./content-hash";
|