@nathapp/nax 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +70 -56
- package/docs/ROADMAP.md +45 -15
- package/docs/specs/trigger-completion.md +145 -0
- package/nax/features/routing-persistence/prd.json +104 -0
- package/nax/features/routing-persistence/progress.txt +1 -0
- package/nax/features/trigger-completion/prd.json +150 -0
- package/nax/features/trigger-completion/progress.txt +7 -0
- package/nax/status.json +15 -16
- package/package.json +1 -1
- package/src/config/types.ts +3 -1
- package/src/execution/crash-recovery.ts +11 -0
- package/src/execution/executor-types.ts +1 -1
- package/src/execution/iteration-runner.ts +1 -0
- package/src/execution/lifecycle/run-setup.ts +4 -0
- package/src/execution/sequential-executor.ts +45 -7
- package/src/interaction/plugins/auto.ts +10 -1
- package/src/metrics/aggregator.ts +2 -1
- package/src/metrics/tracker.ts +26 -14
- package/src/metrics/types.ts +2 -0
- package/src/pipeline/event-bus.ts +14 -1
- package/src/pipeline/stages/completion.ts +20 -0
- package/src/pipeline/stages/execution.ts +62 -0
- package/src/pipeline/stages/review.ts +25 -1
- package/src/pipeline/stages/routing.ts +42 -8
- package/src/pipeline/subscribers/hooks.ts +32 -0
- package/src/pipeline/subscribers/interaction.ts +36 -1
- package/src/pipeline/types.ts +2 -0
- package/src/prd/types.ts +4 -0
- package/src/routing/content-hash.ts +25 -0
- package/src/routing/index.ts +3 -0
- package/src/routing/router.ts +3 -2
- package/src/routing/strategies/keyword.ts +2 -1
- package/src/routing/strategies/llm-prompts.ts +29 -28
- package/src/utils/git.ts +21 -0
- package/test/integration/routing/plugin-routing-core.test.ts +1 -1
- package/test/unit/execution/sequential-executor.test.ts +235 -0
- package/test/unit/interaction/auto-plugin.test.ts +162 -0
- package/test/unit/interaction-plugins.test.ts +308 -1
- package/test/unit/metrics/aggregator.test.ts +164 -0
- package/test/unit/metrics/tracker.test.ts +186 -0
- package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
- package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
- package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
- package/test/unit/pipeline/stages/review.test.ts +201 -0
- package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
- package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
- package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
- package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
- package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
- package/test/unit/prd-auto-default.test.ts +2 -2
- package/test/unit/routing/content-hash.test.ts +99 -0
- package/test/unit/routing/routing-stability.test.ts +1 -1
- package/test/unit/routing-core.test.ts +5 -5
- package/test/unit/routing-strategies.test.ts +1 -3
- package/test/unit/utils/git.test.ts +50 -0
package/CLAUDE.md
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
# nax — AI Coding Agent Orchestrator
|
|
2
2
|
|
|
3
|
-
Bun + TypeScript CLI that orchestrates AI coding agents with model routing, TDD strategies, and
|
|
3
|
+
Bun + TypeScript CLI that orchestrates AI coding agents (Claude Code) with model-tier routing, TDD strategies, plugin hooks, and a Central Run Registry.
|
|
4
|
+
|
|
5
|
+
## Tech Stack
|
|
6
|
+
|
|
7
|
+
| Layer | Choice |
|
|
8
|
+
|:------|:-------|
|
|
9
|
+
| Runtime | **Bun 1.3.7+** — Bun-native APIs only, no Node.js equivalents |
|
|
10
|
+
| Language | **TypeScript strict** — no `any` without explicit justification |
|
|
11
|
+
| Test | **`bun:test`** — describe/test/expect |
|
|
12
|
+
| Lint/Format | **Biome** (`bun run lint`) |
|
|
13
|
+
| Build | `bun run build` |
|
|
4
14
|
|
|
5
15
|
## Git Identity
|
|
6
16
|
|
|
@@ -11,14 +21,21 @@ git config user.email "subrina8080@outlook.com"
|
|
|
11
21
|
|
|
12
22
|
## Commands
|
|
13
23
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
bun
|
|
17
|
-
bun run
|
|
18
|
-
bun
|
|
19
|
-
bun
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
| Command | Purpose |
|
|
25
|
+
|:--------|:--------|
|
|
26
|
+
| `bun run typecheck` | tsc --noEmit |
|
|
27
|
+
| `bun run lint` | Biome |
|
|
28
|
+
| `bun test test/unit/foo.test.ts` | Targeted test during iteration |
|
|
29
|
+
| `NAX_SKIP_PRECHECK=1 bun test test/ --timeout=60000 --bail` | Full suite |
|
|
30
|
+
|
|
31
|
+
nax runs lint, typecheck, and tests automatically via the pipeline. Run these manually only when working outside a nax session.
|
|
32
|
+
|
|
33
|
+
## Engineering Persona
|
|
34
|
+
|
|
35
|
+
- **Senior Engineer mindset**: check edge cases, null/undefined, race conditions, and error states.
|
|
36
|
+
- **TDD first**: write or update tests before implementation when the story calls for it.
|
|
37
|
+
- **Stuck rule**: if the same test fails 2+ iterations, stop, summarise failed attempts, reassess approach.
|
|
38
|
+
- **Never push to remote** — the human reviews and pushes.
|
|
22
39
|
|
|
23
40
|
## Architecture
|
|
24
41
|
|
|
@@ -33,67 +50,64 @@ Runner.run() [src/execution/runner.ts — thin orchestrator only]
|
|
|
33
50
|
→ registry.teardownAll()
|
|
34
51
|
```
|
|
35
52
|
|
|
36
|
-
### Key Directories
|
|
53
|
+
### Key Source Directories
|
|
37
54
|
|
|
38
55
|
| Directory | Purpose |
|
|
39
|
-
|
|
40
|
-
| `src/execution/` | Runner loop, agent adapters,
|
|
41
|
-
| `src/execution/
|
|
42
|
-
| `src/
|
|
43
|
-
| `src/
|
|
44
|
-
| `src/
|
|
45
|
-
| `src/routing/` |
|
|
56
|
+
|:----------|:--------|
|
|
57
|
+
| `src/execution/` | Runner loop, agent adapters, escalation, lifecycle hooks |
|
|
58
|
+
| `src/execution/escalation/` | Tier escalation on repeated failures |
|
|
59
|
+
| `src/pipeline/stages/` | One file per pipeline stage |
|
|
60
|
+
| `src/pipeline/subscribers/` | Event-driven hooks (interaction, hooks.ts) |
|
|
61
|
+
| `src/routing/` | Model-tier routing — keyword, LLM, plugin chain |
|
|
62
|
+
| `src/routing/strategies/` | keyword.ts, llm.ts, llm-prompts.ts |
|
|
63
|
+
| `src/interaction/` | Interaction triggers + plugins (Auto, Telegram, Webhook) |
|
|
46
64
|
| `src/plugins/` | Plugin system — loader, registry, validator |
|
|
47
|
-
| `src/
|
|
65
|
+
| `src/verification/` | Test execution, smart runner, scoped runner |
|
|
66
|
+
| `src/metrics/` | StoryMetrics, aggregator, tracker |
|
|
67
|
+
| `src/config/` | Config schema + layered loader (global → project) |
|
|
48
68
|
| `src/agents/adapters/` | Agent integrations (Claude Code) |
|
|
49
|
-
| `src/cli/` + `src/commands/` | CLI commands
|
|
50
|
-
| `src/
|
|
51
|
-
| `src/
|
|
69
|
+
| `src/cli/` + `src/commands/` | CLI commands — check both locations |
|
|
70
|
+
| `src/prd/` | PRD types, loader, story state machine |
|
|
71
|
+
| `src/hooks/` | Lifecycle hook wiring |
|
|
72
|
+
| `src/constitution/` | Constitution loader + injection |
|
|
73
|
+
| `src/analyze/` | `nax analyze` — story classifier |
|
|
52
74
|
|
|
53
|
-
### Plugin
|
|
75
|
+
### Plugin Extension Points
|
|
54
76
|
|
|
55
|
-
|
|
|
56
|
-
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
77
|
+
| Interface | Loaded By | Purpose |
|
|
78
|
+
|:----------|:----------|:--------|
|
|
79
|
+
| `IContextProvider` | `context.ts` stage | Inject context into agent prompts |
|
|
80
|
+
| `IReviewer` | Review stage | Post-verify quality checks |
|
|
81
|
+
| `IReporter` | Runner | onRunStart / onStoryComplete / onRunEnd events |
|
|
82
|
+
| `IRoutingStrategy` | Router chain | Override model-tier routing |
|
|
61
83
|
|
|
62
84
|
### Config
|
|
63
85
|
|
|
64
86
|
- Global: `~/.nax/config.json` → Project: `<workdir>/nax/config.json`
|
|
65
|
-
- Schema: `src/config/schema.ts` — no hardcoded flags or credentials
|
|
87
|
+
- Schema: `src/config/schema.ts` — no hardcoded flags or credentials anywhere
|
|
66
88
|
|
|
67
|
-
##
|
|
89
|
+
## Workflow Protocol
|
|
68
90
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
- **New agent adapters** go in `src/agents/adapters/<name>.ts` — never inline in runner or existing adapters.
|
|
91
|
+
1. **Explore first**: use `grep`, `cat`, and solograph MCP to understand context before writing code.
|
|
92
|
+
2. **Plan complex tasks**: for multi-file changes, write a short plan before implementing.
|
|
93
|
+
3. **Implement in small chunks**: one logical concern per commit.
|
|
73
94
|
|
|
74
|
-
##
|
|
95
|
+
## Code Intelligence (Solograph MCP)
|
|
75
96
|
|
|
76
|
-
|
|
97
|
+
Use **solograph** MCP tools on-demand — do not use `web_search` or `kb_search`.
|
|
77
98
|
|
|
99
|
+
| Tool | When |
|
|
100
|
+
|:-----|:-----|
|
|
101
|
+
| `project_code_search` | Find existing patterns before writing new code |
|
|
102
|
+
| `codegraph_explain` | Architecture overview before tackling unfamiliar areas |
|
|
103
|
+
| `codegraph_query` | Dependency/impact analysis (Cypher) |
|
|
104
|
+
| `project_code_reindex` | After creating or deleting source files |
|
|
78
105
|
|
|
79
|
-
##
|
|
106
|
+
## Coding Standards & Forbidden Patterns
|
|
107
|
+
|
|
108
|
+
Full rules in `.claude/rules/` (loaded automatically):
|
|
80
109
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
| `project_code_search` | Find existing patterns, symbols, or implementations before writing new code |
|
|
86
|
-
| `codegraph_explain` | Get architecture overview of nax before tackling unfamiliar areas |
|
|
87
|
-
| `codegraph_query` | Cypher queries — dependency analysis, impact analysis, hub files |
|
|
88
|
-
| `codegraph_stats` | Quick graph stats (file/symbol counts) |
|
|
89
|
-
| `codegraph_shared` | Find packages shared across projects |
|
|
90
|
-
| `session_search` | Search prior Claude Code session history for relevant context |
|
|
91
|
-
| `project_info` | Project registry info |
|
|
92
|
-
| `project_code_reindex` | Reindex after creating or deleting source files, or major refactors |
|
|
93
|
-
|
|
94
|
-
Single source of truth: VPS solograph instance (Mac01 tunnels to VPS — same data either way).
|
|
95
|
-
## IMPORTANT
|
|
96
|
-
|
|
97
|
-
- Do NOT push to remote — let the human review and push.
|
|
98
|
-
- Never hardcode API keys — agents use their own auth from env.
|
|
99
|
-
- Agent adapters spawn external processes — always handle timeouts and cleanup.
|
|
110
|
+
- `01-project-conventions.md` — Bun-native APIs, 400-line limit, barrel imports, logging, commits
|
|
111
|
+
- `02-test-architecture.md` — directory mirroring, placement rules, file naming
|
|
112
|
+
- `03-test-writing.md` — `_deps` injection pattern, mock discipline, CI guards
|
|
113
|
+
- `04-forbidden-patterns.md` — banned APIs and test anti-patterns with alternatives
|
package/docs/ROADMAP.md
CHANGED
|
@@ -118,34 +118,60 @@
|
|
|
118
118
|
|
|
119
119
|
---
|
|
120
120
|
|
|
121
|
-
## v0.23.0 — Status File Consolidation
|
|
121
|
+
## v0.23.0 — Status File Consolidation ✅
|
|
122
122
|
|
|
123
123
|
**Theme:** Auto-write status.json to well-known paths, align readers, remove dead options
|
|
124
|
-
**Status:**
|
|
124
|
+
**Status:** ✅ Shipped (2026-03-07)
|
|
125
125
|
**Spec:** [docs/specs/status-file-consolidation.md](specs/status-file-consolidation.md)
|
|
126
126
|
**Pre-requisite for:** v0.24.0 (Central Run Registry)
|
|
127
127
|
|
|
128
128
|
### Stories
|
|
129
129
|
- [x] ~~**SFC-001:** Auto-write project-level status — remove `--status-file` flag, always write to `<workdir>/nax/status.json`~~
|
|
130
|
-
- [
|
|
131
|
-
- [
|
|
132
|
-
- [
|
|
133
|
-
- [
|
|
134
|
-
- [
|
|
130
|
+
- [x] ~~**BUG-043:** Fix scoped test command construction + add `testScoped` config with `{{files}}` template~~
|
|
131
|
+
- [x] ~~**BUG-044:** Log scoped and full-suite test commands at info level in verify stage~~
|
|
132
|
+
- [x] ~~**SFC-002:** Write feature-level status on run end — copy final snapshot to `<workdir>/nax/features/<feature>/status.json`~~
|
|
133
|
+
- [x] ~~**SFC-003:** Align status readers — `nax status` + `nax diagnose` read from correct paths~~
|
|
134
|
+
- [x] ~~**SFC-004:** Clean up dead code — remove `--status-file` option, `.nax-status.json` references~~
|
|
135
135
|
|
|
136
136
|
---
|
|
137
137
|
|
|
138
|
-
## v0.
|
|
138
|
+
## v0.26.0 — Routing Persistence ✅ Shipped (2026-03-08)
|
|
139
139
|
|
|
140
|
-
**
|
|
140
|
+
- **RRP-001:** Persist initial routing classification to `prd.json` on first classification
|
|
141
|
+
- **RRP-002:** Add `initialComplexity` to `StoryRouting` and `StoryMetrics` for accurate reporting
|
|
142
|
+
- **RRP-003:** Add `contentHash` to `StoryRouting` for staleness detection — stale cached routing is re-classified
|
|
143
|
+
- **RRP-004:** Unit tests for routing persistence, idempotence, staleness, content hash, metrics
|
|
144
|
+
- **BUG-052:** Replace `console.warn` with structured JSONL logger in `review/runner.ts` and `optimizer/index.ts`
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## v0.25.0 — Trigger Completion ✅ Shipped (2026-03-07)
|
|
149
|
+
|
|
150
|
+
**Theme:** Wire all 8 unwired interaction triggers, 3 missing hook events, and add plugin integration tests
|
|
141
151
|
**Status:** 🔲 Planned
|
|
152
|
+
**Spec:** [docs/specs/trigger-completion.md](specs/trigger-completion.md)
|
|
153
|
+
|
|
154
|
+
### Stories
|
|
155
|
+
- [ ] **TC-001:** Wire `cost-exceeded` + `cost-warning` triggers — fire at 80%/100% of cost limit in sequential-executor.ts
|
|
156
|
+
- [ ] **TC-002:** Wire `max-retries` trigger — fire on permanent story failure via `story:failed` event in wireInteraction
|
|
157
|
+
- [ ] **TC-003:** Wire `security-review`, `merge-conflict`, `pre-merge` triggers — review rejection, git conflict detection, pre-completion gate
|
|
158
|
+
- [ ] **TC-004:** Wire `story-ambiguity` + `review-gate` triggers — ambiguity keyword detection, per-story human checkpoint
|
|
159
|
+
- [ ] **TC-005:** Wire missing hook events — `on-resume`, `on-session-end`, `on-error` to pipeline events
|
|
160
|
+
- [ ] **TC-006:** Auto plugin + Telegram + Webhook integration tests — mock LLM/network, cover approve/reject/HMAC flows
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## v0.24.0 — Central Run Registry ✅
|
|
165
|
+
|
|
166
|
+
**Theme:** Global run index across all projects — single source of truth for all nax run history
|
|
167
|
+
**Status:** ✅ Shipped (2026-03-07)
|
|
142
168
|
**Spec:** [docs/specs/central-run-registry.md](specs/central-run-registry.md)
|
|
143
169
|
|
|
144
170
|
### Stories
|
|
145
|
-
- [
|
|
146
|
-
- [
|
|
147
|
-
- [
|
|
148
|
-
- [
|
|
171
|
+
- [x] ~~**CRR-000:** `src/pipeline/subscribers/events-writer.ts` — `wireEventsWriter()`, writes lifecycle events to `~/.nax/events/<project>/events.jsonl` (machine-readable completion signal for watchdog/CI)~~
|
|
172
|
+
- [x] ~~**CRR-001:** `src/pipeline/subscribers/registry.ts` — `wireRegistry()` subscriber, listens to `run:started`, writes `~/.nax/runs/<project>-<feature>-<runId>/meta.json` (path pointers only — no data duplication, no symlinks)~~
|
|
173
|
+
- [x] ~~**CRR-002:** `src/commands/runs.ts` — `nax runs` CLI, reads `meta.json` → resolves live `status.json` from `statusPath`, displays table (project, feature, status, stories, duration, date). Filters: `--project`, `--last`, `--status`~~
|
|
174
|
+
- [x] ~~**CRR-003:** `nax logs --run <runId>` — resolve run from global registry via `eventsDir`, stream logs from any directory~~
|
|
149
175
|
|
|
150
176
|
---
|
|
151
177
|
|
|
@@ -223,6 +249,10 @@
|
|
|
223
249
|
|
|
224
250
|
| Version | Theme | Date | Details |
|
|
225
251
|
|:---|:---|:---|:---|
|
|
252
|
+
| v0.26.0 | Routing Persistence | 2026-03-08 | RRP-001–004: persist initial routing, initialComplexity, contentHash staleness detection, unit tests; BUG-052: structured logger in review/optimizer |
|
|
253
|
+
| v0.25.0 | Trigger Completion | 2026-03-07 | TC-001–004: run.complete event, crash recovery, headless formatter, trigger completion |
|
|
254
|
+
| v0.24.0 | Central Run Registry | 2026-03-07 | CRR-000–003: events writer, registry, nax runs CLI, nax logs --run global resolution |
|
|
255
|
+
| v0.23.0 | Status File Consolidation | 2026-03-07 | SFC-001–004: auto-write status.json, feature-level status, align readers, remove dead code; BUG-043/044: testScoped config + command logging |
|
|
226
256
|
| v0.18.1 | Type Safety + CI Pipeline | 2026-03-03 | 60 TS errors + 12 lint errors fixed, GitLab CI green (1952/56/0) |
|
|
227
257
|
| v0.22.2 | Routing Stability + SFC-001 | 2026-03-07 | BUG-040 floating outputPromise crash on LLM timeout retry; SFC-001 auto-write status.json |
|
|
228
258
|
| v0.22.1 | Pipeline Re-Architecture | 2026-03-07 | VerificationOrchestrator, EventBus, new stages (rectify/autofix/regression/deferred-regression), post-run SSOT. 2264 pass |
|
|
@@ -287,8 +317,8 @@
|
|
|
287
317
|
|
|
288
318
|
- [x] ~~**BUG-037:** Test output summary (verify stage) captures precheck boilerplate instead of actual `bun test` failure. Fixed: `.slice(-20)` tail — shipped in v0.22.1 (re-arch phase 2).~~
|
|
289
319
|
- [x] ~~**BUG-038:** `smart-runner` over-matching when global defaults change. Fixed by FEAT-010 (v0.21.0) — per-attempt `storyGitRef` baseRef tracking; `git diff <baseRef>..HEAD` prevents cross-story file pollution.~~
|
|
290
|
-
- [
|
|
291
|
-
- [
|
|
320
|
+
- [x] ~~**BUG-043:** Scoped test command appends files instead of replacing path — `runners.ts:scoped()` concatenates `scopedTestPaths` to full-suite command, resulting in `bun test test/ --timeout=60000 /path/to/file.ts` (runs everything). Fix: use `testScoped` config with `{{files}}` template, fall back to `buildSmartTestCommand()` heuristic. **Location:** `src/verification/runners.ts:scoped()`
|
|
321
|
+
- [x] ~~**BUG-044:** Scoped/full-suite test commands not logged — no visibility into what command was actually executed during verify stage. Fix: log at info level before execution.
|
|
292
322
|
|
|
293
323
|
### Features
|
|
294
324
|
- [x] ~~`nax unlock` command~~
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Trigger Completion — Spec
|
|
2
|
+
|
|
3
|
+
**Version:** v0.25.0
|
|
4
|
+
**Status:** Planned
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Problem
|
|
9
|
+
|
|
10
|
+
8 of 9 interaction trigger helpers (`checkCostExceeded`, `checkCostWarning`, `checkMaxRetries`, `checkSecurityReview`, `checkMergeConflict`, `checkPreMerge`, `checkStoryAmbiguity`, `checkReviewGate`) are implemented in `src/interaction/triggers.ts` and exported but **never called** from the pipeline.
|
|
11
|
+
|
|
12
|
+
Only `human-review` is wired (via `wireInteraction` subscriber on `human-review:requested` event).
|
|
13
|
+
|
|
14
|
+
Additionally, 3 hook events (`on-resume`, `on-session-end`, `on-error`) are defined in `HookEvent` but not wired to any pipeline event.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Goal
|
|
19
|
+
|
|
20
|
+
Wire all 8 remaining triggers to the correct pipeline decision points. Add 3 missing hook events. Add E2E/integration test coverage for the Telegram and auto plugins.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Stories
|
|
25
|
+
|
|
26
|
+
### TC-001: Wire `cost-exceeded` and `cost-warning` triggers
|
|
27
|
+
|
|
28
|
+
**Location:** `src/execution/sequential-executor.ts`
|
|
29
|
+
|
|
30
|
+
Currently at line 93, when `totalCost >= config.execution.costLimit`, the run exits with `"cost-limit"` — no interaction trigger is fired.
|
|
31
|
+
|
|
32
|
+
**Fix:**
|
|
33
|
+
- Before exiting on cost limit: call `checkCostExceeded({featureName, cost, limit}, config, interactionChain)`. If trigger returns `abort` or chain not available → exit as today. Pass `interactionChain` into `executeSequential` ctx (already present in `SequentialExecutionContext`).
|
|
34
|
+
- Add a `cost-warning` threshold check: when `totalCost >= costLimit * 0.8` (configurable via `interaction.triggers.cost-warning.threshold`, default 0.8), fire `checkCostWarning`. Fire only once per run (track with a boolean flag). Fallback: `continue`.
|
|
35
|
+
- Emit new `run:paused` event if trigger response is `escalate` (pause for human decision).
|
|
36
|
+
- Add `CostExceededEvent` and `CostWarningEvent` to `PipelineEventBus` (or reuse `run:paused` with a `reason` field — preferred, avoids new event types).
|
|
37
|
+
|
|
38
|
+
**Acceptance criteria:**
|
|
39
|
+
- When cost hits 80% of limit, `cost-warning` trigger fires once and run continues (default fallback)
|
|
40
|
+
- When cost hits 100% of limit, `cost-exceeded` trigger fires; abort kills the run, skip/continue allows proceeding past limit
|
|
41
|
+
- When no interaction plugin is configured, behavior is identical to today (no-op)
|
|
42
|
+
- Tests: unit test both thresholds with mock chain
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
### TC-002: Wire `max-retries` trigger
|
|
47
|
+
|
|
48
|
+
**Location:** `src/execution/sequential-executor.ts` or `src/pipeline/pipeline-result-handler.ts`
|
|
49
|
+
|
|
50
|
+
Currently when a story exhausts all tier escalations and is marked failed permanently (`markStoryFailed`), no trigger fires (except `human-review` which fires on `human-review:requested` event for a different condition).
|
|
51
|
+
|
|
52
|
+
**Fix:**
|
|
53
|
+
- In the story failure path (after all escalations exhausted), call `checkMaxRetries({featureName, storyId, iteration}, config, interactionChain)`.
|
|
54
|
+
- Response `skip` = proceed (today's behavior), `abort` = halt entire run, `escalate` = retry story from scratch at top tier.
|
|
55
|
+
- Wire via `story:failed` event in `wireInteraction` subscriber (add alongside `human-review:requested`).
|
|
56
|
+
|
|
57
|
+
**Acceptance criteria:**
|
|
58
|
+
- `max-retries` trigger fires when a story is permanently failed
|
|
59
|
+
- `abort` response halts the run with exit reason `"interaction-abort"`
|
|
60
|
+
- `skip` response is silent (today's behavior)
|
|
61
|
+
- Tests: unit test with mock chain for all three fallbacks
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
### TC-003: Wire `security-review`, `merge-conflict`, `pre-merge` triggers
|
|
66
|
+
|
|
67
|
+
**Location:** `src/pipeline/stages/review.ts` and `src/pipeline/stages/completion.ts` (post-story)
|
|
68
|
+
|
|
69
|
+
- **`security-review`**: Fire after plugin reviewer (e.g. semgrep) rejects a story in `review.ts`. Currently returns `{ action: "fail" }`. Before failing permanently, call `checkSecurityReview`. Response `abort` = fail (today), `escalate` = retry with security context injected.
|
|
70
|
+
- **`merge-conflict`**: Fire when git operations detect a merge conflict during story commit. Currently no merge-conflict detection exists — add detection in `src/execution/git.ts` (catch `CONFLICT` in git merge/rebase output) and call `checkMergeConflict`.
|
|
71
|
+
- **`pre-merge`**: Fire after all stories pass but before the run is marked complete. Call `checkPreMerge({featureName, totalStories, cost}, config, interactionChain)` in `sequential-executor.ts` final block. Response `abort` = halt, `continue` = complete normally.
|
|
72
|
+
|
|
73
|
+
**Acceptance criteria:**
|
|
74
|
+
- `security-review` trigger fires when plugin reviewer rejects (not when lint/typecheck fails)
|
|
75
|
+
- `merge-conflict` trigger fires when git detects CONFLICT markers
|
|
76
|
+
- `pre-merge` trigger fires once after all stories pass, before run:completed
|
|
77
|
+
- Tests: unit tests for each trigger point with mock chain
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
### TC-004: Wire `story-ambiguity` and `review-gate` triggers
|
|
82
|
+
|
|
83
|
+
**Location:** `src/pipeline/stages/execution.ts`
|
|
84
|
+
|
|
85
|
+
- **`story-ambiguity`**: Fire when agent session returns ambiguous/clarification-needed signal. Currently the agent exit codes and output are parsed in `execution.ts` — add a detection heuristic (e.g. agent output contains "unclear" / "ambiguous" / "need clarification" keywords, or a new `needsClarification` flag in agent result). Call `checkStoryAmbiguity` before escalating.
|
|
86
|
+
- **`review-gate`**: Fire after `story:completed` as a human checkpoint gate (configurable, disabled by default). Wire via new `review-gate:requested` event emitted in completion stage when `interaction.triggers.review-gate.enabled = true`.
|
|
87
|
+
|
|
88
|
+
**Acceptance criteria:**
|
|
89
|
+
- `story-ambiguity` trigger fires when agent signals ambiguity (keyword detection)
|
|
90
|
+
- `review-gate` trigger fires after each story passes when enabled
|
|
91
|
+
- Both default to disabled in config (opt-in)
|
|
92
|
+
- Tests: unit tests for ambiguity detection heuristic + trigger dispatch
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
### TC-005: Wire missing hook events (`on-resume`, `on-session-end`, `on-error`)
|
|
97
|
+
|
|
98
|
+
**Location:** `src/pipeline/subscribers/hooks.ts`
|
|
99
|
+
|
|
100
|
+
Three hook events are defined in `HookEvent` but never wired to pipeline events:
|
|
101
|
+
|
|
102
|
+
- **`on-resume`**: Fire when a paused run resumes. Add `run:resumed` event to `PipelineEventBus`, emit it in `sequential-executor.ts` when resuming from pause state. Wire in `wireHooks`.
|
|
103
|
+
- **`on-session-end`**: Fire when an individual agent session ends (pass or fail). Map to `story:completed` + `story:failed`. Wire in `wireHooks` on both events.
|
|
104
|
+
- **`on-error`**: Fire on unhandled errors / crash. Emit in `crash-recovery.ts` crash handler. Wire in `wireHooks`.
|
|
105
|
+
|
|
106
|
+
**Acceptance criteria:**
|
|
107
|
+
- `on-resume` hook fires when a paused run is continued
|
|
108
|
+
- `on-session-end` hook fires after every agent session (pass or fail)
|
|
109
|
+
- `on-error` hook fires in crash handler before exit
|
|
110
|
+
- Tests: extend existing `hooks.test.ts` with the three new events
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
### TC-006: Auto plugin integration tests
|
|
115
|
+
|
|
116
|
+
**Location:** `test/integration/interaction/`
|
|
117
|
+
|
|
118
|
+
The `AutoInteractionPlugin` (LLM-based) has zero test coverage. The Telegram and Webhook plugins have init/config tests but no send/receive flow tests.
|
|
119
|
+
|
|
120
|
+
**Fix:**
|
|
121
|
+
- `auto.test.ts` — mock the LLM call (`_deps` pattern), test: approve decision, reject decision, confidence below threshold falls back, `security-review` is never auto-approved.
|
|
122
|
+
- Extend `interaction-plugins.test.ts` with Telegram send flow (mock `fetch`, verify message format + inline keyboard structure).
|
|
123
|
+
- Extend with Webhook send flow (mock HTTP server, verify HMAC signature validation).
|
|
124
|
+
|
|
125
|
+
**Acceptance criteria:**
|
|
126
|
+
- Auto plugin: LLM approve/reject/confidence-fallback/security-review-block all covered
|
|
127
|
+
- Telegram: message send format and inline keyboard structure verified
|
|
128
|
+
- Webhook: HMAC verification tested (valid + tampered signatures)
|
|
129
|
+
- All tests are unit/mock — no real network calls
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Out of Scope
|
|
134
|
+
|
|
135
|
+
- Full E2E test with real Telegram bot (requires live credentials)
|
|
136
|
+
- New trigger types beyond the 9 already defined
|
|
137
|
+
- Interaction state persistence (pause/resume full flow) — separate feature
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Notes
|
|
142
|
+
|
|
143
|
+
- All trigger calls must be best-effort guarded: if `interactionChain` is null/undefined, skip silently (today's behavior)
|
|
144
|
+
- `interactionChain` is already threaded through `SequentialExecutionContext` — no new context changes needed for most stories
|
|
145
|
+
- Config `interaction.triggers.<name>.enabled` must be `true` for any trigger to fire (`isTriggerEnabled` handles this)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
{
|
|
2
|
+
"project": "nax-routing-persistence",
|
|
3
|
+
"branchName": "feat/routing-persistence",
|
|
4
|
+
"feature": "routing-persistence",
|
|
5
|
+
"userStories": [
|
|
6
|
+
{
|
|
7
|
+
"id": "RRP-001",
|
|
8
|
+
"title": "Persist initial routing to prd.json on first classification",
|
|
9
|
+
"description": "Currently, when nax run classifies a story for the first time (no prior nax analyze, story.routing is undefined), the result lives only in ctx.routing (in-memory). If the run crashes and resumes, the routing stage re-classifies fresh — LLM may return different complexity/testStrategy, causing silent inconsistency mid-feature. Fix: after fresh classification in routing.ts, write the result back to prd.json via savePRD so story.routing is populated from the very first iteration.",
|
|
10
|
+
"acceptanceCriteria": [
|
|
11
|
+
"When story.routing is undefined before routing stage, after classification story.routing is written to prd.json",
|
|
12
|
+
"Subsequent iterations (or resume after crash) use the persisted story.routing — no re-classification",
|
|
13
|
+
"Escalation still overwrites modelTier and testStrategy as before — only initialComplexity is protected",
|
|
14
|
+
"savePRD is called once per story on first classification (not on every iteration if already persisted)",
|
|
15
|
+
"Unit tests verify prd.json is updated after first routing stage execution"
|
|
16
|
+
],
|
|
17
|
+
"complexity": "medium",
|
|
18
|
+
"status": "passed",
|
|
19
|
+
"tags": [],
|
|
20
|
+
"dependencies": [],
|
|
21
|
+
"escalations": [],
|
|
22
|
+
"attempts": 0,
|
|
23
|
+
"priorErrors": [],
|
|
24
|
+
"priorFailures": [],
|
|
25
|
+
"storyPoints": 1,
|
|
26
|
+
"passes": true
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "RRP-002",
|
|
30
|
+
"title": "Add initialComplexity to StoryRouting and StoryMetrics for accurate reporting",
|
|
31
|
+
"description": "StoryMetrics.complexity currently captures ctx.routing.complexity at completion time — which may reflect a post-escalation re-classification, not the original prediction. Add story.routing.initialComplexity (written once at first classify, never overwritten) and StoryMetrics.initialComplexity. Update metrics/aggregator.ts complexityAccuracy to compare initialComplexity vs finalTier instead of current complexity vs finalTier.",
|
|
32
|
+
"acceptanceCriteria": [
|
|
33
|
+
"StoryRouting interface gains initialComplexity?: Complexity field",
|
|
34
|
+
"Routing stage writes initialComplexity when story.routing is first created (RRP-001 path)",
|
|
35
|
+
"Escalation path never overwrites initialComplexity — only modelTier and testStrategy change",
|
|
36
|
+
"StoryMetrics gains initialComplexity?: string field",
|
|
37
|
+
"collectStoryMetrics() reads initialComplexity from story.routing.initialComplexity (falls back to routing.complexity for backward compat)",
|
|
38
|
+
"metrics/aggregator.ts complexityAccuracy uses initialComplexity for predicted vs finalTier comparison",
|
|
39
|
+
"Unit tests verify initialComplexity is set on first classify and unchanged after escalation"
|
|
40
|
+
],
|
|
41
|
+
"complexity": "medium",
|
|
42
|
+
"status": "pending",
|
|
43
|
+
"tags": [],
|
|
44
|
+
"dependencies": [
|
|
45
|
+
"RRP-001"
|
|
46
|
+
],
|
|
47
|
+
"escalations": [],
|
|
48
|
+
"attempts": 0,
|
|
49
|
+
"priorErrors": [],
|
|
50
|
+
"priorFailures": [],
|
|
51
|
+
"storyPoints": 1
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"id": "RRP-003",
|
|
55
|
+
"title": "Add contentHash to StoryRouting for staleness detection (BUG-048)",
|
|
56
|
+
"description": "When nax analyze is run, it writes story.routing to prd.json. If the story is subsequently edited (more ACs, changed tags, updated description), nax run blindly trusts the existing routing — wrong complexity, wrong testStrategy. Fix: add story.routing.contentHash — a hash of title+description+acceptanceCriteria.join()+tags.join() written at classify time. Routing stage recomputes hash on each run; if mismatch, treat as cache miss and re-classify.",
|
|
57
|
+
"acceptanceCriteria": [
|
|
58
|
+
"StoryRouting interface gains contentHash?: string field",
|
|
59
|
+
"A helper function computeStoryContentHash(story: UserStory): string computes a hash of title+description+ACs+tags",
|
|
60
|
+
"Routing stage: if story.routing exists but contentHash is missing or mismatches current story content, re-classify (treat as cache miss)",
|
|
61
|
+
"Routing stage: after classification, write contentHash to story.routing",
|
|
62
|
+
"If story content unchanged, routing stage uses cached routing as before — no regression",
|
|
63
|
+
"Unit tests cover: hash match uses cache; hash mismatch re-classifies; missing hash re-classifies"
|
|
64
|
+
],
|
|
65
|
+
"complexity": "medium",
|
|
66
|
+
"status": "pending",
|
|
67
|
+
"tags": [],
|
|
68
|
+
"dependencies": [
|
|
69
|
+
"RRP-001"
|
|
70
|
+
],
|
|
71
|
+
"escalations": [],
|
|
72
|
+
"attempts": 0,
|
|
73
|
+
"priorErrors": [],
|
|
74
|
+
"priorFailures": [],
|
|
75
|
+
"storyPoints": 1
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"id": "RRP-004",
|
|
79
|
+
"title": "Integration tests: routing persistence across simulated crash-resume and staleness",
|
|
80
|
+
"description": "Write integration tests that verify routing persistence end-to-end: (1) first run classifies and persists story.routing to prd.json, (2) second run uses persisted routing without re-classifying, (3) escalation preserves initialComplexity, (4) story content change triggers re-classification via contentHash mismatch.",
|
|
81
|
+
"acceptanceCriteria": [
|
|
82
|
+
"Integration test: routing stage with story.routing=undefined writes story.routing to prd.json after classification",
|
|
83
|
+
"Integration test: routing stage re-run with same prd.json uses cached routing — no LLM call made",
|
|
84
|
+
"Integration test: escalation updates modelTier in prd.json but initialComplexity remains unchanged",
|
|
85
|
+
"Integration test: edit story content after routing — hash mismatch detected — routing stage re-classifies",
|
|
86
|
+
"Integration test: story.routing with matching contentHash — no re-classification (cache hit confirmed)"
|
|
87
|
+
],
|
|
88
|
+
"complexity": "medium",
|
|
89
|
+
"status": "pending",
|
|
90
|
+
"tags": [],
|
|
91
|
+
"dependencies": [
|
|
92
|
+
"RRP-001",
|
|
93
|
+
"RRP-002",
|
|
94
|
+
"RRP-003"
|
|
95
|
+
],
|
|
96
|
+
"escalations": [],
|
|
97
|
+
"attempts": 0,
|
|
98
|
+
"priorErrors": [],
|
|
99
|
+
"priorFailures": [],
|
|
100
|
+
"storyPoints": 1
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
"updatedAt": "2026-03-07T16:32:39.496Z"
|
|
104
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[2026-03-07T16:32:39.495Z] RRP-001 — PASSED — Persist initial routing to prd.json on first classification — Cost: $0.5223
|