@nathapp/nax 0.49.1 → 0.49.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +280 -10
- package/dist/nax.js +43 -18
- package/package.json +1 -1
- package/src/config/test-strategy.ts +4 -4
- package/src/execution/iteration-runner.ts +1 -1
- package/src/execution/pipeline-result-handler.ts +4 -1
- package/src/execution/story-selector.ts +2 -1
- package/src/pipeline/stages/autofix.ts +26 -7
- package/src/pipeline/stages/routing.ts +1 -1
- package/src/review/runner.ts +15 -0
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.49.3] - 2026-03-18
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **Autofix `recheckReview` bug:** `reviewStage.execute()` returns `action:"continue"` for both pass AND built-in-check-failure (to hand off to autofix). Using `result.action === "continue"` always returned `true`, causing "Mechanical autofix succeeded" to log every cycle and looping until `MAX_STAGE_RETRIES` with no real fix. Fix: check `ctx.reviewResult?.success` directly after execute.
|
|
12
|
+
- **Autofix selective mechanical fix:** `lintFix`/`formatFix` cannot fix typecheck errors. Phase 1 now only runs when the `lint` check actually failed. Typecheck-only failures skip straight to agent rectification (Phase 2).
|
|
13
|
+
- **Review command logging:** `runner.ts` now logs the resolved command and workdir for every check at info level, and full output on failure at warn level — eliminates phantom failure mystery.
|
|
14
|
+
- **Re-decompose on second run:** Batch-mode story selector was missing `"decomposed"` in its status skip list (single-story path already excluded it). Stories with `status: "decomposed"` were being picked up again, triggering unnecessary LLM decompose calls. Added `"decomposed"` to batch filter and a guard in routing SD-004 block.
|
|
15
|
+
- **totalCost always 0:** `handlePipelineFailure` returned no `costDelta`; `iteration-runner` hardcoded `costDelta: 0` for failures. Agent cost for failed stories was silently dropped. Fix: extract `agentResult?.estimatedCost` in failure path same as success path.
|
|
16
|
+
|
|
17
|
+
## [0.49.2] - 2026-03-18
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- **Test strategy descriptions:** `TEST_STRATEGY_GUIDE` (used in plan and decompose prompts) had incorrect descriptions for `three-session-tdd` and `three-session-tdd-lite`. Both strategies use 3 sessions. Key distinction: `three-session-tdd` (strict) — test-writer makes no src/ changes, implementer makes no test changes; `three-session-tdd-lite` (lite) — test-writer may add minimal src/ stubs, implementer may expand coverage and replace stubs. Updated in `src/config/test-strategy.ts`, `docs/specs/test-strategy-ssot.md`, and `docs/architecture/ARCHITECTURE.md`.
|
|
21
|
+
|
|
8
22
|
## [0.49.1] - 2026-03-18
|
|
9
23
|
|
|
10
24
|
### Fixed
|
package/README.md
CHANGED
|
@@ -18,8 +18,16 @@ bun install -g @nathapp/nax
|
|
|
18
18
|
cd your-project
|
|
19
19
|
nax init
|
|
20
20
|
nax features create my-feature
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
# Option A: write prd.json manually, then run
|
|
23
|
+
nax run -f my-feature
|
|
24
|
+
|
|
25
|
+
# Option B: generate prd.json from a spec file, then run
|
|
26
|
+
nax plan -f my-feature --from spec.md
|
|
22
27
|
nax run -f my-feature
|
|
28
|
+
|
|
29
|
+
# Option C: plan + run in one command
|
|
30
|
+
nax run -f my-feature --plan --from spec.md
|
|
23
31
|
```
|
|
24
32
|
|
|
25
33
|
## How It Works
|
|
@@ -54,6 +62,14 @@ nax/
|
|
|
54
62
|
└── features/ # One folder per feature
|
|
55
63
|
```
|
|
56
64
|
|
|
65
|
+
**Monorepo — scaffold a package:**
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
nax init --package packages/api
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Creates `packages/api/nax/context.md` for per-package agent context.
|
|
72
|
+
|
|
57
73
|
---
|
|
58
74
|
|
|
59
75
|
### `nax features create <name>`
|
|
@@ -76,20 +92,33 @@ nax features list
|
|
|
76
92
|
|
|
77
93
|
---
|
|
78
94
|
|
|
79
|
-
### `nax
|
|
95
|
+
### `nax plan -f <name> --from <spec>`
|
|
80
96
|
|
|
81
|
-
|
|
97
|
+
Generate a `prd.json` from a spec file using an LLM. Replaces the deprecated `nax analyze`.
|
|
82
98
|
|
|
83
99
|
```bash
|
|
84
|
-
nax
|
|
100
|
+
nax plan -f my-feature --from spec.md
|
|
85
101
|
```
|
|
86
102
|
|
|
87
103
|
**Flags:**
|
|
88
104
|
|
|
89
105
|
| Flag | Description |
|
|
90
106
|
|:-----|:------------|
|
|
91
|
-
|
|
|
92
|
-
| `--
|
|
107
|
+
| `-f, --feature <name>` | Feature name (required) |
|
|
108
|
+
| `--from <spec-path>` | Path to spec file (required) |
|
|
109
|
+
| `--auto` / `--one-shot` | Skip interactive Q&A — single LLM call, no back-and-forth |
|
|
110
|
+
| `-b, --branch <branch>` | Override default branch name |
|
|
111
|
+
| `-d, --dir <path>` | Project directory |
|
|
112
|
+
|
|
113
|
+
**Interactive vs one-shot:**
|
|
114
|
+
- Default (no flag): interactive planning session — nax asks clarifying questions, refines the plan iteratively
|
|
115
|
+
- `--auto` / `--one-shot`: single LLM call, faster but less precise
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
### `nax analyze` *(deprecated)*
|
|
120
|
+
|
|
121
|
+
> ⚠️ **Deprecated.** Use `nax plan` instead. `nax analyze` remains available for backward compatibility but will be removed in a future version.
|
|
93
122
|
|
|
94
123
|
---
|
|
95
124
|
|
|
@@ -105,10 +134,23 @@ nax run -f my-feature
|
|
|
105
134
|
|
|
106
135
|
| Flag | Description |
|
|
107
136
|
|:-----|:------------|
|
|
108
|
-
| `-f, --feature <name>` | Feature name
|
|
137
|
+
| `-f, --feature <name>` | Feature name |
|
|
138
|
+
| `-a, --agent <name>` | Force a specific agent (`claude`, `opencode`, `codex`, etc.) |
|
|
139
|
+
| `--plan` | Run plan phase first (requires `--from`) |
|
|
140
|
+
| `--from <spec-path>` | Spec file for `--plan` |
|
|
141
|
+
| `--one-shot` | Skip interactive Q&A during planning (ACP only) |
|
|
142
|
+
| `--force` | Overwrite existing `prd.json` when using `--plan` |
|
|
143
|
+
| `--parallel <n>` | Max parallel sessions (`0` = auto based on CPU cores; omit = sequential) |
|
|
109
144
|
| `--dry-run` | Preview story routing without running agents |
|
|
110
145
|
| `--headless` | Non-interactive output (structured logs, no TUI) |
|
|
111
|
-
|
|
|
146
|
+
| `--verbose` | Debug-level logging |
|
|
147
|
+
| `--quiet` | Warnings and errors only |
|
|
148
|
+
| `--silent` | Errors only |
|
|
149
|
+
| `--json` | Raw JSONL output to stdout (for scripting) |
|
|
150
|
+
| `--skip-precheck` | Skip precheck validations (advanced users only) |
|
|
151
|
+
| `--no-context` | Disable context builder (skip file context in prompts) |
|
|
152
|
+
| `--no-batch` | Execute all stories individually (disable batching) |
|
|
153
|
+
| `-d, --dir <path>` | Working directory |
|
|
112
154
|
|
|
113
155
|
**Examples:**
|
|
114
156
|
|
|
@@ -116,11 +158,23 @@ nax run -f my-feature
|
|
|
116
158
|
# Preview what would run (no agents spawned)
|
|
117
159
|
nax run -f user-auth --dry-run
|
|
118
160
|
|
|
119
|
-
#
|
|
120
|
-
nax run -f user-auth
|
|
161
|
+
# Plan from spec then run — one command
|
|
162
|
+
nax run -f user-auth --plan --from spec.md
|
|
163
|
+
|
|
164
|
+
# Run with parallel execution (auto concurrency)
|
|
165
|
+
nax run -f user-auth --parallel 0
|
|
166
|
+
|
|
167
|
+
# Run with up to 3 parallel worktree sessions
|
|
168
|
+
nax run -f user-auth --parallel 3
|
|
169
|
+
|
|
170
|
+
# Force a specific agent
|
|
171
|
+
nax run -f user-auth --agent opencode
|
|
121
172
|
|
|
122
173
|
# Run in CI/CD (structured output)
|
|
123
174
|
nax run -f user-auth --headless
|
|
175
|
+
|
|
176
|
+
# Raw JSONL for scripting
|
|
177
|
+
nax run -f user-auth --json
|
|
124
178
|
```
|
|
125
179
|
|
|
126
180
|
---
|
|
@@ -199,6 +253,58 @@ Output sections:
|
|
|
199
253
|
|
|
200
254
|
---
|
|
201
255
|
|
|
256
|
+
### `nax generate`
|
|
257
|
+
|
|
258
|
+
Generate agent config files from `nax/context.md`. Supports Claude Code, OpenCode, Codex, Cursor, Windsurf, Aider, and Gemini.
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
nax generate
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
**Flags:**
|
|
265
|
+
|
|
266
|
+
| Flag | Description |
|
|
267
|
+
|:-----|:------------|
|
|
268
|
+
| `-c, --context <path>` | Context file path (default: `nax/context.md`) |
|
|
269
|
+
| `-o, --output <dir>` | Output directory (default: project root) |
|
|
270
|
+
| `-a, --agent <name>` | Generate for a specific agent only (`claude`, `opencode`, `cursor`, `windsurf`, `aider`, `codex`, `gemini`) |
|
|
271
|
+
| `--dry-run` | Preview without writing files |
|
|
272
|
+
| `--no-auto-inject` | Disable auto-injection of project metadata |
|
|
273
|
+
| `--package <dir>` | Generate for a specific monorepo package (e.g. `packages/api`) |
|
|
274
|
+
| `--all-packages` | Generate for all discovered packages |
|
|
275
|
+
|
|
276
|
+
**What it generates:**
|
|
277
|
+
|
|
278
|
+
| Agent | File |
|
|
279
|
+
|:------|:-----|
|
|
280
|
+
| Claude Code | `CLAUDE.md` |
|
|
281
|
+
| OpenCode | `AGENTS.md` |
|
|
282
|
+
| Codex | `AGENTS.md` |
|
|
283
|
+
| Cursor | `.cursorrules` |
|
|
284
|
+
| Windsurf | `.windsurfrules` |
|
|
285
|
+
| Aider | `.aider.md` |
|
|
286
|
+
| Gemini | `GEMINI.md` |
|
|
287
|
+
|
|
288
|
+
**Workflow:**
|
|
289
|
+
|
|
290
|
+
1. Create `nax/context.md` — describe your project's architecture, conventions, and coding standards
|
|
291
|
+
2. Run `nax generate` — writes agent config files to the project root (and per-package if configured)
|
|
292
|
+
3. Commit the generated files — your agents will automatically pick them up
|
|
293
|
+
|
|
294
|
+
**Monorepo (per-package):**
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
# Generate CLAUDE.md for a single package
|
|
298
|
+
nax generate --package packages/api
|
|
299
|
+
|
|
300
|
+
# Generate for all packages (auto-discovers workspace packages)
|
|
301
|
+
nax generate --all-packages
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
Each package can have its own `nax/context.md` at `<package>/nax/context.md` for package-specific agent instructions.
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
202
308
|
### `nax prompts -f <name>`
|
|
203
309
|
|
|
204
310
|
Assemble and display the prompt that would be sent to the agent for each story role.
|
|
@@ -439,6 +545,170 @@ If the regression gate detects failures, nax maps them to the responsible story
|
|
|
439
545
|
|
|
440
546
|
---
|
|
441
547
|
|
|
548
|
+
## Parallel Execution
|
|
549
|
+
|
|
550
|
+
nax can run multiple stories concurrently using git worktrees — each story gets an isolated worktree so agents don't step on each other.
|
|
551
|
+
|
|
552
|
+
```bash
|
|
553
|
+
# Auto concurrency (based on CPU cores)
|
|
554
|
+
nax run -f my-feature --parallel 0
|
|
555
|
+
|
|
556
|
+
# Fixed concurrency
|
|
557
|
+
nax run -f my-feature --parallel 3
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
**How it works:**
|
|
561
|
+
|
|
562
|
+
1. Stories are grouped by dependency order (dependent stories wait for their prerequisites)
|
|
563
|
+
2. Each batch of independent stories gets its own git worktree
|
|
564
|
+
3. Agent sessions run concurrently inside those worktrees
|
|
565
|
+
4. Once a batch completes, changes are merged back in dependency order
|
|
566
|
+
5. Merge conflicts are automatically rectified by re-running the conflicted story on the updated base
|
|
567
|
+
|
|
568
|
+
**Config:**
|
|
569
|
+
|
|
570
|
+
```json
|
|
571
|
+
{
|
|
572
|
+
"execution": {
|
|
573
|
+
"maxParallelSessions": 4
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
```
|
|
577
|
+
|
|
578
|
+
> Sequential mode (no `--parallel`) is the safe default. Use parallel for large feature sets with independent stories.
|
|
579
|
+
|
|
580
|
+
---
|
|
581
|
+
|
|
582
|
+
## Agents
|
|
583
|
+
|
|
584
|
+
nax supports multiple coding agents. By default it uses Claude Code via the ACP protocol.
|
|
585
|
+
|
|
586
|
+
```bash
|
|
587
|
+
# List installed agents and their capabilities
|
|
588
|
+
nax agents
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
**Supported agents:**
|
|
592
|
+
|
|
593
|
+
| Agent | Protocol | Notes |
|
|
594
|
+
|:------|:---------|:------|
|
|
595
|
+
| `claude` | ACP (default) | Claude Code via acpx |
|
|
596
|
+
| `opencode` | ACP | OpenCode via acpx |
|
|
597
|
+
| `codex` | ACP | Codex via acpx |
|
|
598
|
+
| `cursor` | ACP | Cursor via acpx |
|
|
599
|
+
| `windsurf` | ACP | Windsurf via acpx |
|
|
600
|
+
| `aider` | ACP | Aider via acpx |
|
|
601
|
+
| `gemini` | ACP | Gemini CLI via acpx |
|
|
602
|
+
|
|
603
|
+
**ACP protocol (default):**
|
|
604
|
+
|
|
605
|
+
nax uses [acpx](https://github.com/nathapp/acpx) as the ACP transport. All agents run as persistent sessions — nax sends prompts and receives structured JSON-RPC responses including token counts and exact USD cost per session.
|
|
606
|
+
|
|
607
|
+
**Configuring agents:**
|
|
608
|
+
|
|
609
|
+
```json
|
|
610
|
+
{
|
|
611
|
+
"execution": {
|
|
612
|
+
"defaultAgent": "claude",
|
|
613
|
+
"protocol": "acp",
|
|
614
|
+
"fallbackOrder": ["claude", "codex", "opencode", "gemini"]
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
**Force a specific agent at runtime:**
|
|
620
|
+
|
|
621
|
+
```bash
|
|
622
|
+
nax run -f my-feature --agent opencode
|
|
623
|
+
```
|
|
624
|
+
|
|
625
|
+
---
|
|
626
|
+
|
|
627
|
+
## Monorepo Support
|
|
628
|
+
|
|
629
|
+
nax supports monorepos with workspace-level and per-package configuration.
|
|
630
|
+
|
|
631
|
+
### Setup
|
|
632
|
+
|
|
633
|
+
```bash
|
|
634
|
+
# Initialize nax at the repo root
|
|
635
|
+
nax init
|
|
636
|
+
|
|
637
|
+
# Scaffold per-package context for a specific package
|
|
638
|
+
nax init --package packages/api
|
|
639
|
+
nax init --package packages/web
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
### Per-Package Config
|
|
643
|
+
|
|
644
|
+
Each package can override specific config fields by placing a `nax/config.json` inside the package directory:
|
|
645
|
+
|
|
646
|
+
```
|
|
647
|
+
repo-root/
|
|
648
|
+
├── nax/
|
|
649
|
+
│ └── config.json # root config
|
|
650
|
+
├── packages/
|
|
651
|
+
│ ├── api/
|
|
652
|
+
│ │ └── nax/
|
|
653
|
+
│ │ ├── config.json # overrides for api package
|
|
654
|
+
│ │ └── context.md # agent context for api
|
|
655
|
+
│ └── web/
|
|
656
|
+
│ └── nax/
|
|
657
|
+
│ ├── config.json # overrides for web package
|
|
658
|
+
│ └── context.md # agent context for web
|
|
659
|
+
```
|
|
660
|
+
|
|
661
|
+
**Overridable fields per package:** `execution`, `review`, `acceptance`, `quality`, `context`
|
|
662
|
+
|
|
663
|
+
```json
|
|
664
|
+
// packages/api/nax/config.json
|
|
665
|
+
{
|
|
666
|
+
"quality": {
|
|
667
|
+
"commands": {
|
|
668
|
+
"test": "turbo test --filter=@myapp/api",
|
|
669
|
+
"lint": "turbo lint --filter=@myapp/api"
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
```
|
|
674
|
+
|
|
675
|
+
### Per-Package Stories
|
|
676
|
+
|
|
677
|
+
In your `prd.json`, set `workdir` on each story to point to the package:
|
|
678
|
+
|
|
679
|
+
```json
|
|
680
|
+
{
|
|
681
|
+
"userStories": [
|
|
682
|
+
{
|
|
683
|
+
"id": "US-001",
|
|
684
|
+
"title": "Add auth endpoint",
|
|
685
|
+
"workdir": "packages/api",
|
|
686
|
+
"status": "pending"
|
|
687
|
+
}
|
|
688
|
+
]
|
|
689
|
+
}
|
|
690
|
+
```
|
|
691
|
+
|
|
692
|
+
nax will run the agent inside that package's directory and apply its config overrides automatically.
|
|
693
|
+
|
|
694
|
+
### Workspace Detection
|
|
695
|
+
|
|
696
|
+
When `nax plan` generates stories for a monorepo, it auto-discovers packages from:
|
|
697
|
+
- `turbo.json` → `packages` field
|
|
698
|
+
- `package.json` → `workspaces`
|
|
699
|
+
- `pnpm-workspace.yaml` → `packages`
|
|
700
|
+
- Existing `*/nax/context.md` files
|
|
701
|
+
|
|
702
|
+
### Generate Agent Files for All Packages
|
|
703
|
+
|
|
704
|
+
```bash
|
|
705
|
+
nax generate --all-packages
|
|
706
|
+
```
|
|
707
|
+
|
|
708
|
+
Generates a `CLAUDE.md` (or agent-specific file) in each discovered package directory, using the package's own `nax/context.md` if present.
|
|
709
|
+
|
|
710
|
+
---
|
|
711
|
+
|
|
442
712
|
## Hooks
|
|
443
713
|
|
|
444
714
|
Integrate notifications, CI triggers, or custom scripts via lifecycle hooks.
|
package/dist/nax.js
CHANGED
|
@@ -3267,10 +3267,10 @@ Security-critical functions (authentication, cryptography, tokens, sessions, cre
|
|
|
3267
3267
|
password hashing, access control) must be classified at MINIMUM "medium" complexity
|
|
3268
3268
|
regardless of LOC count. These require at minimum "tdd-simple" test strategy.`, TEST_STRATEGY_GUIDE = `## Test Strategy Guide
|
|
3269
3269
|
|
|
3270
|
-
- test-after: Simple changes with well-understood behavior. Write tests after implementation.
|
|
3271
|
-
- tdd-simple: Medium complexity. Write
|
|
3272
|
-
- three-session-tdd: Complex stories.
|
|
3273
|
-
- three-session-tdd-lite: Expert/high-risk stories.
|
|
3270
|
+
- test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
|
|
3271
|
+
- tdd-simple: Medium complexity. Write failing tests first, then implement to pass them \u2014 all in one session.
|
|
3272
|
+
- three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests \u2014 no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
|
|
3273
|
+
- three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`, GROUPING_RULES = `## Grouping Rules
|
|
3274
3274
|
|
|
3275
3275
|
- Combine small, related tasks into a single "simple" or "medium" story.
|
|
3276
3276
|
- Do NOT create separate stories for every single file or function unless complex.
|
|
@@ -22250,7 +22250,7 @@ var package_default;
|
|
|
22250
22250
|
var init_package = __esm(() => {
|
|
22251
22251
|
package_default = {
|
|
22252
22252
|
name: "@nathapp/nax",
|
|
22253
|
-
version: "0.49.
|
|
22253
|
+
version: "0.49.3",
|
|
22254
22254
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
22255
22255
|
type: "module",
|
|
22256
22256
|
bin: {
|
|
@@ -22323,8 +22323,8 @@ var init_version = __esm(() => {
|
|
|
22323
22323
|
NAX_VERSION = package_default.version;
|
|
22324
22324
|
NAX_COMMIT = (() => {
|
|
22325
22325
|
try {
|
|
22326
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
22327
|
-
return "
|
|
22326
|
+
if (/^[0-9a-f]{6,10}$/.test("30ff375"))
|
|
22327
|
+
return "30ff375";
|
|
22328
22328
|
} catch {}
|
|
22329
22329
|
try {
|
|
22330
22330
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
@@ -24357,6 +24357,8 @@ async function resolveCommand(check2, config2, executionConfig, workdir) {
|
|
|
24357
24357
|
}
|
|
24358
24358
|
async function runCheck(check2, command, workdir) {
|
|
24359
24359
|
const startTime = Date.now();
|
|
24360
|
+
const logger = getSafeLogger();
|
|
24361
|
+
logger?.info("review", `Running ${check2} check`, { check: check2, command, workdir });
|
|
24360
24362
|
try {
|
|
24361
24363
|
const parts = command.split(/\s+/);
|
|
24362
24364
|
const executable = parts[0];
|
|
@@ -24395,6 +24397,17 @@ async function runCheck(check2, command, workdir) {
|
|
|
24395
24397
|
const stderr = await new Response(proc.stderr).text();
|
|
24396
24398
|
const output = [stdout, stderr].filter(Boolean).join(`
|
|
24397
24399
|
`);
|
|
24400
|
+
if (exitCode !== 0) {
|
|
24401
|
+
logger?.warn("review", `${check2} check failed`, {
|
|
24402
|
+
check: check2,
|
|
24403
|
+
command,
|
|
24404
|
+
workdir,
|
|
24405
|
+
exitCode,
|
|
24406
|
+
output: output.slice(0, 2000)
|
|
24407
|
+
});
|
|
24408
|
+
} else {
|
|
24409
|
+
logger?.debug("review", `${check2} check passed`, { check: check2, command, durationMs: Date.now() - startTime });
|
|
24410
|
+
}
|
|
24398
24411
|
return {
|
|
24399
24412
|
check: check2,
|
|
24400
24413
|
command,
|
|
@@ -24680,8 +24693,8 @@ async function recheckReview(ctx) {
|
|
|
24680
24693
|
const { reviewStage: reviewStage2 } = await Promise.resolve().then(() => (init_review(), exports_review));
|
|
24681
24694
|
if (!reviewStage2.enabled(ctx))
|
|
24682
24695
|
return true;
|
|
24683
|
-
|
|
24684
|
-
return
|
|
24696
|
+
await reviewStage2.execute(ctx);
|
|
24697
|
+
return ctx.reviewResult?.success === true;
|
|
24685
24698
|
}
|
|
24686
24699
|
function collectFailedChecks(ctx) {
|
|
24687
24700
|
return (ctx.reviewResult?.checks ?? []).filter((c) => !c.success);
|
|
@@ -24793,11 +24806,18 @@ var init_autofix = __esm(() => {
|
|
|
24793
24806
|
const lintFixCmd = effectiveConfig.quality.commands.lintFix;
|
|
24794
24807
|
const formatFixCmd = effectiveConfig.quality.commands.formatFix;
|
|
24795
24808
|
const effectiveWorkdir = ctx.story.workdir ? join18(ctx.workdir, ctx.story.workdir) : ctx.workdir;
|
|
24796
|
-
|
|
24809
|
+
const failedCheckNames = new Set((reviewResult.checks ?? []).filter((c) => !c.success).map((c) => c.check));
|
|
24810
|
+
const hasLintFailure = failedCheckNames.has("lint");
|
|
24811
|
+
logger.info("autofix", "Starting autofix", {
|
|
24812
|
+
storyId: ctx.story.id,
|
|
24813
|
+
failedChecks: [...failedCheckNames],
|
|
24814
|
+
workdir: effectiveWorkdir
|
|
24815
|
+
});
|
|
24816
|
+
if (hasLintFailure && (lintFixCmd || formatFixCmd)) {
|
|
24797
24817
|
if (lintFixCmd) {
|
|
24798
24818
|
pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
|
|
24799
24819
|
const lintResult = await _autofixDeps.runCommand(lintFixCmd, effectiveWorkdir);
|
|
24800
|
-
logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id });
|
|
24820
|
+
logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id, command: lintFixCmd });
|
|
24801
24821
|
if (lintResult.exitCode !== 0) {
|
|
24802
24822
|
logger.warn("autofix", "lintFix command failed \u2014 may not have fixed all issues", {
|
|
24803
24823
|
storyId: ctx.story.id,
|
|
@@ -24808,7 +24828,10 @@ var init_autofix = __esm(() => {
|
|
|
24808
24828
|
if (formatFixCmd) {
|
|
24809
24829
|
pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
|
|
24810
24830
|
const fmtResult = await _autofixDeps.runCommand(formatFixCmd, effectiveWorkdir);
|
|
24811
|
-
logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
|
|
24831
|
+
logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
|
|
24832
|
+
storyId: ctx.story.id,
|
|
24833
|
+
command: formatFixCmd
|
|
24834
|
+
});
|
|
24812
24835
|
if (fmtResult.exitCode !== 0) {
|
|
24813
24836
|
logger.warn("autofix", "formatFix command failed \u2014 may not have fixed all issues", {
|
|
24814
24837
|
storyId: ctx.story.id,
|
|
@@ -24819,11 +24842,12 @@ var init_autofix = __esm(() => {
|
|
|
24819
24842
|
const recheckPassed = await _autofixDeps.recheckReview(ctx);
|
|
24820
24843
|
pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
|
|
24821
24844
|
if (recheckPassed) {
|
|
24822
|
-
if (ctx.reviewResult)
|
|
24823
|
-
ctx.reviewResult = { ...ctx.reviewResult, success: true };
|
|
24824
24845
|
logger.info("autofix", "Mechanical autofix succeeded \u2014 retrying review", { storyId: ctx.story.id });
|
|
24825
24846
|
return { action: "retry", fromStage: "review" };
|
|
24826
24847
|
}
|
|
24848
|
+
logger.info("autofix", "Mechanical autofix did not resolve all failures \u2014 proceeding to agent rectification", {
|
|
24849
|
+
storyId: ctx.story.id
|
|
24850
|
+
});
|
|
24827
24851
|
}
|
|
24828
24852
|
const agentFixed = await _autofixDeps.runAgentRectification(ctx);
|
|
24829
24853
|
if (agentFixed) {
|
|
@@ -29553,7 +29577,7 @@ var init_routing2 = __esm(() => {
|
|
|
29553
29577
|
logger.debug("routing", ctx.routing.reasoning);
|
|
29554
29578
|
}
|
|
29555
29579
|
const decomposeConfig = ctx.config.decompose;
|
|
29556
|
-
if (decomposeConfig) {
|
|
29580
|
+
if (decomposeConfig && ctx.story.status !== "decomposed") {
|
|
29557
29581
|
const acCount = ctx.story.acceptanceCriteria.length;
|
|
29558
29582
|
const complexity = ctx.routing.complexity;
|
|
29559
29583
|
const isOversized = acCount > decomposeConfig.maxAcceptanceCriteria && (complexity === "complex" || complexity === "expert");
|
|
@@ -34256,6 +34280,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
|
|
|
34256
34280
|
const logger = getSafeLogger();
|
|
34257
34281
|
let prd = ctx.prd;
|
|
34258
34282
|
let prdDirty = false;
|
|
34283
|
+
const costDelta = pipelineResult.context.agentResult?.estimatedCost || 0;
|
|
34259
34284
|
switch (pipelineResult.finalAction) {
|
|
34260
34285
|
case "pause":
|
|
34261
34286
|
markStoryPaused(prd, ctx.story.id);
|
|
@@ -34322,7 +34347,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
|
|
|
34322
34347
|
break;
|
|
34323
34348
|
}
|
|
34324
34349
|
}
|
|
34325
|
-
return { prd, prdDirty };
|
|
34350
|
+
return { prd, prdDirty, costDelta };
|
|
34326
34351
|
}
|
|
34327
34352
|
var init_pipeline_result_handler = __esm(() => {
|
|
34328
34353
|
init_logger2();
|
|
@@ -34427,7 +34452,7 @@ async function runIteration(ctx, prd, selection, iterations, totalCost, allStory
|
|
|
34427
34452
|
return {
|
|
34428
34453
|
prd: r.prd,
|
|
34429
34454
|
storiesCompletedDelta: 0,
|
|
34430
|
-
costDelta:
|
|
34455
|
+
costDelta: r.costDelta,
|
|
34431
34456
|
prdDirty: r.prdDirty,
|
|
34432
34457
|
finalAction: pipelineResult.finalAction,
|
|
34433
34458
|
reason: pipelineResult.reason
|
|
@@ -34465,7 +34490,7 @@ function buildPreviewRouting(story, config2) {
|
|
|
34465
34490
|
function selectNextStories(prd, config2, batchPlan, currentBatchIndex, lastStoryId, useBatch) {
|
|
34466
34491
|
if (useBatch && currentBatchIndex < batchPlan.length) {
|
|
34467
34492
|
const batch = batchPlan[currentBatchIndex];
|
|
34468
|
-
const storiesToExecute = batch.stories.filter((s) => !s.passes && s.status !== "passed" && s.status !== "skipped" && s.status !== "blocked" && s.status !== "failed" && s.status !== "paused");
|
|
34493
|
+
const storiesToExecute = batch.stories.filter((s) => !s.passes && s.status !== "passed" && s.status !== "skipped" && s.status !== "blocked" && s.status !== "failed" && s.status !== "paused" && s.status !== "decomposed");
|
|
34469
34494
|
if (storiesToExecute.length === 0) {
|
|
34470
34495
|
return { selection: null, nextBatchIndex: currentBatchIndex + 1 };
|
|
34471
34496
|
}
|
package/package.json
CHANGED
|
@@ -53,10 +53,10 @@ regardless of LOC count. These require at minimum "tdd-simple" test strategy.`;
|
|
|
53
53
|
|
|
54
54
|
export const TEST_STRATEGY_GUIDE = `## Test Strategy Guide
|
|
55
55
|
|
|
56
|
-
- test-after: Simple changes with well-understood behavior. Write tests after implementation.
|
|
57
|
-
- tdd-simple: Medium complexity. Write
|
|
58
|
-
- three-session-tdd: Complex stories.
|
|
59
|
-
- three-session-tdd-lite: Expert/high-risk stories.
|
|
56
|
+
- test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
|
|
57
|
+
- tdd-simple: Medium complexity. Write failing tests first, then implement to pass them — all in one session.
|
|
58
|
+
- three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests — no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
|
|
59
|
+
- three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`;
|
|
60
60
|
|
|
61
61
|
export const GROUPING_RULES = `## Grouping Rules
|
|
62
62
|
|
|
@@ -102,6 +102,7 @@ export async function handlePipelineSuccess(
|
|
|
102
102
|
export interface PipelineFailureResult {
|
|
103
103
|
prd: PRD;
|
|
104
104
|
prdDirty: boolean;
|
|
105
|
+
costDelta: number;
|
|
105
106
|
}
|
|
106
107
|
|
|
107
108
|
export async function handlePipelineFailure(
|
|
@@ -111,6 +112,8 @@ export async function handlePipelineFailure(
|
|
|
111
112
|
const logger = getSafeLogger();
|
|
112
113
|
let prd = ctx.prd;
|
|
113
114
|
let prdDirty = false;
|
|
115
|
+
// Always capture cost even for failed stories — agent ran and spent tokens
|
|
116
|
+
const costDelta = pipelineResult.context.agentResult?.estimatedCost || 0;
|
|
114
117
|
|
|
115
118
|
switch (pipelineResult.finalAction) {
|
|
116
119
|
case "pause":
|
|
@@ -185,5 +188,5 @@ export async function handlePipelineFailure(
|
|
|
185
188
|
}
|
|
186
189
|
}
|
|
187
190
|
|
|
188
|
-
return { prd, prdDirty };
|
|
191
|
+
return { prd, prdDirty, costDelta };
|
|
189
192
|
}
|
|
@@ -61,12 +61,22 @@ export const autofixStage: PipelineStage = {
|
|
|
61
61
|
// Effective workdir for running commands (scoped to package if monorepo)
|
|
62
62
|
const effectiveWorkdir = ctx.story.workdir ? join(ctx.workdir, ctx.story.workdir) : ctx.workdir;
|
|
63
63
|
|
|
64
|
-
//
|
|
65
|
-
|
|
64
|
+
// Identify which checks failed
|
|
65
|
+
const failedCheckNames = new Set((reviewResult.checks ?? []).filter((c) => !c.success).map((c) => c.check));
|
|
66
|
+
const hasLintFailure = failedCheckNames.has("lint");
|
|
67
|
+
|
|
68
|
+
logger.info("autofix", "Starting autofix", {
|
|
69
|
+
storyId: ctx.story.id,
|
|
70
|
+
failedChecks: [...failedCheckNames],
|
|
71
|
+
workdir: effectiveWorkdir,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// Phase 1: Mechanical fix — only for lint failures (lintFix/formatFix cannot fix typecheck errors)
|
|
75
|
+
if (hasLintFailure && (lintFixCmd || formatFixCmd)) {
|
|
66
76
|
if (lintFixCmd) {
|
|
67
77
|
pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
|
|
68
78
|
const lintResult = await _autofixDeps.runCommand(lintFixCmd, effectiveWorkdir);
|
|
69
|
-
logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id });
|
|
79
|
+
logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id, command: lintFixCmd });
|
|
70
80
|
if (lintResult.exitCode !== 0) {
|
|
71
81
|
logger.warn("autofix", "lintFix command failed — may not have fixed all issues", {
|
|
72
82
|
storyId: ctx.story.id,
|
|
@@ -78,7 +88,10 @@ export const autofixStage: PipelineStage = {
|
|
|
78
88
|
if (formatFixCmd) {
|
|
79
89
|
pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
|
|
80
90
|
const fmtResult = await _autofixDeps.runCommand(formatFixCmd, effectiveWorkdir);
|
|
81
|
-
logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
|
|
91
|
+
logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
|
|
92
|
+
storyId: ctx.story.id,
|
|
93
|
+
command: formatFixCmd,
|
|
94
|
+
});
|
|
82
95
|
if (fmtResult.exitCode !== 0) {
|
|
83
96
|
logger.warn("autofix", "formatFix command failed — may not have fixed all issues", {
|
|
84
97
|
storyId: ctx.story.id,
|
|
@@ -91,10 +104,13 @@ export const autofixStage: PipelineStage = {
|
|
|
91
104
|
pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
|
|
92
105
|
|
|
93
106
|
if (recheckPassed) {
|
|
94
|
-
if (ctx.reviewResult) ctx.reviewResult = { ...ctx.reviewResult, success: true };
|
|
95
107
|
logger.info("autofix", "Mechanical autofix succeeded — retrying review", { storyId: ctx.story.id });
|
|
96
108
|
return { action: "retry", fromStage: "review" };
|
|
97
109
|
}
|
|
110
|
+
|
|
111
|
+
logger.info("autofix", "Mechanical autofix did not resolve all failures — proceeding to agent rectification", {
|
|
112
|
+
storyId: ctx.story.id,
|
|
113
|
+
});
|
|
98
114
|
}
|
|
99
115
|
|
|
100
116
|
// Phase 2: Agent rectification — spawn agent with review error context
|
|
@@ -134,8 +150,11 @@ async function recheckReview(ctx: PipelineContext): Promise<boolean> {
|
|
|
134
150
|
// Import reviewStage lazily to avoid circular deps
|
|
135
151
|
const { reviewStage } = await import("./review");
|
|
136
152
|
if (!reviewStage.enabled(ctx)) return true;
|
|
137
|
-
|
|
138
|
-
|
|
153
|
+
// reviewStage.execute updates ctx.reviewResult in place.
|
|
154
|
+
// We cannot use result.action here because review returns "continue" for BOTH
|
|
155
|
+
// pass and built-in-check-failure (to hand off to autofix). Check success directly.
|
|
156
|
+
await reviewStage.execute(ctx);
|
|
157
|
+
return ctx.reviewResult?.success === true;
|
|
139
158
|
}
|
|
140
159
|
|
|
141
160
|
function collectFailedChecks(ctx: PipelineContext): ReviewCheckResult[] {
|
|
@@ -196,7 +196,7 @@ export const routingStage: PipelineStage = {
|
|
|
196
196
|
|
|
197
197
|
// SD-004: Oversized story detection and decomposition
|
|
198
198
|
const decomposeConfig = ctx.config.decompose;
|
|
199
|
-
if (decomposeConfig) {
|
|
199
|
+
if (decomposeConfig && ctx.story.status !== "decomposed") {
|
|
200
200
|
const acCount = ctx.story.acceptanceCriteria.length;
|
|
201
201
|
const complexity = ctx.routing.complexity;
|
|
202
202
|
const isOversized =
|
package/src/review/runner.ts
CHANGED
|
@@ -99,6 +99,9 @@ const SIGKILL_GRACE_PERIOD_MS = 5_000;
|
|
|
99
99
|
*/
|
|
100
100
|
async function runCheck(check: ReviewCheckName, command: string, workdir: string): Promise<ReviewCheckResult> {
|
|
101
101
|
const startTime = Date.now();
|
|
102
|
+
const logger = getSafeLogger();
|
|
103
|
+
|
|
104
|
+
logger?.info("review", `Running ${check} check`, { check, command, workdir });
|
|
102
105
|
|
|
103
106
|
try {
|
|
104
107
|
// Parse command into executable and args
|
|
@@ -152,6 +155,18 @@ async function runCheck(check: ReviewCheckName, command: string, workdir: string
|
|
|
152
155
|
const stderr = await new Response(proc.stderr).text();
|
|
153
156
|
const output = [stdout, stderr].filter(Boolean).join("\n");
|
|
154
157
|
|
|
158
|
+
if (exitCode !== 0) {
|
|
159
|
+
logger?.warn("review", `${check} check failed`, {
|
|
160
|
+
check,
|
|
161
|
+
command,
|
|
162
|
+
workdir,
|
|
163
|
+
exitCode,
|
|
164
|
+
output: output.slice(0, 2000),
|
|
165
|
+
});
|
|
166
|
+
} else {
|
|
167
|
+
logger?.debug("review", `${check} check passed`, { check, command, durationMs: Date.now() - startTime });
|
|
168
|
+
}
|
|
169
|
+
|
|
155
170
|
return {
|
|
156
171
|
check,
|
|
157
172
|
command,
|