@nathapp/nax 0.49.1 → 0.49.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.49.3] - 2026-03-18
9
+
10
+ ### Fixed
11
+ - **Autofix `recheckReview` bug:** `reviewStage.execute()` returns `action:"continue"` for both pass AND built-in-check-failure (to hand off to autofix). Using `result.action === "continue"` always returned `true`, causing "Mechanical autofix succeeded" to log every cycle and looping until `MAX_STAGE_RETRIES` with no real fix. Fix: check `ctx.reviewResult?.success` directly after execute.
12
+ - **Autofix selective mechanical fix:** `lintFix`/`formatFix` cannot fix typecheck errors. Phase 1 now only runs when the `lint` check actually failed. Typecheck-only failures skip straight to agent rectification (Phase 2).
13
+ - **Review command logging:** `runner.ts` now logs the resolved command and workdir for every check at info level, and full output on failure at warn level — eliminates phantom failure mystery.
14
+ - **Re-decompose on second run:** Batch-mode story selector was missing `"decomposed"` in its status skip list (single-story path already excluded it). Stories with `status: "decomposed"` were being picked up again, triggering unnecessary LLM decompose calls. Added `"decomposed"` to batch filter and a guard in routing SD-004 block.
15
+ - **totalCost always 0:** `handlePipelineFailure` returned no `costDelta`; `iteration-runner` hardcoded `costDelta: 0` for failures. Agent cost for failed stories was silently dropped. Fix: extract `agentResult?.estimatedCost` in failure path same as success path.
16
+
17
+ ## [0.49.2] - 2026-03-18
18
+
19
+ ### Fixed
20
+ - **Test strategy descriptions:** `TEST_STRATEGY_GUIDE` (used in plan and decompose prompts) had incorrect descriptions for `three-session-tdd` and `three-session-tdd-lite`. Both strategies use 3 sessions. Key distinction: `three-session-tdd` (strict) — test-writer makes no src/ changes, implementer makes no test changes; `three-session-tdd-lite` (lite) — test-writer may add minimal src/ stubs, implementer may expand coverage and replace stubs. Updated in `src/config/test-strategy.ts`, `docs/specs/test-strategy-ssot.md`, and `docs/architecture/ARCHITECTURE.md`.
21
+
8
22
  ## [0.49.1] - 2026-03-18
9
23
 
10
24
  ### Fixed
package/README.md CHANGED
@@ -18,8 +18,16 @@ bun install -g @nathapp/nax
18
18
  cd your-project
19
19
  nax init
20
20
  nax features create my-feature
21
- # Edit nax/features/my-feature/prd.json with your user stories
21
+
22
+ # Option A: write prd.json manually, then run
23
+ nax run -f my-feature
24
+
25
+ # Option B: generate prd.json from a spec file, then run
26
+ nax plan -f my-feature --from spec.md
22
27
  nax run -f my-feature
28
+
29
+ # Option C: plan + run in one command
30
+ nax run -f my-feature --plan --from spec.md
23
31
  ```
24
32
 
25
33
  ## How It Works
@@ -54,6 +62,14 @@ nax/
54
62
  └── features/ # One folder per feature
55
63
  ```
56
64
 
65
+ **Monorepo — scaffold a package:**
66
+
67
+ ```bash
68
+ nax init --package packages/api
69
+ ```
70
+
71
+ Creates `packages/api/nax/context.md` for per-package agent context.
72
+
57
73
  ---
58
74
 
59
75
  ### `nax features create <name>`
@@ -76,20 +92,33 @@ nax features list
76
92
 
77
93
  ---
78
94
 
79
- ### `nax analyze -f <name>`
95
+ ### `nax plan -f <name> --from <spec>`
80
96
 
81
- Parse a `spec.md` file into a structured `prd.json`. Uses an LLM to decompose the spec into classified user stories.
97
+ Generate a `prd.json` from a spec file using an LLM. Replaces the deprecated `nax analyze`.
82
98
 
83
99
  ```bash
84
- nax analyze -f my-feature
100
+ nax plan -f my-feature --from spec.md
85
101
  ```
86
102
 
87
103
  **Flags:**
88
104
 
89
105
  | Flag | Description |
90
106
  |:-----|:------------|
91
- | `--from <path>` | Explicit spec path (overrides default `spec.md`) |
92
- | `--reclassify` | Re-classify existing `prd.json` without re-decomposing |
107
+ | `-f, --feature <name>` | Feature name (required) |
108
+ | `--from <spec-path>` | Path to spec file (required) |
109
+ | `--auto` / `--one-shot` | Skip interactive Q&A — single LLM call, no back-and-forth |
110
+ | `-b, --branch <branch>` | Override default branch name |
111
+ | `-d, --dir <path>` | Project directory |
112
+
113
+ **Interactive vs one-shot:**
114
+ - Default (no flag): interactive planning session — nax asks clarifying questions, refines the plan iteratively
115
+ - `--auto` / `--one-shot`: single LLM call, faster but less precise
116
+
117
+ ---
118
+
119
+ ### `nax analyze` *(deprecated)*
120
+
121
+ > ⚠️ **Deprecated.** Use `nax plan` instead. `nax analyze` remains available for backward compatibility but will be removed in a future version.
93
122
 
94
123
  ---
95
124
 
@@ -105,10 +134,23 @@ nax run -f my-feature
105
134
 
106
135
  | Flag | Description |
107
136
  |:-----|:------------|
108
- | `-f, --feature <name>` | Feature name (required) |
137
+ | `-f, --feature <name>` | Feature name |
138
+ | `-a, --agent <name>` | Force a specific agent (`claude`, `opencode`, `codex`, etc.) |
139
+ | `--plan` | Run plan phase first (requires `--from`) |
140
+ | `--from <spec-path>` | Spec file for `--plan` |
141
+ | `--one-shot` | Skip interactive Q&A during planning (ACP only) |
142
+ | `--force` | Overwrite existing `prd.json` when using `--plan` |
143
+ | `--parallel <n>` | Max parallel sessions (`0` = auto based on CPU cores; omit = sequential) |
109
144
  | `--dry-run` | Preview story routing without running agents |
110
145
  | `--headless` | Non-interactive output (structured logs, no TUI) |
111
- | `-d, --dir <path>` | Project directory (defaults to `cwd`) |
146
+ | `--verbose` | Debug-level logging |
147
+ | `--quiet` | Warnings and errors only |
148
+ | `--silent` | Errors only |
149
+ | `--json` | Raw JSONL output to stdout (for scripting) |
150
+ | `--skip-precheck` | Skip precheck validations (advanced users only) |
151
+ | `--no-context` | Disable context builder (skip file context in prompts) |
152
+ | `--no-batch` | Execute all stories individually (disable batching) |
153
+ | `-d, --dir <path>` | Working directory |
112
154
 
113
155
  **Examples:**
114
156
 
@@ -116,11 +158,23 @@ nax run -f my-feature
116
158
  # Preview what would run (no agents spawned)
117
159
  nax run -f user-auth --dry-run
118
160
 
119
- # Run in a different directory
120
- nax run -f user-auth -d /path/to/project
161
+ # Plan from spec then run — one command
162
+ nax run -f user-auth --plan --from spec.md
163
+
164
+ # Run with parallel execution (auto concurrency)
165
+ nax run -f user-auth --parallel 0
166
+
167
+ # Run with up to 3 parallel worktree sessions
168
+ nax run -f user-auth --parallel 3
169
+
170
+ # Force a specific agent
171
+ nax run -f user-auth --agent opencode
121
172
 
122
173
  # Run in CI/CD (structured output)
123
174
  nax run -f user-auth --headless
175
+
176
+ # Raw JSONL for scripting
177
+ nax run -f user-auth --json
124
178
  ```
125
179
 
126
180
  ---
@@ -199,6 +253,58 @@ Output sections:
199
253
 
200
254
  ---
201
255
 
256
+ ### `nax generate`
257
+
258
+ Generate agent config files from `nax/context.md`. Supports Claude Code, OpenCode, Codex, Cursor, Windsurf, Aider, and Gemini.
259
+
260
+ ```bash
261
+ nax generate
262
+ ```
263
+
264
+ **Flags:**
265
+
266
+ | Flag | Description |
267
+ |:-----|:------------|
268
+ | `-c, --context <path>` | Context file path (default: `nax/context.md`) |
269
+ | `-o, --output <dir>` | Output directory (default: project root) |
270
+ | `-a, --agent <name>` | Generate for a specific agent only (`claude`, `opencode`, `cursor`, `windsurf`, `aider`, `codex`, `gemini`) |
271
+ | `--dry-run` | Preview without writing files |
272
+ | `--no-auto-inject` | Disable auto-injection of project metadata |
273
+ | `--package <dir>` | Generate for a specific monorepo package (e.g. `packages/api`) |
274
+ | `--all-packages` | Generate for all discovered packages |
275
+
276
+ **What it generates:**
277
+
278
+ | Agent | File |
279
+ |:------|:-----|
280
+ | Claude Code | `CLAUDE.md` |
281
+ | OpenCode | `AGENTS.md` |
282
+ | Codex | `AGENTS.md` |
283
+ | Cursor | `.cursorrules` |
284
+ | Windsurf | `.windsurfrules` |
285
+ | Aider | `.aider.md` |
286
+ | Gemini | `GEMINI.md` |
287
+
288
+ **Workflow:**
289
+
290
+ 1. Create `nax/context.md` — describe your project's architecture, conventions, and coding standards
291
+ 2. Run `nax generate` — writes agent config files to the project root (and per-package if configured)
292
+ 3. Commit the generated files — your agents will automatically pick them up
293
+
294
+ **Monorepo (per-package):**
295
+
296
+ ```bash
297
+ # Generate CLAUDE.md for a single package
298
+ nax generate --package packages/api
299
+
300
+ # Generate for all packages (auto-discovers workspace packages)
301
+ nax generate --all-packages
302
+ ```
303
+
304
+ Each package can have its own `nax/context.md` at `<package>/nax/context.md` for package-specific agent instructions.
305
+
306
+ ---
307
+
202
308
  ### `nax prompts -f <name>`
203
309
 
204
310
  Assemble and display the prompt that would be sent to the agent for each story role.
@@ -439,6 +545,170 @@ If the regression gate detects failures, nax maps them to the responsible story
439
545
 
440
546
  ---
441
547
 
548
+ ## Parallel Execution
549
+
550
+ nax can run multiple stories concurrently using git worktrees — each story gets an isolated worktree so agents don't step on each other.
551
+
552
+ ```bash
553
+ # Auto concurrency (based on CPU cores)
554
+ nax run -f my-feature --parallel 0
555
+
556
+ # Fixed concurrency
557
+ nax run -f my-feature --parallel 3
558
+ ```
559
+
560
+ **How it works:**
561
+
562
+ 1. Stories are grouped by dependency order (dependent stories wait for their prerequisites)
563
+ 2. Each batch of independent stories gets its own git worktree
564
+ 3. Agent sessions run concurrently inside those worktrees
565
+ 4. Once a batch completes, changes are merged back in dependency order
566
+ 5. Merge conflicts are automatically rectified by re-running the conflicted story on the updated base
567
+
568
+ **Config:**
569
+
570
+ ```json
571
+ {
572
+ "execution": {
573
+ "maxParallelSessions": 4
574
+ }
575
+ }
576
+ ```
577
+
578
+ > Sequential mode (no `--parallel`) is the safe default. Use parallel for large feature sets with independent stories.
579
+
580
+ ---
581
+
582
+ ## Agents
583
+
584
+ nax supports multiple coding agents. By default it uses Claude Code via the ACP protocol.
585
+
586
+ ```bash
587
+ # List installed agents and their capabilities
588
+ nax agents
589
+ ```
590
+
591
+ **Supported agents:**
592
+
593
+ | Agent | Protocol | Notes |
594
+ |:------|:---------|:------|
595
+ | `claude` | ACP (default) | Claude Code via acpx |
596
+ | `opencode` | ACP | OpenCode via acpx |
597
+ | `codex` | ACP | Codex via acpx |
598
+ | `cursor` | ACP | Cursor via acpx |
599
+ | `windsurf` | ACP | Windsurf via acpx |
600
+ | `aider` | ACP | Aider via acpx |
601
+ | `gemini` | ACP | Gemini CLI via acpx |
602
+
603
+ **ACP protocol (default):**
604
+
605
+ nax uses [acpx](https://github.com/nathapp/acpx) as the ACP transport. All agents run as persistent sessions — nax sends prompts and receives structured JSON-RPC responses including token counts and exact USD cost per session.
606
+
607
+ **Configuring agents:**
608
+
609
+ ```json
610
+ {
611
+ "execution": {
612
+ "defaultAgent": "claude",
613
+ "protocol": "acp",
614
+ "fallbackOrder": ["claude", "codex", "opencode", "gemini"]
615
+ }
616
+ }
617
+ ```
618
+
619
+ **Force a specific agent at runtime:**
620
+
621
+ ```bash
622
+ nax run -f my-feature --agent opencode
623
+ ```
624
+
625
+ ---
626
+
627
+ ## Monorepo Support
628
+
629
+ nax supports monorepos with workspace-level and per-package configuration.
630
+
631
+ ### Setup
632
+
633
+ ```bash
634
+ # Initialize nax at the repo root
635
+ nax init
636
+
637
+ # Scaffold per-package context for a specific package
638
+ nax init --package packages/api
639
+ nax init --package packages/web
640
+ ```
641
+
642
+ ### Per-Package Config
643
+
644
+ Each package can override specific config fields by placing a `nax/config.json` inside the package directory:
645
+
646
+ ```
647
+ repo-root/
648
+ ├── nax/
649
+ │ └── config.json # root config
650
+ ├── packages/
651
+ │ ├── api/
652
+ │ │ └── nax/
653
+ │ │ ├── config.json # overrides for api package
654
+ │ │ └── context.md # agent context for api
655
+ │ └── web/
656
+ │ └── nax/
657
+ │ ├── config.json # overrides for web package
658
+ │ └── context.md # agent context for web
659
+ ```
660
+
661
+ **Overridable fields per package:** `execution`, `review`, `acceptance`, `quality`, `context`
662
+
663
+ ```json
664
+ // packages/api/nax/config.json
665
+ {
666
+ "quality": {
667
+ "commands": {
668
+ "test": "turbo test --filter=@myapp/api",
669
+ "lint": "turbo lint --filter=@myapp/api"
670
+ }
671
+ }
672
+ }
673
+ ```
674
+
675
+ ### Per-Package Stories
676
+
677
+ In your `prd.json`, set `workdir` on each story to point to the package:
678
+
679
+ ```json
680
+ {
681
+ "userStories": [
682
+ {
683
+ "id": "US-001",
684
+ "title": "Add auth endpoint",
685
+ "workdir": "packages/api",
686
+ "status": "pending"
687
+ }
688
+ ]
689
+ }
690
+ ```
691
+
692
+ nax will run the agent inside that package's directory and apply its config overrides automatically.
693
+
694
+ ### Workspace Detection
695
+
696
+ When `nax plan` generates stories for a monorepo, it auto-discovers packages from:
697
+ - `turbo.json` → `packages` field
698
+ - `package.json` → `workspaces`
699
+ - `pnpm-workspace.yaml` → `packages`
700
+ - Existing `*/nax/context.md` files
701
+
702
+ ### Generate Agent Files for All Packages
703
+
704
+ ```bash
705
+ nax generate --all-packages
706
+ ```
707
+
708
+ Generates a `CLAUDE.md` (or agent-specific file) in each discovered package directory, using the package's own `nax/context.md` if present.
709
+
710
+ ---
711
+
442
712
  ## Hooks
443
713
 
444
714
  Integrate notifications, CI triggers, or custom scripts via lifecycle hooks.
package/dist/nax.js CHANGED
@@ -3267,10 +3267,10 @@ Security-critical functions (authentication, cryptography, tokens, sessions, cre
3267
3267
  password hashing, access control) must be classified at MINIMUM "medium" complexity
3268
3268
  regardless of LOC count. These require at minimum "tdd-simple" test strategy.`, TEST_STRATEGY_GUIDE = `## Test Strategy Guide
3269
3269
 
3270
- - test-after: Simple changes with well-understood behavior. Write tests after implementation.
3271
- - tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
3272
- - three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
3273
- - three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.`, GROUPING_RULES = `## Grouping Rules
3270
+ - test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
3271
+ - tdd-simple: Medium complexity. Write failing tests first, then implement to pass them \u2014 all in one session.
3272
+ - three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests \u2014 no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
3273
+ - three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`, GROUPING_RULES = `## Grouping Rules
3274
3274
 
3275
3275
  - Combine small, related tasks into a single "simple" or "medium" story.
3276
3276
  - Do NOT create separate stories for every single file or function unless complex.
@@ -22250,7 +22250,7 @@ var package_default;
22250
22250
  var init_package = __esm(() => {
22251
22251
  package_default = {
22252
22252
  name: "@nathapp/nax",
22253
- version: "0.49.1",
22253
+ version: "0.49.3",
22254
22254
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
22255
22255
  type: "module",
22256
22256
  bin: {
@@ -22323,8 +22323,8 @@ var init_version = __esm(() => {
22323
22323
  NAX_VERSION = package_default.version;
22324
22324
  NAX_COMMIT = (() => {
22325
22325
  try {
22326
- if (/^[0-9a-f]{6,10}$/.test("635a552"))
22327
- return "635a552";
22326
+ if (/^[0-9a-f]{6,10}$/.test("30ff375"))
22327
+ return "30ff375";
22328
22328
  } catch {}
22329
22329
  try {
22330
22330
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -24357,6 +24357,8 @@ async function resolveCommand(check2, config2, executionConfig, workdir) {
24357
24357
  }
24358
24358
  async function runCheck(check2, command, workdir) {
24359
24359
  const startTime = Date.now();
24360
+ const logger = getSafeLogger();
24361
+ logger?.info("review", `Running ${check2} check`, { check: check2, command, workdir });
24360
24362
  try {
24361
24363
  const parts = command.split(/\s+/);
24362
24364
  const executable = parts[0];
@@ -24395,6 +24397,17 @@ async function runCheck(check2, command, workdir) {
24395
24397
  const stderr = await new Response(proc.stderr).text();
24396
24398
  const output = [stdout, stderr].filter(Boolean).join(`
24397
24399
  `);
24400
+ if (exitCode !== 0) {
24401
+ logger?.warn("review", `${check2} check failed`, {
24402
+ check: check2,
24403
+ command,
24404
+ workdir,
24405
+ exitCode,
24406
+ output: output.slice(0, 2000)
24407
+ });
24408
+ } else {
24409
+ logger?.debug("review", `${check2} check passed`, { check: check2, command, durationMs: Date.now() - startTime });
24410
+ }
24398
24411
  return {
24399
24412
  check: check2,
24400
24413
  command,
@@ -24680,8 +24693,8 @@ async function recheckReview(ctx) {
24680
24693
  const { reviewStage: reviewStage2 } = await Promise.resolve().then(() => (init_review(), exports_review));
24681
24694
  if (!reviewStage2.enabled(ctx))
24682
24695
  return true;
24683
- const result = await reviewStage2.execute(ctx);
24684
- return result.action === "continue";
24696
+ await reviewStage2.execute(ctx);
24697
+ return ctx.reviewResult?.success === true;
24685
24698
  }
24686
24699
  function collectFailedChecks(ctx) {
24687
24700
  return (ctx.reviewResult?.checks ?? []).filter((c) => !c.success);
@@ -24793,11 +24806,18 @@ var init_autofix = __esm(() => {
24793
24806
  const lintFixCmd = effectiveConfig.quality.commands.lintFix;
24794
24807
  const formatFixCmd = effectiveConfig.quality.commands.formatFix;
24795
24808
  const effectiveWorkdir = ctx.story.workdir ? join18(ctx.workdir, ctx.story.workdir) : ctx.workdir;
24796
- if (lintFixCmd || formatFixCmd) {
24809
+ const failedCheckNames = new Set((reviewResult.checks ?? []).filter((c) => !c.success).map((c) => c.check));
24810
+ const hasLintFailure = failedCheckNames.has("lint");
24811
+ logger.info("autofix", "Starting autofix", {
24812
+ storyId: ctx.story.id,
24813
+ failedChecks: [...failedCheckNames],
24814
+ workdir: effectiveWorkdir
24815
+ });
24816
+ if (hasLintFailure && (lintFixCmd || formatFixCmd)) {
24797
24817
  if (lintFixCmd) {
24798
24818
  pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
24799
24819
  const lintResult = await _autofixDeps.runCommand(lintFixCmd, effectiveWorkdir);
24800
- logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id });
24820
+ logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id, command: lintFixCmd });
24801
24821
  if (lintResult.exitCode !== 0) {
24802
24822
  logger.warn("autofix", "lintFix command failed \u2014 may not have fixed all issues", {
24803
24823
  storyId: ctx.story.id,
@@ -24808,7 +24828,10 @@ var init_autofix = __esm(() => {
24808
24828
  if (formatFixCmd) {
24809
24829
  pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
24810
24830
  const fmtResult = await _autofixDeps.runCommand(formatFixCmd, effectiveWorkdir);
24811
- logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, { storyId: ctx.story.id });
24831
+ logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
24832
+ storyId: ctx.story.id,
24833
+ command: formatFixCmd
24834
+ });
24812
24835
  if (fmtResult.exitCode !== 0) {
24813
24836
  logger.warn("autofix", "formatFix command failed \u2014 may not have fixed all issues", {
24814
24837
  storyId: ctx.story.id,
@@ -24819,11 +24842,12 @@ var init_autofix = __esm(() => {
24819
24842
  const recheckPassed = await _autofixDeps.recheckReview(ctx);
24820
24843
  pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
24821
24844
  if (recheckPassed) {
24822
- if (ctx.reviewResult)
24823
- ctx.reviewResult = { ...ctx.reviewResult, success: true };
24824
24845
  logger.info("autofix", "Mechanical autofix succeeded \u2014 retrying review", { storyId: ctx.story.id });
24825
24846
  return { action: "retry", fromStage: "review" };
24826
24847
  }
24848
+ logger.info("autofix", "Mechanical autofix did not resolve all failures \u2014 proceeding to agent rectification", {
24849
+ storyId: ctx.story.id
24850
+ });
24827
24851
  }
24828
24852
  const agentFixed = await _autofixDeps.runAgentRectification(ctx);
24829
24853
  if (agentFixed) {
@@ -29553,7 +29577,7 @@ var init_routing2 = __esm(() => {
29553
29577
  logger.debug("routing", ctx.routing.reasoning);
29554
29578
  }
29555
29579
  const decomposeConfig = ctx.config.decompose;
29556
- if (decomposeConfig) {
29580
+ if (decomposeConfig && ctx.story.status !== "decomposed") {
29557
29581
  const acCount = ctx.story.acceptanceCriteria.length;
29558
29582
  const complexity = ctx.routing.complexity;
29559
29583
  const isOversized = acCount > decomposeConfig.maxAcceptanceCriteria && (complexity === "complex" || complexity === "expert");
@@ -34256,6 +34280,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
34256
34280
  const logger = getSafeLogger();
34257
34281
  let prd = ctx.prd;
34258
34282
  let prdDirty = false;
34283
+ const costDelta = pipelineResult.context.agentResult?.estimatedCost || 0;
34259
34284
  switch (pipelineResult.finalAction) {
34260
34285
  case "pause":
34261
34286
  markStoryPaused(prd, ctx.story.id);
@@ -34322,7 +34347,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
34322
34347
  break;
34323
34348
  }
34324
34349
  }
34325
- return { prd, prdDirty };
34350
+ return { prd, prdDirty, costDelta };
34326
34351
  }
34327
34352
  var init_pipeline_result_handler = __esm(() => {
34328
34353
  init_logger2();
@@ -34427,7 +34452,7 @@ async function runIteration(ctx, prd, selection, iterations, totalCost, allStory
34427
34452
  return {
34428
34453
  prd: r.prd,
34429
34454
  storiesCompletedDelta: 0,
34430
- costDelta: 0,
34455
+ costDelta: r.costDelta,
34431
34456
  prdDirty: r.prdDirty,
34432
34457
  finalAction: pipelineResult.finalAction,
34433
34458
  reason: pipelineResult.reason
@@ -34465,7 +34490,7 @@ function buildPreviewRouting(story, config2) {
34465
34490
  function selectNextStories(prd, config2, batchPlan, currentBatchIndex, lastStoryId, useBatch) {
34466
34491
  if (useBatch && currentBatchIndex < batchPlan.length) {
34467
34492
  const batch = batchPlan[currentBatchIndex];
34468
- const storiesToExecute = batch.stories.filter((s) => !s.passes && s.status !== "passed" && s.status !== "skipped" && s.status !== "blocked" && s.status !== "failed" && s.status !== "paused");
34493
+ const storiesToExecute = batch.stories.filter((s) => !s.passes && s.status !== "passed" && s.status !== "skipped" && s.status !== "blocked" && s.status !== "failed" && s.status !== "paused" && s.status !== "decomposed");
34469
34494
  if (storiesToExecute.length === 0) {
34470
34495
  return { selection: null, nextBatchIndex: currentBatchIndex + 1 };
34471
34496
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.49.1",
3
+ "version": "0.49.3",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -53,10 +53,10 @@ regardless of LOC count. These require at minimum "tdd-simple" test strategy.`;
53
53
 
54
54
  export const TEST_STRATEGY_GUIDE = `## Test Strategy Guide
55
55
 
56
- - test-after: Simple changes with well-understood behavior. Write tests after implementation.
57
- - tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
58
- - three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
59
- - three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.`;
56
+ - test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
57
+ - tdd-simple: Medium complexity. Write failing tests first, then implement to pass them — all in one session.
58
+ - three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests — no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
59
+ - three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`;
60
60
 
61
61
  export const GROUPING_RULES = `## Grouping Rules
62
62
 
@@ -142,7 +142,7 @@ export async function runIteration(
142
142
  return {
143
143
  prd: r.prd,
144
144
  storiesCompletedDelta: 0,
145
- costDelta: 0,
145
+ costDelta: r.costDelta,
146
146
  prdDirty: r.prdDirty,
147
147
  finalAction: pipelineResult.finalAction,
148
148
  reason: pipelineResult.reason,
@@ -102,6 +102,7 @@ export async function handlePipelineSuccess(
102
102
  export interface PipelineFailureResult {
103
103
  prd: PRD;
104
104
  prdDirty: boolean;
105
+ costDelta: number;
105
106
  }
106
107
 
107
108
  export async function handlePipelineFailure(
@@ -111,6 +112,8 @@ export async function handlePipelineFailure(
111
112
  const logger = getSafeLogger();
112
113
  let prd = ctx.prd;
113
114
  let prdDirty = false;
115
+ // Always capture cost even for failed stories — agent ran and spent tokens
116
+ const costDelta = pipelineResult.context.agentResult?.estimatedCost || 0;
114
117
 
115
118
  switch (pipelineResult.finalAction) {
116
119
  case "pause":
@@ -185,5 +188,5 @@ export async function handlePipelineFailure(
185
188
  }
186
189
  }
187
190
 
188
- return { prd, prdDirty };
191
+ return { prd, prdDirty, costDelta };
189
192
  }
@@ -39,7 +39,8 @@ export function selectNextStories(
39
39
  s.status !== "skipped" &&
40
40
  s.status !== "blocked" &&
41
41
  s.status !== "failed" &&
42
- s.status !== "paused",
42
+ s.status !== "paused" &&
43
+ s.status !== "decomposed",
43
44
  );
44
45
 
45
46
  if (storiesToExecute.length === 0) {
@@ -61,12 +61,22 @@ export const autofixStage: PipelineStage = {
61
61
  // Effective workdir for running commands (scoped to package if monorepo)
62
62
  const effectiveWorkdir = ctx.story.workdir ? join(ctx.workdir, ctx.story.workdir) : ctx.workdir;
63
63
 
64
- // Phase 1: Mechanical fix (if commands are configured)
65
- if (lintFixCmd || formatFixCmd) {
64
+ // Identify which checks failed
65
+ const failedCheckNames = new Set((reviewResult.checks ?? []).filter((c) => !c.success).map((c) => c.check));
66
+ const hasLintFailure = failedCheckNames.has("lint");
67
+
68
+ logger.info("autofix", "Starting autofix", {
69
+ storyId: ctx.story.id,
70
+ failedChecks: [...failedCheckNames],
71
+ workdir: effectiveWorkdir,
72
+ });
73
+
74
+ // Phase 1: Mechanical fix — only for lint failures (lintFix/formatFix cannot fix typecheck errors)
75
+ if (hasLintFailure && (lintFixCmd || formatFixCmd)) {
66
76
  if (lintFixCmd) {
67
77
  pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
68
78
  const lintResult = await _autofixDeps.runCommand(lintFixCmd, effectiveWorkdir);
69
- logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id });
79
+ logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id, command: lintFixCmd });
70
80
  if (lintResult.exitCode !== 0) {
71
81
  logger.warn("autofix", "lintFix command failed — may not have fixed all issues", {
72
82
  storyId: ctx.story.id,
@@ -78,7 +88,10 @@ export const autofixStage: PipelineStage = {
78
88
  if (formatFixCmd) {
79
89
  pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
80
90
  const fmtResult = await _autofixDeps.runCommand(formatFixCmd, effectiveWorkdir);
81
- logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, { storyId: ctx.story.id });
91
+ logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
92
+ storyId: ctx.story.id,
93
+ command: formatFixCmd,
94
+ });
82
95
  if (fmtResult.exitCode !== 0) {
83
96
  logger.warn("autofix", "formatFix command failed — may not have fixed all issues", {
84
97
  storyId: ctx.story.id,
@@ -91,10 +104,13 @@ export const autofixStage: PipelineStage = {
91
104
  pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
92
105
 
93
106
  if (recheckPassed) {
94
- if (ctx.reviewResult) ctx.reviewResult = { ...ctx.reviewResult, success: true };
95
107
  logger.info("autofix", "Mechanical autofix succeeded — retrying review", { storyId: ctx.story.id });
96
108
  return { action: "retry", fromStage: "review" };
97
109
  }
110
+
111
+ logger.info("autofix", "Mechanical autofix did not resolve all failures — proceeding to agent rectification", {
112
+ storyId: ctx.story.id,
113
+ });
98
114
  }
99
115
 
100
116
  // Phase 2: Agent rectification — spawn agent with review error context
@@ -134,8 +150,11 @@ async function recheckReview(ctx: PipelineContext): Promise<boolean> {
134
150
  // Import reviewStage lazily to avoid circular deps
135
151
  const { reviewStage } = await import("./review");
136
152
  if (!reviewStage.enabled(ctx)) return true;
137
- const result = await reviewStage.execute(ctx);
138
- return result.action === "continue";
153
+ // reviewStage.execute updates ctx.reviewResult in place.
154
+ // We cannot use result.action here because review returns "continue" for BOTH
155
+ // pass and built-in-check-failure (to hand off to autofix). Check success directly.
156
+ await reviewStage.execute(ctx);
157
+ return ctx.reviewResult?.success === true;
139
158
  }
140
159
 
141
160
  function collectFailedChecks(ctx: PipelineContext): ReviewCheckResult[] {
@@ -196,7 +196,7 @@ export const routingStage: PipelineStage = {
196
196
 
197
197
  // SD-004: Oversized story detection and decomposition
198
198
  const decomposeConfig = ctx.config.decompose;
199
- if (decomposeConfig) {
199
+ if (decomposeConfig && ctx.story.status !== "decomposed") {
200
200
  const acCount = ctx.story.acceptanceCriteria.length;
201
201
  const complexity = ctx.routing.complexity;
202
202
  const isOversized =
@@ -99,6 +99,9 @@ const SIGKILL_GRACE_PERIOD_MS = 5_000;
99
99
  */
100
100
  async function runCheck(check: ReviewCheckName, command: string, workdir: string): Promise<ReviewCheckResult> {
101
101
  const startTime = Date.now();
102
+ const logger = getSafeLogger();
103
+
104
+ logger?.info("review", `Running ${check} check`, { check, command, workdir });
102
105
 
103
106
  try {
104
107
  // Parse command into executable and args
@@ -152,6 +155,18 @@ async function runCheck(check: ReviewCheckName, command: string, workdir: string
152
155
  const stderr = await new Response(proc.stderr).text();
153
156
  const output = [stdout, stderr].filter(Boolean).join("\n");
154
157
 
158
+ if (exitCode !== 0) {
159
+ logger?.warn("review", `${check} check failed`, {
160
+ check,
161
+ command,
162
+ workdir,
163
+ exitCode,
164
+ output: output.slice(0, 2000),
165
+ });
166
+ } else {
167
+ logger?.debug("review", `${check} check passed`, { check, command, durationMs: Date.now() - startTime });
168
+ }
169
+
155
170
  return {
156
171
  check,
157
172
  command,