npm - @nathapp/nax - Versions diffs - 0.49.1 → 0.49.3 - Mend

@nathapp/nax 0.49.1 → 0.49.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +14 -0
package/README.md +280 -10
package/dist/nax.js +43 -18
package/package.json +1 -1
package/src/config/test-strategy.ts +4 -4
package/src/execution/iteration-runner.ts +1 -1
package/src/execution/pipeline-result-handler.ts +4 -1
package/src/execution/story-selector.ts +2 -1
package/src/pipeline/stages/autofix.ts +26 -7
package/src/pipeline/stages/routing.ts +1 -1
package/src/review/runner.ts +15 -0

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.49.3] - 2026-03-18
+### Fixed
+- **Autofix `recheckReview` bug:** `reviewStage.execute()` returns `action:"continue"` for both pass AND built-in-check-failure (to hand off to autofix). Using `result.action === "continue"` always returned `true`, causing "Mechanical autofix succeeded" to log every cycle and looping until `MAX_STAGE_RETRIES` with no real fix. Fix: check `ctx.reviewResult?.success` directly after execute.
+- **Autofix selective mechanical fix:** `lintFix`/`formatFix` cannot fix typecheck errors. Phase 1 now only runs when the `lint` check actually failed. Typecheck-only failures skip straight to agent rectification (Phase 2).
+- **Review command logging:** `runner.ts` now logs the resolved command and workdir for every check at info level, and full output on failure at warn level — eliminates phantom failure mystery.
+- **Re-decompose on second run:** Batch-mode story selector was missing `"decomposed"` in its status skip list (single-story path already excluded it). Stories with `status: "decomposed"` were being picked up again, triggering unnecessary LLM decompose calls. Added `"decomposed"` to batch filter and a guard in routing SD-004 block.
+- **totalCost always 0:** `handlePipelineFailure` returned no `costDelta`; `iteration-runner` hardcoded `costDelta: 0` for failures. Agent cost for failed stories was silently dropped. Fix: extract `agentResult?.estimatedCost` in failure path same as success path.
+## [0.49.2] - 2026-03-18
+### Fixed
+- **Test strategy descriptions:** `TEST_STRATEGY_GUIDE` (used in plan and decompose prompts) had incorrect descriptions for `three-session-tdd` and `three-session-tdd-lite`. Both strategies use 3 sessions. Key distinction: `three-session-tdd` (strict) — test-writer makes no src/ changes, implementer makes no test changes; `three-session-tdd-lite` (lite) — test-writer may add minimal src/ stubs, implementer may expand coverage and replace stubs. Updated in `src/config/test-strategy.ts`, `docs/specs/test-strategy-ssot.md`, and `docs/architecture/ARCHITECTURE.md`.
 ## [0.49.1] - 2026-03-18
 ### Fixed

package/README.md CHANGED Viewed

@@ -18,8 +18,16 @@ bun install -g @nathapp/nax
 cd your-project
 nax init
 nax features create my-feature
-# Edit nax/features/my-feature/prd.json with your user stories
+# Option A: write prd.json manually, then run
+nax run -f my-feature
+# Option B: generate prd.json from a spec file, then run
+nax plan -f my-feature --from spec.md
 nax run -f my-feature
+# Option C: plan + run in one command
+nax run -f my-feature --plan --from spec.md
 ```
 ## How It Works
@@ -54,6 +62,14 @@ nax/
 └── features/         # One folder per feature
 ```
+**Monorepo — scaffold a package:**
+```bash
+nax init --package packages/api
+```
+Creates `packages/api/nax/context.md` for per-package agent context.
 ---
 ### `nax features create <name>`
@@ -76,20 +92,33 @@ nax features list
 ---
-### `nax analyze -f <name>`
+### `nax plan -f <name> --from <spec>`
-Parse a `spec.md` file into a structured `prd.json`. Uses an LLM to decompose the spec into classified user stories.
+Generate a `prd.json` from a spec file using an LLM. Replaces the deprecated `nax analyze`.
 ```bash
-nax analyze -f my-feature
+nax plan -f my-feature --from spec.md
 ```
 **Flags:**
 | Flag | Description |
 |:-----|:------------|
-| `--from <path>` | Explicit spec path (overrides default `spec.md`) |
-| `--reclassify` | Re-classify existing `prd.json` without re-decomposing |
+| `-f, --feature <name>` | Feature name (required) |
+| `--from <spec-path>` | Path to spec file (required) |
+| `--auto` / `--one-shot` | Skip interactive Q&A — single LLM call, no back-and-forth |
+| `-b, --branch <branch>` | Override default branch name |
+| `-d, --dir <path>` | Project directory |
+**Interactive vs one-shot:**
+- Default (no flag): interactive planning session — nax asks clarifying questions, refines the plan iteratively
+- `--auto` / `--one-shot`: single LLM call, faster but less precise
+---
+### `nax analyze` *(deprecated)*
+> ⚠️ **Deprecated.** Use `nax plan` instead. `nax analyze` remains available for backward compatibility but will be removed in a future version.
 ---
@@ -105,10 +134,23 @@ nax run -f my-feature
 | Flag | Description |
 |:-----|:------------|
-| `-f, --feature <name>` | Feature name (required) |
+| `-f, --feature <name>` | Feature name |
+| `-a, --agent <name>` | Force a specific agent (`claude`, `opencode`, `codex`, etc.) |
+| `--plan` | Run plan phase first (requires `--from`) |
+| `--from <spec-path>` | Spec file for `--plan` |
+| `--one-shot` | Skip interactive Q&A during planning (ACP only) |
+| `--force` | Overwrite existing `prd.json` when using `--plan` |
+| `--parallel <n>` | Max parallel sessions (`0` = auto based on CPU cores; omit = sequential) |
 | `--dry-run` | Preview story routing without running agents |
 | `--headless` | Non-interactive output (structured logs, no TUI) |
-| `-d, --dir <path>` | Project directory (defaults to `cwd`) |
+| `--verbose` | Debug-level logging |
+| `--quiet` | Warnings and errors only |
+| `--silent` | Errors only |
+| `--json` | Raw JSONL output to stdout (for scripting) |
+| `--skip-precheck` | Skip precheck validations (advanced users only) |
+| `--no-context` | Disable context builder (skip file context in prompts) |
+| `--no-batch` | Execute all stories individually (disable batching) |
+| `-d, --dir <path>` | Working directory |
 **Examples:**
@@ -116,11 +158,23 @@ nax run -f my-feature
 # Preview what would run (no agents spawned)
 nax run -f user-auth --dry-run
-# Run in a different directory
-nax run -f user-auth -d /path/to/project
+# Plan from spec then run — one command
+nax run -f user-auth --plan --from spec.md
+# Run with parallel execution (auto concurrency)
+nax run -f user-auth --parallel 0
+# Run with up to 3 parallel worktree sessions
+nax run -f user-auth --parallel 3
+# Force a specific agent
+nax run -f user-auth --agent opencode
 # Run in CI/CD (structured output)
 nax run -f user-auth --headless
+# Raw JSONL for scripting
+nax run -f user-auth --json
 ```
 ---
@@ -199,6 +253,58 @@ Output sections:
 ---
+### `nax generate`
+Generate agent config files from `nax/context.md`. Supports Claude Code, OpenCode, Codex, Cursor, Windsurf, Aider, and Gemini.
+```bash
+nax generate
+```
+**Flags:**
+| Flag | Description |
+|:-----|:------------|
+| `-c, --context <path>` | Context file path (default: `nax/context.md`) |
+| `-o, --output <dir>` | Output directory (default: project root) |
+| `-a, --agent <name>` | Generate for a specific agent only (`claude`, `opencode`, `cursor`, `windsurf`, `aider`, `codex`, `gemini`) |
+| `--dry-run` | Preview without writing files |
+| `--no-auto-inject` | Disable auto-injection of project metadata |
+| `--package <dir>` | Generate for a specific monorepo package (e.g. `packages/api`) |
+| `--all-packages` | Generate for all discovered packages |
+**What it generates:**
+| Agent | File |
+|:------|:-----|
+| Claude Code | `CLAUDE.md` |
+| OpenCode | `AGENTS.md` |
+| Codex | `AGENTS.md` |
+| Cursor | `.cursorrules` |
+| Windsurf | `.windsurfrules` |
+| Aider | `.aider.md` |
+| Gemini | `GEMINI.md` |
+**Workflow:**
+1. Create `nax/context.md` — describe your project's architecture, conventions, and coding standards
+2. Run `nax generate` — writes agent config files to the project root (and per-package if configured)
+3. Commit the generated files — your agents will automatically pick them up
+**Monorepo (per-package):**
+```bash
+# Generate CLAUDE.md for a single package
+nax generate --package packages/api
+# Generate for all packages (auto-discovers workspace packages)
+nax generate --all-packages
+```
+Each package can have its own `nax/context.md` at `<package>/nax/context.md` for package-specific agent instructions.
+---
 ### `nax prompts -f <name>`
 Assemble and display the prompt that would be sent to the agent for each story role.
@@ -439,6 +545,170 @@ If the regression gate detects failures, nax maps them to the responsible story
 ---
+## Parallel Execution
+nax can run multiple stories concurrently using git worktrees — each story gets an isolated worktree so agents don't step on each other.
+```bash
+# Auto concurrency (based on CPU cores)
+nax run -f my-feature --parallel 0
+# Fixed concurrency
+nax run -f my-feature --parallel 3
+```
+**How it works:**
+1. Stories are grouped by dependency order (dependent stories wait for their prerequisites)
+2. Each batch of independent stories gets its own git worktree
+3. Agent sessions run concurrently inside those worktrees
+4. Once a batch completes, changes are merged back in dependency order
+5. Merge conflicts are automatically rectified by re-running the conflicted story on the updated base
+**Config:**
+```json
+{
+  "execution": {
+    "maxParallelSessions": 4
+  }
+}
+```
+> Sequential mode (no `--parallel`) is the safe default. Use parallel for large feature sets with independent stories.
+---
+## Agents
+nax supports multiple coding agents. By default it uses Claude Code via the ACP protocol.
+```bash
+# List installed agents and their capabilities
+nax agents
+```
+**Supported agents:**
+| Agent | Protocol | Notes |
+|:------|:---------|:------|
+| `claude` | ACP (default) | Claude Code via acpx |
+| `opencode` | ACP | OpenCode via acpx |
+| `codex` | ACP | Codex via acpx |
+| `cursor` | ACP | Cursor via acpx |
+| `windsurf` | ACP | Windsurf via acpx |
+| `aider` | ACP | Aider via acpx |
+| `gemini` | ACP | Gemini CLI via acpx |
+**ACP protocol (default):**
+nax uses [acpx](https://github.com/nathapp/acpx) as the ACP transport. All agents run as persistent sessions — nax sends prompts and receives structured JSON-RPC responses including token counts and exact USD cost per session.
+**Configuring agents:**
+```json
+{
+  "execution": {
+    "defaultAgent": "claude",
+    "protocol": "acp",
+    "fallbackOrder": ["claude", "codex", "opencode", "gemini"]
+  }
+}
+```
+**Force a specific agent at runtime:**
+```bash
+nax run -f my-feature --agent opencode
+```
+---
+## Monorepo Support
+nax supports monorepos with workspace-level and per-package configuration.
+### Setup
+```bash
+# Initialize nax at the repo root
+nax init
+# Scaffold per-package context for a specific package
+nax init --package packages/api
+nax init --package packages/web
+```
+### Per-Package Config
+Each package can override specific config fields by placing a `nax/config.json` inside the package directory:
+```
+repo-root/
+├── nax/
+│   └── config.json          # root config
+├── packages/
+│   ├── api/
+│   │   └── nax/
+│   │       ├── config.json  # overrides for api package
+│   │       └── context.md   # agent context for api
+│   └── web/
+│       └── nax/
+│           ├── config.json  # overrides for web package
+│           └── context.md   # agent context for web
+```
+**Overridable fields per package:** `execution`, `review`, `acceptance`, `quality`, `context`
+```json
+// packages/api/nax/config.json
+{
+  "quality": {
+    "commands": {
+      "test": "turbo test --filter=@myapp/api",
+      "lint": "turbo lint --filter=@myapp/api"
+    }
+  }
+}
+```
+### Per-Package Stories
+In your `prd.json`, set `workdir` on each story to point to the package:
+```json
+{
+  "userStories": [
+    {
+      "id": "US-001",
+      "title": "Add auth endpoint",
+      "workdir": "packages/api",
+      "status": "pending"
+    }
+  ]
+}
+```
+nax will run the agent inside that package's directory and apply its config overrides automatically.
+### Workspace Detection
+When `nax plan` generates stories for a monorepo, it auto-discovers packages from:
+- `turbo.json` → `packages` field
+- `package.json` → `workspaces`
+- `pnpm-workspace.yaml` → `packages`
+- Existing `*/nax/context.md` files
+### Generate Agent Files for All Packages
+```bash
+nax generate --all-packages
+```
+Generates a `CLAUDE.md` (or agent-specific file) in each discovered package directory, using the package's own `nax/context.md` if present.
+---
 ## Hooks
 Integrate notifications, CI triggers, or custom scripts via lifecycle hooks.

package/dist/nax.js CHANGED Viewed

@@ -3267,10 +3267,10 @@ Security-critical functions (authentication, cryptography, tokens, sessions, cre
 password hashing, access control) must be classified at MINIMUM "medium" complexity
 regardless of LOC count. These require at minimum "tdd-simple" test strategy.`, TEST_STRATEGY_GUIDE = `## Test Strategy Guide
-- test-after: Simple changes with well-understood behavior. Write tests after implementation.
-- tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
-- three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
-- three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.`, GROUPING_RULES = `## Grouping Rules
+- test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
+- tdd-simple: Medium complexity. Write failing tests first, then implement to pass them \u2014 all in one session.
+- three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests \u2014 no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
+- three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`, GROUPING_RULES = `## Grouping Rules
 - Combine small, related tasks into a single "simple" or "medium" story.
 - Do NOT create separate stories for every single file or function unless complex.
@@ -22250,7 +22250,7 @@ var package_default;
 var init_package = __esm(() => {
   package_default = {
     name: "@nathapp/nax",
-    version: "0.49.1",
+    version: "0.49.3",
     description: "AI Coding Agent Orchestrator \u2014 loops until done",
     type: "module",
     bin: {
@@ -22323,8 +22323,8 @@ var init_version = __esm(() => {
   NAX_VERSION = package_default.version;
   NAX_COMMIT = (() => {
     try {
-      if (/^[0-9a-f]{6,10}$/.test("635a552"))
-        return "635a552";
+      if (/^[0-9a-f]{6,10}$/.test("30ff375"))
+        return "30ff375";
     } catch {}
     try {
       const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -24357,6 +24357,8 @@ async function resolveCommand(check2, config2, executionConfig, workdir) {
 }
 async function runCheck(check2, command, workdir) {
   const startTime = Date.now();
+  const logger = getSafeLogger();
+  logger?.info("review", `Running ${check2} check`, { check: check2, command, workdir });
   try {
     const parts = command.split(/\s+/);
     const executable = parts[0];
@@ -24395,6 +24397,17 @@ async function runCheck(check2, command, workdir) {
     const stderr = await new Response(proc.stderr).text();
     const output = [stdout, stderr].filter(Boolean).join(`
 `);
+    if (exitCode !== 0) {
+      logger?.warn("review", `${check2} check failed`, {
+        check: check2,
+        command,
+        workdir,
+        exitCode,
+        output: output.slice(0, 2000)
+      });
+    } else {
+      logger?.debug("review", `${check2} check passed`, { check: check2, command, durationMs: Date.now() - startTime });
+    }
     return {
       check: check2,
       command,
@@ -24680,8 +24693,8 @@ async function recheckReview(ctx) {
   const { reviewStage: reviewStage2 } = await Promise.resolve().then(() => (init_review(), exports_review));
   if (!reviewStage2.enabled(ctx))
     return true;
-  const result = await reviewStage2.execute(ctx);
-  return result.action === "continue";
+  await reviewStage2.execute(ctx);
+  return ctx.reviewResult?.success === true;
 }
 function collectFailedChecks(ctx) {
   return (ctx.reviewResult?.checks ?? []).filter((c) => !c.success);
@@ -24793,11 +24806,18 @@ var init_autofix = __esm(() => {
       const lintFixCmd = effectiveConfig.quality.commands.lintFix;
       const formatFixCmd = effectiveConfig.quality.commands.formatFix;
       const effectiveWorkdir = ctx.story.workdir ? join18(ctx.workdir, ctx.story.workdir) : ctx.workdir;
-      if (lintFixCmd || formatFixCmd) {
+      const failedCheckNames = new Set((reviewResult.checks ?? []).filter((c) => !c.success).map((c) => c.check));
+      const hasLintFailure = failedCheckNames.has("lint");
+      logger.info("autofix", "Starting autofix", {
+        storyId: ctx.story.id,
+        failedChecks: [...failedCheckNames],
+        workdir: effectiveWorkdir
+      });
+      if (hasLintFailure && (lintFixCmd || formatFixCmd)) {
         if (lintFixCmd) {
           pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
           const lintResult = await _autofixDeps.runCommand(lintFixCmd, effectiveWorkdir);
-          logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id });
+          logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id, command: lintFixCmd });
           if (lintResult.exitCode !== 0) {
             logger.warn("autofix", "lintFix command failed \u2014 may not have fixed all issues", {
               storyId: ctx.story.id,
@@ -24808,7 +24828,10 @@ var init_autofix = __esm(() => {
         if (formatFixCmd) {
           pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
           const fmtResult = await _autofixDeps.runCommand(formatFixCmd, effectiveWorkdir);
-          logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, { storyId: ctx.story.id });
+          logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
+            storyId: ctx.story.id,
+            command: formatFixCmd
+          });
           if (fmtResult.exitCode !== 0) {
             logger.warn("autofix", "formatFix command failed \u2014 may not have fixed all issues", {
               storyId: ctx.story.id,
@@ -24819,11 +24842,12 @@ var init_autofix = __esm(() => {
         const recheckPassed = await _autofixDeps.recheckReview(ctx);
         pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
         if (recheckPassed) {
-          if (ctx.reviewResult)
-            ctx.reviewResult = { ...ctx.reviewResult, success: true };
           logger.info("autofix", "Mechanical autofix succeeded \u2014 retrying review", { storyId: ctx.story.id });
           return { action: "retry", fromStage: "review" };
         }
+        logger.info("autofix", "Mechanical autofix did not resolve all failures \u2014 proceeding to agent rectification", {
+          storyId: ctx.story.id
+        });
       }
       const agentFixed = await _autofixDeps.runAgentRectification(ctx);
       if (agentFixed) {
@@ -29553,7 +29577,7 @@ var init_routing2 = __esm(() => {
         logger.debug("routing", ctx.routing.reasoning);
       }
       const decomposeConfig = ctx.config.decompose;
-      if (decomposeConfig) {
+      if (decomposeConfig && ctx.story.status !== "decomposed") {
         const acCount = ctx.story.acceptanceCriteria.length;
         const complexity = ctx.routing.complexity;
         const isOversized = acCount > decomposeConfig.maxAcceptanceCriteria && (complexity === "complex" || complexity === "expert");
@@ -34256,6 +34280,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
   const logger = getSafeLogger();
   let prd = ctx.prd;
   let prdDirty = false;
+  const costDelta = pipelineResult.context.agentResult?.estimatedCost || 0;
   switch (pipelineResult.finalAction) {
     case "pause":
       markStoryPaused(prd, ctx.story.id);
@@ -34322,7 +34347,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
       break;
     }
   }
-  return { prd, prdDirty };
+  return { prd, prdDirty, costDelta };
 }
 var init_pipeline_result_handler = __esm(() => {
   init_logger2();
@@ -34427,7 +34452,7 @@ async function runIteration(ctx, prd, selection, iterations, totalCost, allStory
   return {
     prd: r.prd,
     storiesCompletedDelta: 0,
-    costDelta: 0,
+    costDelta: r.costDelta,
     prdDirty: r.prdDirty,
     finalAction: pipelineResult.finalAction,
     reason: pipelineResult.reason
@@ -34465,7 +34490,7 @@ function buildPreviewRouting(story, config2) {
 function selectNextStories(prd, config2, batchPlan, currentBatchIndex, lastStoryId, useBatch) {
   if (useBatch && currentBatchIndex < batchPlan.length) {
     const batch = batchPlan[currentBatchIndex];
-    const storiesToExecute = batch.stories.filter((s) => !s.passes && s.status !== "passed" && s.status !== "skipped" && s.status !== "blocked" && s.status !== "failed" && s.status !== "paused");
+    const storiesToExecute = batch.stories.filter((s) => !s.passes && s.status !== "passed" && s.status !== "skipped" && s.status !== "blocked" && s.status !== "failed" && s.status !== "paused" && s.status !== "decomposed");
     if (storiesToExecute.length === 0) {
       return { selection: null, nextBatchIndex: currentBatchIndex + 1 };
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nathapp/nax",
-  "version": "0.49.1",
+  "version": "0.49.3",
   "description": "AI Coding Agent Orchestrator — loops until done",
   "type": "module",
   "bin": {

package/src/config/test-strategy.ts CHANGED Viewed

@@ -53,10 +53,10 @@ regardless of LOC count. These require at minimum "tdd-simple" test strategy.`;
 export const TEST_STRATEGY_GUIDE = `## Test Strategy Guide
-- test-after: Simple changes with well-understood behavior. Write tests after implementation.
-- tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
-- three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
-- three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.`;
+- test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
+- tdd-simple: Medium complexity. Write failing tests first, then implement to pass them — all in one session.
+- three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests — no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
+- three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`;
 export const GROUPING_RULES = `## Grouping Rules

package/src/execution/iteration-runner.ts CHANGED Viewed

@@ -142,7 +142,7 @@ export async function runIteration(
   return {
     prd: r.prd,
     storiesCompletedDelta: 0,
-    costDelta: 0,
+    costDelta: r.costDelta,
     prdDirty: r.prdDirty,
     finalAction: pipelineResult.finalAction,
     reason: pipelineResult.reason,

package/src/execution/pipeline-result-handler.ts CHANGED Viewed

@@ -102,6 +102,7 @@ export async function handlePipelineSuccess(
 export interface PipelineFailureResult {
   prd: PRD;
   prdDirty: boolean;
+  costDelta: number;
 }
 export async function handlePipelineFailure(
@@ -111,6 +112,8 @@ export async function handlePipelineFailure(
   const logger = getSafeLogger();
   let prd = ctx.prd;
   let prdDirty = false;
+  // Always capture cost even for failed stories — agent ran and spent tokens
+  const costDelta = pipelineResult.context.agentResult?.estimatedCost || 0;
   switch (pipelineResult.finalAction) {
     case "pause":
@@ -185,5 +188,5 @@ export async function handlePipelineFailure(
     }
   }
-  return { prd, prdDirty };
+  return { prd, prdDirty, costDelta };
 }

package/src/execution/story-selector.ts CHANGED Viewed

@@ -39,7 +39,8 @@ export function selectNextStories(
         s.status !== "skipped" &&
         s.status !== "blocked" &&
         s.status !== "failed" &&
-        s.status !== "paused",
+        s.status !== "paused" &&
+        s.status !== "decomposed",
     );
     if (storiesToExecute.length === 0) {

package/src/pipeline/stages/autofix.ts CHANGED Viewed

@@ -61,12 +61,22 @@ export const autofixStage: PipelineStage = {
     // Effective workdir for running commands (scoped to package if monorepo)
     const effectiveWorkdir = ctx.story.workdir ? join(ctx.workdir, ctx.story.workdir) : ctx.workdir;
-    // Phase 1: Mechanical fix (if commands are configured)
-    if (lintFixCmd || formatFixCmd) {
+    // Identify which checks failed
+    const failedCheckNames = new Set((reviewResult.checks ?? []).filter((c) => !c.success).map((c) => c.check));
+    const hasLintFailure = failedCheckNames.has("lint");
+    logger.info("autofix", "Starting autofix", {
+      storyId: ctx.story.id,
+      failedChecks: [...failedCheckNames],
+      workdir: effectiveWorkdir,
+    });
+    // Phase 1: Mechanical fix — only for lint failures (lintFix/formatFix cannot fix typecheck errors)
+    if (hasLintFailure && (lintFixCmd || formatFixCmd)) {
       if (lintFixCmd) {
         pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
         const lintResult = await _autofixDeps.runCommand(lintFixCmd, effectiveWorkdir);
-        logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id });
+        logger.debug("autofix", `lintFix exit=${lintResult.exitCode}`, { storyId: ctx.story.id, command: lintFixCmd });
         if (lintResult.exitCode !== 0) {
           logger.warn("autofix", "lintFix command failed — may not have fixed all issues", {
             storyId: ctx.story.id,
@@ -78,7 +88,10 @@ export const autofixStage: PipelineStage = {
       if (formatFixCmd) {
         pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
         const fmtResult = await _autofixDeps.runCommand(formatFixCmd, effectiveWorkdir);
-        logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, { storyId: ctx.story.id });
+        logger.debug("autofix", `formatFix exit=${fmtResult.exitCode}`, {
+          storyId: ctx.story.id,
+          command: formatFixCmd,
+        });
         if (fmtResult.exitCode !== 0) {
           logger.warn("autofix", "formatFix command failed — may not have fixed all issues", {
             storyId: ctx.story.id,
@@ -91,10 +104,13 @@ export const autofixStage: PipelineStage = {
       pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
       if (recheckPassed) {
-        if (ctx.reviewResult) ctx.reviewResult = { ...ctx.reviewResult, success: true };
         logger.info("autofix", "Mechanical autofix succeeded — retrying review", { storyId: ctx.story.id });
         return { action: "retry", fromStage: "review" };
       }
+      logger.info("autofix", "Mechanical autofix did not resolve all failures — proceeding to agent rectification", {
+        storyId: ctx.story.id,
+      });
     }
     // Phase 2: Agent rectification — spawn agent with review error context
@@ -134,8 +150,11 @@ async function recheckReview(ctx: PipelineContext): Promise<boolean> {
   // Import reviewStage lazily to avoid circular deps
   const { reviewStage } = await import("./review");
   if (!reviewStage.enabled(ctx)) return true;
-  const result = await reviewStage.execute(ctx);
-  return result.action === "continue";
+  // reviewStage.execute updates ctx.reviewResult in place.
+  // We cannot use result.action here because review returns "continue" for BOTH
+  // pass and built-in-check-failure (to hand off to autofix). Check success directly.
+  await reviewStage.execute(ctx);
+  return ctx.reviewResult?.success === true;
 }
 function collectFailedChecks(ctx: PipelineContext): ReviewCheckResult[] {

package/src/pipeline/stages/routing.ts CHANGED Viewed

@@ -196,7 +196,7 @@ export const routingStage: PipelineStage = {
     // SD-004: Oversized story detection and decomposition
     const decomposeConfig = ctx.config.decompose;
-    if (decomposeConfig) {
+    if (decomposeConfig && ctx.story.status !== "decomposed") {
       const acCount = ctx.story.acceptanceCriteria.length;
       const complexity = ctx.routing.complexity;
       const isOversized =

package/src/review/runner.ts CHANGED Viewed

@@ -99,6 +99,9 @@ const SIGKILL_GRACE_PERIOD_MS = 5_000;
  */
 async function runCheck(check: ReviewCheckName, command: string, workdir: string): Promise<ReviewCheckResult> {
   const startTime = Date.now();
+  const logger = getSafeLogger();
+  logger?.info("review", `Running ${check} check`, { check, command, workdir });
   try {
     // Parse command into executable and args
@@ -152,6 +155,18 @@ async function runCheck(check: ReviewCheckName, command: string, workdir: string
     const stderr = await new Response(proc.stderr).text();
     const output = [stdout, stderr].filter(Boolean).join("\n");
+    if (exitCode !== 0) {
+      logger?.warn("review", `${check} check failed`, {
+        check,
+        command,
+        workdir,
+        exitCode,
+        output: output.slice(0, 2000),
+      });
+    } else {
+      logger?.debug("review", `${check} check passed`, { check, command, durationMs: Date.now() - startTime });
+    }
     return {
       check,
       command,