@nathapp/nax 0.27.0 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md CHANGED
@@ -92,16 +92,46 @@ Runner.run() [src/execution/runner.ts — thin orchestrator only]
92
92
  2. **Plan complex tasks**: for multi-file changes, write a short plan before implementing.
93
93
  3. **Implement in small chunks**: one logical concern per commit.
94
94
 
95
- ## Code Intelligence (Solograph MCP)
95
+ ## Code Intelligence (Solograph MCP) — MANDATORY
96
96
 
97
- Use **solograph** MCP tools on-demand do not use `web_search` or `kb_search`.
97
+ **Always use solograph MCP tools before writing code or analyzing architecture.** Do NOT use `web_search` or `kb_search` as substitutes.
98
98
 
99
- | Tool | When |
100
- |:-----|:-----|
101
- | `project_code_search` | Find existing patterns before writing new code |
102
- | `codegraph_explain` | Architecture overview before tackling unfamiliar areas |
103
- | `codegraph_query` | Dependency/impact analysis (Cypher) |
104
- | `project_code_reindex` | After creating or deleting source files |
99
+ ### Tool Selection Guide
100
+
101
+ | Tool | Capability | When to Use | Availability |
102
+ |:-----|:-----------|:-----------|:-------------|
103
+ | `codegraph_query` | Structural queries (Cypher) — find calls, dependencies, imports | **Preferred for dependency analysis, call tracing, symbol lookup** | ✅ Always works (in-memory graph) |
104
+ | `project_code_search` | Semantic search (Redis vector DB) pattern matching by meaning | Natural language queries like "find auth patterns" | ⚠️ Requires explicit `project_code_reindex` + Redis daemon |
105
+ | `codegraph_explain` | Architecture overview for unfamiliar subsystems | Understand module relationships before major changes | ✅ Always works |
106
+ | `project_code_reindex` | Index project for semantic search | After creating/deleting source files | ✅ Always works |
107
+
108
+ ### Recommended Workflow
109
+
110
+ For nax, **prefer `codegraph_query`** for routine tasks:
111
+ - Finding where functions are called (`calculateAggregateMetrics` called by `status-cost.ts`)
112
+ - Analyzing dependencies before refactoring
113
+ - Tracing import/export chains
114
+ - Querying symbol definitions and relationships
115
+
116
+ **Use `project_code_search` only if:**
117
+ - You need semantic similarity ("find authentication patterns")
118
+ - Redis is indexed and running (not guaranteed in all sessions)
119
+
120
+ ### Example Queries
121
+
122
+ ```cypher
123
+ -- Find files calling calculateAggregateMetrics
124
+ MATCH (f:File)-[:CALLS]->(s:Symbol {name: "calculateAggregateMetrics"})
125
+ RETURN f.path
126
+
127
+ -- Find all imports of aggregator.ts
128
+ MATCH (f:File)-[:IMPORTS]->(target:File {path: "src/metrics/aggregator.ts"})
129
+ RETURN f.path
130
+
131
+ -- Find symbols defined in a file
132
+ MATCH (f:File {path: "src/metrics/aggregator.ts"})-[:DEFINES]->(s:Symbol)
133
+ RETURN s.name, s.type
134
+ ```
105
135
 
106
136
  ## Coding Standards & Forbidden Patterns
107
137
 
package/docs/ROADMAP.md CHANGED
@@ -321,22 +321,21 @@
321
321
  - [x] ~~**BUG-022:** Story interleaving — `getNextStory()` round-robins instead of exhausting retries on current story → fixed in v0.18.0~~
322
322
  - [x] ~~**BUG-023:** Agent failure silent — no exitCode/stderr in JSONL → fixed in v0.18.0~~
323
323
  - [x] ~~**BUG-025:** `needsHumanReview` not triggering interactive plugin → fixed in v0.18.0~~
324
-
325
- - [x] **BUG-029:** Escalation resets story to `pending` → bypasses BUG-022 retry priority. `handleTierEscalation()` sets `status: "pending"` after escalation, but `getNextStory()` Priority 1 only checks `status === "failed"`. Result: after BUG-026 escalated (iter 1), nax moved to BUG-028 (iter 2) instead of retrying BUG-026 immediately. **Location:** `src/prd/index.ts:getNextStory()` + `src/execution/escalation/tier-escalation.ts`. **Fix:** `getNextStory()` should also prioritize stories with `story.routing.modelTier` that changed since last attempt (escalation marker), or `handleTierEscalation` should use a distinct status like `"retry-pending"` that Priority 1 recognizes.
326
- - [x] **BUG-030:** Review lint failure → hard `"fail"`, no rectification or retry. `src/pipeline/stages/review.ts:92` returns `{ action: "fail" }` for all review failures including lint. In `pipeline-result-handler.ts`, `"fail"` calls `markStoryFailed()` — permanently dead. But lint errors are auto-fixable (agent can run `biome check --fix`). Contrast with verify stage which returns `"escalate"` on test failure, allowing retry. SFC-001 and SFC-002 both hit this — tests passed but 5 Biome lint errors killed the stories permanently. **Fix:** Review stage should return `"escalate"` (not `"fail"`) for lint/typecheck failures, or add a review-rectification loop (like verify has) that gives the agent one retry with the lint output as context. Reserve `"fail"` for unfixable review issues (e.g. plugin reviewer rejection).
327
- - [x] **BUG-031:** Keyword fallback classifier gives inconsistent strategy across retries for same story. BUG-026 was classified as `test-after` on iter 1 (keyword fallback), but `three-session-tdd-lite` on iter 5 (same keyword fallback). The keyword classifier in `src/routing/strategies/keyword.ts:classifyComplexity()` may be influenced by `priorErrors` text added between attempts, shifting the keyword match result. **Location:** `src/routing/strategies/keyword.ts`. **Fix:** Keyword classifier should only consider the story's original title + description + acceptance criteria, not accumulated `priorErrors` or `priorFailures`. Alternatively, once a strategy is set in `story.routing.testStrategy`, the routing stage should preserve it across retries (already partially done in `routing.ts:40-41` but may not apply when LLM falls back to keyword).
328
- - [x] **BUG-032:** Routing stage overrides escalated `modelTier` with complexity-derived tier. `src/pipeline/stages/routing.ts:43` always runs `complexityToModelTier(routing.complexity, config)` even when `story.routing.modelTier` was explicitly set by `handleTierEscalation()`. BUG-026 was escalated to `balanced` (logged in iteration header), but `Task classified` shows `modelTier=fast` because `complexityToModelTier("simple", config)` → `"fast"`. Related to BUG-013 (escalation routing not applied) which was marked fixed, but the fix in `applyCachedRouting()` in `pipeline-result-handler.ts:295-310` runs **after** the routing stage — too late. **Location:** `src/pipeline/stages/routing.ts:43`. **Fix:** When `story.routing.modelTier` is explicitly set (by escalation), skip `complexityToModelTier()` and use the cached tier directly. Only derive from complexity when `story.routing.modelTier` is absent.
329
- - [x] **BUG-033:** LLM routing has no retry on timeout — single attempt with hardcoded 15s default. All 5 LLM routing attempts in the v0.18.3 run timed out at 15s, forcing keyword fallback every time. `src/routing/strategies/llm.ts:63` reads `llmConfig?.timeoutMs ?? 15000` but there's no retry logic — one timeout = immediate fallback. **Location:** `src/routing/strategies/llm.ts:callLlm()`. **Fix:** Add `routing.llm.retries` config (default: 1) with backoff. Also surface `routing.llm.timeoutMs` in `nax config --explain` and consider raising default to 30s for batch routing which processes multiple stories.
330
-
331
- - [x] ~~**BUG-037:** Test output summary (verify stage) captures precheck boilerplate instead of actual `bun test` failure. Fixed: `.slice(-20)` tail — shipped in v0.22.1 (re-arch phase 2).~~
332
- - [x] ~~**BUG-038:** `smart-runner` over-matching when global defaults change. Fixed by FEAT-010 (v0.21.0) — per-attempt `storyGitRef` baseRef tracking; `git diff <baseRef>..HEAD` prevents cross-story file pollution.~~
333
- - [x] ~~**BUG-043:** Scoped test command appends files instead of replacing path — `runners.ts:scoped()` concatenates `scopedTestPaths` to full-suite command, resulting in `bun test test/ --timeout=60000 /path/to/file.ts` (runs everything). Fix: use `testScoped` config with `{{files}}` template, fall back to `buildSmartTestCommand()` heuristic. **Location:** `src/verification/runners.ts:scoped()`
334
- - [x] ~~**BUG-044:** Scoped/full-suite test commands not logged — no visibility into what command was actually executed during verify stage. Fix: log at info level before execution.
335
- - [ ] **BUG-049:** Review typecheck runs on dirty working tree — false-positive pass when agent commits partial changes. If the agent stages only some files (e.g. forgets `git add types.ts`), the working tree retains the uncommitted fix and `bun run typecheck` passes — but the committed state has a type error. Discovered in routing-persistence run: RRP-003 committed `contentHash` refs in `routing.ts` without the matching `StoryRouting.contentHash` field in `types.ts`; typecheck passed because `types.ts` was locally modified but uncommitted. **Location:** `src/review/runner.ts:runCheck()`. **Fix:** Before running built-in checks, assert working tree is clean (`git diff --name-only` returns empty). If dirty, fail with "uncommitted changes detected" or log a warning and stash/restore.
336
- - [ ] **BUG-050:** `checkOptionalCommands` precheck uses legacy config fields — misleading "not configured" warning. Checks `config.execution.lintCommand` and `config.execution.typecheckCommand` (stale/legacy fields). Actual config uses `config.review.commands.typecheck` and `config.review.commands.lint`. Result: precheck always warns "Optional commands not configured: lint, typecheck" even when correctly configured, desensitizing operators to real warnings. **Location:** `src/precheck/checks-warnings.ts:checkOptionalCommands()`. **Fix:** Update check to resolve via the same priority chain as `review/runner.ts`: `execution.*Command` → `review.commands.*` → `package.json` scripts.
337
- - [ ] **BUG-052:** `console.warn` in runtime pipeline code bypasses structured JSONL logger invisible to log consumers. `src/review/runner.ts` and `src/optimizer/index.ts` used `console.warn()` for skip/fallback events, which print to stderr but are never written to the JSONL log file. This made review stage skip decisions invisible during post-run analysis. **Location:** `src/review/runner.ts:runReview()`, `src/optimizer/index.ts:resolveOptimizer()`. **Fix:** Replace with `getSafeLogger()?.warn()`. Fixed in feat/routing-persistence.
338
- - [ ] **BUG-052:** `console.warn` in runtime pipeline code bypasses structured JSONL logger — invisible to log consumers. `src/review/runner.ts` and `src/optimizer/index.ts` used `console.warn()` for skip/fallback events, which print to stderr but are never written to the JSONL log file. This made review stage skip decisions invisible during post-run analysis. **Location:** `src/review/runner.ts:runReview()`, `src/optimizer/index.ts:resolveOptimizer()`. **Fix:** Replace with `getSafeLogger()?.warn()`. ✅ Fixed in feat/routing-persistence.
339
- - [ ] **BUG-051:** `quality.commands.typecheck` and `quality.commands.lint` are dead config — silently ignored. `QualityConfig.commands.{typecheck,lint}` exist in the type definition and are documented in `nax config --explain`, but are never read by any runtime code. The review runner reads only `review.commands.typecheck/lint`. Users who set `quality.commands.typecheck` get no effect. **Location:** `src/config/types.ts` (QualityConfig), `src/review/runner.ts:resolveCommand()`. **Fix:** Either (A) remove the dead fields from `QualityConfig` and update docs, or (B) consolidate — make review runner read from `quality.commands` and deprecate `review.commands`.
324
+ - [x] ~~**BUG-029:** Escalation resets story to `pending`. Fixed.~~
325
+ - [x] ~~**BUG-030:** Review lint failure resets. Fixed.~~
326
+ - [x] ~~**BUG-031:** Keyword fallback classifier inconsistency. Fixed.~~
327
+ - [x] ~~**BUG-032:** Routing stage overrides escalated modelTier. Fixed.~~
328
+ - [x] ~~**BUG-033:** LLM routing timeout/retry. Fixed.~~
329
+ - [x] ~~**BUG-037:** Test output summary (verify stage) tail. Fixed.~~
330
+ - [x] ~~**BUG-038:** smart-runner over-matching. Fixed.~~
331
+ - [x] ~~**BUG-043:** Scoped test command construction. Fixed.~~
332
+ - [x] ~~**BUG-044:** Scoped/full-suite test command logging. Fixed.~~
333
+ - [x] ~~**BUG-049:** Review typecheck runs on dirty working tree. Fixed in v0.27.0.~~
334
+ - [x] ~~**BUG-050:** `checkOptionalCommands` precheck uses legacy config fields. Fixed in v0.27.0.~~
335
+ - [x] ~~**BUG-051:** `quality.commands.typecheck/lint` are dead config. Fixed in v0.27.0.~~
336
+ - [x] ~~**BUG-052:** `console.warn` in runtime pipeline code bypasses JSONL logger. Fixed in v0.26.0.~~
337
+ - [ ] **BUG-054:** Redundant scoped verify after TDD full-suite gate passes. When rectification gate runs full test suite and passes, the pipeline verify stage re-runs scoped tests (subset). **Fix:** Skip verify if full-suite gate already passed.
338
+ - [ ] **BUG-055:** Pipeline skip messages conflate "not needed" with "disabled". `runner.ts:54` logs "skipped (disabled)" for all stages where `enabled()` returns false, even if just because tests passed. **Fix:** Differentiate log message.
340
339
 
341
340
  ### Features
342
341
  - [x] ~~`nax unlock` command~~
@@ -362,4 +361,4 @@ Sequential canary → stable: `v0.12.0-canary.0` → `canary.N` → `v0.12.0`
362
361
  Canary: `npm publish --tag canary`
363
362
  Stable: `npm publish` (latest)
364
363
 
365
- *Last updated: 2026-03-07 (v0.22.1 shipped — Pipeline Re-Architecture: VerificationOrchestrator, EventBus, new stages, post-run SSOT)*
364
+ *Last updated: 2026-03-08 (v0.27.0 shipped — Review Quality)*
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.27.0",
3
+ "version": "0.27.1",
4
4
  "description": "AI Coding Agent Orchestrator \u2014 loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -51,7 +51,8 @@ export async function runPipeline(
51
51
 
52
52
  // Skip disabled stages
53
53
  if (!stage.enabled(context)) {
54
- logger.debug("pipeline", `Stage "${stage.name}" skipped (disabled)`);
54
+ const reason = stage.skipReason?.(context) ?? "disabled";
55
+ logger.debug("pipeline", `Stage "${stage.name}" skipped (${reason})`);
55
56
  i++;
56
57
  continue;
57
58
  }
@@ -29,6 +29,11 @@ export const autofixStage: PipelineStage = {
29
29
  return autofixEnabled;
30
30
  },
31
31
 
32
+ skipReason(ctx: PipelineContext): string {
33
+ if (!ctx.reviewResult || ctx.reviewResult.success) return "not needed (review passed)";
34
+ return "disabled (autofix not enabled in config)";
35
+ },
36
+
32
37
  async execute(ctx: PipelineContext): Promise<StageResult> {
33
38
  const logger = getLogger();
34
39
  const { reviewResult } = ctx;
@@ -27,6 +27,11 @@ export const rectifyStage: PipelineStage = {
27
27
  return ctx.config.execution.rectification?.enabled ?? false;
28
28
  },
29
29
 
30
+ skipReason(ctx: PipelineContext): string {
31
+ if (!ctx.verifyResult || ctx.verifyResult.success) return "not needed (verify passed)";
32
+ return "disabled (rectification not enabled in config)";
33
+ },
34
+
30
35
  async execute(ctx: PipelineContext): Promise<StageResult> {
31
36
  const logger = getLogger();
32
37
  const { verifyResult } = ctx;
@@ -26,12 +26,17 @@ export const regressionStage: PipelineStage = {
26
26
  const mode = ctx.config.execution.regressionGate?.mode ?? "deferred";
27
27
  if (mode !== "per-story") return false;
28
28
  // Only run when verify passed (or was skipped/not set)
29
- // Only run when verify passed (or was skipped/not set)
30
29
  if (ctx.verifyResult && !ctx.verifyResult.success) return false;
31
30
  const gateEnabled = ctx.config.execution.regressionGate?.enabled ?? true;
32
31
  return gateEnabled;
33
32
  },
34
33
 
34
+ skipReason(ctx: PipelineContext): string {
35
+ const mode = ctx.config.execution.regressionGate?.mode ?? "deferred";
36
+ if (mode !== "per-story") return `not needed (regression mode is '${mode}', not 'per-story')`;
37
+ return "disabled (regression gate not enabled in config)";
38
+ },
39
+
35
40
  async execute(ctx: PipelineContext): Promise<StageResult> {
36
41
  const logger = getLogger();
37
42
  const testCommand = ctx.config.review?.commands?.test ?? ctx.config.quality.commands.test ?? "bun test";
@@ -45,7 +45,8 @@ function buildScopedCommand(testFiles: string[], baseCommand: string, testScoped
45
45
 
46
46
  export const verifyStage: PipelineStage = {
47
47
  name: "verify",
48
- enabled: () => true,
48
+ enabled: (ctx: PipelineContext) => !ctx.fullSuiteGatePassed,
49
+ skipReason: () => "not needed (full-suite gate already passed)",
49
50
 
50
51
  async execute(ctx: PipelineContext): Promise<StageResult> {
51
52
  const logger = getLogger();
@@ -108,6 +108,8 @@ export interface PipelineContext {
108
108
  retryAsLite?: boolean;
109
109
  /** Failure category from TDD orchestrator (set by executionStage on TDD failure) */
110
110
  tddFailureCategory?: FailureCategory;
111
+ /** Set to true when TDD full-suite gate already passed — verify stage skips to avoid redundant run (BUG-054) */
112
+ fullSuiteGatePassed?: boolean;
111
113
  }
112
114
 
113
115
  /**
@@ -167,6 +169,13 @@ export interface PipelineStage {
167
169
  */
168
170
  enabled: (ctx: PipelineContext) => boolean;
169
171
 
172
+ /**
173
+ * Optional human-readable reason why the stage was skipped.
174
+ * Distinguishes "not needed" (conditions not met) from "disabled" (config).
175
+ * Used by the pipeline runner for better observability (BUG-055).
176
+ */
177
+ skipReason?: (ctx: PipelineContext) => string;
178
+
170
179
  /**
171
180
  * Execute the stage logic.
172
181
  *
@@ -255,7 +255,16 @@ export async function runThreeSessionTdd(options: ThreeSessionTddOptions): Promi
255
255
  }
256
256
 
257
257
  // Full-Suite Gate (v0.11 Rectification)
258
- await runFullSuiteGate(story, config, workdir, agent, implementerTier, contextMarkdown, lite, logger);
258
+ const fullSuiteGatePassed = await runFullSuiteGate(
259
+ story,
260
+ config,
261
+ workdir,
262
+ agent,
263
+ implementerTier,
264
+ contextMarkdown,
265
+ lite,
266
+ logger,
267
+ );
259
268
 
260
269
  // Session 3: Verifier
261
270
  const session3Ref = (await captureGitRef(workdir)) ?? "HEAD";
@@ -379,5 +388,6 @@ export async function runThreeSessionTdd(options: ThreeSessionTddOptions): Promi
379
388
  verdict,
380
389
  totalCost,
381
390
  lite,
391
+ fullSuiteGatePassed,
382
392
  };
383
393
  }
@@ -34,9 +34,9 @@ export async function runFullSuiteGate(
34
34
  contextMarkdown: string | undefined,
35
35
  lite: boolean,
36
36
  logger: ReturnType<typeof getLogger>,
37
- ): Promise<void> {
37
+ ): Promise<boolean> {
38
38
  const rectificationEnabled = config.execution.rectification?.enabled ?? false;
39
- if (!rectificationEnabled) return;
39
+ if (!rectificationEnabled) return false;
40
40
 
41
41
  const rectificationConfig = config.execution.rectification;
42
42
  const testCmd = config.quality?.commands?.test ?? "bun test";
@@ -54,7 +54,7 @@ export async function runFullSuiteGate(
54
54
  const testSummary = parseBunTestOutput(fullSuiteResult.output);
55
55
 
56
56
  if (testSummary.failed > 0) {
57
- await runRectificationLoop(
57
+ return await runRectificationLoop(
58
58
  story,
59
59
  config,
60
60
  workdir,
@@ -69,14 +69,18 @@ export async function runFullSuiteGate(
69
69
  fullSuiteTimeout,
70
70
  );
71
71
  }
72
- } else if (fullSuitePassed) {
72
+ // No failures detected despite non-zero exit — treat as passed
73
+ return true;
74
+ }
75
+ if (fullSuitePassed) {
73
76
  logger.info("tdd", "Full suite gate passed", { storyId: story.id });
74
- } else {
75
- logger.warn("tdd", "Full suite gate execution failed (no output)", {
76
- storyId: story.id,
77
- exitCode: fullSuiteResult.exitCode,
78
- });
77
+ return true;
79
78
  }
79
+ logger.warn("tdd", "Full suite gate execution failed (no output)", {
80
+ storyId: story.id,
81
+ exitCode: fullSuiteResult.exitCode,
82
+ });
83
+ return false;
80
84
  }
81
85
 
82
86
  /** Run the rectification retry loop when full suite gate detects regressions. */
@@ -93,7 +97,7 @@ async function runRectificationLoop(
93
97
  rectificationConfig: NonNullable<NaxConfig["execution"]["rectification"]>,
94
98
  testCmd: string,
95
99
  fullSuiteTimeout: number,
96
- ): Promise<void> {
100
+ ): Promise<boolean> {
97
101
  const rectificationState: RectificationState = {
98
102
  attempt: 0,
99
103
  initialFailures: testSummary.failed,
@@ -156,7 +160,7 @@ async function runRectificationLoop(
156
160
  storyId: story.id,
157
161
  attempt: rectificationState.attempt,
158
162
  });
159
- break;
163
+ return true;
160
164
  }
161
165
 
162
166
  if (retryFullSuite.output) {
@@ -177,7 +181,8 @@ async function runRectificationLoop(
177
181
  attempts: rectificationState.attempt,
178
182
  remainingFailures: rectificationState.currentFailures,
179
183
  });
180
- } else {
181
- logger.info("tdd", "Full suite gate passed", { storyId: story.id });
184
+ return false;
182
185
  }
186
+ logger.info("tdd", "Full suite gate passed", { storyId: story.id });
187
+ return true;
183
188
  }
package/src/tdd/types.ts CHANGED
@@ -78,4 +78,6 @@ export interface ThreeSessionTddResult {
78
78
  * undefined = verdict was not attempted (e.g. early-exit before session 3 ran)
79
79
  */
80
80
  verdict?: import("./verdict").VerifierVerdict | null;
81
+ /** Whether the TDD full-suite gate passed (used by verify stage to skip redundant run, BUG-054) */
82
+ fullSuiteGatePassed?: boolean;
81
83
  }