npm - @nathapp/nax - Versions diffs - 0.19.0 → 0.20.0 - Mend

@nathapp/nax 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/docs/ROADMAP.md +2 -0
package/nax/config.json +2 -2
package/nax/features/verify-v2/prd.json +79 -0
package/nax/features/verify-v2/progress.txt +3 -0
package/package.json +1 -1
package/src/config/defaults.ts +2 -1
package/src/config/schemas.ts +2 -0
package/src/config/types.ts +4 -0
package/src/execution/lifecycle/index.ts +1 -0
package/src/execution/lifecycle/run-completion.ts +29 -0
package/src/execution/lifecycle/run-regression.ts +301 -0
package/src/execution/pipeline-result-handler.ts +0 -1
package/src/execution/post-verify.ts +31 -194
package/src/execution/runner.ts +1 -0
package/src/pipeline/stages/verify.ts +26 -22
package/src/verification/smart-runner.ts +52 -0
package/test/integration/rectification-flow.test.ts +3 -3
package/test/integration/review-config-commands.test.ts +1 -1
package/test/integration/verify-stage.test.ts +9 -0
package/test/unit/config/defaults.test.ts +69 -0
package/test/unit/config/regression-gate-schema.test.ts +159 -0
package/test/unit/execution/lifecycle/run-completion.test.ts +239 -0
package/test/unit/execution/lifecycle/run-regression.test.ts +418 -0
package/test/unit/execution/post-verify-regression.test.ts +31 -84
package/test/unit/execution/post-verify.test.ts +28 -48
package/test/unit/pipeline/stages/verify.test.ts +266 -0
package/test/unit/pipeline/verify-smart-runner.test.ts +1 -0

package/docs/ROADMAP.md CHANGED Viewed

@@ -224,6 +224,8 @@
 - [x] **BUG-032:** Routing stage overrides escalated `modelTier` with complexity-derived tier. `src/pipeline/stages/routing.ts:43` always runs `complexityToModelTier(routing.complexity, config)` even when `story.routing.modelTier` was explicitly set by `handleTierEscalation()`. BUG-026 was escalated to `balanced` (logged in iteration header), but `Task classified` shows `modelTier=fast` because `complexityToModelTier("simple", config)` → `"fast"`. Related to BUG-013 (escalation routing not applied) which was marked fixed, but the fix in `applyCachedRouting()` in `pipeline-result-handler.ts:295-310` runs **after** the routing stage — too late. **Location:** `src/pipeline/stages/routing.ts:43`. **Fix:** When `story.routing.modelTier` is explicitly set (by escalation), skip `complexityToModelTier()` and use the cached tier directly. Only derive from complexity when `story.routing.modelTier` is absent.
 - [x] **BUG-033:** LLM routing has no retry on timeout — single attempt with hardcoded 15s default. All 5 LLM routing attempts in the v0.18.3 run timed out at 15s, forcing keyword fallback every time. `src/routing/strategies/llm.ts:63` reads `llmConfig?.timeoutMs ?? 15000` but there's no retry logic — one timeout = immediate fallback. **Location:** `src/routing/strategies/llm.ts:callLlm()`. **Fix:** Add `routing.llm.retries` config (default: 1) with backoff. Also surface `routing.llm.timeoutMs` in `nax config --explain` and consider raising default to 30s for batch routing which processes multiple stories.
+- [ ] **BUG-037:** Test output summary (verify stage) captures precheck boilerplate instead of actual `bun test` failure. **Symptom:** Logs show successful prechecks (Head) instead of failed tests (Tail). **Fix:** Change `Test output preview` log to tail the last 20 lines of output instead of heading the first 10.
+- [ ] **BUG-038:** `smart-runner` over-matching when global defaults change. **Symptom:** Changing `DEFAULT_CONFIG` matches broad integration tests that fail due to environment/precheck side effects, obscuring targeted results. **Fix:** Refine path mapping to prioritize direct unit tests and exclude known heavy integration tests from default smart-runner matches unless explicitly relevant.
 ### Features
 - [x] ~~`nax unlock` command~~
 - [x] ~~Constitution file support~~

package/nax/config.json CHANGED Viewed

@@ -59,7 +59,7 @@
     "maxIterations": 6,
     "iterationDelayMs": 2000,
     "costLimit": 8.0,
-    "sessionTimeoutSeconds": 600,
+    "sessionTimeoutSeconds": 7200,
     "verificationTimeoutSeconds": 300,
     "maxStoriesPerFeature": 15,
     "rectification": {
@@ -147,4 +147,4 @@
       "scopeToStory": true
     }
   }
-}
+}

package/nax/features/verify-v2/prd.json ADDED Viewed

@@ -0,0 +1,79 @@
+{
+  "project": "nax",
+  "branchName": "feat/v0.20.0-verify-v2",
+  "feature": "verify-v2",
+  "userStories": [
+    {
+      "id": "US-001",
+      "title": "Remove test from review defaults",
+      "description": "Change review.checks default from ['typecheck', 'lint', 'test'] to ['typecheck', 'lint'] in src/config/defaults.ts. The test check in review duplicates the pipeline verify stage. Keep 'test' as a valid enum value in the schema for backwards compatibility but remove it from the default config. Update any tests that assert on the default review checks array.",
+      "complexity": "simple",
+      "status": "passed",
+      "attempts": 0,
+      "priorErrors": [
+        "Attempt 1 failed with model tier: balanced: Stage requested escalation to higher tier"
+      ],
+      "priorFailures": [
+        {
+          "attempt": 1,
+          "modelTier": "balanced",
+          "stage": "escalation",
+          "summary": "Failed with tier balanced, escalating to next tier",
+          "timestamp": "2026-03-05T11:14:42.773Z"
+        }
+      ],
+      "escalations": [],
+      "dependencies": [],
+      "tags": [],
+      "acceptanceCriteria": [],
+      "storyPoints": 1,
+      "routing": {
+        "complexity": "simple",
+        "modelTier": "powerful",
+        "testStrategy": "test-after",
+        "reasoning": "override: simple config default change, tests already exist"
+      },
+      "passes": true
+    },
+    {
+      "id": "US-002",
+      "title": "Remove post-verify scoped duplicate",
+      "description": "In src/execution/post-verify.ts, remove the scoped verification logic (getChangedTestFiles + runVerification for scoped tests) from runPostAgentVerification(). The pipeline verify stage already runs Smart Test Runner scoped tests. post-verify should ONLY run the regression gate (full suite) and handle failure revert with StructuredFailure. Remove getChangedTestFiles() and scopeTestCommand() helper functions. Remove the scoped rectification loop call. Update the function signature to no longer need storyGitRef. Update all tests for post-verify accordingly.",
+      "complexity": "medium",
+      "status": "passed",
+      "attempts": 0,
+      "priorErrors": [],
+      "priorFailures": [],
+      "escalations": [],
+      "dependencies": [],
+      "tags": [],
+      "acceptanceCriteria": [],
+      "storyPoints": 1,
+      "passes": true
+    },
+    {
+      "id": "US-003",
+      "title": "Deferred regression gate",
+      "description": "Create new src/execution/lifecycle/run-regression.ts that implements a deferred regression gate. Instead of running the full test suite after every story, run it once after all stories complete. Steps: (1) Add 'mode' field to RegressionGateConfigSchema with values 'deferred' | 'per-story' | 'disabled' (default: 'deferred'). (2) Add 'maxRectificationAttempts' field (default: 2). (3) In run-regression.ts: run full suite once, parse failures, use reverse Smart Test Runner mapping (test file -> source file -> responsible story via git log), attempt targeted rectification per responsible story, re-run full suite to confirm. (4) Call deferred regression from run-completion.ts before final metrics, only when mode is 'deferred'. (5) When mode is 'deferred', skip the per-story regression gate in post-verify.ts. (6) Add reverseMapTestToSource() to smart-runner.ts. (7) Handle edge cases: partial completion (only check passed stories), overlapping file changes (try last story first), unmapped tests (warn and mark all passed stories for re-verification).",
+      "complexity": "complex",
+      "status": "passed",
+      "attempts": 0,
+      "priorErrors": [],
+      "priorFailures": [],
+      "escalations": [],
+      "dependencies": [],
+      "tags": [],
+      "acceptanceCriteria": [],
+      "storyPoints": 1,
+      "routing": {
+        "complexity": "complex",
+        "modelTier": "balanced",
+        "testStrategy": "three-session-tdd-lite",
+        "reasoning": "override: complex new file + schema changes, needs powerful model"
+      },
+      "failureCategory": "session-failure",
+      "passes": true
+    }
+  ],
+  "updatedAt": "2026-03-05T11:58:46.858Z"
+}

package/nax/features/verify-v2/progress.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[2026-03-05T07:05:07.935Z] US-002 — PASSED — Remove post-verify scoped duplicate — Cost: $0.1170
+[2026-03-05T08:08:59.656Z] US-003 — FAILED — Deferred regression gate — Execution failed
+[2026-03-05T08:13:50.586Z] US-001 — FAILED — Remove test from review defaults — Execution failed

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nathapp/nax",
-  "version": "0.19.0",
+  "version": "0.20.0",
   "description": "AI Coding Agent Orchestrator \u2014 loops until done",
   "type": "module",
   "bin": {

package/src/config/defaults.ts CHANGED Viewed

@@ -67,6 +67,7 @@ export const DEFAULT_CONFIG: NaxConfig = {
       enabled: true,
       timeoutSeconds: 120,
       acceptOnTimeout: true,
+      maxRectificationAttempts: 2,
     },
     contextProviderTokenBudget: 2000,
     smartTestRunner: true,
@@ -113,7 +114,7 @@ export const DEFAULT_CONFIG: NaxConfig = {
   },
   review: {
     enabled: true,
-    checks: ["typecheck", "lint", "test"],
+    checks: ["typecheck", "lint"],
     commands: {},
   },
   plan: {

package/src/config/schemas.ts CHANGED Viewed

@@ -63,6 +63,8 @@ const RegressionGateConfigSchema = z.object({
   enabled: z.boolean().default(true),
   timeoutSeconds: z.number().int().min(10).max(600).default(120),
   acceptOnTimeout: z.boolean().default(true),
+  mode: z.enum(["deferred", "per-story", "disabled"]).default("deferred"),
+  maxRectificationAttempts: z.number().int().min(1).default(2),
 });
 const SmartTestRunnerConfigSchema = z.object({

package/src/config/types.ts CHANGED Viewed

@@ -78,6 +78,10 @@ export interface RegressionGateConfig {
   timeoutSeconds: number;
   /** Accept timeout as pass instead of failing (BUG-026, default: true) */
   acceptOnTimeout?: boolean;
+  /** Mode of regression gate: 'deferred' (run once after all stories), 'per-story' (run after each story), 'disabled' (default: 'deferred') */
+  mode?: "deferred" | "per-story" | "disabled";
+  /** Max rectification attempts for deferred regression gate (default: 2) */
+  maxRectificationAttempts?: number;
 }
 /** Smart test runner configuration (STR-007) */

package/src/execution/lifecycle/index.ts CHANGED Viewed

@@ -9,3 +9,4 @@ export { handleParallelCompletion, type ParallelCompletionOptions } from "./para
 export { handleRunCompletion, type RunCompletionOptions, type RunCompletionResult } from "./run-completion";
 export { cleanupRun, type RunCleanupOptions } from "./run-cleanup";
 export { setupRun, type RunSetupOptions, type RunSetupResult } from "./run-setup";
+export { runDeferredRegression, type DeferredRegressionOptions, type DeferredRegressionResult } from "./run-regression";

package/src/execution/lifecycle/run-completion.ts CHANGED Viewed

@@ -2,17 +2,28 @@
  * Run Completion — Final Metrics and Status Updates
  *
  * Handles the final steps after sequential execution completes:
+ * - Run deferred regression gate (if configured)
  * - Save run metrics
  * - Log completion summary with per-story metrics
  * - Update final status
  */
+import type { NaxConfig } from "../../config";
 import { getSafeLogger } from "../../logger";
 import type { StoryMetrics } from "../../metrics";
 import { saveRunMetrics } from "../../metrics";
 import { countStories, isComplete, isStalled } from "../../prd";
 import type { PRD } from "../../prd";
 import type { StatusWriter } from "../status-writer";
+import { runDeferredRegression } from "./run-regression";
+/**
+ * Injectable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ * @internal - test use only.
+ */
+export const _runCompletionDeps = {
+  runDeferredRegression,
+};
 export interface RunCompletionOptions {
   runId: string;
@@ -26,6 +37,7 @@ export interface RunCompletionOptions {
   startTime: number;
   workdir: string;
   statusWriter: StatusWriter;
+  config: NaxConfig;
 }
 export interface RunCompletionResult {
@@ -57,8 +69,25 @@ export async function handleRunCompletion(options: RunCompletionOptions): Promis
     startTime,
     workdir,
     statusWriter,
+    config,
   } = options;
+  // Run deferred regression gate before final metrics
+  const regressionMode = config.execution.regressionGate?.mode;
+  if (regressionMode === "deferred" && config.quality.commands.test) {
+    const regressionResult = await _runCompletionDeps.runDeferredRegression({
+      config,
+      prd,
+      workdir,
+    });
+    logger?.info("regression", "Deferred regression gate completed", {
+      success: regressionResult.success,
+      failedTests: regressionResult.failedTests,
+      affectedStories: regressionResult.affectedStories,
+    });
+  }
   const durationMs = Date.now() - startTime;
   const runCompletedAt = new Date().toISOString();

package/src/execution/lifecycle/run-regression.ts ADDED Viewed

@@ -0,0 +1,301 @@
+/**
+ * Deferred Regression Gate
+ *
+ * Runs full test suite once after all stories complete, then attempts
+ * targeted rectification per responsible story. Handles edge cases:
+ * - Partial completion: only check stories marked passed
+ * - Overlapping file changes: try last modified story first
+ * - Unmapped tests: warn and mark all passed stories for re-verification
+ */
+import type { NaxConfig } from "../../config";
+import { getSafeLogger } from "../../logger";
+import type { PRD, UserStory } from "../../prd";
+import { countStories } from "../../prd";
+import { hasCommitsForStory } from "../../utils/git";
+import { parseBunTestOutput } from "../../verification";
+import { reverseMapTestToSource } from "../../verification/smart-runner";
+import { runRectificationLoop } from "../post-verify-rectification";
+import { runVerification } from "../verification";
+/**
+ * Injectable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ * @internal - test use only.
+ */
+export const _regressionDeps = {
+  runVerification,
+  runRectificationLoop,
+  parseBunTestOutput,
+  reverseMapTestToSource,
+};
+export interface DeferredRegressionOptions {
+  config: NaxConfig;
+  prd: PRD;
+  workdir: string;
+}
+export interface DeferredRegressionResult {
+  success: boolean;
+  failedTests: number;
+  passedTests: number;
+  rectificationAttempts: number;
+  affectedStories: string[];
+}
+/**
+ * Map a test file to the story responsible for it via git log.
+ *
+ * Searches recent commits for story IDs in the format US-NNN.
+ * Returns the first matching story ID, or undefined if not found.
+ */
+async function findResponsibleStory(
+  testFile: string,
+  workdir: string,
+  passedStories: UserStory[],
+): Promise<UserStory | undefined> {
+  const logger = getSafeLogger();
+  // Try each passed story in reverse order (most recent first)
+  for (let i = passedStories.length - 1; i >= 0; i--) {
+    const story = passedStories[i];
+    const hasCommits = await hasCommitsForStory(workdir, story.id, 50);
+    if (hasCommits) {
+      logger?.info("regression", `Mapped test to story ${story.id}`, { testFile });
+      return story;
+    }
+  }
+  return undefined;
+}
+/**
+ * Run deferred regression gate after all stories complete.
+ *
+ * Steps:
+ * 1. Run full test suite
+ * 2. If failures, reverse-map test files to source files to stories
+ * 3. For each affected story, attempt targeted rectification
+ * 4. Re-run full suite to confirm fixes
+ * 5. Return results with affected story list
+ */
+export async function runDeferredRegression(options: DeferredRegressionOptions): Promise<DeferredRegressionResult> {
+  const logger = getSafeLogger();
+  const { config, prd, workdir } = options;
+  // Check if regression gate is deferred
+  const regressionMode = config.execution.regressionGate?.mode ?? "deferred";
+  if (regressionMode === "disabled") {
+    logger?.info("regression", "Deferred regression gate disabled");
+    return {
+      success: true,
+      failedTests: 0,
+      passedTests: 0,
+      rectificationAttempts: 0,
+      affectedStories: [],
+    };
+  }
+  if (regressionMode !== "deferred") {
+    logger?.info("regression", "Regression gate mode is not deferred, skipping");
+    return {
+      success: true,
+      failedTests: 0,
+      passedTests: 0,
+      rectificationAttempts: 0,
+      affectedStories: [],
+    };
+  }
+  const testCommand = config.quality.commands.test ?? "bun test";
+  const timeoutSeconds = config.execution.regressionGate?.timeoutSeconds ?? 120;
+  const maxRectificationAttempts = config.execution.regressionGate?.maxRectificationAttempts ?? 2;
+  // Only check stories that have been marked as passed
+  const counts = countStories(prd);
+  const passedStories = prd.userStories.filter((s) => s.status === "passed");
+  if (passedStories.length === 0) {
+    logger?.info("regression", "No passed stories to verify (partial completion)");
+    return {
+      success: true,
+      failedTests: 0,
+      passedTests: 0,
+      rectificationAttempts: 0,
+      affectedStories: [],
+    };
+  }
+  logger?.info("regression", "Running deferred full-suite regression gate", {
+    totalStories: counts.total,
+    passedStories: passedStories.length,
+  });
+  // Step 1: Run full test suite
+  const fullSuiteResult = await _regressionDeps.runVerification({
+    workingDirectory: workdir,
+    command: testCommand,
+    timeoutSeconds,
+    forceExit: config.quality.forceExit,
+    detectOpenHandles: config.quality.detectOpenHandles,
+    detectOpenHandlesRetries: config.quality.detectOpenHandlesRetries,
+    timeoutRetryCount: 0,
+    gracePeriodMs: config.quality.gracePeriodMs,
+    drainTimeoutMs: config.quality.drainTimeoutMs,
+    shell: config.quality.shell,
+    stripEnvVars: config.quality.stripEnvVars,
+  });
+  if (fullSuiteResult.success) {
+    logger?.info("regression", "Full suite passed");
+    return {
+      success: true,
+      failedTests: 0,
+      passedTests: fullSuiteResult.passCount ?? 0,
+      rectificationAttempts: 0,
+      affectedStories: [],
+    };
+  }
+  // Handle timeout
+  const acceptOnTimeout = config.execution.regressionGate?.acceptOnTimeout ?? true;
+  if (fullSuiteResult.status === "TIMEOUT" && acceptOnTimeout) {
+    logger?.warn("regression", "Full-suite regression gate timed out (accepted as pass)");
+    return {
+      success: true,
+      failedTests: 0,
+      passedTests: 0,
+      rectificationAttempts: 0,
+      affectedStories: [],
+    };
+  }
+  if (!fullSuiteResult.output) {
+    logger?.error("regression", "Full suite failed with no output");
+    return {
+      success: false,
+      failedTests: fullSuiteResult.failCount ?? 0,
+      passedTests: fullSuiteResult.passCount ?? 0,
+      rectificationAttempts: 0,
+      affectedStories: [],
+    };
+  }
+  // Step 2: Parse failures and map to source files to stories
+  const testSummary = _regressionDeps.parseBunTestOutput(fullSuiteResult.output);
+  const affectedStories = new Set<string>();
+  const affectedStoriesObjs = new Map<string, UserStory>();
+  logger?.warn("regression", "Regression detected", {
+    failedTests: testSummary.failed,
+    passedTests: testSummary.passed,
+  });
+  // Extract test file paths from failures
+  const testFilesInFailures = new Set<string>();
+  for (const failure of testSummary.failures) {
+    if (failure.file) {
+      testFilesInFailures.add(failure.file);
+    }
+  }
+  if (testFilesInFailures.size === 0) {
+    logger?.warn("regression", "No test files found in failures (unmapped)");
+    // Mark all passed stories for re-verification
+    for (const story of passedStories) {
+      affectedStories.add(story.id);
+      affectedStoriesObjs.set(story.id, story);
+    }
+  } else {
+    // Map test files to source files to stories
+    const testFilesArray = Array.from(testFilesInFailures);
+    const sourceFilesArray = _regressionDeps.reverseMapTestToSource(testFilesArray, workdir);
+    logger?.info("regression", "Mapped test files to source files", {
+      testFiles: testFilesArray.length,
+      sourceFiles: sourceFilesArray.length,
+    });
+    for (const testFile of testFilesArray) {
+      const responsibleStory = await findResponsibleStory(testFile, workdir, passedStories);
+      if (responsibleStory) {
+        affectedStories.add(responsibleStory.id);
+        affectedStoriesObjs.set(responsibleStory.id, responsibleStory);
+      } else {
+        logger?.warn("regression", "Could not map test file to story", { testFile });
+      }
+    }
+  }
+  if (affectedStories.size === 0) {
+    logger?.warn("regression", "No stories could be mapped to failures");
+    return {
+      success: false,
+      failedTests: testSummary.failed,
+      passedTests: testSummary.passed,
+      rectificationAttempts: 0,
+      affectedStories: Array.from(affectedStories),
+    };
+  }
+  // Step 3: Attempt rectification per story
+  let rectificationAttempts = 0;
+  const affectedStoriesList = Array.from(affectedStoriesObjs.values());
+  for (const story of affectedStoriesList) {
+    for (let attempt = 0; attempt < maxRectificationAttempts; attempt++) {
+      rectificationAttempts++;
+      logger?.info("regression", `Rectifying story ${story.id} (attempt ${attempt + 1}/${maxRectificationAttempts})`);
+      const fixed = await _regressionDeps.runRectificationLoop({
+        config,
+        workdir,
+        story,
+        testCommand,
+        timeoutSeconds,
+        testOutput: fullSuiteResult.output,
+        promptPrefix: `# DEFERRED REGRESSION: Full-Suite Failures\n\nYour story ${story.id} broke tests in the full suite. Fix these regressions.`,
+      });
+      if (fixed) {
+        logger?.info("regression", `Story ${story.id} rectified successfully`);
+        break; // Move to next story
+      }
+    }
+  }
+  // Step 4: Re-run full suite to confirm
+  logger?.info("regression", "Re-running full suite after rectification");
+  const retryResult = await _regressionDeps.runVerification({
+    workingDirectory: workdir,
+    command: testCommand,
+    timeoutSeconds,
+    forceExit: config.quality.forceExit,
+    detectOpenHandles: config.quality.detectOpenHandles,
+    detectOpenHandlesRetries: config.quality.detectOpenHandlesRetries,
+    timeoutRetryCount: 0,
+    gracePeriodMs: config.quality.gracePeriodMs,
+    drainTimeoutMs: config.quality.drainTimeoutMs,
+    shell: config.quality.shell,
+    stripEnvVars: config.quality.stripEnvVars,
+  });
+  const success = retryResult.success || (retryResult.status === "TIMEOUT" && acceptOnTimeout);
+  if (success) {
+    logger?.info("regression", "Deferred regression gate passed after rectification");
+  } else {
+    logger?.warn("regression", "Deferred regression gate still failing after rectification", {
+      remainingFailures: retryResult.failCount,
+    });
+  }
+  return {
+    success,
+    failedTests: retryResult.failCount ?? 0,
+    passedTests: retryResult.passCount ?? 0,
+    rectificationAttempts,
+    affectedStories: Array.from(affectedStories),
+  };
+}

package/src/execution/pipeline-result-handler.ts CHANGED Viewed

@@ -83,7 +83,6 @@ export async function handlePipelineSuccess(
     storiesToExecute: ctx.storiesToExecute,
     allStoryMetrics: ctx.allStoryMetrics,
     timeoutRetryCountMap: ctx.timeoutRetryCountMap,
-    storyGitRef: ctx.storyGitRef ?? undefined,
   });
   const verificationPassed = verifyResult.passed;
   prd = verifyResult.prd;