npm - @nathapp/nax - Versions diffs - 0.18.1 → 0.18.3 - Mend

@nathapp/nax 0.18.1 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/.gitlab-ci.yml +12 -6
package/bun.lock +1 -1
package/bunfig.toml +2 -1
package/docker-compose.test.yml +17 -0
package/docs/ROADMAP.md +121 -36
package/docs/specs/verification-architecture-v2.md +343 -0
package/nax/config.json +13 -10
package/nax/features/smart-test-runner/plan.md +7 -0
package/nax/features/smart-test-runner/prd.json +203 -0
package/nax/features/smart-test-runner/progress.txt +13 -0
package/nax/features/smart-test-runner/spec.md +7 -0
package/nax/features/smart-test-runner/tasks.md +8 -0
package/nax/features/v0.18.3-execution-reliability/prd.json +80 -0
package/nax/features/v0.18.3-execution-reliability/progress.txt +3 -0
package/package.json +2 -2
package/src/config/defaults.ts +2 -0
package/src/config/schema.ts +1 -0
package/src/config/schemas.ts +24 -0
package/src/config/types.ts +16 -1
package/src/context/builder.ts +11 -0
package/src/context/elements.ts +38 -1
package/src/execution/escalation/tier-escalation.ts +28 -3
package/src/execution/post-verify-rectification.ts +4 -2
package/src/execution/post-verify.ts +73 -9
package/src/execution/progress.ts +2 -0
package/src/pipeline/stages/review.ts +5 -3
package/src/pipeline/stages/routing.ts +14 -9
package/src/pipeline/stages/verify.ts +54 -1
package/src/prd/index.ts +16 -1
package/src/prd/types.ts +33 -0
package/src/precheck/index.ts +9 -4
package/src/routing/strategies/llm.ts +5 -0
package/src/verification/gate.ts +2 -1
package/src/verification/smart-runner.ts +214 -0
package/src/verification/types.ts +2 -0
package/test/US-002-orchestrator.test.ts +5 -5
package/test/context/prior-failures.test.ts +462 -0
package/test/execution/post-verify-bug026.test.ts +443 -0
package/test/execution/post-verify.test.ts +32 -0
package/test/execution/structured-failure.test.ts +414 -0
package/test/integration/logger.test.ts +1 -1
package/test/integration/review-plugin-integration.test.ts +2 -1
package/test/integration/story-id-in-events.test.ts +1 -1
package/test/unit/config/smart-runner-flag.test.ts +249 -0
package/test/unit/pipeline/routing-partial-override.test.ts +141 -0
package/test/unit/pipeline/verify-smart-runner.test.ts +344 -0
package/test/unit/prd-get-next-story.test.ts +28 -0
package/test/unit/routing.test.ts +102 -0
package/test/unit/smart-test-runner.test.ts +512 -0
package/test/unit/verification/smart-runner.test.ts +246 -0

package/src/execution/post-verify-rectification.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import type { NaxConfig } from "../config";
 import { resolveModel } from "../config";
 import { getSafeLogger } from "../logger";
 import type { StoryMetrics } from "../metrics";
-import type { PRD, UserStory } from "../prd";
+import type { PRD, StructuredFailure, UserStory } from "../prd";
 import { getExpectedFiles, savePRD } from "../prd";
 import { appendProgress } from "./progress";
 import { type RectificationState, createRectificationPrompt, shouldRetryRectification } from "./rectification";
@@ -147,6 +147,7 @@ export interface RevertStoriesOptions {
   featureDir?: string;
   diagnosticContext: string;
   countsTowardEscalation: boolean;
+  priorFailure?: StructuredFailure;
 }
 /** Revert stories to pending on verification failure and save PRD. */
@@ -160,12 +161,13 @@ export async function revertStoriesOnFailure(opts: RevertStoriesOptions): Promis
     }
   }
-  // Revert stories to pending with diagnostic context
+  // Revert stories to pending with diagnostic context and priorFailures
   opts.prd.userStories = opts.prd.userStories.map((s) =>
     storyIds.has(s.id)
       ? {
           ...s,
           priorErrors: [...(s.priorErrors || []), opts.diagnosticContext],
+          priorFailures: opts.priorFailure ? [...(s.priorFailures || []), opts.priorFailure] : s.priorFailures,
           status: "pending" as const,
           passes: false,
         }

package/src/execution/post-verify.ts CHANGED Viewed

@@ -8,13 +8,42 @@ import { spawn } from "bun";
 import type { NaxConfig } from "../config";
 import { getSafeLogger } from "../logger";
 import type { StoryMetrics } from "../metrics";
-import type { PRD, UserStory } from "../prd";
+import type { PRD, StructuredFailure, UserStory, VerificationStage } from "../prd";
 import { getExpectedFiles, savePRD } from "../prd";
+import type { TestFailure, VerificationResult } from "../verification";
+import { parseBunTestOutput } from "../verification";
 import { getTierConfig } from "./escalation";
 import { revertStoriesOnFailure, runRectificationLoop } from "./post-verify-rectification";
 import { appendProgress } from "./progress";
 import { getEnvironmentalEscalationThreshold, parseTestOutput, runVerification } from "./verification";
+/** Build a StructuredFailure from verification result and test output. */
+function buildStructuredFailure(
+  story: UserStory,
+  stage: VerificationStage,
+  verificationResult: VerificationResult,
+  summary: string,
+): StructuredFailure {
+  const testFailures =
+    verificationResult.status === "TEST_FAILURE" && verificationResult.output
+      ? parseBunTestOutput(verificationResult.output).failures.map((f) => ({
+          file: f.file,
+          testName: f.testName,
+          error: f.error,
+          stackTrace: f.stackTrace,
+        }))
+      : undefined;
+  return {
+    attempt: (story.attempts ?? 0) + 1,
+    modelTier: story.routing?.modelTier ?? "unknown",
+    stage,
+    summary,
+    testFailures: testFailures && testFailures.length > 0 ? testFailures : undefined,
+    timestamp: new Date().toISOString(),
+  };
+}
 /** Get test files changed since a git ref. Returns empty array if detection fails. */
 async function getChangedTestFiles(workdir: string, gitRef?: string): Promise<string[]> {
   if (!gitRef) return [];
@@ -122,12 +151,30 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
     }
     // Regression Gate (BUG-009): run full suite after scoped tests pass
-    const regressionResult = await runRegressionGate(config, workdir, story, changedTestFiles, rectificationEnabled);
-    if (regressionResult === "passed" || regressionResult === "skipped") {
+    const regressionGateResult = await runRegressionGate(
+      config,
+      workdir,
+      story,
+      changedTestFiles,
+      rectificationEnabled,
+    );
+    if (regressionGateResult.status === "passed" || regressionGateResult.status === "skipped") {
       return { passed: true, prd };
     }
-    // Regression failed -- revert stories
+    // Regression failed -- build StructuredFailure and revert stories
+    // verificationResult is always set when status === "failed" (see RegressionGateResult)
+    const regressionVerificationResult = regressionGateResult.verificationResult ?? {
+      status: "TEST_FAILURE" as const,
+      success: false,
+      countsTowardEscalation: true,
+    };
+    const regressionFailure = buildStructuredFailure(
+      story,
+      "regression",
+      regressionVerificationResult,
+      "Full-suite regression detected",
+    );
     const updatedPrd = await revertStoriesOnFailure({
       prd,
       prdPath,
@@ -137,6 +184,7 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
       featureDir,
       diagnosticContext: "REGRESSION: full-suite regression detected",
       countsTowardEscalation: true,
+      priorFailure: regressionFailure,
     });
     return { passed: false, prd: updatedPrd };
   }
@@ -173,6 +221,7 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
   // Revert stories and save
   const diagnosticContext = verificationResult.error || `Verification failed: ${verificationResult.status}`;
+  const verifyFailure = buildStructuredFailure(story, "verify", verificationResult, diagnosticContext);
   const updatedPrd = await revertStoriesOnFailure({
     prd,
     prdPath,
@@ -182,11 +231,17 @@ export async function runPostAgentVerification(opts: PostVerifyOptions): Promise
     featureDir,
     diagnosticContext,
     countsTowardEscalation: verificationResult.countsTowardEscalation ?? false,
+    priorFailure: verifyFailure,
   });
   return { passed: false, prd: updatedPrd };
 }
+interface RegressionGateResult {
+  status: "passed" | "skipped" | "failed";
+  verificationResult?: VerificationResult;
+}
 /** Run regression gate (full suite) after scoped tests pass. */
 async function runRegressionGate(
   config: NaxConfig,
@@ -194,7 +249,7 @@ async function runRegressionGate(
   story: UserStory,
   changedTestFiles: string[],
   rectificationEnabled: boolean,
-): Promise<"passed" | "skipped" | "failed"> {
+): Promise<RegressionGateResult> {
   const logger = getSafeLogger();
   const regressionGateEnabled = config.execution.regressionGate?.enabled ?? true;
   const scopedTestsWereRun = changedTestFiles.length > 0;
@@ -203,7 +258,7 @@ async function runRegressionGate(
     if (regressionGateEnabled && !scopedTestsWereRun) {
       logger?.debug("regression-gate", "Skipping regression gate (full suite already run in scoped verification)");
     }
-    return "skipped";
+    return { status: "skipped" };
   }
   logger?.info("regression-gate", "Running full-suite regression gate");
@@ -225,7 +280,16 @@ async function runRegressionGate(
   if (regressionResult.success) {
     logger?.info("regression-gate", "Full-suite regression gate passed");
-    return "passed";
+    return { status: "passed" };
+  }
+  // Handle timeout: accept as pass if configured (BUG-026)
+  const acceptOnTimeout = config.execution.regressionGate?.acceptOnTimeout ?? true;
+  if (regressionResult.status === "TIMEOUT" && acceptOnTimeout) {
+    logger?.warn("regression-gate", "[BUG-026] Full-suite regression gate timed out (accepted as pass)", {
+      reason: "Timeout is not evidence of regression — scoped verification already passed",
+    });
+    return { status: "passed" };
   }
   logger?.warn("regression-gate", "Full-suite regression detected", { status: regressionResult.status });
@@ -243,10 +307,10 @@ async function runRegressionGate(
       promptPrefix:
         "# REGRESSION: Cross-Story Test Failures\n\nYour changes passed scoped tests but broke unrelated tests. Fix these regressions.",
     });
-    if (fixed) return "passed";
+    if (fixed) return { status: "passed" };
   }
-  return "failed";
+  return { status: "failed", verificationResult: regressionResult };
 }
 /** Check if environmental failure should trigger early escalation. */

package/src/execution/progress.ts CHANGED Viewed

@@ -4,6 +4,7 @@
  * Append timestamped entries to progress.txt after story completion.
  */
+import { mkdirSync } from "node:fs";
 import { join } from "node:path";
 import type { StoryStatus } from "../prd";
@@ -14,6 +15,7 @@ export async function appendProgress(
   status: StoryStatus,
   message: string,
 ): Promise<void> {
+  mkdirSync(featureDir, { recursive: true });
   const progressPath = join(featureDir, "progress.txt");
   const timestamp = new Date().toISOString();
   const entry = `[${timestamp}] ${storyId} — ${status.toUpperCase()} — ${message}\n`;

package/src/pipeline/stages/review.ts CHANGED Viewed

@@ -83,13 +83,15 @@ export const reviewStage: PipelineStage = {
     const reviewResult = await runReview(ctx.config.review, ctx.workdir, ctx.config.execution);
     ctx.reviewResult = reviewResult;
-    // HARD FAILURE: Review failure means code quality gate not met
+    // BUG-030: Review failure (lint/typecheck) should escalate, not hard-fail.
+    // Lint/typecheck errors are auto-fixable — give the agent a retry with error context.
+    // Only plugin reviewer rejections are hard failures.
     if (!reviewResult.success) {
-      logger.error("review", "Review failed (built-in checks)", {
+      logger.warn("review", "Review failed (built-in checks) — escalating for retry", {
         reason: reviewResult.failureReason,
         storyId: ctx.story.id,
       });
-      return { action: "fail", reason: `Review failed: ${reviewResult.failureReason}` };
+      return { action: "escalate", reason: `Review failed: ${reviewResult.failureReason}` };
     }
     // Run plugin reviewers if any are registered

package/src/pipeline/stages/routing.ts CHANGED Viewed

@@ -2,8 +2,8 @@
  * Routing Stage
  *
  * Classifies story complexity and determines model tier + test strategy.
- * Uses cached complexity/testStrategy from story if available, but ALWAYS
- * derives modelTier from current config (never cached).
+ * Uses cached complexity/testStrategy/modelTier from story if available.
+ * modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
  *
  * @returns
  * - `continue`: Routing determined, proceed to next stage
@@ -30,17 +30,22 @@ export const routingStage: PipelineStage = {
   async execute(ctx: PipelineContext): Promise<StageResult> {
     const logger = getLogger();
-    // If story has cached routing, use it but re-derive modelTier from current config
+    // If story has cached routing, use cached values (escalated modelTier takes priority)
     // Otherwise, perform fresh classification
     let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
     if (ctx.story.routing) {
-      // Use cached complexity/testStrategy, but re-derive modelTier from current config
+      // Use cached complexity/testStrategy/modelTier
       routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
-      // Override with cached complexity if available
-      routing.complexity = ctx.story.routing.complexity;
-      routing.testStrategy = ctx.story.routing.testStrategy;
-      // Re-derive modelTier from cached complexity and current config
-      routing.modelTier = complexityToModelTier(routing.complexity as import("../../config").Complexity, ctx.config);
+      // Override with cached values only when they are actually set
+      if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
+      if (ctx.story.routing?.testStrategy) routing.testStrategy = ctx.story.routing.testStrategy;
+      // BUG-032: Use escalated modelTier if explicitly set (by handleTierEscalation),
+      // otherwise derive from complexity + current config
+      if (ctx.story.routing?.modelTier) {
+        routing.modelTier = ctx.story.routing.modelTier;
+      } else {
+        routing.modelTier = complexityToModelTier(routing.complexity as import("../../config").Complexity, ctx.config);
+      }
     } else {
       // Fresh classification
       routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);

package/src/pipeline/stages/verify.ts CHANGED Viewed

@@ -20,10 +20,24 @@
  * ```
  */
+import type { SmartTestRunnerConfig } from "../../config/types";
 import { getLogger } from "../../logger";
 import { regression } from "../../verification/gate";
+import { _smartRunnerDeps } from "../../verification/smart-runner";
 import type { PipelineContext, PipelineStage, StageResult } from "../types";
+const DEFAULT_SMART_RUNNER_CONFIG: SmartTestRunnerConfig = {
+  enabled: true,
+  testFilePatterns: ["test/**/*.test.ts"],
+  fallback: "import-grep",
+};
+function coerceSmartTestRunner(val: boolean | SmartTestRunnerConfig | undefined): SmartTestRunnerConfig {
+  if (val === undefined || val === true) return DEFAULT_SMART_RUNNER_CONFIG;
+  if (val === false) return { ...DEFAULT_SMART_RUNNER_CONFIG, enabled: false };
+  return val;
+}
 export const verifyStage: PipelineStage = {
   name: "verify",
   enabled: () => true,
@@ -46,11 +60,50 @@ export const verifyStage: PipelineStage = {
     logger.info("verify", "Running verification", { storyId: ctx.story.id });
+    // Determine effective test command (smart runner or full suite)
+    let effectiveCommand = testCommand;
+    const smartRunnerConfig = coerceSmartTestRunner(ctx.config.execution.smartTestRunner);
+    if (smartRunnerConfig.enabled) {
+      const sourceFiles = await _smartRunnerDeps.getChangedSourceFiles(ctx.workdir);
+      // Pass 1: path convention mapping
+      const pass1Files = await _smartRunnerDeps.mapSourceToTests(sourceFiles, ctx.workdir);
+      if (pass1Files.length > 0) {
+        logger.info("verify", `[smart-runner] Pass 1: path convention matched ${pass1Files.length} test files`, {
+          storyId: ctx.story.id,
+        });
+        effectiveCommand = _smartRunnerDeps.buildSmartTestCommand(pass1Files, testCommand);
+      } else if (smartRunnerConfig.fallback === "import-grep") {
+        // Pass 2: import-grep fallback
+        const pass2Files = await _smartRunnerDeps.importGrepFallback(
+          sourceFiles,
+          ctx.workdir,
+          smartRunnerConfig.testFilePatterns,
+        );
+        if (pass2Files.length > 0) {
+          logger.info("verify", `[smart-runner] Pass 2: import-grep matched ${pass2Files.length} test files`, {
+            storyId: ctx.story.id,
+          });
+          effectiveCommand = _smartRunnerDeps.buildSmartTestCommand(pass2Files, testCommand);
+        } else {
+          logger.info("verify", "[smart-runner] No mapped tests — falling back to full suite", {
+            storyId: ctx.story.id,
+          });
+        }
+      } else {
+        logger.info("verify", "[smart-runner] No mapped tests — falling back to full suite", {
+          storyId: ctx.story.id,
+        });
+      }
+    }
     // Use unified regression gate (includes 2s wait for agent process cleanup)
     const result = await regression({
       workdir: ctx.workdir,
-      command: testCommand,
+      command: effectiveCommand,
       timeoutSeconds: ctx.config.execution.verificationTimeoutSeconds,
+      acceptOnTimeout: ctx.config.execution.regressionGate?.acceptOnTimeout ?? true,
     });
     // HARD FAILURE: Tests must pass for story to be marked complete

package/src/prd/index.ts CHANGED Viewed

@@ -6,7 +6,16 @@ import { existsSync, statSync } from "node:fs";
 import type { FailureCategory } from "../tdd/types";
 import type { PRD, UserStory } from "./types";
-export type { PRD, UserStory, StoryRouting, StoryStatus, EscalationAttempt } from "./types";
+export type {
+  PRD,
+  UserStory,
+  StoryRouting,
+  StoryStatus,
+  EscalationAttempt,
+  StructuredFailure,
+  TestFailureContext,
+  VerificationStage,
+} from "./types";
 export { isStalled, markStoryAsBlocked, generateHumanHaltSummary, getContextFiles, getExpectedFiles } from "./types";
 export type { FailureCategory } from "../tdd/types";
@@ -36,6 +45,7 @@ export async function loadPRD(path: string): Promise<PRD> {
   for (const story of prd.userStories) {
     story.attempts = story.attempts ?? 0;
     story.priorErrors = story.priorErrors ?? [];
+    story.priorFailures = story.priorFailures ?? [];
     story.escalations = story.escalations ?? [];
     story.dependencies = story.dependencies ?? [];
     story.tags = story.tags ?? [];
@@ -73,6 +83,11 @@ export function getNextStory(prd: PRD, currentStoryId?: string | null, maxRetrie
     if (currentStory && currentStory.status === "failed" && (currentStory.attempts ?? 0) <= maxRetries) {
       return currentStory;
     }
+    // BUG-029: After tier escalation, story is set to "pending" (not "failed").
+    // Prioritize current story if it was escalated (pending + has prior attempts).
+    if (currentStory && currentStory.status === "pending" && (currentStory.attempts ?? 0) > 0) {
+      return currentStory;
+    }
   }
   const completedIds = new Set(

package/src/prd/types.ts CHANGED Viewed

@@ -11,6 +11,37 @@ import type { FailureCategory } from "../tdd/types";
 /** User story status */
 export type StoryStatus = "pending" | "in-progress" | "passed" | "failed" | "skipped" | "blocked" | "paused";
+/** Verification stage where failure occurred */
+export type VerificationStage = "verify" | "review" | "regression" | "rectification" | "agent-session" | "escalation";
+/** Test failure context from parsed test output */
+export interface TestFailureContext {
+  /** Test file path */
+  file: string;
+  /** Full test name (including describe blocks) */
+  testName: string;
+  /** Error message */
+  error: string;
+  /** Stack trace lines */
+  stackTrace: string[];
+}
+/** Structured failure context for escalated tiers */
+export interface StructuredFailure {
+  /** Attempt number when failure occurred */
+  attempt: number;
+  /** Model tier that was running */
+  modelTier: string;
+  /** Stage where failure occurred */
+  stage: VerificationStage;
+  /** Summary of what failed */
+  summary: string;
+  /** Parsed test failures (if applicable) */
+  testFailures?: TestFailureContext[];
+  /** ISO timestamp when failure was recorded */
+  timestamp: string;
+}
 /** Routing metadata per story */
 export interface StoryRouting {
   complexity: Complexity;
@@ -71,6 +102,8 @@ export interface UserStory {
   expectedFiles?: string[];
   /** Prior error messages from failed attempts */
   priorErrors?: string[];
+  /** Structured failure context for escalated tiers */
+  priorFailures?: StructuredFailure[];
   /** Custom context strings */
   customContext?: string[];
   /** Category of the last failure (set when story is marked failed) */

package/src/precheck/index.ts CHANGED Viewed

@@ -62,6 +62,8 @@ export interface PrecheckOptions {
   format?: "human" | "json";
   /** Working directory */
   workdir: string;
+  /** Suppress console output (for programmatic use) */
+  silent?: boolean;
 }
 /** Extended result with exit code for CLI usage */
@@ -87,6 +89,7 @@ export async function runPrecheck(
 ): Promise<PrecheckResultWithCode> {
   const workdir = options?.workdir || process.cwd();
   const format = options?.format || "human";
+  const silent = options?.silent ?? false;
   const passed: Check[] = [];
   const blockers: Check[] = [];
@@ -196,10 +199,12 @@ export async function runPrecheck(
     exitCode = hasPRDError ? EXIT_CODES.INVALID_PRD : EXIT_CODES.BLOCKER;
   }
-  if (format === "json") {
-    console.log(JSON.stringify(output, null, 2));
-  } else {
-    printSummary(output);
+  if (!silent) {
+    if (format === "json") {
+      console.log(JSON.stringify(output, null, 2));
+    } else {
+      printSummary(output);
+    }
   }
   return {

package/src/routing/strategies/llm.ts CHANGED Viewed

@@ -36,6 +36,11 @@ export function getCacheSize(): number {
   return cachedDecisions.size;
 }
+/** Clear routing cache entry for a specific story (used on tier escalation) */
+export function clearCacheForStory(storyId: string): void {
+  cachedDecisions.delete(storyId);
+}
 /** Evict oldest entry when cache is full (LRU) */
 function evictOldest(): void {
   const firstKey = cachedDecisions.keys().next().value;

package/src/verification/gate.ts CHANGED Viewed

@@ -95,9 +95,10 @@ async function runVerificationCore(options: VerificationGateOptions): Promise<Ve
   });
   if (execution.timeout) {
+    const success = options.acceptOnTimeout ?? false;
     return {
       status: "TIMEOUT",
-      success: false,
+      success,
       countsTowardEscalation: false, // Timeout is environmental, not code failure
       error: execution.error,
       output: execution.output,