npm - @nathapp/nax - Versions diffs - 0.36.1 → 0.36.2 - Mend

@nathapp/nax 0.36.1 → 0.36.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/nax.js +323 -51
package/package.json +1 -1
package/src/execution/dry-run.ts +1 -1
package/src/execution/escalation/escalation.ts +5 -3
package/src/execution/escalation/tier-escalation.ts +41 -4
package/src/execution/iteration-runner.ts +5 -0
package/src/execution/parallel-executor.ts +293 -9
package/src/execution/parallel.ts +40 -21
package/src/execution/pipeline-result-handler.ts +3 -2
package/src/execution/runner.ts +13 -3
package/src/metrics/tracker.ts +8 -4
package/src/metrics/types.ts +2 -0
package/src/pipeline/event-bus.ts +1 -1
package/src/pipeline/stages/completion.ts +1 -1
package/src/pipeline/stages/verify.ts +8 -1
package/src/pipeline/subscribers/reporters.ts +3 -3
package/src/pipeline/types.ts +4 -0
package/src/plugins/types.ts +1 -1
package/src/prd/types.ts +2 -0
package/src/tdd/types.ts +2 -1
package/src/verification/crash-detector.ts +34 -0
package/src/verification/orchestrator-types.ts +8 -1

package/src/execution/parallel.ts CHANGED Viewed

@@ -26,14 +26,18 @@ import { MergeEngine, type StoryDependencies } from "../worktree/merge";
  * Result from parallel execution of a batch of stories
  */
 export interface ParallelBatchResult {
-  /** Stories that completed successfully */
-  successfulStories: UserStory[];
-  /** Stories that failed */
-  failedStories: Array<{ story: UserStory; error: string }>;
+  /** Stories that passed the TDD pipeline (pre-merge) */
+  pipelinePassed: UserStory[];
+  /** Stories that were actually merged to the base branch */
+  merged: UserStory[];
+  /** Stories that failed the pipeline */
+  failed: Array<{ story: UserStory; error: string }>;
   /** Total cost accumulated */
   totalCost: number;
-  /** Stories with merge conflicts */
-  conflictedStories: Array<{ storyId: string; conflictFiles: string[] }>;
+  /** Stories with merge conflicts (includes per-story original cost for rectification) */
+  mergeConflicts: Array<{ storyId: string; conflictFiles: string[]; originalCost: number }>;
+  /** Per-story execution costs for successful stories */
+  storyCosts: Map<string, number>;
 }
 /**
@@ -148,10 +152,12 @@ async function executeParallelBatch(
   const logger = getSafeLogger();
   const worktreeManager = new WorktreeManager();
   const results: ParallelBatchResult = {
-    successfulStories: [],
-    failedStories: [],
+    pipelinePassed: [],
+    merged: [],
+    failed: [],
     totalCost: 0,
-    conflictedStories: [],
+    mergeConflicts: [],
+    storyCosts: new Map(),
   };
   // Create worktrees for all stories in batch
@@ -168,7 +174,7 @@ async function executeParallelBatch(
         worktreePath,
       });
     } catch (error) {
-      results.failedStories.push({
+      results.failed.push({
         story,
         error: `Failed to create worktree: ${error instanceof Error ? error.message : String(error)}`,
       });
@@ -188,15 +194,16 @@ async function executeParallelBatch(
     const executePromise = executeStoryInWorktree(story, worktreePath, context, routing as RoutingResult, eventEmitter)
       .then((result) => {
         results.totalCost += result.cost;
+        results.storyCosts.set(story.id, result.cost);
         if (result.success) {
-          results.successfulStories.push(story);
+          results.pipelinePassed.push(story);
           logger?.info("parallel", "Story execution succeeded", {
             storyId: story.id,
             cost: result.cost,
           });
         } else {
-          results.failedStories.push({ story, error: result.error || "Unknown error" });
+          results.failed.push({ story, error: result.error || "Unknown error" });
           logger?.error("parallel", "Story execution failed", {
             storyId: story.id,
             error: result.error,
@@ -257,7 +264,12 @@ export async function executeParallel(
   featureDir: string | undefined,
   parallel: number,
   eventEmitter?: PipelineEventEmitter,
-): Promise<{ storiesCompleted: number; totalCost: number; updatedPrd: PRD }> {
+): Promise<{
+  storiesCompleted: number;
+  totalCost: number;
+  updatedPrd: PRD;
+  mergeConflicts: Array<{ storyId: string; conflictFiles: string[]; originalCost: number }>;
+}> {
   const logger = getSafeLogger();
   const maxConcurrency = resolveMaxConcurrency(parallel);
   const worktreeManager = new WorktreeManager();
@@ -278,6 +290,7 @@ export async function executeParallel(
   let storiesCompleted = 0;
   let totalCost = 0;
   const currentPrd = prd;
+  const allMergeConflicts: Array<{ storyId: string; conflictFiles: string[]; originalCost: number }> = [];
   // Execute each batch sequentially (stories within each batch run in parallel)
   for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
@@ -311,8 +324,8 @@ export async function executeParallel(
     totalCost += batchResult.totalCost;
     // Merge successful stories in topological order
-    if (batchResult.successfulStories.length > 0) {
-      const successfulIds = batchResult.successfulStories.map((s) => s.id);
+    if (batchResult.pipelinePassed.length > 0) {
+      const successfulIds = batchResult.pipelinePassed.map((s) => s.id);
       const deps = buildDependencyMap(batch);
       logger?.info("parallel", "Merging successful stories", {
@@ -327,6 +340,8 @@ export async function executeParallel(
           // Update PRD: mark story as passed
           markStoryPassed(currentPrd, mergeResult.storyId);
           storiesCompleted++;
+          const mergedStory = batchResult.pipelinePassed.find((s) => s.id === mergeResult.storyId);
+          if (mergedStory) batchResult.merged.push(mergedStory);
           logger?.info("parallel", "Story merged successfully", {
             storyId: mergeResult.storyId,
@@ -335,9 +350,10 @@ export async function executeParallel(
         } else {
           // Merge conflict — mark story as failed
           markStoryFailed(currentPrd, mergeResult.storyId);
-          batchResult.conflictedStories.push({
+          batchResult.mergeConflicts.push({
             storyId: mergeResult.storyId,
             conflictFiles: mergeResult.conflictFiles || [],
+            originalCost: batchResult.storyCosts.get(mergeResult.storyId) ?? 0,
           });
           logger?.error("parallel", "Merge conflict", {
@@ -355,7 +371,7 @@ export async function executeParallel(
     }
     // Mark failed stories in PRD and clean up their worktrees
-    for (const { story, error } of batchResult.failedStories) {
+    for (const { story, error } of batchResult.failed) {
       markStoryFailed(currentPrd, story.id);
       logger?.error("parallel", "Cleaning up failed story worktree", {
@@ -376,10 +392,13 @@ export async function executeParallel(
     // Save PRD after each batch
     await savePRD(currentPrd, prdPath);
+    allMergeConflicts.push(...batchResult.mergeConflicts);
     logger?.info("parallel", `Batch ${batchIndex + 1} complete`, {
-      successful: batchResult.successfulStories.length,
-      failed: batchResult.failedStories.length,
-      conflicts: batchResult.conflictedStories.length,
+      pipelinePassed: batchResult.pipelinePassed.length,
+      merged: batchResult.merged.length,
+      failed: batchResult.failed.length,
+      mergeConflicts: batchResult.mergeConflicts.length,
       batchCost: batchResult.totalCost,
     });
   }
@@ -389,5 +408,5 @@ export async function executeParallel(
     totalCost,
   });
-  return { storiesCompleted, totalCost, updatedPrd: currentPrd };
+  return { storiesCompleted, totalCost, updatedPrd: currentPrd, mergeConflicts: allMergeConflicts };
 }

package/src/execution/pipeline-result-handler.ts CHANGED Viewed

@@ -68,7 +68,7 @@ export async function handlePipelineSuccess(
       storyId: completedStory.id,
       storyTitle: completedStory.title,
       totalCost: ctx.totalCost + costDelta,
-      durationMs: now - ctx.startTime,
+      runElapsedMs: now - ctx.startTime,
       storyDurationMs: ctx.storyStartTime ? now - ctx.storyStartTime : undefined,
     });
@@ -77,7 +77,7 @@ export async function handlePipelineSuccess(
       storyId: completedStory.id,
       story: completedStory,
       passed: true,
-      durationMs: Date.now() - ctx.startTime,
+      runElapsedMs: Date.now() - ctx.startTime,
       cost: costDelta,
       modelTier: ctx.routing.modelTier,
       testStrategy: ctx.routing.testStrategy,
@@ -177,6 +177,7 @@ export async function handlePipelineFailure(
         feature: ctx.feature,
         totalCost: ctx.totalCost,
         workdir: ctx.workdir,
+        attemptCost: pipelineResult.context.agentResult?.estimatedCost || 0,
       });
       prd = escalationResult.prd;
       prdDirty = escalationResult.prdDirty;

package/src/execution/runner.ts CHANGED Viewed

@@ -21,6 +21,7 @@ import { clearCache as clearLlmCache, routeBatch as llmRouteBatch } from "../rou
 import { precomputeBatchPlan } from "./batching";
 import { stopHeartbeat, writeExitSummary } from "./crash-recovery";
 import { getAllReadyStories } from "./helpers";
+import type { ParallelExecutorOptions, ParallelExecutorResult } from "./parallel-executor";
 import { hookCtx } from "./story-context";
 /**
@@ -29,6 +30,10 @@ import { hookCtx } from "./story-context";
  */
 export const _runnerDeps = {
   fireHook,
+  // Injectable for tests — avoids dynamic-import module-cache issues in bun test (bun 1.3.9+)
+  runParallelExecution: null as
+    | null
+    | ((options: ParallelExecutorOptions, prd: import("../prd").PRD) => Promise<ParallelExecutorResult>),
 };
 // Re-export for backward compatibility
@@ -202,7 +207,8 @@ export async function run(options: RunOptions): Promise<RunResult> {
     // ── Parallel Execution Path (when --parallel is set) ──────────────────────
     if (options.parallel !== undefined) {
-      const { runParallelExecution } = await import("./parallel-executor");
+      const runParallelExecution =
+        _runnerDeps.runParallelExecution ?? (await import("./parallel-executor")).runParallelExecution;
       const parallelResult = await runParallelExecution(
         {
           prdPath,
@@ -231,6 +237,8 @@ export async function run(options: RunOptions): Promise<RunResult> {
       prd = parallelResult.prd;
       totalCost = parallelResult.totalCost;
       storiesCompleted = parallelResult.storiesCompleted;
+      // BUG-066: merge parallel story metrics into the running accumulator
+      allStoryMetrics.push(...parallelResult.storyMetrics);
       // If parallel execution completed everything, return early
       if (parallelResult.completed && parallelResult.durationMs !== undefined) {
@@ -269,8 +277,10 @@ export async function run(options: RunOptions): Promise<RunResult> {
     prd = sequentialResult.prd;
     iterations = sequentialResult.iterations;
-    storiesCompleted = sequentialResult.storiesCompleted;
-    totalCost = sequentialResult.totalCost;
+    // BUG-064: accumulate (not overwrite) totalCost from sequential path
+    totalCost += sequentialResult.totalCost;
+    // BUG-065: accumulate (not overwrite) storiesCompleted from sequential path
+    storiesCompleted += sequentialResult.storiesCompleted;
     allStoryMetrics.push(...sequentialResult.allStoryMetrics);
     // After main loop: Check if we need acceptance retry loop

package/src/metrics/tracker.ts CHANGED Viewed

@@ -44,14 +44,16 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
   const agentResult = ctx.agentResult;
   // Calculate attempts (initial + escalations)
+  // BUG-067: priorFailures captures cross-tier attempts that story.escalations never records
   const escalationCount = story.escalations?.length || 0;
-  const attempts = Math.max(1, story.attempts || 1);
+  const priorFailureCount = story.priorFailures?.length || 0;
+  const attempts = priorFailureCount + Math.max(1, story.attempts || 1);
   // Determine final tier (from last escalation or initial routing)
   const finalTier = escalationCount > 0 ? story.escalations[escalationCount - 1].toTier : routing.modelTier;
-  // First pass success = succeeded with no escalations
-  const firstPassSuccess = agentResult?.success === true && escalationCount === 0;
+  // First pass success = succeeded with no prior failures and no escalations (BUG-067)
+  const firstPassSuccess = agentResult?.success === true && escalationCount === 0 && priorFailureCount === 0;
   // Extract model name from config
   const modelEntry = ctx.config.models[routing.modelTier];
@@ -76,12 +78,13 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
     attempts,
     finalTier,
     success: agentResult?.success || false,
-    cost: agentResult?.estimatedCost || 0,
+    cost: (ctx.accumulatedAttemptCost ?? 0) + (agentResult?.estimatedCost || 0),
     durationMs: agentResult?.durationMs || 0,
     firstPassSuccess,
     startedAt: storyStartTime,
     completedAt: new Date().toISOString(),
     fullSuiteGatePassed,
+    runtimeCrashes: ctx.storyRuntimeCrashes ?? 0,
   };
 }
@@ -139,6 +142,7 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
       startedAt: storyStartTime,
       completedAt: new Date().toISOString(),
       fullSuiteGatePassed: false, // batches are not TDD-gated
+      runtimeCrashes: 0, // batch stories don't have individual crash tracking
     };
   });
 }

package/src/metrics/types.ts CHANGED Viewed

@@ -34,6 +34,8 @@ export interface StoryMetrics {
   startedAt: string;
   /** Timestamp when completed */
   completedAt: string;
+  /** Number of runtime crashes (RUNTIME_CRASH verify status) encountered for this story (BUG-070) */
+  runtimeCrashes?: number;
   /** Whether TDD full-suite gate passed (only true for TDD strategies when gate passes) */
   fullSuiteGatePassed?: boolean;
 }

package/src/pipeline/event-bus.ts CHANGED Viewed

@@ -41,7 +41,7 @@ export interface StoryCompletedEvent {
   storyId: string;
   story: UserStory;
   passed: boolean;
-  durationMs: number;
+  runElapsedMs: number;
   /** Optional: passed by executor/stage for hook/reporter subscribers */
   cost?: number;
   modelTier?: string;

package/src/pipeline/stages/completion.ts CHANGED Viewed

@@ -67,7 +67,7 @@ export const completionStage: PipelineStage = {
         storyId: completedStory.id,
         story: completedStory,
         passed: true,
-        durationMs: storyMetric?.durationMs ?? 0,
+        runElapsedMs: storyMetric?.durationMs ?? 0,
         // Extra fields picked up by subscribers via `as any`
         cost: costPerStory,
         modelTier: ctx.routing?.modelTier,

package/src/pipeline/stages/verify.ts CHANGED Viewed

@@ -11,6 +11,7 @@
 import type { SmartTestRunnerConfig } from "../../config/types";
 import { getLogger } from "../../logger";
+import { detectRuntimeCrash } from "../../verification/crash-detector";
 import type { VerifyStatus } from "../../verification/orchestrator-types";
 import { regression } from "../../verification/runners";
 import { _smartRunnerDeps } from "../../verification/smart-runner";
@@ -133,7 +134,13 @@ export const verifyStage: PipelineStage = {
     // Store result on context for rectify stage
     ctx.verifyResult = {
       success: result.success,
-      status: (result.status === "TIMEOUT" ? "TIMEOUT" : result.success ? "PASS" : "TEST_FAILURE") as VerifyStatus,
+      status: (result.status === "TIMEOUT"
+        ? "TIMEOUT"
+        : result.success
+          ? "PASS"
+          : detectRuntimeCrash(result.output)
+            ? "RUNTIME_CRASH"
+            : "TEST_FAILURE") as VerifyStatus,
       storyId: ctx.story.id,
       strategy: "scoped",
       passCount: result.passCount ?? 0,

package/src/pipeline/subscribers/reporters.ts CHANGED Viewed

@@ -74,7 +74,7 @@ export function wireReporters(
                 runId,
                 storyId: ev.storyId,
                 status: "completed",
-                durationMs: ev.durationMs,
+                runElapsedMs: ev.runElapsedMs,
                 cost: ev.cost ?? 0,
                 tier: ev.modelTier ?? "balanced",
                 testStrategy: ev.testStrategy ?? "test-after",
@@ -100,7 +100,7 @@ export function wireReporters(
                 runId,
                 storyId: ev.storyId,
                 status: "failed",
-                durationMs: Date.now() - startTime,
+                runElapsedMs: Date.now() - startTime,
                 cost: 0,
                 tier: "balanced",
                 testStrategy: "test-after",
@@ -126,7 +126,7 @@ export function wireReporters(
                 runId,
                 storyId: ev.storyId,
                 status: "paused",
-                durationMs: Date.now() - startTime,
+                runElapsedMs: Date.now() - startTime,
                 cost: 0,
                 tier: "balanced",
                 testStrategy: "test-after",

package/src/pipeline/types.ts CHANGED Viewed

@@ -110,8 +110,12 @@ export interface PipelineContext {
   tddFailureCategory?: FailureCategory;
   /** Set to true when TDD full-suite gate already passed — verify stage skips to avoid redundant run (BUG-054) */
   fullSuiteGatePassed?: boolean;
+  /** Number of runtime crashes (RUNTIME_CRASH verify status) encountered for this story (BUG-070) */
+  storyRuntimeCrashes?: number;
   /** Structured review findings from plugin reviewers — passed to escalation for retry context */
   reviewFindings?: import("../plugins/types").ReviewFinding[];
+  /** Accumulated cost across all prior escalation attempts (BUG-067) */
+  accumulatedAttemptCost?: number;
 }
 /**

package/src/plugins/types.ts CHANGED Viewed

@@ -274,7 +274,7 @@ export interface StoryCompleteEvent {
   runId: string;
   storyId: string;
   status: "completed" | "failed" | "skipped" | "paused";
-  durationMs: number;
+  runElapsedMs: number;
   cost: number;
   tier: string;
   testStrategy: string;

package/src/prd/types.ts CHANGED Viewed

@@ -49,6 +49,8 @@ export interface StructuredFailure {
   testFailures?: TestFailureContext[];
   /** Structured review findings from plugin reviewers (e.g., semgrep, eslint) */
   reviewFindings?: import("../plugins/types").ReviewFinding[];
+  /** Estimated cost of this attempt (BUG-067: accumulated across escalations) */
+  cost?: number;
   /** ISO timestamp when failure was recorded */
   timestamp: string;
 }

package/src/tdd/types.ts CHANGED Viewed

@@ -12,7 +12,8 @@ export type FailureCategory =
   /** Verifier explicitly rejected the implementation */
   | "verifier-rejected"
   /** Greenfield project with no test files — TDD not applicable (BUG-010) */
-  | "greenfield-no-tests";
+  | "greenfield-no-tests"
+  | "runtime-crash";
 /** Isolation verification result */
 export interface IsolationCheck {

package/src/verification/crash-detector.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Runtime Crash Detector — BUG-070
+ *
+ * Detects Bun runtime crashes in test output so they can be classified as
+ * RUNTIME_CRASH rather than TEST_FAILURE, preventing spurious tier escalation.
+ *
+ * STUB — implementation is intentionally absent. Tests are RED until
+ * the real logic is written.
+ */
+/**
+ * Known patterns emitted by the Bun runtime before any test results
+ * when a crash occurs (segfault, panic, etc.).
+ */
+export const CRASH_PATTERNS = [
+  "panic(main thread)",
+  "Segmentation fault",
+  "Bun has crashed",
+  "oh no: Bun has crashed",
+] as const;
+/**
+ * Detect whether the given test runner output contains a Bun runtime crash.
+ *
+ * Returns true if any known crash pattern is found in the output.
+ * These patterns are emitted by Bun itself before any test result lines.
+ *
+ * @param output - Raw stdout/stderr from the test runner
+ */
+export function detectRuntimeCrash(output: string | undefined | null): boolean {
+  // STUB: not implemented yet — always returns false
+  if (!output) return false;
+  return CRASH_PATTERNS.some((pattern) => output.includes(pattern));
+}

package/src/verification/orchestrator-types.ts CHANGED Viewed

@@ -50,7 +50,14 @@ export interface StructuredTestFailure {
 // Result
 // ---------------------------------------------------------------------------
-export type VerifyStatus = "PASS" | "TEST_FAILURE" | "TIMEOUT" | "BUILD_ERROR" | "SKIPPED" | "ASSET_CHECK_FAILED";
+export type VerifyStatus =
+  | "PASS"
+  | "TEST_FAILURE"
+  | "TIMEOUT"
+  | "BUILD_ERROR"
+  | "SKIPPED"
+  | "ASSET_CHECK_FAILED"
+  | "RUNTIME_CRASH";
 export interface VerifyResult {
   success: boolean;