@nathapp/nax 0.36.1 → 0.36.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,14 +26,18 @@ import { MergeEngine, type StoryDependencies } from "../worktree/merge";
26
26
  * Result from parallel execution of a batch of stories
27
27
  */
28
28
  export interface ParallelBatchResult {
29
- /** Stories that completed successfully */
30
- successfulStories: UserStory[];
31
- /** Stories that failed */
32
- failedStories: Array<{ story: UserStory; error: string }>;
29
+ /** Stories that passed the TDD pipeline (pre-merge) */
30
+ pipelinePassed: UserStory[];
31
+ /** Stories that were actually merged to the base branch */
32
+ merged: UserStory[];
33
+ /** Stories that failed the pipeline */
34
+ failed: Array<{ story: UserStory; error: string }>;
33
35
  /** Total cost accumulated */
34
36
  totalCost: number;
35
- /** Stories with merge conflicts */
36
- conflictedStories: Array<{ storyId: string; conflictFiles: string[] }>;
37
+ /** Stories with merge conflicts (includes per-story original cost for rectification) */
38
+ mergeConflicts: Array<{ storyId: string; conflictFiles: string[]; originalCost: number }>;
39
+ /** Per-story execution costs for successful stories */
40
+ storyCosts: Map<string, number>;
37
41
  }
38
42
 
39
43
  /**
@@ -148,10 +152,12 @@ async function executeParallelBatch(
148
152
  const logger = getSafeLogger();
149
153
  const worktreeManager = new WorktreeManager();
150
154
  const results: ParallelBatchResult = {
151
- successfulStories: [],
152
- failedStories: [],
155
+ pipelinePassed: [],
156
+ merged: [],
157
+ failed: [],
153
158
  totalCost: 0,
154
- conflictedStories: [],
159
+ mergeConflicts: [],
160
+ storyCosts: new Map(),
155
161
  };
156
162
 
157
163
  // Create worktrees for all stories in batch
@@ -168,7 +174,7 @@ async function executeParallelBatch(
168
174
  worktreePath,
169
175
  });
170
176
  } catch (error) {
171
- results.failedStories.push({
177
+ results.failed.push({
172
178
  story,
173
179
  error: `Failed to create worktree: ${error instanceof Error ? error.message : String(error)}`,
174
180
  });
@@ -188,15 +194,16 @@ async function executeParallelBatch(
188
194
  const executePromise = executeStoryInWorktree(story, worktreePath, context, routing as RoutingResult, eventEmitter)
189
195
  .then((result) => {
190
196
  results.totalCost += result.cost;
197
+ results.storyCosts.set(story.id, result.cost);
191
198
 
192
199
  if (result.success) {
193
- results.successfulStories.push(story);
200
+ results.pipelinePassed.push(story);
194
201
  logger?.info("parallel", "Story execution succeeded", {
195
202
  storyId: story.id,
196
203
  cost: result.cost,
197
204
  });
198
205
  } else {
199
- results.failedStories.push({ story, error: result.error || "Unknown error" });
206
+ results.failed.push({ story, error: result.error || "Unknown error" });
200
207
  logger?.error("parallel", "Story execution failed", {
201
208
  storyId: story.id,
202
209
  error: result.error,
@@ -257,7 +264,12 @@ export async function executeParallel(
257
264
  featureDir: string | undefined,
258
265
  parallel: number,
259
266
  eventEmitter?: PipelineEventEmitter,
260
- ): Promise<{ storiesCompleted: number; totalCost: number; updatedPrd: PRD }> {
267
+ ): Promise<{
268
+ storiesCompleted: number;
269
+ totalCost: number;
270
+ updatedPrd: PRD;
271
+ mergeConflicts: Array<{ storyId: string; conflictFiles: string[]; originalCost: number }>;
272
+ }> {
261
273
  const logger = getSafeLogger();
262
274
  const maxConcurrency = resolveMaxConcurrency(parallel);
263
275
  const worktreeManager = new WorktreeManager();
@@ -278,6 +290,7 @@ export async function executeParallel(
278
290
  let storiesCompleted = 0;
279
291
  let totalCost = 0;
280
292
  const currentPrd = prd;
293
+ const allMergeConflicts: Array<{ storyId: string; conflictFiles: string[]; originalCost: number }> = [];
281
294
 
282
295
  // Execute each batch sequentially (stories within each batch run in parallel)
283
296
  for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
@@ -311,8 +324,8 @@ export async function executeParallel(
311
324
  totalCost += batchResult.totalCost;
312
325
 
313
326
  // Merge successful stories in topological order
314
- if (batchResult.successfulStories.length > 0) {
315
- const successfulIds = batchResult.successfulStories.map((s) => s.id);
327
+ if (batchResult.pipelinePassed.length > 0) {
328
+ const successfulIds = batchResult.pipelinePassed.map((s) => s.id);
316
329
  const deps = buildDependencyMap(batch);
317
330
 
318
331
  logger?.info("parallel", "Merging successful stories", {
@@ -327,6 +340,8 @@ export async function executeParallel(
327
340
  // Update PRD: mark story as passed
328
341
  markStoryPassed(currentPrd, mergeResult.storyId);
329
342
  storiesCompleted++;
343
+ const mergedStory = batchResult.pipelinePassed.find((s) => s.id === mergeResult.storyId);
344
+ if (mergedStory) batchResult.merged.push(mergedStory);
330
345
 
331
346
  logger?.info("parallel", "Story merged successfully", {
332
347
  storyId: mergeResult.storyId,
@@ -335,9 +350,10 @@ export async function executeParallel(
335
350
  } else {
336
351
  // Merge conflict — mark story as failed
337
352
  markStoryFailed(currentPrd, mergeResult.storyId);
338
- batchResult.conflictedStories.push({
353
+ batchResult.mergeConflicts.push({
339
354
  storyId: mergeResult.storyId,
340
355
  conflictFiles: mergeResult.conflictFiles || [],
356
+ originalCost: batchResult.storyCosts.get(mergeResult.storyId) ?? 0,
341
357
  });
342
358
 
343
359
  logger?.error("parallel", "Merge conflict", {
@@ -355,7 +371,7 @@ export async function executeParallel(
355
371
  }
356
372
 
357
373
  // Mark failed stories in PRD and clean up their worktrees
358
- for (const { story, error } of batchResult.failedStories) {
374
+ for (const { story, error } of batchResult.failed) {
359
375
  markStoryFailed(currentPrd, story.id);
360
376
 
361
377
  logger?.error("parallel", "Cleaning up failed story worktree", {
@@ -376,10 +392,13 @@ export async function executeParallel(
376
392
  // Save PRD after each batch
377
393
  await savePRD(currentPrd, prdPath);
378
394
 
395
+ allMergeConflicts.push(...batchResult.mergeConflicts);
396
+
379
397
  logger?.info("parallel", `Batch ${batchIndex + 1} complete`, {
380
- successful: batchResult.successfulStories.length,
381
- failed: batchResult.failedStories.length,
382
- conflicts: batchResult.conflictedStories.length,
398
+ pipelinePassed: batchResult.pipelinePassed.length,
399
+ merged: batchResult.merged.length,
400
+ failed: batchResult.failed.length,
401
+ mergeConflicts: batchResult.mergeConflicts.length,
383
402
  batchCost: batchResult.totalCost,
384
403
  });
385
404
  }
@@ -389,5 +408,5 @@ export async function executeParallel(
389
408
  totalCost,
390
409
  });
391
410
 
392
- return { storiesCompleted, totalCost, updatedPrd: currentPrd };
411
+ return { storiesCompleted, totalCost, updatedPrd: currentPrd, mergeConflicts: allMergeConflicts };
393
412
  }
@@ -68,7 +68,7 @@ export async function handlePipelineSuccess(
68
68
  storyId: completedStory.id,
69
69
  storyTitle: completedStory.title,
70
70
  totalCost: ctx.totalCost + costDelta,
71
- durationMs: now - ctx.startTime,
71
+ runElapsedMs: now - ctx.startTime,
72
72
  storyDurationMs: ctx.storyStartTime ? now - ctx.storyStartTime : undefined,
73
73
  });
74
74
 
@@ -77,7 +77,7 @@ export async function handlePipelineSuccess(
77
77
  storyId: completedStory.id,
78
78
  story: completedStory,
79
79
  passed: true,
80
- durationMs: Date.now() - ctx.startTime,
80
+ runElapsedMs: Date.now() - ctx.startTime,
81
81
  cost: costDelta,
82
82
  modelTier: ctx.routing.modelTier,
83
83
  testStrategy: ctx.routing.testStrategy,
@@ -177,6 +177,7 @@ export async function handlePipelineFailure(
177
177
  feature: ctx.feature,
178
178
  totalCost: ctx.totalCost,
179
179
  workdir: ctx.workdir,
180
+ attemptCost: pipelineResult.context.agentResult?.estimatedCost || 0,
180
181
  });
181
182
  prd = escalationResult.prd;
182
183
  prdDirty = escalationResult.prdDirty;
@@ -21,6 +21,7 @@ import { clearCache as clearLlmCache, routeBatch as llmRouteBatch } from "../rou
21
21
  import { precomputeBatchPlan } from "./batching";
22
22
  import { stopHeartbeat, writeExitSummary } from "./crash-recovery";
23
23
  import { getAllReadyStories } from "./helpers";
24
+ import type { ParallelExecutorOptions, ParallelExecutorResult } from "./parallel-executor";
24
25
  import { hookCtx } from "./story-context";
25
26
 
26
27
  /**
@@ -29,6 +30,10 @@ import { hookCtx } from "./story-context";
29
30
  */
30
31
  export const _runnerDeps = {
31
32
  fireHook,
33
+ // Injectable for tests — avoids dynamic-import module-cache issues in bun test (bun 1.3.9+)
34
+ runParallelExecution: null as
35
+ | null
36
+ | ((options: ParallelExecutorOptions, prd: import("../prd").PRD) => Promise<ParallelExecutorResult>),
32
37
  };
33
38
 
34
39
  // Re-export for backward compatibility
@@ -202,7 +207,8 @@ export async function run(options: RunOptions): Promise<RunResult> {
202
207
 
203
208
  // ── Parallel Execution Path (when --parallel is set) ──────────────────────
204
209
  if (options.parallel !== undefined) {
205
- const { runParallelExecution } = await import("./parallel-executor");
210
+ const runParallelExecution =
211
+ _runnerDeps.runParallelExecution ?? (await import("./parallel-executor")).runParallelExecution;
206
212
  const parallelResult = await runParallelExecution(
207
213
  {
208
214
  prdPath,
@@ -231,6 +237,8 @@ export async function run(options: RunOptions): Promise<RunResult> {
231
237
  prd = parallelResult.prd;
232
238
  totalCost = parallelResult.totalCost;
233
239
  storiesCompleted = parallelResult.storiesCompleted;
240
+ // BUG-066: merge parallel story metrics into the running accumulator
241
+ allStoryMetrics.push(...parallelResult.storyMetrics);
234
242
 
235
243
  // If parallel execution completed everything, return early
236
244
  if (parallelResult.completed && parallelResult.durationMs !== undefined) {
@@ -269,8 +277,10 @@ export async function run(options: RunOptions): Promise<RunResult> {
269
277
 
270
278
  prd = sequentialResult.prd;
271
279
  iterations = sequentialResult.iterations;
272
- storiesCompleted = sequentialResult.storiesCompleted;
273
- totalCost = sequentialResult.totalCost;
280
+ // BUG-064: accumulate (not overwrite) totalCost from sequential path
281
+ totalCost += sequentialResult.totalCost;
282
+ // BUG-065: accumulate (not overwrite) storiesCompleted from sequential path
283
+ storiesCompleted += sequentialResult.storiesCompleted;
274
284
  allStoryMetrics.push(...sequentialResult.allStoryMetrics);
275
285
 
276
286
  // After main loop: Check if we need acceptance retry loop
@@ -44,14 +44,16 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
44
44
  const agentResult = ctx.agentResult;
45
45
 
46
46
  // Calculate attempts (initial + escalations)
47
+ // BUG-067: priorFailures captures cross-tier attempts that story.escalations never records
47
48
  const escalationCount = story.escalations?.length || 0;
48
- const attempts = Math.max(1, story.attempts || 1);
49
+ const priorFailureCount = story.priorFailures?.length || 0;
50
+ const attempts = priorFailureCount + Math.max(1, story.attempts || 1);
49
51
 
50
52
  // Determine final tier (from last escalation or initial routing)
51
53
  const finalTier = escalationCount > 0 ? story.escalations[escalationCount - 1].toTier : routing.modelTier;
52
54
 
53
- // First pass success = succeeded with no escalations
54
- const firstPassSuccess = agentResult?.success === true && escalationCount === 0;
55
+ // First pass success = succeeded with no prior failures and no escalations (BUG-067)
56
+ const firstPassSuccess = agentResult?.success === true && escalationCount === 0 && priorFailureCount === 0;
55
57
 
56
58
  // Extract model name from config
57
59
  const modelEntry = ctx.config.models[routing.modelTier];
@@ -76,12 +78,13 @@ export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string
76
78
  attempts,
77
79
  finalTier,
78
80
  success: agentResult?.success || false,
79
- cost: agentResult?.estimatedCost || 0,
81
+ cost: (ctx.accumulatedAttemptCost ?? 0) + (agentResult?.estimatedCost || 0),
80
82
  durationMs: agentResult?.durationMs || 0,
81
83
  firstPassSuccess,
82
84
  startedAt: storyStartTime,
83
85
  completedAt: new Date().toISOString(),
84
86
  fullSuiteGatePassed,
87
+ runtimeCrashes: ctx.storyRuntimeCrashes ?? 0,
85
88
  };
86
89
  }
87
90
 
@@ -139,6 +142,7 @@ export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string
139
142
  startedAt: storyStartTime,
140
143
  completedAt: new Date().toISOString(),
141
144
  fullSuiteGatePassed: false, // batches are not TDD-gated
145
+ runtimeCrashes: 0, // batch stories don't have individual crash tracking
142
146
  };
143
147
  });
144
148
  }
@@ -34,6 +34,8 @@ export interface StoryMetrics {
34
34
  startedAt: string;
35
35
  /** Timestamp when completed */
36
36
  completedAt: string;
37
+ /** Number of runtime crashes (RUNTIME_CRASH verify status) encountered for this story (BUG-070) */
38
+ runtimeCrashes?: number;
37
39
  /** Whether TDD full-suite gate passed (only true for TDD strategies when gate passes) */
38
40
  fullSuiteGatePassed?: boolean;
39
41
  }
@@ -41,7 +41,7 @@ export interface StoryCompletedEvent {
41
41
  storyId: string;
42
42
  story: UserStory;
43
43
  passed: boolean;
44
- durationMs: number;
44
+ runElapsedMs: number;
45
45
  /** Optional: passed by executor/stage for hook/reporter subscribers */
46
46
  cost?: number;
47
47
  modelTier?: string;
@@ -67,7 +67,7 @@ export const completionStage: PipelineStage = {
67
67
  storyId: completedStory.id,
68
68
  story: completedStory,
69
69
  passed: true,
70
- durationMs: storyMetric?.durationMs ?? 0,
70
+ runElapsedMs: storyMetric?.durationMs ?? 0,
71
71
  // Extra fields picked up by subscribers via `as any`
72
72
  cost: costPerStory,
73
73
  modelTier: ctx.routing?.modelTier,
@@ -11,6 +11,7 @@
11
11
 
12
12
  import type { SmartTestRunnerConfig } from "../../config/types";
13
13
  import { getLogger } from "../../logger";
14
+ import { detectRuntimeCrash } from "../../verification/crash-detector";
14
15
  import type { VerifyStatus } from "../../verification/orchestrator-types";
15
16
  import { regression } from "../../verification/runners";
16
17
  import { _smartRunnerDeps } from "../../verification/smart-runner";
@@ -133,7 +134,13 @@ export const verifyStage: PipelineStage = {
133
134
  // Store result on context for rectify stage
134
135
  ctx.verifyResult = {
135
136
  success: result.success,
136
- status: (result.status === "TIMEOUT" ? "TIMEOUT" : result.success ? "PASS" : "TEST_FAILURE") as VerifyStatus,
137
+ status: (result.status === "TIMEOUT"
138
+ ? "TIMEOUT"
139
+ : result.success
140
+ ? "PASS"
141
+ : detectRuntimeCrash(result.output)
142
+ ? "RUNTIME_CRASH"
143
+ : "TEST_FAILURE") as VerifyStatus,
137
144
  storyId: ctx.story.id,
138
145
  strategy: "scoped",
139
146
  passCount: result.passCount ?? 0,
@@ -74,7 +74,7 @@ export function wireReporters(
74
74
  runId,
75
75
  storyId: ev.storyId,
76
76
  status: "completed",
77
- durationMs: ev.durationMs,
77
+ runElapsedMs: ev.runElapsedMs,
78
78
  cost: ev.cost ?? 0,
79
79
  tier: ev.modelTier ?? "balanced",
80
80
  testStrategy: ev.testStrategy ?? "test-after",
@@ -100,7 +100,7 @@ export function wireReporters(
100
100
  runId,
101
101
  storyId: ev.storyId,
102
102
  status: "failed",
103
- durationMs: Date.now() - startTime,
103
+ runElapsedMs: Date.now() - startTime,
104
104
  cost: 0,
105
105
  tier: "balanced",
106
106
  testStrategy: "test-after",
@@ -126,7 +126,7 @@ export function wireReporters(
126
126
  runId,
127
127
  storyId: ev.storyId,
128
128
  status: "paused",
129
- durationMs: Date.now() - startTime,
129
+ runElapsedMs: Date.now() - startTime,
130
130
  cost: 0,
131
131
  tier: "balanced",
132
132
  testStrategy: "test-after",
@@ -110,8 +110,12 @@ export interface PipelineContext {
110
110
  tddFailureCategory?: FailureCategory;
111
111
  /** Set to true when TDD full-suite gate already passed — verify stage skips to avoid redundant run (BUG-054) */
112
112
  fullSuiteGatePassed?: boolean;
113
+ /** Number of runtime crashes (RUNTIME_CRASH verify status) encountered for this story (BUG-070) */
114
+ storyRuntimeCrashes?: number;
113
115
  /** Structured review findings from plugin reviewers — passed to escalation for retry context */
114
116
  reviewFindings?: import("../plugins/types").ReviewFinding[];
117
+ /** Accumulated cost across all prior escalation attempts (BUG-067) */
118
+ accumulatedAttemptCost?: number;
115
119
  }
116
120
 
117
121
  /**
@@ -274,7 +274,7 @@ export interface StoryCompleteEvent {
274
274
  runId: string;
275
275
  storyId: string;
276
276
  status: "completed" | "failed" | "skipped" | "paused";
277
- durationMs: number;
277
+ runElapsedMs: number;
278
278
  cost: number;
279
279
  tier: string;
280
280
  testStrategy: string;
package/src/prd/types.ts CHANGED
@@ -49,6 +49,8 @@ export interface StructuredFailure {
49
49
  testFailures?: TestFailureContext[];
50
50
  /** Structured review findings from plugin reviewers (e.g., semgrep, eslint) */
51
51
  reviewFindings?: import("../plugins/types").ReviewFinding[];
52
+ /** Estimated cost of this attempt (BUG-067: accumulated across escalations) */
53
+ cost?: number;
52
54
  /** ISO timestamp when failure was recorded */
53
55
  timestamp: string;
54
56
  }
package/src/tdd/types.ts CHANGED
@@ -12,7 +12,8 @@ export type FailureCategory =
12
12
  /** Verifier explicitly rejected the implementation */
13
13
  | "verifier-rejected"
14
14
  /** Greenfield project with no test files — TDD not applicable (BUG-010) */
15
- | "greenfield-no-tests";
15
+ | "greenfield-no-tests"
16
+ | "runtime-crash";
16
17
 
17
18
  /** Isolation verification result */
18
19
  export interface IsolationCheck {
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Runtime Crash Detector — BUG-070
3
+ *
4
+ * Detects Bun runtime crashes in test output so they can be classified as
5
+ * RUNTIME_CRASH rather than TEST_FAILURE, preventing spurious tier escalation.
6
+ *
7
+ * STUB — implementation is intentionally absent. Tests are RED until
8
+ * the real logic is written.
9
+ */
10
+
11
+ /**
12
+ * Known patterns emitted by the Bun runtime before any test results
13
+ * when a crash occurs (segfault, panic, etc.).
14
+ */
15
+ export const CRASH_PATTERNS = [
16
+ "panic(main thread)",
17
+ "Segmentation fault",
18
+ "Bun has crashed",
19
+ "oh no: Bun has crashed",
20
+ ] as const;
21
+
22
+ /**
23
+ * Detect whether the given test runner output contains a Bun runtime crash.
24
+ *
25
+ * Returns true if any known crash pattern is found in the output.
26
+ * These patterns are emitted by Bun itself before any test result lines.
27
+ *
28
+ * @param output - Raw stdout/stderr from the test runner
29
+ */
30
+ export function detectRuntimeCrash(output: string | undefined | null): boolean {
31
+ // STUB: not implemented yet — always returns false
32
+ if (!output) return false;
33
+ return CRASH_PATTERNS.some((pattern) => output.includes(pattern));
34
+ }
@@ -50,7 +50,14 @@ export interface StructuredTestFailure {
50
50
  // Result
51
51
  // ---------------------------------------------------------------------------
52
52
 
53
- export type VerifyStatus = "PASS" | "TEST_FAILURE" | "TIMEOUT" | "BUILD_ERROR" | "SKIPPED" | "ASSET_CHECK_FAILED";
53
+ export type VerifyStatus =
54
+ | "PASS"
55
+ | "TEST_FAILURE"
56
+ | "TIMEOUT"
57
+ | "BUILD_ERROR"
58
+ | "SKIPPED"
59
+ | "ASSET_CHECK_FAILED"
60
+ | "RUNTIME_CRASH";
54
61
 
55
62
  export interface VerifyResult {
56
63
  success: boolean;