@nathapp/nax 0.63.0 → 0.63.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/nax.js +285 -65
  2. package/package.json +1 -1
package/dist/nax.js CHANGED
@@ -7598,6 +7598,12 @@ ${SEMANTIC_OUTPUT_SCHEMA}`;
7598
7598
  ` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
7599
7599
  ` + "The object must start with { and end with }.";
7600
7600
  }
7601
+ static jsonRetryCondensed(maxFindings = 3) {
7602
+ return `Your previous response was truncated and could not be parsed as valid JSON.
7603
+ Respond with a condensed summary: at most ${maxFindings} findings, highest severity first.
7604
+ Output ONLY a complete, valid JSON object. It must start with { and end with }.
7605
+ Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string}]}`;
7606
+ }
7601
7607
  }
7602
7608
  function buildAttemptContextBlock(priorFailures) {
7603
7609
  if (!priorFailures || priorFailures.length === 0)
@@ -23835,7 +23841,7 @@ var init_schemas3 = __esm(() => {
23835
23841
  });
23836
23842
  SemanticReviewConfigSchema = exports_external.object({
23837
23843
  modelTier: ModelTierSchema.default("balanced"),
23838
- diffMode: exports_external.enum(["embedded", "ref"]).default("embedded"),
23844
+ diffMode: exports_external.enum(["embedded", "ref"]).default("ref"),
23839
23845
  resetRefOnRerun: exports_external.boolean().default(false),
23840
23846
  rules: exports_external.array(exports_external.string()).default([]),
23841
23847
  timeoutMs: exports_external.number().int().positive().default(600000),
@@ -24270,7 +24276,7 @@ var init_schemas3 = __esm(() => {
24270
24276
  blockingThreshold: "error",
24271
24277
  semantic: {
24272
24278
  modelTier: "balanced",
24273
- diffMode: "embedded",
24279
+ diffMode: "ref",
24274
24280
  resetRefOnRerun: false,
24275
24281
  rules: [],
24276
24282
  timeoutMs: 600000,
@@ -24469,6 +24475,7 @@ __export(exports_adapter, {
24469
24475
  closeAcpSession: () => closeAcpSession,
24470
24476
  _fallbackDeps: () => _fallbackDeps,
24471
24477
  _acpAdapterDeps: () => _acpAdapterDeps,
24478
+ MAX_AGENT_OUTPUT_CHARS: () => MAX_AGENT_OUTPUT_CHARS,
24472
24479
  AcpAgentAdapter: () => AcpAgentAdapter
24473
24480
  });
24474
24481
  import { createHash } from "crypto";
@@ -30123,7 +30130,9 @@ async function collectStoryMetrics(ctx, storyStartTime) {
30123
30130
  const priorFailureCount = story.priorFailures?.length || 0;
30124
30131
  const attempts = priorFailureCount + Math.max(1, story.attempts || 1);
30125
30132
  const finalTier = escalationCount > 0 ? story.escalations[escalationCount - 1].toTier : routing.modelTier;
30126
- const firstPassSuccess = agentResult?.success === true && escalationCount === 0 && priorFailureCount === 0;
30133
+ const autofixAttemptCount = ctx.autofixAttempt ?? 0;
30134
+ const rectifyAttemptCount = ctx.rectifyAttempt ?? 0;
30135
+ const firstPassSuccess = agentResult?.success === true && escalationCount === 0 && priorFailureCount === 0 && autofixAttemptCount === 0 && rectifyAttemptCount === 0;
30127
30136
  const agentUsed = routing.agent ?? ctx.agentManager?.getDefault() ?? resolveDefaultAgent(ctx.config);
30128
30137
  let modelUsed = routing.modelTier;
30129
30138
  try {
@@ -33060,8 +33069,27 @@ var init_event_bus = __esm(() => {
33060
33069
  });
33061
33070
 
33062
33071
  // src/pipeline/stages/autofix-adversarial.ts
33063
- function splitAdversarialFindingsByScope(check2, testFilePatterns) {
33064
- if (check2.check !== "adversarial" || !check2.findings?.length) {
33072
+ function extractFilesFromLintOutput(output) {
33073
+ if (!output.trim())
33074
+ return [];
33075
+ const files = new Set;
33076
+ const PATH_RE = /^[ \t]*((?:\/[\w./-]+|\.\.?\/[\w./-]+|[\w][\w-]*(?:\/[\w./-]+)+))(?::\d+)?(?::\d+)?(?:\s|:|$)/gm;
33077
+ let startIndex = 0;
33078
+ while (startIndex <= output.length) {
33079
+ PATH_RE.lastIndex = startIndex;
33080
+ const m = PATH_RE.exec(output);
33081
+ if (m === null)
33082
+ break;
33083
+ const candidate = m[1];
33084
+ if (SOURCE_EXT_RE.test(candidate)) {
33085
+ files.add(candidate);
33086
+ }
33087
+ startIndex = m.index + 1;
33088
+ }
33089
+ return Array.from(files);
33090
+ }
33091
+ function splitByStructuredFindings(check2, testFilePatterns) {
33092
+ if (!check2.findings?.length) {
33065
33093
  return { testFindings: null, sourceFindings: null };
33066
33094
  }
33067
33095
  const testFs = check2.findings.filter((f) => isTestFile(f.file ?? "", testFilePatterns));
@@ -33073,12 +33101,36 @@ function splitAdversarialFindingsByScope(check2, testFilePatterns) {
33073
33101
  };
33074
33102
  return { testFindings: toCheck(testFs), sourceFindings: toCheck(sourceFs) };
33075
33103
  }
33104
+ function splitByOutputParsing(check2, testFilePatterns) {
33105
+ const files = extractFilesFromLintOutput(check2.output);
33106
+ if (files.length === 0) {
33107
+ if (check2.output.trim()) {
33108
+ return { testFindings: null, sourceFindings: check2 };
33109
+ }
33110
+ return { testFindings: null, sourceFindings: null };
33111
+ }
33112
+ const hasTest = files.some((f) => isTestFile(f, testFilePatterns));
33113
+ const hasSource = files.some((f) => !isTestFile(f, testFilePatterns));
33114
+ return {
33115
+ testFindings: hasTest ? check2 : null,
33116
+ sourceFindings: hasSource ? check2 : null
33117
+ };
33118
+ }
33119
+ function splitFindingsByScope(check2, testFilePatterns) {
33120
+ if (check2.check === "adversarial") {
33121
+ return splitByStructuredFindings(check2, testFilePatterns);
33122
+ }
33123
+ if (check2.check === "lint") {
33124
+ return splitByOutputParsing(check2, testFilePatterns);
33125
+ }
33126
+ return { testFindings: null, sourceFindings: null };
33127
+ }
33076
33128
  async function runTestWriterRectification(ctx, testWriterChecks, story, agentManager, keepOpen = true) {
33077
33129
  const logger = getLogger();
33078
33130
  const twPrompt = RectifierPromptBuilder.testWriterRectification(testWriterChecks, story);
33079
33131
  const defaultAgent = agentManager.getDefault();
33080
33132
  if (!defaultAgent) {
33081
- logger.warn("autofix", "Test-writer rectification skipped \u2014 no default agent", { storyId: ctx.story.id });
33133
+ logger.warn("autofix", "Test-writer rectification skipped -- no default agent", { storyId: ctx.story.id });
33082
33134
  return 0;
33083
33135
  }
33084
33136
  const modelTier = ctx.rootConfig.tdd?.sessionTiers?.testWriter ?? "balanced";
@@ -33104,17 +33156,19 @@ async function runTestWriterRectification(ctx, testWriterChecks, story, agentMan
33104
33156
  });
33105
33157
  return twResult.estimatedCost ?? 0;
33106
33158
  } catch {
33107
- logger.warn("autofix", "Test-writer rectification failed \u2014 proceeding with implementer", {
33159
+ logger.warn("autofix", "Test-writer rectification failed -- proceeding with implementer", {
33108
33160
  storyId: ctx.story.id
33109
33161
  });
33110
33162
  return 0;
33111
33163
  }
33112
33164
  }
33165
+ var SOURCE_EXT_RE;
33113
33166
  var init_autofix_adversarial = __esm(() => {
33114
33167
  init_config();
33115
33168
  init_logger2();
33116
33169
  init_prompts();
33117
33170
  init_test_runners();
33171
+ SOURCE_EXT_RE = /\.(ts|tsx|js|jsx|mjs|cjs|go|py|rs|rb|java|cs|cpp|c|h|swift|kt)$/;
33118
33172
  });
33119
33173
 
33120
33174
  // src/review/dialogue.ts
@@ -33520,6 +33574,17 @@ var init_agent_profiles = __esm(() => {
33520
33574
  toolSchemaDialect: "openai"
33521
33575
  }
33522
33576
  },
33577
+ opencode: {
33578
+ caps: {
33579
+ maxContextTokens: 128000,
33580
+ preferredPromptTokens: 12000,
33581
+ supportsToolCalls: true,
33582
+ supportsSystemPrompt: true,
33583
+ supportsMarkdown: true,
33584
+ systemPromptStyle: "markdown-sections",
33585
+ toolSchemaDialect: "openai"
33586
+ }
33587
+ },
33523
33588
  local: {
33524
33589
  caps: {
33525
33590
  maxContextTokens: 32000,
@@ -36787,6 +36852,14 @@ var init_review_audit = __esm(() => {
36787
36852
  };
36788
36853
  });
36789
36854
 
36855
+ // src/review/truncation.ts
36856
+ function looksLikeTruncatedJson(raw) {
36857
+ return raw.trimEnd().length >= MAX_AGENT_OUTPUT_CHARS - 100;
36858
+ }
36859
+ var init_truncation = __esm(() => {
36860
+ init_adapter();
36861
+ });
36862
+
36790
36863
  // src/review/adversarial.ts
36791
36864
  function validateAdversarialShape(parsed) {
36792
36865
  if (typeof parsed !== "object" || parsed === null)
@@ -36988,22 +37061,26 @@ async function runAdversarialReview(workdir, storyGitRef, story, adversarialConf
36988
37061
  return {
36989
37062
  check: "adversarial",
36990
37063
  success: true,
37064
+ failOpen: true,
36991
37065
  command: "",
36992
37066
  exitCode: 0,
36993
37067
  output: `skipped: LLM call failed \u2014 ${String(err)}`,
36994
37068
  durationMs: Date.now() - startTime
36995
37069
  };
36996
37070
  }
36997
- if (!parseAdversarialResponse(rawResponse)) {
37071
+ const isTruncated = looksLikeTruncatedJson(rawResponse);
37072
+ if (isTruncated || !parseAdversarialResponse(rawResponse)) {
36998
37073
  retryAttempted = true;
37074
+ const retryPrompt = isTruncated ? ReviewPromptBuilder.jsonRetryCondensed() : ReviewPromptBuilder.jsonRetry();
36999
37075
  logger?.info("adversarial", "JSON parse failed, retrying (1/1)", {
37000
37076
  storyId: story.id,
37001
37077
  rawHead: rawResponse.slice(0, 200),
37002
- responseLen: rawResponse.length
37078
+ responseLen: rawResponse.length,
37079
+ isTruncated
37003
37080
  });
37004
37081
  try {
37005
37082
  const retryResult = await agentManager.run({
37006
- runOptions: { prompt: ReviewPromptBuilder.jsonRetry(), ...runOpts, keepOpen: false }
37083
+ runOptions: { prompt: retryPrompt, ...runOpts, keepOpen: false }
37007
37084
  });
37008
37085
  rawResponse = retryResult.output;
37009
37086
  llmCost += retryResult.estimatedCost ?? 0;
@@ -37058,6 +37135,7 @@ async function runAdversarialReview(workdir, storyGitRef, story, adversarialConf
37058
37135
  return {
37059
37136
  check: "adversarial",
37060
37137
  success: true,
37138
+ failOpen: true,
37061
37139
  command: "",
37062
37140
  exitCode: 0,
37063
37141
  output: "adversarial review: could not parse LLM response (fail-open)",
@@ -37164,6 +37242,7 @@ var init_adversarial = __esm(() => {
37164
37242
  init_review_builder();
37165
37243
  init_diff_utils();
37166
37244
  init_review_audit();
37245
+ init_truncation();
37167
37246
  _adversarialDeps = {
37168
37247
  writeReviewAudit
37169
37248
  };
@@ -37274,7 +37353,7 @@ async function runSemanticReview(workdir, storyGitRef, story, semanticConfig, ag
37274
37353
  durationMs: Date.now() - startTime
37275
37354
  };
37276
37355
  }
37277
- const diffMode = semanticConfig.diffMode ?? "embedded";
37356
+ const diffMode = semanticConfig.diffMode ?? "ref";
37278
37357
  logger?.info("review", "Running semantic check", {
37279
37358
  storyId: story.id,
37280
37359
  modelTier: semanticConfig.modelTier,
@@ -37546,22 +37625,26 @@ ${formatFindings2(debateBlocking)}`,
37546
37625
  return {
37547
37626
  check: "semantic",
37548
37627
  success: true,
37628
+ failOpen: true,
37549
37629
  command: "",
37550
37630
  exitCode: 0,
37551
37631
  output: `skipped: LLM call failed \u2014 ${String(err)}`,
37552
37632
  durationMs: Date.now() - startTime
37553
37633
  };
37554
37634
  }
37555
- if (!parseLLMResponse(rawResponse)) {
37635
+ const isTruncated = looksLikeTruncatedJson(rawResponse);
37636
+ if (isTruncated || !parseLLMResponse(rawResponse)) {
37556
37637
  retryAttempted = true;
37638
+ const retryPrompt = isTruncated ? ReviewPromptBuilder.jsonRetryCondensed() : ReviewPromptBuilder.jsonRetry();
37557
37639
  logger?.info("semantic", "JSON parse failed, retrying (1/1)", {
37558
37640
  storyId: story.id,
37559
37641
  rawHead: rawResponse.slice(0, 200),
37560
- responseLen: rawResponse.length
37642
+ responseLen: rawResponse.length,
37643
+ isTruncated
37561
37644
  });
37562
37645
  try {
37563
37646
  const retryResult = await agentManager.run({
37564
- runOptions: { prompt: ReviewPromptBuilder.jsonRetry(), ...runOpts, keepOpen: false }
37647
+ runOptions: { prompt: retryPrompt, ...runOpts, keepOpen: false }
37565
37648
  });
37566
37649
  rawResponse = retryResult.output;
37567
37650
  llmCost += retryResult.estimatedCost ?? 0;
@@ -37616,6 +37699,7 @@ ${formatFindings2(debateBlocking)}`,
37616
37699
  return {
37617
37700
  check: "semantic",
37618
37701
  success: true,
37702
+ failOpen: true,
37619
37703
  command: "",
37620
37704
  exitCode: 0,
37621
37705
  output: "semantic review: could not parse LLM response (fail-open)",
@@ -37719,6 +37803,7 @@ var init_semantic = __esm(() => {
37719
37803
  init_test_runners();
37720
37804
  init_diff_utils();
37721
37805
  init_review_audit();
37806
+ init_truncation();
37722
37807
  _semanticDeps = {
37723
37808
  createDebateSession: (opts) => new DebateSession(opts),
37724
37809
  writeReviewAudit
@@ -37873,7 +37958,7 @@ Stage and commit these files before running review.`
37873
37958
  };
37874
37959
  const semanticCfg = config2.semantic ?? {
37875
37960
  modelTier: "balanced",
37876
- diffMode: "embedded",
37961
+ diffMode: "ref",
37877
37962
  resetRefOnRerun: false,
37878
37963
  rules: [],
37879
37964
  timeoutMs: 600000
@@ -38043,6 +38128,15 @@ function buildReviewSummary(checks3) {
38043
38128
  }
38044
38129
  return summary;
38045
38130
  }
38131
+ function formatFailureReason(check2) {
38132
+ return check2.check === "semantic" || check2.check === "adversarial" ? `${check2.check} failed` : `${check2.check} failed (exit code ${check2.exitCode})`;
38133
+ }
38134
+ function buildFailureReason(checks3) {
38135
+ const failedChecks = checks3.filter((check2) => !check2.success);
38136
+ if (failedChecks.length === 0)
38137
+ return;
38138
+ return failedChecks.map(formatFailureReason).join(", ");
38139
+ }
38046
38140
 
38047
38141
  class ReviewOrchestrator {
38048
38142
  async review(reviewConfig, workdir, executionConfig, plugins, storyGitRef, scopePrefix, qualityCommands, storyId, story, agentManager, naxConfig, retrySkipChecks, featureName, resolverSession, priorFailures, featureContextMarkdown, contextBundles, projectDir, env2, naxIgnoreIndex) {
@@ -38116,8 +38210,7 @@ class ReviewOrchestrator {
38116
38210
  const allChecks = [...mechanicalResult.checks, ...llmCheckResults];
38117
38211
  const mechanicalPassed = mechanicalResult.success;
38118
38212
  const llmPassed = llmCheckResults.every((c) => c.success);
38119
- const firstFailure = allChecks.find((c) => !c.success);
38120
- const failureReason = firstFailure ? firstFailure.check === "semantic" || firstFailure.check === "adversarial" ? `${firstFailure.check} failed` : `${firstFailure.check} failed (exit code ${firstFailure.exitCode})` : undefined;
38213
+ const failureReason = buildFailureReason(allChecks);
38121
38214
  const reviewSummary = buildReviewSummary(llmCheckResults);
38122
38215
  builtIn = {
38123
38216
  success: mechanicalPassed && llmPassed,
@@ -38368,6 +38461,20 @@ var init_review = __esm(() => {
38368
38461
  ctx.reviewResult = result.builtIn;
38369
38462
  ctx.mechanicalFailedOnly = result.mechanicalFailedOnly;
38370
38463
  const reviewCost = (result.builtIn.checks ?? []).reduce((sum, c) => sum + (c.cost ?? 0), 0) || undefined;
38464
+ const failOpenChecks = result.builtIn.success ? (result.builtIn.checks ?? []).filter((c) => c.failOpen).map((c) => c.check) : [];
38465
+ if (failOpenChecks.length > 0 && (ctx.autofixAttempt ?? 0) > 0) {
38466
+ logger.warn("review", "Fail-open on partial-progress retry \u2014 treating as failure (fail-closed on ambiguity)", {
38467
+ storyId: ctx.story.id,
38468
+ failOpenChecks,
38469
+ autofixAttempt: ctx.autofixAttempt
38470
+ });
38471
+ ctx.reviewResult = {
38472
+ ...result.builtIn,
38473
+ success: false,
38474
+ failureReason: `fail-open on retry: ${failOpenChecks.join(", ")}`
38475
+ };
38476
+ return { action: "continue", cost: reviewCost };
38477
+ }
38371
38478
  if (!result.success) {
38372
38479
  const pluginFindings = result.builtIn.pluginReviewers?.flatMap((pr) => pr.findings ?? []) ?? [];
38373
38480
  const semanticFindings = (result.builtIn.checks ?? []).filter((c) => c.check === "semantic" && !c.success && c.findings?.length).flatMap((c) => c.findings ?? []);
@@ -38413,6 +38520,9 @@ async function recheckReview(ctx) {
38413
38520
  if (!reviewStage2.enabled(ctx))
38414
38521
  return true;
38415
38522
  await reviewStage2.execute(ctx);
38523
+ const hasFailOpen = (ctx.reviewResult?.checks ?? []).some((c) => c.failOpen);
38524
+ if (hasFailOpen)
38525
+ return false;
38416
38526
  return ctx.reviewResult?.success === true;
38417
38527
  }
38418
38528
  function collectFailedChecks(ctx) {
@@ -38477,14 +38587,16 @@ async function runAgentRectification(ctx, lintFixCmd, formatFixCmd, effectiveWor
38477
38587
  let testWriterChecks = [];
38478
38588
  const stageTestFilePatterns = typeof ctx.rootConfig.execution?.smartTestRunner === "object" ? ctx.rootConfig.execution.smartTestRunner?.testFilePatterns : undefined;
38479
38589
  for (const check2 of failedChecks) {
38480
- if (check2.check === "adversarial" && check2.findings?.length) {
38481
- const { testFindings, sourceFindings } = splitAdversarialFindingsByScope(check2, stageTestFilePatterns);
38482
- if (testFindings)
38483
- testWriterChecks = [...testWriterChecks, testFindings];
38484
- if (sourceFindings) {
38485
- implementerChecks = implementerChecks.map((c) => c === check2 ? sourceFindings : c);
38486
- } else {
38487
- implementerChecks = implementerChecks.filter((c) => c !== check2);
38590
+ if (check2.check === "adversarial" || check2.check === "lint") {
38591
+ const { testFindings, sourceFindings } = splitFindingsByScope(check2, stageTestFilePatterns);
38592
+ if (testFindings || sourceFindings) {
38593
+ if (testFindings)
38594
+ testWriterChecks = [...testWriterChecks, testFindings];
38595
+ if (sourceFindings) {
38596
+ implementerChecks = implementerChecks.map((c) => c === check2 ? sourceFindings : c);
38597
+ } else {
38598
+ implementerChecks = implementerChecks.filter((c) => c !== check2);
38599
+ }
38488
38600
  }
38489
38601
  }
38490
38602
  }
@@ -38493,18 +38605,18 @@ async function runAgentRectification(ctx, lintFixCmd, formatFixCmd, effectiveWor
38493
38605
  if (ctx.routing.testStrategy === "no-test") {
38494
38606
  logger.warn("autofix", "Skipping test-writer rectification (no-test strategy)", {
38495
38607
  storyId: ctx.story.id,
38496
- skippedFindingCount: testWriterChecks.flatMap((c) => c.findings ?? []).length
38608
+ checks: testWriterChecks.map((c) => c.check)
38497
38609
  });
38498
38610
  } else {
38499
- logger.info("autofix", "Routing test-file adversarial findings to test-writer session", {
38611
+ logger.info("autofix", "Routing test-file findings to test-writer session", {
38500
38612
  storyId: ctx.story.id,
38501
- findingCount: testWriterChecks.flatMap((c) => c.findings ?? []).length
38613
+ checks: testWriterChecks.map((c) => c.check)
38502
38614
  });
38503
38615
  autofixCostAccum += await _autofixDeps.runTestWriterRectification(ctx, testWriterChecks, ctx.story, agentManager);
38504
38616
  }
38505
38617
  }
38506
38618
  if (implementerChecks.length === 0) {
38507
- logger.info("autofix", "All adversarial findings routed to test-writer \u2014 skipping implementer loop", {
38619
+ logger.info("autofix", "All findings routed to test-writer \u2014 skipping implementer loop", {
38508
38620
  storyId: ctx.story.id
38509
38621
  });
38510
38622
  return { succeeded: false, cost: autofixCostAccum };
@@ -38845,13 +38957,11 @@ var init_autofix = __esm(() => {
38845
38957
  if (ctx.routing.testStrategy === "no-test") {
38846
38958
  const failedChecks = (reviewResult.checks ?? []).filter((c) => !c.success);
38847
38959
  if (failedChecks.length > 0 && failedChecks.every((c) => {
38848
- if (c.check !== "adversarial")
38849
- return false;
38850
- const { testFindings, sourceFindings } = splitAdversarialFindingsByScope(c, testFilePatterns);
38960
+ const { testFindings, sourceFindings } = splitFindingsByScope(c, testFilePatterns);
38851
38961
  return testFindings !== null && sourceFindings === null;
38852
38962
  })) {
38853
38963
  const skippedFindingCount = failedChecks.flatMap((c) => c.findings ?? []).length;
38854
- logger.warn("autofix", "Adversarial review found test-file issues \u2014 skipped (no-test strategy)", {
38964
+ logger.warn("autofix", "Review found test-file issues only \u2014 skipped (no-test strategy)", {
38855
38965
  storyId: ctx.story.id,
38856
38966
  skippedFindingCount
38857
38967
  });
@@ -40100,6 +40210,40 @@ var init_verification = __esm(() => {
40100
40210
  init_runners();
40101
40211
  });
40102
40212
 
40213
+ // src/verification/failure-records.ts
40214
+ function truncateUnmappedFailureOutput(output) {
40215
+ const tailLines = output.split(`
40216
+ `).slice(-UNMAPPED_FAILURE_OUTPUT_MAX_LINES).join(`
40217
+ `);
40218
+ if (tailLines.length <= UNMAPPED_FAILURE_OUTPUT_MAX_CHARS) {
40219
+ return tailLines;
40220
+ }
40221
+ return `... (truncated)
40222
+ ${tailLines.slice(-UNMAPPED_FAILURE_OUTPUT_MAX_CHARS)}`;
40223
+ }
40224
+ function buildFailureRecords(testSummary, rawOutput) {
40225
+ if (testSummary.failures.length > 0) {
40226
+ return testSummary.failures.map((failure) => ({
40227
+ test: failure.testName,
40228
+ file: failure.file,
40229
+ message: failure.error,
40230
+ output: failure.stackTrace.length > 0 ? failure.stackTrace.join(`
40231
+ `) : undefined
40232
+ }));
40233
+ }
40234
+ if (testSummary.failed === 0) {
40235
+ return [];
40236
+ }
40237
+ return [
40238
+ {
40239
+ test: `Unmapped test failures (${testSummary.failed} detected)`,
40240
+ message: "Structured test failure parsing returned no failure records. Diagnose the regression from the raw test output.",
40241
+ output: rawOutput?.trim() ? truncateUnmappedFailureOutput(rawOutput.trim()) : undefined
40242
+ }
40243
+ ];
40244
+ }
40245
+ var UNMAPPED_FAILURE_OUTPUT_MAX_LINES = 200, UNMAPPED_FAILURE_OUTPUT_MAX_CHARS = 8000;
40246
+
40103
40247
  // src/tdd/cleanup.ts
40104
40248
  async function getPgid(pid) {
40105
40249
  try {
@@ -40275,7 +40419,7 @@ async function runFullSuiteGate(story, config2, workdir, agentManager, implement
40275
40419
  failures: filteredFailures,
40276
40420
  failed: wasFiltered ? filteredFailures.length : testSummary.failed
40277
40421
  };
40278
- return await runRectificationLoop(story, config2, workdir, agentManager, implementerTier, lite, logger, filteredSummary, rectificationConfig, effectiveTestCmd, fullSuiteTimeout, featureName, projectDir);
40422
+ return await runRectificationLoop(story, config2, workdir, agentManager, implementerTier, lite, logger, filteredSummary, rectificationConfig, effectiveTestCmd, fullSuiteTimeout, fullSuiteResult.output, featureName, projectDir);
40279
40423
  }
40280
40424
  if (testSummary.passed > 0) {
40281
40425
  logger.info("tdd", "Full suite gate passed (non-zero exit, 0 failures, tests detected)", {
@@ -40303,7 +40447,7 @@ async function runFullSuiteGate(story, config2, workdir, agentManager, implement
40303
40447
  });
40304
40448
  return { passed: false, cost: 0 };
40305
40449
  }
40306
- async function runRectificationLoop(story, config2, workdir, agentManager, implementerTier, lite, logger, testSummary, rectificationConfig, testCmd, fullSuiteTimeout, featureName, projectDir) {
40450
+ async function runRectificationLoop(story, config2, workdir, agentManager, implementerTier, lite, logger, testSummary, rectificationConfig, testCmd, fullSuiteTimeout, testOutput, featureName, projectDir) {
40307
40451
  const rectificationState = {
40308
40452
  attempt: 0,
40309
40453
  initialFailures: testSummary.failed,
@@ -40324,6 +40468,7 @@ async function runRectificationLoop(story, config2, workdir, agentManager, imple
40324
40468
  isolationPassed: true
40325
40469
  };
40326
40470
  let gateCostAccum = 0;
40471
+ let currentTestOutput = testOutput;
40327
40472
  const fixed = await runSharedRectificationLoop({
40328
40473
  stage: "tdd",
40329
40474
  storyId: story.id,
@@ -40343,13 +40488,7 @@ async function runRectificationLoop(story, config2, workdir, agentManager, imple
40343
40488
  }),
40344
40489
  canContinue: (state) => state.isolationPassed && _rectificationGateDeps.shouldRetryRectification(state, rectificationConfig),
40345
40490
  buildPrompt: async () => {
40346
- const failureRecords = testSummary.failures.map((f) => ({
40347
- test: f.testName,
40348
- file: f.file,
40349
- message: f.error,
40350
- output: f.stackTrace.length > 0 ? f.stackTrace.join(`
40351
- `) : undefined
40352
- }));
40491
+ const failureRecords = buildFailureRecords(testSummary, currentTestOutput);
40353
40492
  return RectifierPromptBuilder.for("tdd-suite-failure").story(story).priorFailures(failureRecords).testCommand(testCmd).conventions().task().build();
40354
40493
  },
40355
40494
  runAttempt: async (attempt, rectificationPrompt) => {
@@ -40420,6 +40559,7 @@ async function runRectificationLoop(story, config2, workdir, agentManager, imple
40420
40559
  }
40421
40560
  if (retryFullSuite.output) {
40422
40561
  const newTestSummary = _rectificationGateDeps.parseTestOutput(retryFullSuite.output);
40562
+ currentTestOutput = retryFullSuite.output;
40423
40563
  state.currentFailures = newTestSummary.failed;
40424
40564
  testSummary.failures = newTestSummary.failures;
40425
40565
  testSummary.failed = newTestSummary.failed;
@@ -42213,6 +42353,7 @@ async function _defaultRunDebate(storyId, stageConfig, prompt, config2, agentMan
42213
42353
  return { output, totalCostUsd };
42214
42354
  }
42215
42355
  async function runRectificationLoop2(opts) {
42356
+ const loopStartMs = Date.now();
42216
42357
  const {
42217
42358
  config: config2,
42218
42359
  workdir,
@@ -42231,6 +42372,7 @@ async function runRectificationLoop2(opts) {
42231
42372
  const agentManager = opts.agentManager ?? _rectificationDeps.createManager(config2);
42232
42373
  const rectificationConfig = config2.execution.rectification;
42233
42374
  const testSummary = parseTestOutput(testOutput);
42375
+ let currentTestOutput = testOutput;
42234
42376
  const label = promptPrefix ? "regression rectification" : "rectification";
42235
42377
  const rectificationState = {
42236
42378
  attempt: 0,
@@ -42290,13 +42432,7 @@ ${debateResult.output}`;
42290
42432
  });
42291
42433
  }
42292
42434
  }
42293
- const failureRecords = testSummary.failures.map((f) => ({
42294
- test: f.testName,
42295
- file: f.file,
42296
- message: f.error,
42297
- output: f.stackTrace.length > 0 ? f.stackTrace.join(`
42298
- `) : undefined
42299
- }));
42435
+ const failureRecords = buildFailureRecords(testSummary, currentTestOutput);
42300
42436
  let rectificationPrompt = await RectifierPromptBuilder.for("verify-failure").story(story).priorFailures(failureRecords).testCommand(testCommand).conventions().task().build();
42301
42437
  if (diagnosisPrefix) {
42302
42438
  rectificationPrompt = `${diagnosisPrefix}
@@ -42379,6 +42515,7 @@ ${rectificationPrompt}`;
42379
42515
  }
42380
42516
  if (retryVerification.output) {
42381
42517
  const newTestSummary = parseTestOutput(retryVerification.output);
42518
+ currentTestOutput = retryVerification.output;
42382
42519
  state.currentFailures = newTestSummary.failed;
42383
42520
  state.lastExitCode = retryVerification.status === "SUCCESS" ? 0 : 1;
42384
42521
  testSummary.failures = newTestSummary.failures;
@@ -42504,7 +42641,7 @@ ${escalationPrompt}`;
42504
42641
  }
42505
42642
  throw error48;
42506
42643
  });
42507
- return { succeeded, cost: costAccum };
42644
+ return { succeeded, cost: costAccum, durationMs: Date.now() - loopStartMs };
42508
42645
  }
42509
42646
  function runRectificationLoopFromCtx(ctx, opts) {
42510
42647
  return runRectificationLoop2({
@@ -44853,7 +44990,7 @@ var package_default;
44853
44990
  var init_package = __esm(() => {
44854
44991
  package_default = {
44855
44992
  name: "@nathapp/nax",
44856
- version: "0.63.0",
44993
+ version: "0.63.1",
44857
44994
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
44858
44995
  type: "module",
44859
44996
  bin: {
@@ -44934,8 +45071,8 @@ var init_version = __esm(() => {
44934
45071
  NAX_VERSION = package_default.version;
44935
45072
  NAX_COMMIT = (() => {
44936
45073
  try {
44937
- if (/^[0-9a-f]{6,10}$/.test("e6619c9f"))
44938
- return "e6619c9f";
45074
+ if (/^[0-9a-f]{6,10}$/.test("579e7fb0"))
45075
+ return "579e7fb0";
44939
45076
  } catch {}
44940
45077
  try {
44941
45078
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -45976,7 +46113,10 @@ async function runDeferredRegression(options) {
45976
46113
  failedTestFiles: [],
45977
46114
  passedTests: 0,
45978
46115
  rectificationAttempts: 0,
45979
- affectedStories: []
46116
+ affectedStories: [],
46117
+ storyCosts: {},
46118
+ storyDurations: {},
46119
+ storyOutcomes: {}
45980
46120
  };
45981
46121
  }
45982
46122
  if (regressionMode !== "deferred") {
@@ -45987,7 +46127,10 @@ async function runDeferredRegression(options) {
45987
46127
  failedTestFiles: [],
45988
46128
  passedTests: 0,
45989
46129
  rectificationAttempts: 0,
45990
- affectedStories: []
46130
+ affectedStories: [],
46131
+ storyCosts: {},
46132
+ storyDurations: {},
46133
+ storyOutcomes: {}
45991
46134
  };
45992
46135
  }
45993
46136
  const testCommand = config2.quality.commands.test ?? "bun test";
@@ -46017,7 +46160,10 @@ async function runDeferredRegression(options) {
46017
46160
  failedTestFiles: [],
46018
46161
  passedTests: 0,
46019
46162
  rectificationAttempts: 0,
46020
- affectedStories: []
46163
+ affectedStories: [],
46164
+ storyCosts: {},
46165
+ storyDurations: {},
46166
+ storyOutcomes: {}
46021
46167
  };
46022
46168
  }
46023
46169
  logger?.info("regression", "Running deferred full-suite regression gate", {
@@ -46033,7 +46179,10 @@ async function runDeferredRegression(options) {
46033
46179
  failedTestFiles: [],
46034
46180
  passedTests: fullSuiteResult.passCount ?? 0,
46035
46181
  rectificationAttempts: 0,
46036
- affectedStories: []
46182
+ affectedStories: [],
46183
+ storyCosts: {},
46184
+ storyDurations: {},
46185
+ storyOutcomes: {}
46037
46186
  };
46038
46187
  }
46039
46188
  if (fullSuiteResult.status === "TIMEOUT" && acceptOnTimeout) {
@@ -46044,7 +46193,10 @@ async function runDeferredRegression(options) {
46044
46193
  failedTestFiles: [],
46045
46194
  passedTests: 0,
46046
46195
  rectificationAttempts: 0,
46047
- affectedStories: []
46196
+ affectedStories: [],
46197
+ storyCosts: {},
46198
+ storyDurations: {},
46199
+ storyOutcomes: {}
46048
46200
  };
46049
46201
  }
46050
46202
  if (!fullSuiteResult.output) {
@@ -46055,7 +46207,10 @@ async function runDeferredRegression(options) {
46055
46207
  failedTestFiles: [],
46056
46208
  passedTests: fullSuiteResult.passCount ?? 0,
46057
46209
  rectificationAttempts: 0,
46058
- affectedStories: []
46210
+ affectedStories: [],
46211
+ storyCosts: {},
46212
+ storyDurations: {},
46213
+ storyOutcomes: {}
46059
46214
  };
46060
46215
  }
46061
46216
  const testSummary = _regressionDeps.parseTestOutput(fullSuiteResult.output);
@@ -46067,7 +46222,10 @@ async function runDeferredRegression(options) {
46067
46222
  failedTestFiles: [],
46068
46223
  passedTests: 0,
46069
46224
  rectificationAttempts: 0,
46070
- affectedStories: []
46225
+ affectedStories: [],
46226
+ storyCosts: {},
46227
+ storyDurations: {},
46228
+ storyOutcomes: {}
46071
46229
  };
46072
46230
  }
46073
46231
  const affectedStories = new Set;
@@ -46108,18 +46266,24 @@ async function runDeferredRegression(options) {
46108
46266
  failedTestFiles: Array.from(testFilesInFailures),
46109
46267
  passedTests: testSummary.passed,
46110
46268
  rectificationAttempts: 0,
46111
- affectedStories: Array.from(affectedStories)
46269
+ affectedStories: Array.from(affectedStories),
46270
+ storyCosts: {},
46271
+ storyDurations: {},
46272
+ storyOutcomes: {}
46112
46273
  };
46113
46274
  }
46114
46275
  let rectificationAttempts = 0;
46115
46276
  let storiesRectified = 0;
46116
46277
  let currentTestOutput = fullSuiteResult.output;
46117
46278
  const affectedStoriesList = Array.from(affectedStoriesObjs.values());
46279
+ const storyCostAccum = {};
46280
+ const storyDurationAccum = {};
46281
+ const storyOutcomeAccum = {};
46118
46282
  for (const story of affectedStoriesList) {
46119
46283
  for (let attempt = 0;attempt < maxRectificationAttempts; attempt++) {
46120
46284
  rectificationAttempts++;
46121
46285
  logger?.info("regression", `Rectifying story ${story.id} (attempt ${attempt + 1}/${maxRectificationAttempts})`);
46122
- const fixed = await _regressionDeps.runRectificationLoop({
46286
+ const rectResult = await _regressionDeps.runRectificationLoop({
46123
46287
  config: config2,
46124
46288
  workdir,
46125
46289
  story,
@@ -46132,7 +46296,12 @@ Your story ${story.id} broke tests in the full suite. Fix these regressions.`,
46132
46296
  agentManager,
46133
46297
  featureName: prd.feature
46134
46298
  });
46135
- if (fixed) {
46299
+ storyCostAccum[story.id] = (storyCostAccum[story.id] ?? 0) + rectResult.cost;
46300
+ storyDurationAccum[story.id] = (storyDurationAccum[story.id] ?? 0) + rectResult.durationMs;
46301
+ if (!storyOutcomeAccum[story.id]) {
46302
+ storyOutcomeAccum[story.id] = rectResult.succeeded;
46303
+ }
46304
+ if (rectResult.succeeded) {
46136
46305
  storiesRectified++;
46137
46306
  logger?.info("regression", `Story ${story.id} rectified successfully`);
46138
46307
  logger?.info("regression", "Re-running full suite after story rectification", {
@@ -46155,7 +46324,10 @@ Your story ${story.id} broke tests in the full suite. Fix these regressions.`,
46155
46324
  failedTestFiles: Array.from(testFilesInFailures),
46156
46325
  passedTests: midResult.passCount ?? 0,
46157
46326
  rectificationAttempts,
46158
- affectedStories: Array.from(affectedStories)
46327
+ affectedStories: Array.from(affectedStories),
46328
+ storyCosts: storyCostAccum,
46329
+ storyDurations: storyDurationAccum,
46330
+ storyOutcomes: storyOutcomeAccum
46159
46331
  };
46160
46332
  }
46161
46333
  logger?.warn("regression", "Full suite still failing after story rectification \u2014 continuing", {
@@ -46185,7 +46357,10 @@ Your story ${story.id} broke tests in the full suite. Fix these regressions.`,
46185
46357
  failedTestFiles: Array.from(testFilesInFailures),
46186
46358
  passedTests: retryResult.passCount ?? 0,
46187
46359
  rectificationAttempts,
46188
- affectedStories: Array.from(affectedStories)
46360
+ affectedStories: Array.from(affectedStories),
46361
+ storyCosts: storyCostAccum,
46362
+ storyDurations: storyDurationAccum,
46363
+ storyOutcomes: storyOutcomeAccum
46189
46364
  };
46190
46365
  }
46191
46366
  var _regressionDeps;
@@ -46286,6 +46461,50 @@ async function handleRunCompletion(options) {
46286
46461
  }, workdir);
46287
46462
  }
46288
46463
  }
46464
+ const regressionStoryCosts = regressionResult.storyCosts ?? {};
46465
+ const regressionStoryDurations = regressionResult.storyDurations ?? {};
46466
+ const regressionStoryOutcomes = regressionResult.storyOutcomes ?? {};
46467
+ if (Object.keys(regressionStoryCosts).length > 0) {
46468
+ const existingIndex = new Map(allStoryMetrics.map((m, i) => [m.storyId, i]));
46469
+ const rectCompletedAt = new Date().toISOString();
46470
+ const defaultAgent = options.agentManager?.getDefault() ?? resolveDefaultAgent(config2);
46471
+ for (const [storyId, storyCost] of Object.entries(regressionStoryCosts)) {
46472
+ const storyDuration = regressionStoryDurations[storyId] ?? 0;
46473
+ const storySuccess = regressionStoryOutcomes[storyId] ?? regressionResult.success;
46474
+ const existingIdx = existingIndex.get(storyId);
46475
+ if (existingIdx === undefined) {
46476
+ const regrStory = prd.userStories.find((s) => s.id === storyId);
46477
+ allStoryMetrics.push({
46478
+ storyId,
46479
+ complexity: regrStory?.routing?.complexity ?? "medium",
46480
+ modelTier: "balanced",
46481
+ modelUsed: defaultAgent,
46482
+ attempts: 1,
46483
+ finalTier: "balanced",
46484
+ success: storySuccess,
46485
+ cost: storyCost,
46486
+ durationMs: storyDuration,
46487
+ firstPassSuccess: false,
46488
+ startedAt: rectCompletedAt,
46489
+ completedAt: rectCompletedAt,
46490
+ source: "rectification",
46491
+ rectificationCost: storyCost,
46492
+ fullSuiteGatePassed: false,
46493
+ runtimeCrashes: 0
46494
+ });
46495
+ } else {
46496
+ const existing = allStoryMetrics[existingIdx];
46497
+ allStoryMetrics[existingIdx] = {
46498
+ ...existing,
46499
+ cost: existing.cost + storyCost,
46500
+ durationMs: existing.durationMs + storyDuration,
46501
+ rectificationCost: (existing.rectificationCost ?? 0) + storyCost,
46502
+ firstPassSuccess: false,
46503
+ success: existing.success && storySuccess
46504
+ };
46505
+ }
46506
+ }
46507
+ }
46289
46508
  }
46290
46509
  }
46291
46510
  const durationMs = Date.now() - startTime;
@@ -46392,6 +46611,7 @@ async function handleRunCompletion(options) {
46392
46611
  }
46393
46612
  var _runCompletionDeps;
46394
46613
  var init_run_completion = __esm(() => {
46614
+ init_agents();
46395
46615
  init_runner4();
46396
46616
  init_logger2();
46397
46617
  init_metrics();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.63.0",
3
+ "version": "0.63.1",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {