@nathapp/nax 0.67.17 → 0.67.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/nax.js +157 -47
  2. package/package.json +1 -1
package/dist/nax.js CHANGED
@@ -29750,11 +29750,14 @@ function buildStorySection(story) {
29750
29750
  function buildVerdictSection(story) {
29751
29751
  return `# Verdict Instructions
29752
29752
 
29753
- ## Write Verdict File
29753
+ ## Write Verdict File and Emit JSON in Final Reply
29754
29754
 
29755
- After completing your verification, you **MUST** write a verdict file at the **project root**:
29755
+ After completing your verification, you **MUST** do BOTH of the following:
29756
29756
 
29757
- **File:** \`.nax-verifier-verdict.json\`
29757
+ 1. Write the verdict file at the **project root**: \`.nax-verifier-verdict.json\`
29758
+ 2. Emit the same verdict JSON as the FINAL content of your reply \u2014 no prose
29759
+ before or after, no markdown fences. Your reply must end with a closing
29760
+ brace \`}\` on its own line. The orchestrator parses your reply as JSON.
29758
29761
 
29759
29762
  Set \`approved: true\` when ALL of these conditions are met:
29760
29763
  - All story-scoped tests pass (the orchestrator already attempted the full-suite gate \u2014 you only need to verify the story's own tests)
@@ -29778,7 +29781,7 @@ Set \`approved: false\` when ANY of these conditions are true:
29778
29781
  - \`fixes\` \u2014 keep this empty; the verifier must not apply code or test fixes
29779
29782
  - \`reasoning\` \u2014 brief summary of your overall assessment
29780
29783
 
29781
- When done, do not commit code changes. Only write the verdict file.`;
29784
+ When done, do not commit code changes. Write the verdict file, then end your reply with the JSON object.`;
29782
29785
  }
29783
29786
 
29784
29787
  // src/prompts/sections/conventions.ts
@@ -30164,6 +30167,24 @@ class TddPromptBuilder {
30164
30167
  const isolation = role === "test-writer" ? opts.lite ? "lite" : "strict" : undefined;
30165
30168
  return TddPromptBuilder.for(role, { variant, isolation }).withLoader(workdir, config2).story(story).context(opts.contextMarkdown).v2FeatureContext(opts.contextBundle?.pushMarkdown).featureContext(opts.contextBundle ? undefined : opts.featureContextMarkdown).constitution(opts.constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.quality?.testing).build();
30166
30169
  }
30170
+ static verdictRetry() {
30171
+ return `Your previous reply could not be parsed as a valid VerifierVerdict JSON object.
30172
+ ` + `Re-emit the verdict as the FINAL content of your reply.
30173
+ ` + `Output ONLY the JSON object \u2014 no markdown fences, no explanation, no prose.
30174
+ ` + `The reply must start with { and end with } on its own line.
30175
+ ` + "Required top-level fields: version, approved, tests, testModifications, acceptanceCriteria, quality, fixes, reasoning.";
30176
+ }
30177
+ static verdictRetryCondensed() {
30178
+ return `Your previous reply was truncated and could not be parsed as valid JSON.
30179
+ ` + `Re-emit a CONDENSED verdict that omits the acceptanceCriteria.criteria[] entries:
30180
+ ` + `- Keep acceptanceCriteria.allMet (boolean) but use criteria=[] (empty array).
30181
+ ` + `- Keep quality.issues=[] and fixes=[] empty.
30182
+ ` + `- Set testModifications.reasoning to a single sentence.
30183
+ ` + `- Set reasoning to a single sentence.
30184
+ ` + `Output ONLY the JSON object \u2014 no markdown fences, no prose.
30185
+ ` + `Schema (minimal):
30186
+ ` + `{"version":1,"approved":boolean,"tests":{"allPassing":boolean,"passCount":number,"failCount":number},"testModifications":{"detected":boolean,"files":[],"legitimate":boolean,"reasoning":"..."},"acceptanceCriteria":{"allMet":boolean,"criteria":[]},"quality":{"rating":"good"|"acceptable"|"poor","issues":[]},"fixes":[],"reasoning":"..."}`;
30187
+ }
30167
30188
  s(id, content) {
30168
30189
  return { id, content, overridable: false };
30169
30190
  }
@@ -31013,6 +31034,7 @@ Severity guide:
31013
31034
  - If you cannot quote an exact excerpt that proves your point, downgrade the finding to \`"unverifiable"\` rather than fabricating a quote.
31014
31035
 
31015
31036
  **AC-grounding rule \u2014 required for every "error" finding:**
31037
+ - Do NOT write an \`acQuote\` that does not appear verbatim in the listed AC text. If you cannot find an exact verbatim match, set severity to \`warning\` \u2014 never approximate, paraphrase, or synthesise a quote. A finding dropped for a fabricated quote wastes a review cycle and is worse than a correctly classified \`warning\`.
31016
31038
  - \`acQuote\` must be a verbatim substring of one AC bullet (from the Acceptance Criteria above) that names or constrains the exact **symbol** you are flagging \u2014 not merely the file the symbol lives in.
31017
31039
  - \`acIndex\` is the 1-based position of that AC bullet in the list.
31018
31040
  - Copy \`acQuote\` **exactly** from the AC text, including any backticks, asterisks, or punctuation. Do not paraphrase, strip formatting, or rewrite.
@@ -32776,6 +32798,7 @@ function recordAdversarialAudit(opts) {
32776
32798
  looksLikeFail: opts.looksLikeFail,
32777
32799
  failOpen: opts.failOpen,
32778
32800
  passed: opts.passed,
32801
+ passReason: opts.passReason,
32779
32802
  blockingThreshold: opts.blockingThreshold,
32780
32803
  result: opts.result,
32781
32804
  advisoryFindings: opts.advisoryFindings,
@@ -33109,6 +33132,46 @@ ${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)"
33109
33132
  };
33110
33133
  }
33111
33134
  if (!opResult.passed && acDropped.length > 0) {
33135
+ const allHallucinated = acDropped.every((d) => d.code === "ac_quote_not_substring");
33136
+ if (allHallucinated) {
33137
+ const demotedFindings = toAdversarialReviewFindings(acDropped.map((d) => ({ ...d.finding, severity: "warning", acQuote: undefined, acIndex: undefined })));
33138
+ const existingAdvisory = advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : [];
33139
+ const allAdvisory = [...existingAdvisory, ...demotedFindings];
33140
+ logger?.warn("review", "Adversarial review passed: all blocking findings discarded as hallucinated AC quotes", {
33141
+ storyId: story.id,
33142
+ durationMs,
33143
+ droppedCount: acDropped.length,
33144
+ drops: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue }))
33145
+ });
33146
+ recordAdversarialAudit({
33147
+ runtime,
33148
+ workdir,
33149
+ projectDir,
33150
+ storyId: story.id,
33151
+ featureName,
33152
+ parsed: true,
33153
+ failOpen: false,
33154
+ passed: true,
33155
+ passReason: "ac_quote_not_substring_demoted",
33156
+ blockingThreshold: threshold,
33157
+ result: { passed: true, findings: [] },
33158
+ advisoryFindings: allAdvisory.length > 0 ? allAdvisory : undefined,
33159
+ diffAvailable,
33160
+ adversarialDropAnalysis,
33161
+ adversarialAcceptAnalysis: []
33162
+ });
33163
+ return {
33164
+ check: "adversarial",
33165
+ success: true,
33166
+ passReason: "ac_quote_not_substring_demoted",
33167
+ command: "",
33168
+ exitCode: 0,
33169
+ output: `Adversarial review passed: ${acDropped.length} blocking finding(s) demoted to advisory \u2014 all cited AC quotes were fabricated and could not be validated.`,
33170
+ durationMs,
33171
+ advisoryFindings: allAdvisory.length > 0 ? allAdvisory : undefined,
33172
+ cost: llmCost
33173
+ };
33174
+ }
33112
33175
  logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
33113
33176
  storyId: story.id,
33114
33177
  durationMs,
@@ -36874,6 +36937,29 @@ var init_verdict = __esm(() => {
36874
36937
  });
36875
36938
 
36876
36939
  // src/operations/verify.ts
36940
+ function parseVerdictFromStdout(output, _input, _ctx) {
36941
+ if (!output || !output.trim()) {
36942
+ throw new ParseValidationError("verifier produced no stdout");
36943
+ }
36944
+ const raw = tryParseLLMJson(output);
36945
+ if (!raw || typeof raw !== "object") {
36946
+ throw new ParseValidationError("verifier stdout is not a JSON object");
36947
+ }
36948
+ const verdict = isValidVerdict(raw) ? raw : coerceVerdict(raw);
36949
+ if (!verdict) {
36950
+ throw new ParseValidationError("verifier stdout JSON missing required VerifierVerdict fields");
36951
+ }
36952
+ const categorization = categorizeVerdict(verdict, verdict.tests.allPassing === true);
36953
+ return {
36954
+ success: categorization.success,
36955
+ filesChanged: [],
36956
+ estimatedCostUsd: 0,
36957
+ durationMs: 0,
36958
+ output,
36959
+ ...categorization.failureCategory && { failureCategory: categorization.failureCategory },
36960
+ ...categorization.reviewReason && { reviewReason: categorization.reviewReason }
36961
+ };
36962
+ }
36877
36963
  async function runVerifierIsolation(beforeRef, ctx) {
36878
36964
  if (!beforeRef)
36879
36965
  return;
@@ -36882,16 +36968,31 @@ async function runVerifierIsolation(beforeRef, ctx) {
36882
36968
  }
36883
36969
  var verifierOp;
36884
36970
  var init_verify = __esm(() => {
36971
+ init_retry();
36885
36972
  init_config();
36973
+ init_tdd_builder();
36886
36974
  init_isolation();
36887
36975
  init_verdict();
36888
- init__session_output();
36889
36976
  verifierOp = {
36890
36977
  kind: "run",
36891
36978
  name: "verifier",
36892
36979
  stage: "verify",
36893
36980
  session: { role: "verifier", lifetime: "fresh" },
36894
36981
  config: tddConfigSelector,
36982
+ retry: makeParseRetryStrategy({
36983
+ validate: (parsed) => {
36984
+ if (!parsed || typeof parsed !== "object")
36985
+ return false;
36986
+ const r = parsed;
36987
+ return isValidVerdict(r) || coerceVerdict(r) !== null;
36988
+ },
36989
+ reviewerKind: "verifier",
36990
+ maxAttempts: 2,
36991
+ prompts: {
36992
+ invalid: () => TddPromptBuilder.verdictRetry(),
36993
+ truncated: () => TddPromptBuilder.verdictRetryCondensed()
36994
+ }
36995
+ }),
36895
36996
  build(input, _ctx) {
36896
36997
  if (input.promptMarkdown?.trim()) {
36897
36998
  return {
@@ -36908,13 +37009,8 @@ var init_verify = __esm(() => {
36908
37009
  }
36909
37010
  };
36910
37011
  },
36911
- parse(output, _input, _ctx) {
36912
- const envelope = parseSessionJsonOutput(output);
36913
- return { ...envelope, estimatedCostUsd: 0, durationMs: 0 };
36914
- },
37012
+ parse: parseVerdictFromStdout,
36915
37013
  async verify(parsed, input, ctx) {
36916
- if (!parsed.success)
36917
- return null;
36918
37014
  const isolation = await runVerifierIsolation(input.beforeRef, ctx);
36919
37015
  return isolation ? { ...parsed, isolation } : parsed;
36920
37016
  },
@@ -36922,20 +37018,28 @@ var init_verify = __esm(() => {
36922
37018
  const packageDir = verifyCtx.packageView.packageDir;
36923
37019
  try {
36924
37020
  const verdict = await readVerdict(packageDir);
36925
- if (!verdict)
36926
- return null;
36927
- const testsAllPassing = verdict.tests.allPassing === true;
36928
- const categorization = categorizeVerdict(verdict, testsAllPassing);
36929
- const isolation = await runVerifierIsolation(input.beforeRef, verifyCtx);
37021
+ if (verdict) {
37022
+ const testsAllPassing = verdict.tests.allPassing === true;
37023
+ const categorization = categorizeVerdict(verdict, testsAllPassing);
37024
+ const isolation = await runVerifierIsolation(input.beforeRef, verifyCtx);
37025
+ return {
37026
+ success: categorization.success,
37027
+ filesChanged: [],
37028
+ estimatedCostUsd: 0,
37029
+ durationMs: 0,
37030
+ output: "",
37031
+ ...categorization.failureCategory && { failureCategory: categorization.failureCategory },
37032
+ ...categorization.reviewReason && { reviewReason: categorization.reviewReason },
37033
+ ...isolation && { isolation }
37034
+ };
37035
+ }
36930
37036
  return {
36931
- success: categorization.success,
37037
+ success: false,
36932
37038
  filesChanged: [],
36933
37039
  estimatedCostUsd: 0,
36934
37040
  durationMs: 0,
36935
37041
  output: "",
36936
- ...categorization.failureCategory && { failureCategory: categorization.failureCategory },
36937
- ...categorization.reviewReason && { reviewReason: categorization.reviewReason },
36938
- ...isolation && { isolation }
37042
+ reviewReason: "verifier produced unparseable verdict in stdout after retries and no usable verdict file on disk"
36939
37043
  };
36940
37044
  } finally {
36941
37045
  await cleanupVerdict(packageDir);
@@ -52779,10 +52883,13 @@ async function refreshReviewInputForDispatch(opName, input) {
52779
52883
  return fallback;
52780
52884
  }
52781
52885
  }
52782
- function formatPhaseResultMessage(opName, success2) {
52886
+ function formatPhaseResultMessage(opName, success2, stage) {
52783
52887
  if (opName === "greenfield-gate") {
52784
52888
  return success2 ? "Greenfield-gate: pre-existing tests detected (not greenfield) \u2014 proceeding with normal TDD" : "Greenfield-gate: no pre-existing tests \u2014 greenfield run, pausing TDD test-writer";
52785
52889
  }
52890
+ if (stage === "rectification") {
52891
+ return `Rectification strategy completed: ${opName}`;
52892
+ }
52786
52893
  return success2 ? `Phase passed: ${opName}` : `Phase failed: ${opName}`;
52787
52894
  }
52788
52895
  function isSlot(value) {
@@ -52975,7 +53082,7 @@ function logUnifiedReviewPhaseStart(storyId, opName) {
52975
53082
  logger?.info("review", "Running adversarial check", { storyId });
52976
53083
  }
52977
53084
  }
52978
- function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTddPhase) {
53085
+ function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTddPhase, stage) {
52979
53086
  if (isTddPhase)
52980
53087
  return;
52981
53088
  if (opName === "semantic-review" || opName === "adversarial-review")
@@ -52992,7 +53099,11 @@ function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTdd
52992
53099
  data.findingsCount = findingsCount;
52993
53100
  if (status !== undefined)
52994
53101
  data.status = status;
52995
- const message = formatPhaseResultMessage(opName, success2);
53102
+ const message = formatPhaseResultMessage(opName, success2, stage);
53103
+ if (stage === "rectification") {
53104
+ logger?.info("story-orchestrator", message, data);
53105
+ return;
53106
+ }
52996
53107
  if (success2) {
52997
53108
  logger?.info("story-orchestrator", message, data);
52998
53109
  } else {
@@ -53069,7 +53180,7 @@ async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = fa
53069
53180
  phaseOutputs[opName] = output;
53070
53181
  emitReviewDecision(ctx, opName, output);
53071
53182
  logUnifiedReviewPhaseResult(ctx.storyId, opName, output);
53072
- logDeterministicPhaseOutcome(ctx.storyId, opName, output, Date.now() - phaseStartedAt, isTddPhase);
53183
+ logDeterministicPhaseOutcome(ctx.storyId, opName, output, Date.now() - phaseStartedAt, isTddPhase, slot.op.stage);
53073
53184
  if (isTddPhase) {
53074
53185
  const durationMs = Date.now() - phaseStartedAt;
53075
53186
  logger?.info("tdd", `Session complete: ${opName}`, {
@@ -53453,23 +53564,8 @@ var init_story_orchestrator = __esm(() => {
53453
53564
  STRATEGY_TO_REVALIDATION_PHASES = {
53454
53565
  "mechanical-lintfix": ["lint-check"],
53455
53566
  "mechanical-formatfix": ["lint-check"],
53456
- "autofix-implementer": [
53457
- "lint-check",
53458
- "typecheck-check",
53459
- "full-suite-gate",
53460
- "verifier",
53461
- "verify-scoped",
53462
- "semantic-review",
53463
- "adversarial-review"
53464
- ],
53465
- "autofix-test-writer": [
53466
- "lint-check",
53467
- "typecheck-check",
53468
- "full-suite-gate",
53469
- "verifier",
53470
- "verify-scoped",
53471
- "adversarial-review"
53472
- ],
53567
+ "autofix-implementer": ["lint-check", "typecheck-check", "full-suite-gate", "semantic-review", "adversarial-review"],
53568
+ "autofix-test-writer": ["lint-check", "typecheck-check", "full-suite-gate", "adversarial-review"],
53473
53569
  "full-suite-rectify": [
53474
53570
  "lint-check",
53475
53571
  "typecheck-check",
@@ -53817,7 +53913,7 @@ function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason, failure
53817
53913
  }
53818
53914
  return { action: "escalate", reason: buildReason("isolation-violation") };
53819
53915
  }
53820
- if (failureCategory === "session-failure" || failureCategory === "tests-failing" || failureCategory === "full-suite-gate-exhausted" || failureCategory === "verifier-rejected") {
53916
+ if (failureCategory === "session-failure" || failureCategory === "tests-failing" || failureCategory === "full-suite-gate-exhausted" || failureCategory === "verifier-rejected" || failureCategory === "runtime-crash") {
53821
53917
  return { action: "escalate", reason: buildReason(failureCategory) };
53822
53918
  }
53823
53919
  if (failureCategory === "greenfield-no-tests") {
@@ -53989,6 +54085,12 @@ function deriveTddFailureCategory(phaseOutputs, unfixedFindings) {
53989
54085
  return "full-suite-gate-exhausted";
53990
54086
  }
53991
54087
  }
54088
+ if (!verifierPassed) {
54089
+ const rectOutputCrash = phaseOutputs.rectification;
54090
+ if (rectOutputCrash?.exitReason === "validator-error") {
54091
+ return "runtime-crash";
54092
+ }
54093
+ }
53992
54094
  if (!verifierPassed) {
53993
54095
  const gateOutput = phaseOutputs[fullSuiteGateOp.name];
53994
54096
  if (gateOutput && (gateOutput.success === false || gateOutput.passed === false)) {
@@ -54321,9 +54423,10 @@ var init_post_run = __esm(() => {
54321
54423
  });
54322
54424
 
54323
54425
  // src/pipeline/stages/execution.ts
54324
- var executionStage, _executionDeps;
54426
+ var RUNTIME_CRASH_CODES, executionStage, _executionDeps;
54325
54427
  var init_execution = __esm(() => {
54326
54428
  init_agents();
54429
+ init_errors();
54327
54430
  init_build_plan_for_strategy();
54328
54431
  init_plan_inputs();
54329
54432
  init_post_run();
@@ -54331,6 +54434,7 @@ var init_execution = __esm(() => {
54331
54434
  init_logger2();
54332
54435
  init_git();
54333
54436
  init_execution_helpers();
54437
+ RUNTIME_CRASH_CODES = new Set(["CALL_OP_NO_OUTPUT", "CALL_OP_MAX_RETRIES"]);
54334
54438
  executionStage = {
54335
54439
  name: "execution",
54336
54440
  enabled: () => true,
@@ -54389,10 +54493,15 @@ var init_execution = __esm(() => {
54389
54493
  } : null;
54390
54494
  const initialRef = tddMode ? await _executionDeps.captureGitRef(ctx.workdir) ?? "HEAD" : null;
54391
54495
  const inputs = await _executionDeps.assemblePlanInputsFromCtx(ctx);
54392
- const plan = await buildPlanForStrategy(callCtx, ctx.story, ctx.config, ctx.routing.testStrategy, inputs);
54496
+ const plan = await _executionDeps.buildPlanForStrategy(callCtx, ctx.story, ctx.config, ctx.routing.testStrategy, inputs);
54393
54497
  let planResult;
54394
54498
  try {
54395
54499
  planResult = await plan.run();
54500
+ } catch (err) {
54501
+ if (err instanceof NaxError && RUNTIME_CRASH_CODES.has(err.code)) {
54502
+ ctx.tddFailureCategory = "runtime-crash";
54503
+ }
54504
+ throw err;
54396
54505
  } finally {
54397
54506
  unsubscribe();
54398
54507
  }
@@ -54414,6 +54523,7 @@ var init_execution = __esm(() => {
54414
54523
  validateAgentForTier,
54415
54524
  captureGitRef,
54416
54525
  assemblePlanInputsFromCtx,
54526
+ buildPlanForStrategy,
54417
54527
  applyPostRunInspection,
54418
54528
  decideStageAction
54419
54529
  };
@@ -57833,7 +57943,7 @@ var package_default;
57833
57943
  var init_package = __esm(() => {
57834
57944
  package_default = {
57835
57945
  name: "@nathapp/nax",
57836
- version: "0.67.17",
57946
+ version: "0.67.18",
57837
57947
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
57838
57948
  type: "module",
57839
57949
  bin: {
@@ -57928,8 +58038,8 @@ var init_version = __esm(() => {
57928
58038
  NAX_VERSION = package_default.version;
57929
58039
  NAX_COMMIT = (() => {
57930
58040
  try {
57931
- if (/^[0-9a-f]{6,10}$/.test("74621ad5"))
57932
- return "74621ad5";
58041
+ if (/^[0-9a-f]{6,10}$/.test("cc7adcea"))
58042
+ return "cc7adcea";
57933
58043
  } catch {}
57934
58044
  try {
57935
58045
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.67.17",
3
+ "version": "0.67.18",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {