@nathapp/nax 0.67.16 → 0.67.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +194 -60
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -29750,11 +29750,14 @@ function buildStorySection(story) {
|
|
|
29750
29750
|
function buildVerdictSection(story) {
|
|
29751
29751
|
return `# Verdict Instructions
|
|
29752
29752
|
|
|
29753
|
-
## Write Verdict File
|
|
29753
|
+
## Write Verdict File and Emit JSON in Final Reply
|
|
29754
29754
|
|
|
29755
|
-
After completing your verification, you **MUST**
|
|
29755
|
+
After completing your verification, you **MUST** do BOTH of the following:
|
|
29756
29756
|
|
|
29757
|
-
**
|
|
29757
|
+
1. Write the verdict file at the **project root**: \`.nax-verifier-verdict.json\`
|
|
29758
|
+
2. Emit the same verdict JSON as the FINAL content of your reply \u2014 no prose
|
|
29759
|
+
before or after, no markdown fences. Your reply must end with a closing
|
|
29760
|
+
brace \`}\` on its own line. The orchestrator parses your reply as JSON.
|
|
29758
29761
|
|
|
29759
29762
|
Set \`approved: true\` when ALL of these conditions are met:
|
|
29760
29763
|
- All story-scoped tests pass (the orchestrator already attempted the full-suite gate \u2014 you only need to verify the story's own tests)
|
|
@@ -29778,7 +29781,7 @@ Set \`approved: false\` when ANY of these conditions are true:
|
|
|
29778
29781
|
- \`fixes\` \u2014 keep this empty; the verifier must not apply code or test fixes
|
|
29779
29782
|
- \`reasoning\` \u2014 brief summary of your overall assessment
|
|
29780
29783
|
|
|
29781
|
-
When done, do not commit code changes.
|
|
29784
|
+
When done, do not commit code changes. Write the verdict file, then end your reply with the JSON object.`;
|
|
29782
29785
|
}
|
|
29783
29786
|
|
|
29784
29787
|
// src/prompts/sections/conventions.ts
|
|
@@ -30164,6 +30167,24 @@ class TddPromptBuilder {
|
|
|
30164
30167
|
const isolation = role === "test-writer" ? opts.lite ? "lite" : "strict" : undefined;
|
|
30165
30168
|
return TddPromptBuilder.for(role, { variant, isolation }).withLoader(workdir, config2).story(story).context(opts.contextMarkdown).v2FeatureContext(opts.contextBundle?.pushMarkdown).featureContext(opts.contextBundle ? undefined : opts.featureContextMarkdown).constitution(opts.constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.quality?.testing).build();
|
|
30166
30169
|
}
|
|
30170
|
+
static verdictRetry() {
|
|
30171
|
+
return `Your previous reply could not be parsed as a valid VerifierVerdict JSON object.
|
|
30172
|
+
` + `Re-emit the verdict as the FINAL content of your reply.
|
|
30173
|
+
` + `Output ONLY the JSON object \u2014 no markdown fences, no explanation, no prose.
|
|
30174
|
+
` + `The reply must start with { and end with } on its own line.
|
|
30175
|
+
` + "Required top-level fields: version, approved, tests, testModifications, acceptanceCriteria, quality, fixes, reasoning.";
|
|
30176
|
+
}
|
|
30177
|
+
static verdictRetryCondensed() {
|
|
30178
|
+
return `Your previous reply was truncated and could not be parsed as valid JSON.
|
|
30179
|
+
` + `Re-emit a CONDENSED verdict that omits the acceptanceCriteria.criteria[] entries:
|
|
30180
|
+
` + `- Keep acceptanceCriteria.allMet (boolean) but use criteria=[] (empty array).
|
|
30181
|
+
` + `- Keep quality.issues=[] and fixes=[] empty.
|
|
30182
|
+
` + `- Set testModifications.reasoning to a single sentence.
|
|
30183
|
+
` + `- Set reasoning to a single sentence.
|
|
30184
|
+
` + `Output ONLY the JSON object \u2014 no markdown fences, no prose.
|
|
30185
|
+
` + `Schema (minimal):
|
|
30186
|
+
` + `{"version":1,"approved":boolean,"tests":{"allPassing":boolean,"passCount":number,"failCount":number},"testModifications":{"detected":boolean,"files":[],"legitimate":boolean,"reasoning":"..."},"acceptanceCriteria":{"allMet":boolean,"criteria":[]},"quality":{"rating":"good"|"acceptable"|"poor","issues":[]},"fixes":[],"reasoning":"..."}`;
|
|
30187
|
+
}
|
|
30167
30188
|
s(id, content) {
|
|
30168
30189
|
return { id, content, overridable: false };
|
|
30169
30190
|
}
|
|
@@ -31013,6 +31034,7 @@ Severity guide:
|
|
|
31013
31034
|
- If you cannot quote an exact excerpt that proves your point, downgrade the finding to \`"unverifiable"\` rather than fabricating a quote.
|
|
31014
31035
|
|
|
31015
31036
|
**AC-grounding rule \u2014 required for every "error" finding:**
|
|
31037
|
+
- Do NOT write an \`acQuote\` that does not appear verbatim in the listed AC text. If you cannot find an exact verbatim match, set severity to \`warning\` \u2014 never approximate, paraphrase, or synthesise a quote. A finding dropped for a fabricated quote wastes a review cycle and is worse than a correctly classified \`warning\`.
|
|
31016
31038
|
- \`acQuote\` must be a verbatim substring of one AC bullet (from the Acceptance Criteria above) that names or constrains the exact **symbol** you are flagging \u2014 not merely the file the symbol lives in.
|
|
31017
31039
|
- \`acIndex\` is the 1-based position of that AC bullet in the list.
|
|
31018
31040
|
- Copy \`acQuote\` **exactly** from the AC text, including any backticks, asterisks, or punctuation. Do not paraphrase, strip formatting, or rewrite.
|
|
@@ -32776,6 +32798,7 @@ function recordAdversarialAudit(opts) {
|
|
|
32776
32798
|
looksLikeFail: opts.looksLikeFail,
|
|
32777
32799
|
failOpen: opts.failOpen,
|
|
32778
32800
|
passed: opts.passed,
|
|
32801
|
+
passReason: opts.passReason,
|
|
32779
32802
|
blockingThreshold: opts.blockingThreshold,
|
|
32780
32803
|
result: opts.result,
|
|
32781
32804
|
advisoryFindings: opts.advisoryFindings,
|
|
@@ -33109,6 +33132,46 @@ ${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)"
|
|
|
33109
33132
|
};
|
|
33110
33133
|
}
|
|
33111
33134
|
if (!opResult.passed && acDropped.length > 0) {
|
|
33135
|
+
const allHallucinated = acDropped.every((d) => d.code === "ac_quote_not_substring");
|
|
33136
|
+
if (allHallucinated) {
|
|
33137
|
+
const demotedFindings = toAdversarialReviewFindings(acDropped.map((d) => ({ ...d.finding, severity: "warning", acQuote: undefined, acIndex: undefined })));
|
|
33138
|
+
const existingAdvisory = advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : [];
|
|
33139
|
+
const allAdvisory = [...existingAdvisory, ...demotedFindings];
|
|
33140
|
+
logger?.warn("review", "Adversarial review passed: all blocking findings discarded as hallucinated AC quotes", {
|
|
33141
|
+
storyId: story.id,
|
|
33142
|
+
durationMs,
|
|
33143
|
+
droppedCount: acDropped.length,
|
|
33144
|
+
drops: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue }))
|
|
33145
|
+
});
|
|
33146
|
+
recordAdversarialAudit({
|
|
33147
|
+
runtime,
|
|
33148
|
+
workdir,
|
|
33149
|
+
projectDir,
|
|
33150
|
+
storyId: story.id,
|
|
33151
|
+
featureName,
|
|
33152
|
+
parsed: true,
|
|
33153
|
+
failOpen: false,
|
|
33154
|
+
passed: true,
|
|
33155
|
+
passReason: "ac_quote_not_substring_demoted",
|
|
33156
|
+
blockingThreshold: threshold,
|
|
33157
|
+
result: { passed: true, findings: [] },
|
|
33158
|
+
advisoryFindings: allAdvisory.length > 0 ? allAdvisory : undefined,
|
|
33159
|
+
diffAvailable,
|
|
33160
|
+
adversarialDropAnalysis,
|
|
33161
|
+
adversarialAcceptAnalysis: []
|
|
33162
|
+
});
|
|
33163
|
+
return {
|
|
33164
|
+
check: "adversarial",
|
|
33165
|
+
success: true,
|
|
33166
|
+
passReason: "ac_quote_not_substring_demoted",
|
|
33167
|
+
command: "",
|
|
33168
|
+
exitCode: 0,
|
|
33169
|
+
output: `Adversarial review passed: ${acDropped.length} blocking finding(s) demoted to advisory \u2014 all cited AC quotes were fabricated and could not be validated.`,
|
|
33170
|
+
durationMs,
|
|
33171
|
+
advisoryFindings: allAdvisory.length > 0 ? allAdvisory : undefined,
|
|
33172
|
+
cost: llmCost
|
|
33173
|
+
};
|
|
33174
|
+
}
|
|
33112
33175
|
logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
|
|
33113
33176
|
storyId: story.id,
|
|
33114
33177
|
durationMs,
|
|
@@ -36874,6 +36937,29 @@ var init_verdict = __esm(() => {
|
|
|
36874
36937
|
});
|
|
36875
36938
|
|
|
36876
36939
|
// src/operations/verify.ts
|
|
36940
|
+
function parseVerdictFromStdout(output, _input, _ctx) {
|
|
36941
|
+
if (!output || !output.trim()) {
|
|
36942
|
+
throw new ParseValidationError("verifier produced no stdout");
|
|
36943
|
+
}
|
|
36944
|
+
const raw = tryParseLLMJson(output);
|
|
36945
|
+
if (!raw || typeof raw !== "object") {
|
|
36946
|
+
throw new ParseValidationError("verifier stdout is not a JSON object");
|
|
36947
|
+
}
|
|
36948
|
+
const verdict = isValidVerdict(raw) ? raw : coerceVerdict(raw);
|
|
36949
|
+
if (!verdict) {
|
|
36950
|
+
throw new ParseValidationError("verifier stdout JSON missing required VerifierVerdict fields");
|
|
36951
|
+
}
|
|
36952
|
+
const categorization = categorizeVerdict(verdict, verdict.tests.allPassing === true);
|
|
36953
|
+
return {
|
|
36954
|
+
success: categorization.success,
|
|
36955
|
+
filesChanged: [],
|
|
36956
|
+
estimatedCostUsd: 0,
|
|
36957
|
+
durationMs: 0,
|
|
36958
|
+
output,
|
|
36959
|
+
...categorization.failureCategory && { failureCategory: categorization.failureCategory },
|
|
36960
|
+
...categorization.reviewReason && { reviewReason: categorization.reviewReason }
|
|
36961
|
+
};
|
|
36962
|
+
}
|
|
36877
36963
|
async function runVerifierIsolation(beforeRef, ctx) {
|
|
36878
36964
|
if (!beforeRef)
|
|
36879
36965
|
return;
|
|
@@ -36882,16 +36968,31 @@ async function runVerifierIsolation(beforeRef, ctx) {
|
|
|
36882
36968
|
}
|
|
36883
36969
|
var verifierOp;
|
|
36884
36970
|
var init_verify = __esm(() => {
|
|
36971
|
+
init_retry();
|
|
36885
36972
|
init_config();
|
|
36973
|
+
init_tdd_builder();
|
|
36886
36974
|
init_isolation();
|
|
36887
36975
|
init_verdict();
|
|
36888
|
-
init__session_output();
|
|
36889
36976
|
verifierOp = {
|
|
36890
36977
|
kind: "run",
|
|
36891
36978
|
name: "verifier",
|
|
36892
36979
|
stage: "verify",
|
|
36893
36980
|
session: { role: "verifier", lifetime: "fresh" },
|
|
36894
36981
|
config: tddConfigSelector,
|
|
36982
|
+
retry: makeParseRetryStrategy({
|
|
36983
|
+
validate: (parsed) => {
|
|
36984
|
+
if (!parsed || typeof parsed !== "object")
|
|
36985
|
+
return false;
|
|
36986
|
+
const r = parsed;
|
|
36987
|
+
return isValidVerdict(r) || coerceVerdict(r) !== null;
|
|
36988
|
+
},
|
|
36989
|
+
reviewerKind: "verifier",
|
|
36990
|
+
maxAttempts: 2,
|
|
36991
|
+
prompts: {
|
|
36992
|
+
invalid: () => TddPromptBuilder.verdictRetry(),
|
|
36993
|
+
truncated: () => TddPromptBuilder.verdictRetryCondensed()
|
|
36994
|
+
}
|
|
36995
|
+
}),
|
|
36895
36996
|
build(input, _ctx) {
|
|
36896
36997
|
if (input.promptMarkdown?.trim()) {
|
|
36897
36998
|
return {
|
|
@@ -36908,13 +37009,8 @@ var init_verify = __esm(() => {
|
|
|
36908
37009
|
}
|
|
36909
37010
|
};
|
|
36910
37011
|
},
|
|
36911
|
-
parse
|
|
36912
|
-
const envelope = parseSessionJsonOutput(output);
|
|
36913
|
-
return { ...envelope, estimatedCostUsd: 0, durationMs: 0 };
|
|
36914
|
-
},
|
|
37012
|
+
parse: parseVerdictFromStdout,
|
|
36915
37013
|
async verify(parsed, input, ctx) {
|
|
36916
|
-
if (!parsed.success)
|
|
36917
|
-
return null;
|
|
36918
37014
|
const isolation = await runVerifierIsolation(input.beforeRef, ctx);
|
|
36919
37015
|
return isolation ? { ...parsed, isolation } : parsed;
|
|
36920
37016
|
},
|
|
@@ -36922,20 +37018,28 @@ var init_verify = __esm(() => {
|
|
|
36922
37018
|
const packageDir = verifyCtx.packageView.packageDir;
|
|
36923
37019
|
try {
|
|
36924
37020
|
const verdict = await readVerdict(packageDir);
|
|
36925
|
-
if (
|
|
36926
|
-
|
|
36927
|
-
|
|
36928
|
-
|
|
36929
|
-
|
|
37021
|
+
if (verdict) {
|
|
37022
|
+
const testsAllPassing = verdict.tests.allPassing === true;
|
|
37023
|
+
const categorization = categorizeVerdict(verdict, testsAllPassing);
|
|
37024
|
+
const isolation = await runVerifierIsolation(input.beforeRef, verifyCtx);
|
|
37025
|
+
return {
|
|
37026
|
+
success: categorization.success,
|
|
37027
|
+
filesChanged: [],
|
|
37028
|
+
estimatedCostUsd: 0,
|
|
37029
|
+
durationMs: 0,
|
|
37030
|
+
output: "",
|
|
37031
|
+
...categorization.failureCategory && { failureCategory: categorization.failureCategory },
|
|
37032
|
+
...categorization.reviewReason && { reviewReason: categorization.reviewReason },
|
|
37033
|
+
...isolation && { isolation }
|
|
37034
|
+
};
|
|
37035
|
+
}
|
|
36930
37036
|
return {
|
|
36931
|
-
success:
|
|
37037
|
+
success: false,
|
|
36932
37038
|
filesChanged: [],
|
|
36933
37039
|
estimatedCostUsd: 0,
|
|
36934
37040
|
durationMs: 0,
|
|
36935
37041
|
output: "",
|
|
36936
|
-
|
|
36937
|
-
...categorization.reviewReason && { reviewReason: categorization.reviewReason },
|
|
36938
|
-
...isolation && { isolation }
|
|
37042
|
+
reviewReason: "verifier produced unparseable verdict in stdout after retries and no usable verdict file on disk"
|
|
36939
37043
|
};
|
|
36940
37044
|
} finally {
|
|
36941
37045
|
await cleanupVerdict(packageDir);
|
|
@@ -52779,10 +52883,13 @@ async function refreshReviewInputForDispatch(opName, input) {
|
|
|
52779
52883
|
return fallback;
|
|
52780
52884
|
}
|
|
52781
52885
|
}
|
|
52782
|
-
function formatPhaseResultMessage(opName, success2) {
|
|
52886
|
+
function formatPhaseResultMessage(opName, success2, stage) {
|
|
52783
52887
|
if (opName === "greenfield-gate") {
|
|
52784
52888
|
return success2 ? "Greenfield-gate: pre-existing tests detected (not greenfield) \u2014 proceeding with normal TDD" : "Greenfield-gate: no pre-existing tests \u2014 greenfield run, pausing TDD test-writer";
|
|
52785
52889
|
}
|
|
52890
|
+
if (stage === "rectification") {
|
|
52891
|
+
return `Rectification strategy completed: ${opName}`;
|
|
52892
|
+
}
|
|
52786
52893
|
return success2 ? `Phase passed: ${opName}` : `Phase failed: ${opName}`;
|
|
52787
52894
|
}
|
|
52788
52895
|
function isSlot(value) {
|
|
@@ -52975,7 +53082,7 @@ function logUnifiedReviewPhaseStart(storyId, opName) {
|
|
|
52975
53082
|
logger?.info("review", "Running adversarial check", { storyId });
|
|
52976
53083
|
}
|
|
52977
53084
|
}
|
|
52978
|
-
function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTddPhase) {
|
|
53085
|
+
function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTddPhase, stage) {
|
|
52979
53086
|
if (isTddPhase)
|
|
52980
53087
|
return;
|
|
52981
53088
|
if (opName === "semantic-review" || opName === "adversarial-review")
|
|
@@ -52992,7 +53099,11 @@ function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTdd
|
|
|
52992
53099
|
data.findingsCount = findingsCount;
|
|
52993
53100
|
if (status !== undefined)
|
|
52994
53101
|
data.status = status;
|
|
52995
|
-
const message = formatPhaseResultMessage(opName, success2);
|
|
53102
|
+
const message = formatPhaseResultMessage(opName, success2, stage);
|
|
53103
|
+
if (stage === "rectification") {
|
|
53104
|
+
logger?.info("story-orchestrator", message, data);
|
|
53105
|
+
return;
|
|
53106
|
+
}
|
|
52996
53107
|
if (success2) {
|
|
52997
53108
|
logger?.info("story-orchestrator", message, data);
|
|
52998
53109
|
} else {
|
|
@@ -53069,7 +53180,7 @@ async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = fa
|
|
|
53069
53180
|
phaseOutputs[opName] = output;
|
|
53070
53181
|
emitReviewDecision(ctx, opName, output);
|
|
53071
53182
|
logUnifiedReviewPhaseResult(ctx.storyId, opName, output);
|
|
53072
|
-
logDeterministicPhaseOutcome(ctx.storyId, opName, output, Date.now() - phaseStartedAt, isTddPhase);
|
|
53183
|
+
logDeterministicPhaseOutcome(ctx.storyId, opName, output, Date.now() - phaseStartedAt, isTddPhase, slot.op.stage);
|
|
53073
53184
|
if (isTddPhase) {
|
|
53074
53185
|
const durationMs = Date.now() - phaseStartedAt;
|
|
53075
53186
|
logger?.info("tdd", `Session complete: ${opName}`, {
|
|
@@ -53205,14 +53316,7 @@ async function runRectification(ctx, state, phaseCosts, phaseOutputs) {
|
|
|
53205
53316
|
storyId: ctx.storyId
|
|
53206
53317
|
});
|
|
53207
53318
|
}
|
|
53208
|
-
|
|
53209
|
-
"max-attempts-total",
|
|
53210
|
-
"max-attempts-per-strategy",
|
|
53211
|
-
"bail-when",
|
|
53212
|
-
"no-strategy",
|
|
53213
|
-
"agent-gave-up"
|
|
53214
|
-
]);
|
|
53215
|
-
if (exhaustedReasons.has(cycleResult.exitReason) && cycleResult.finalFindings.length > 0) {
|
|
53319
|
+
if (EXHAUSTED_EXIT_REASONS.has(cycleResult.exitReason) && cycleResult.finalFindings.length > 0) {
|
|
53216
53320
|
return { rectificationExhausted: true, unfixedFindings: cycleResult.finalFindings };
|
|
53217
53321
|
}
|
|
53218
53322
|
return {};
|
|
@@ -53260,6 +53364,7 @@ class ExecutionPlan {
|
|
|
53260
53364
|
}
|
|
53261
53365
|
const rectResult = await runRectification(this.ctx, this.state, phaseCosts, phaseOutputs);
|
|
53262
53366
|
if (this.state.rectification && !rectResult.rectificationExhausted) {
|
|
53367
|
+
let resumeRectifyUsed = false;
|
|
53263
53368
|
for (const phase of collectOrderedPhases(this.state)) {
|
|
53264
53369
|
const name = phase.slot.op.name;
|
|
53265
53370
|
if (name in phaseOutputs && phasePassed(name, phaseOutputs[name], this.ctx.storyId)) {
|
|
@@ -53276,10 +53381,27 @@ class ExecutionPlan {
|
|
|
53276
53381
|
throw error48;
|
|
53277
53382
|
}
|
|
53278
53383
|
if (!phasePassed(name, phaseOutputs[name], this.ctx.storyId)) {
|
|
53384
|
+
if (!resumeRectifyUsed) {
|
|
53385
|
+
resumeRectifyUsed = true;
|
|
53386
|
+
logger?.info("story-orchestrator", "Phase failed in post-rectification resume \u2014 invoking second rectification pass", { storyId: this.ctx.storyId, phase: name, source: "post-rectification-resume" });
|
|
53387
|
+
const secondRect = await runRectification(this.ctx, this.state, phaseCosts, phaseOutputs);
|
|
53388
|
+
if (secondRect.rectificationExhausted) {
|
|
53389
|
+
logger?.warn("story-orchestrator", "Second rectification pass exhausted \u2014 terminal failure", {
|
|
53390
|
+
storyId: this.ctx.storyId,
|
|
53391
|
+
phase: name,
|
|
53392
|
+
source: "post-rectification-resume"
|
|
53393
|
+
});
|
|
53394
|
+
break;
|
|
53395
|
+
}
|
|
53396
|
+
if (phasePassed(name, phaseOutputs[name], this.ctx.storyId)) {
|
|
53397
|
+
continue;
|
|
53398
|
+
}
|
|
53399
|
+
}
|
|
53279
53400
|
logger?.warn("story-orchestrator", "Terminal phase failure (post-rectification resume \u2014 bypasses rectification)", {
|
|
53280
53401
|
storyId: this.ctx.storyId,
|
|
53281
53402
|
phase: name,
|
|
53282
|
-
source: "post-rectification-resume"
|
|
53403
|
+
source: "post-rectification-resume",
|
|
53404
|
+
secondRectifyUsed: resumeRectifyUsed
|
|
53283
53405
|
});
|
|
53284
53406
|
break;
|
|
53285
53407
|
}
|
|
@@ -53384,7 +53506,7 @@ class StoryOrchestratorBuilder {
|
|
|
53384
53506
|
return new ExecutionPlan(ctx, { ...this.state }, opts.isThreeSession ?? false);
|
|
53385
53507
|
}
|
|
53386
53508
|
}
|
|
53387
|
-
var _storyOrchestratorDeps, TDD_OP_NAMES, STRICT_VERDICT_PHASE_NAMES, CANONICAL_ORDER, PHASE_KIND_TO_STATE_KEY, STRATEGY_TO_REVALIDATION_PHASES;
|
|
53509
|
+
var _storyOrchestratorDeps, EXHAUSTED_EXIT_REASONS, TDD_OP_NAMES, STRICT_VERDICT_PHASE_NAMES, CANONICAL_ORDER, PHASE_KIND_TO_STATE_KEY, STRATEGY_TO_REVALIDATION_PHASES;
|
|
53388
53510
|
var init_story_orchestrator = __esm(() => {
|
|
53389
53511
|
init_errors();
|
|
53390
53512
|
init_findings();
|
|
@@ -53400,6 +53522,13 @@ var init_story_orchestrator = __esm(() => {
|
|
|
53400
53522
|
prepareSemanticReviewInput,
|
|
53401
53523
|
prepareAdversarialReviewInput
|
|
53402
53524
|
};
|
|
53525
|
+
EXHAUSTED_EXIT_REASONS = new Set([
|
|
53526
|
+
"max-attempts-total",
|
|
53527
|
+
"max-attempts-per-strategy",
|
|
53528
|
+
"bail-when",
|
|
53529
|
+
"no-strategy",
|
|
53530
|
+
"agent-gave-up"
|
|
53531
|
+
]);
|
|
53403
53532
|
TDD_OP_NAMES = new Set(["test-writer", "implementer", "verifier"]);
|
|
53404
53533
|
STRICT_VERDICT_PHASE_NAMES = new Set([
|
|
53405
53534
|
fullSuiteGateOp.name,
|
|
@@ -53435,23 +53564,8 @@ var init_story_orchestrator = __esm(() => {
|
|
|
53435
53564
|
STRATEGY_TO_REVALIDATION_PHASES = {
|
|
53436
53565
|
"mechanical-lintfix": ["lint-check"],
|
|
53437
53566
|
"mechanical-formatfix": ["lint-check"],
|
|
53438
|
-
"autofix-implementer": [
|
|
53439
|
-
|
|
53440
|
-
"typecheck-check",
|
|
53441
|
-
"full-suite-gate",
|
|
53442
|
-
"verifier",
|
|
53443
|
-
"verify-scoped",
|
|
53444
|
-
"semantic-review",
|
|
53445
|
-
"adversarial-review"
|
|
53446
|
-
],
|
|
53447
|
-
"autofix-test-writer": [
|
|
53448
|
-
"lint-check",
|
|
53449
|
-
"typecheck-check",
|
|
53450
|
-
"full-suite-gate",
|
|
53451
|
-
"verifier",
|
|
53452
|
-
"verify-scoped",
|
|
53453
|
-
"adversarial-review"
|
|
53454
|
-
],
|
|
53567
|
+
"autofix-implementer": ["lint-check", "typecheck-check", "full-suite-gate", "semantic-review", "adversarial-review"],
|
|
53568
|
+
"autofix-test-writer": ["lint-check", "typecheck-check", "full-suite-gate", "adversarial-review"],
|
|
53455
53569
|
"full-suite-rectify": [
|
|
53456
53570
|
"lint-check",
|
|
53457
53571
|
"typecheck-check",
|
|
@@ -53799,7 +53913,7 @@ function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason, failure
|
|
|
53799
53913
|
}
|
|
53800
53914
|
return { action: "escalate", reason: buildReason("isolation-violation") };
|
|
53801
53915
|
}
|
|
53802
|
-
if (failureCategory === "session-failure" || failureCategory === "tests-failing" || failureCategory === "full-suite-gate-exhausted" || failureCategory === "verifier-rejected") {
|
|
53916
|
+
if (failureCategory === "session-failure" || failureCategory === "tests-failing" || failureCategory === "full-suite-gate-exhausted" || failureCategory === "verifier-rejected" || failureCategory === "runtime-crash") {
|
|
53803
53917
|
return { action: "escalate", reason: buildReason(failureCategory) };
|
|
53804
53918
|
}
|
|
53805
53919
|
if (failureCategory === "greenfield-no-tests") {
|
|
@@ -53948,7 +54062,7 @@ function extractPauseReason(phaseOutputs) {
|
|
|
53948
54062
|
}
|
|
53949
54063
|
return;
|
|
53950
54064
|
}
|
|
53951
|
-
function deriveTddFailureCategory(phaseOutputs) {
|
|
54065
|
+
function deriveTddFailureCategory(phaseOutputs, unfixedFindings) {
|
|
53952
54066
|
const testWriterOutput = phaseOutputs[testWriterOp.name];
|
|
53953
54067
|
if (testWriterOutput?.success === false) {
|
|
53954
54068
|
return "session-failure";
|
|
@@ -53965,6 +54079,18 @@ function deriveTddFailureCategory(phaseOutputs) {
|
|
|
53965
54079
|
return "tests-failing";
|
|
53966
54080
|
}
|
|
53967
54081
|
const verifierPassed = verifierOutput?.success === true;
|
|
54082
|
+
if (!verifierPassed && unfixedFindings && unfixedFindings.length > 0) {
|
|
54083
|
+
const rectOutput = phaseOutputs.rectification;
|
|
54084
|
+
if (rectOutput?.exitReason && EXHAUSTED_EXIT_REASONS.has(rectOutput.exitReason) && unfixedFindings.some((f) => f.source === "test-runner")) {
|
|
54085
|
+
return "full-suite-gate-exhausted";
|
|
54086
|
+
}
|
|
54087
|
+
}
|
|
54088
|
+
if (!verifierPassed) {
|
|
54089
|
+
const rectOutputCrash = phaseOutputs.rectification;
|
|
54090
|
+
if (rectOutputCrash?.exitReason === "validator-error") {
|
|
54091
|
+
return "runtime-crash";
|
|
54092
|
+
}
|
|
54093
|
+
}
|
|
53968
54094
|
if (!verifierPassed) {
|
|
53969
54095
|
const gateOutput = phaseOutputs[fullSuiteGateOp.name];
|
|
53970
54096
|
if (gateOutput && (gateOutput.success === false || gateOutput.passed === false)) {
|
|
@@ -53997,7 +54123,6 @@ async function applyPostRunInspection(ctx, planResult, opts) {
|
|
|
53997
54123
|
...capturedTokenUsage ? { tokenUsage: capturedTokenUsage } : {}
|
|
53998
54124
|
};
|
|
53999
54125
|
ctx.agentResult = agentResult;
|
|
54000
|
-
ctx.agentSwapCount = 0;
|
|
54001
54126
|
const fullSuiteGateOutput = planResult.phaseOutputs[fullSuiteGateOp.name];
|
|
54002
54127
|
if (fullSuiteGateOutput?.passed) {
|
|
54003
54128
|
ctx.fullSuiteGatePassed = true;
|
|
@@ -54068,7 +54193,7 @@ async function applyPostRunInspection(ctx, planResult, opts) {
|
|
|
54068
54193
|
}
|
|
54069
54194
|
}
|
|
54070
54195
|
const pauseReason = extractPauseReason(planResult.phaseOutputs);
|
|
54071
|
-
const failureCategory = isTdd && !planResult.success ? deriveTddFailureCategory(planResult.phaseOutputs) : undefined;
|
|
54196
|
+
const failureCategory = isTdd && !planResult.success ? deriveTddFailureCategory(planResult.phaseOutputs, planResult.unfixedFindings) : undefined;
|
|
54072
54197
|
if (isTdd && !planResult.success && !failureCategory) {
|
|
54073
54198
|
const phaseSignals = {};
|
|
54074
54199
|
for (const [name, output] of Object.entries(planResult.phaseOutputs)) {
|
|
@@ -54287,6 +54412,7 @@ var init_post_run = __esm(() => {
|
|
|
54287
54412
|
init_scratch_writer();
|
|
54288
54413
|
init_rollback();
|
|
54289
54414
|
init_git();
|
|
54415
|
+
init_story_orchestrator();
|
|
54290
54416
|
_postRunDeps = {
|
|
54291
54417
|
detectMergeConflict,
|
|
54292
54418
|
checkMergeConflict,
|
|
@@ -54297,9 +54423,10 @@ var init_post_run = __esm(() => {
|
|
|
54297
54423
|
});
|
|
54298
54424
|
|
|
54299
54425
|
// src/pipeline/stages/execution.ts
|
|
54300
|
-
var executionStage, _executionDeps;
|
|
54426
|
+
var RUNTIME_CRASH_CODES, executionStage, _executionDeps;
|
|
54301
54427
|
var init_execution = __esm(() => {
|
|
54302
54428
|
init_agents();
|
|
54429
|
+
init_errors();
|
|
54303
54430
|
init_build_plan_for_strategy();
|
|
54304
54431
|
init_plan_inputs();
|
|
54305
54432
|
init_post_run();
|
|
@@ -54307,6 +54434,7 @@ var init_execution = __esm(() => {
|
|
|
54307
54434
|
init_logger2();
|
|
54308
54435
|
init_git();
|
|
54309
54436
|
init_execution_helpers();
|
|
54437
|
+
RUNTIME_CRASH_CODES = new Set(["CALL_OP_NO_OUTPUT", "CALL_OP_MAX_RETRIES"]);
|
|
54310
54438
|
executionStage = {
|
|
54311
54439
|
name: "execution",
|
|
54312
54440
|
enabled: () => true,
|
|
@@ -54365,10 +54493,15 @@ var init_execution = __esm(() => {
|
|
|
54365
54493
|
} : null;
|
|
54366
54494
|
const initialRef = tddMode ? await _executionDeps.captureGitRef(ctx.workdir) ?? "HEAD" : null;
|
|
54367
54495
|
const inputs = await _executionDeps.assemblePlanInputsFromCtx(ctx);
|
|
54368
|
-
const plan = await buildPlanForStrategy(callCtx, ctx.story, ctx.config, ctx.routing.testStrategy, inputs);
|
|
54496
|
+
const plan = await _executionDeps.buildPlanForStrategy(callCtx, ctx.story, ctx.config, ctx.routing.testStrategy, inputs);
|
|
54369
54497
|
let planResult;
|
|
54370
54498
|
try {
|
|
54371
54499
|
planResult = await plan.run();
|
|
54500
|
+
} catch (err) {
|
|
54501
|
+
if (err instanceof NaxError && RUNTIME_CRASH_CODES.has(err.code)) {
|
|
54502
|
+
ctx.tddFailureCategory = "runtime-crash";
|
|
54503
|
+
}
|
|
54504
|
+
throw err;
|
|
54372
54505
|
} finally {
|
|
54373
54506
|
unsubscribe();
|
|
54374
54507
|
}
|
|
@@ -54390,6 +54523,7 @@ var init_execution = __esm(() => {
|
|
|
54390
54523
|
validateAgentForTier,
|
|
54391
54524
|
captureGitRef,
|
|
54392
54525
|
assemblePlanInputsFromCtx,
|
|
54526
|
+
buildPlanForStrategy,
|
|
54393
54527
|
applyPostRunInspection,
|
|
54394
54528
|
decideStageAction
|
|
54395
54529
|
};
|
|
@@ -57809,7 +57943,7 @@ var package_default;
|
|
|
57809
57943
|
var init_package = __esm(() => {
|
|
57810
57944
|
package_default = {
|
|
57811
57945
|
name: "@nathapp/nax",
|
|
57812
|
-
version: "0.67.
|
|
57946
|
+
version: "0.67.18",
|
|
57813
57947
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
57814
57948
|
type: "module",
|
|
57815
57949
|
bin: {
|
|
@@ -57904,8 +58038,8 @@ var init_version = __esm(() => {
|
|
|
57904
58038
|
NAX_VERSION = package_default.version;
|
|
57905
58039
|
NAX_COMMIT = (() => {
|
|
57906
58040
|
try {
|
|
57907
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
57908
|
-
return "
|
|
58041
|
+
if (/^[0-9a-f]{6,10}$/.test("cc7adcea"))
|
|
58042
|
+
return "cc7adcea";
|
|
57909
58043
|
} catch {}
|
|
57910
58044
|
try {
|
|
57911
58045
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|