@nathapp/nax 0.67.17 → 0.67.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +241 -57
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -29750,11 +29750,14 @@ function buildStorySection(story) {
|
|
|
29750
29750
|
function buildVerdictSection(story) {
|
|
29751
29751
|
return `# Verdict Instructions
|
|
29752
29752
|
|
|
29753
|
-
## Write Verdict File
|
|
29753
|
+
## Write Verdict File and Emit JSON in Final Reply
|
|
29754
29754
|
|
|
29755
|
-
After completing your verification, you **MUST**
|
|
29755
|
+
After completing your verification, you **MUST** do BOTH of the following:
|
|
29756
29756
|
|
|
29757
|
-
**
|
|
29757
|
+
1. Write the verdict file at the **project root**: \`.nax-verifier-verdict.json\`
|
|
29758
|
+
2. Emit the same verdict JSON as the FINAL content of your reply \u2014 no prose
|
|
29759
|
+
before or after, no markdown fences. Your reply must end with a closing
|
|
29760
|
+
brace \`}\` on its own line. The orchestrator parses your reply as JSON.
|
|
29758
29761
|
|
|
29759
29762
|
Set \`approved: true\` when ALL of these conditions are met:
|
|
29760
29763
|
- All story-scoped tests pass (the orchestrator already attempted the full-suite gate \u2014 you only need to verify the story's own tests)
|
|
@@ -29778,7 +29781,7 @@ Set \`approved: false\` when ANY of these conditions are true:
|
|
|
29778
29781
|
- \`fixes\` \u2014 keep this empty; the verifier must not apply code or test fixes
|
|
29779
29782
|
- \`reasoning\` \u2014 brief summary of your overall assessment
|
|
29780
29783
|
|
|
29781
|
-
When done, do not commit code changes.
|
|
29784
|
+
When done, do not commit code changes. Write the verdict file, then end your reply with the JSON object.`;
|
|
29782
29785
|
}
|
|
29783
29786
|
|
|
29784
29787
|
// src/prompts/sections/conventions.ts
|
|
@@ -30164,6 +30167,24 @@ class TddPromptBuilder {
|
|
|
30164
30167
|
const isolation = role === "test-writer" ? opts.lite ? "lite" : "strict" : undefined;
|
|
30165
30168
|
return TddPromptBuilder.for(role, { variant, isolation }).withLoader(workdir, config2).story(story).context(opts.contextMarkdown).v2FeatureContext(opts.contextBundle?.pushMarkdown).featureContext(opts.contextBundle ? undefined : opts.featureContextMarkdown).constitution(opts.constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.quality?.testing).build();
|
|
30166
30169
|
}
|
|
30170
|
+
static verdictRetry() {
|
|
30171
|
+
return `Your previous reply could not be parsed as a valid VerifierVerdict JSON object.
|
|
30172
|
+
` + `Re-emit the verdict as the FINAL content of your reply.
|
|
30173
|
+
` + `Output ONLY the JSON object \u2014 no markdown fences, no explanation, no prose.
|
|
30174
|
+
` + `The reply must start with { and end with } on its own line.
|
|
30175
|
+
` + "Required top-level fields: version, approved, tests, testModifications, acceptanceCriteria, quality, fixes, reasoning.";
|
|
30176
|
+
}
|
|
30177
|
+
static verdictRetryCondensed() {
|
|
30178
|
+
return `Your previous reply was truncated and could not be parsed as valid JSON.
|
|
30179
|
+
` + `Re-emit a CONDENSED verdict that omits the acceptanceCriteria.criteria[] entries:
|
|
30180
|
+
` + `- Keep acceptanceCriteria.allMet (boolean) but use criteria=[] (empty array).
|
|
30181
|
+
` + `- Keep quality.issues=[] and fixes=[] empty.
|
|
30182
|
+
` + `- Set testModifications.reasoning to a single sentence.
|
|
30183
|
+
` + `- Set reasoning to a single sentence.
|
|
30184
|
+
` + `Output ONLY the JSON object \u2014 no markdown fences, no prose.
|
|
30185
|
+
` + `Schema (minimal):
|
|
30186
|
+
` + `{"version":1,"approved":boolean,"tests":{"allPassing":boolean,"passCount":number,"failCount":number},"testModifications":{"detected":boolean,"files":[],"legitimate":boolean,"reasoning":"..."},"acceptanceCriteria":{"allMet":boolean,"criteria":[]},"quality":{"rating":"good"|"acceptable"|"poor","issues":[]},"fixes":[],"reasoning":"..."}`;
|
|
30187
|
+
}
|
|
30167
30188
|
s(id, content) {
|
|
30168
30189
|
return { id, content, overridable: false };
|
|
30169
30190
|
}
|
|
@@ -31013,6 +31034,7 @@ Severity guide:
|
|
|
31013
31034
|
- If you cannot quote an exact excerpt that proves your point, downgrade the finding to \`"unverifiable"\` rather than fabricating a quote.
|
|
31014
31035
|
|
|
31015
31036
|
**AC-grounding rule \u2014 required for every "error" finding:**
|
|
31037
|
+
- Do NOT write an \`acQuote\` that does not appear verbatim in the listed AC text. If you cannot find an exact verbatim match, set severity to \`warning\` \u2014 never approximate, paraphrase, or synthesise a quote. A finding dropped for a fabricated quote wastes a review cycle and is worse than a correctly classified \`warning\`.
|
|
31016
31038
|
- \`acQuote\` must be a verbatim substring of one AC bullet (from the Acceptance Criteria above) that names or constrains the exact **symbol** you are flagging \u2014 not merely the file the symbol lives in.
|
|
31017
31039
|
- \`acIndex\` is the 1-based position of that AC bullet in the list.
|
|
31018
31040
|
- Copy \`acQuote\` **exactly** from the AC text, including any backticks, asterisks, or punctuation. Do not paraphrase, strip formatting, or rewrite.
|
|
@@ -31220,7 +31242,7 @@ ${STEP2}${frameworkLine}
|
|
|
31220
31242
|
${STEP3_HEADER}
|
|
31221
31243
|
${STEP3_SHARED_RULES}
|
|
31222
31244
|
- **File output (REQUIRED)**: Write the acceptance test file DIRECTLY to the path shown below. Do NOT output the test code in your response. After writing the file, reply with a brief confirmation.
|
|
31223
|
-
- **Path anchor (CRITICAL)**: Write the test file to this exact path: \`${p.targetTestFilePath}\`.
|
|
31245
|
+
- **Path anchor (CRITICAL \u2014 do NOT deviate)**: Write the test file to this exact path: \`${p.targetTestFilePath}\`. This path is intentional and computed by the orchestrator \u2014 do not change it based on what you observe in the project. In particular: if you see a \`.nax/features/\` directory at the repo root, that is for stories scoped to the repo root. When a story belongs to a specific package (e.g. \`packages/core\`), its acceptance test lives inside that package's \`.nax/features/\` directory so the test runner can resolve the package's imports correctly. The package root is 3 levels above the test file (\`../../../\` relative to the test file).
|
|
31224
31246
|
- **Process cwd**: When spawning child processes to invoke a CLI or binary, set the working directory to the **package root** (\`join(import.meta.dir, "../../..")\`) as your default \u2014 unless your Step 2 exploration reveals the CLI uses a different working directory convention (e.g. reads config from \`~/.config/\`, or resolves paths relative to a flag value). Always check how the CLI resolves file paths before assuming.${implSection}`;
|
|
31225
31247
|
}
|
|
31226
31248
|
buildGeneratorFromSpecPrompt(p) {
|
|
@@ -32776,6 +32798,7 @@ function recordAdversarialAudit(opts) {
|
|
|
32776
32798
|
looksLikeFail: opts.looksLikeFail,
|
|
32777
32799
|
failOpen: opts.failOpen,
|
|
32778
32800
|
passed: opts.passed,
|
|
32801
|
+
passReason: opts.passReason,
|
|
32779
32802
|
blockingThreshold: opts.blockingThreshold,
|
|
32780
32803
|
result: opts.result,
|
|
32781
32804
|
advisoryFindings: opts.advisoryFindings,
|
|
@@ -33109,6 +33132,46 @@ ${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)"
|
|
|
33109
33132
|
};
|
|
33110
33133
|
}
|
|
33111
33134
|
if (!opResult.passed && acDropped.length > 0) {
|
|
33135
|
+
const allHallucinated = acDropped.every((d) => d.code === "ac_quote_not_substring");
|
|
33136
|
+
if (allHallucinated) {
|
|
33137
|
+
const demotedFindings = toAdversarialReviewFindings(acDropped.map((d) => ({ ...d.finding, severity: "warning", acQuote: undefined, acIndex: undefined })));
|
|
33138
|
+
const existingAdvisory = advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : [];
|
|
33139
|
+
const allAdvisory = [...existingAdvisory, ...demotedFindings];
|
|
33140
|
+
logger?.warn("review", "Adversarial review passed: all blocking findings discarded as hallucinated AC quotes", {
|
|
33141
|
+
storyId: story.id,
|
|
33142
|
+
durationMs,
|
|
33143
|
+
droppedCount: acDropped.length,
|
|
33144
|
+
drops: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue }))
|
|
33145
|
+
});
|
|
33146
|
+
recordAdversarialAudit({
|
|
33147
|
+
runtime,
|
|
33148
|
+
workdir,
|
|
33149
|
+
projectDir,
|
|
33150
|
+
storyId: story.id,
|
|
33151
|
+
featureName,
|
|
33152
|
+
parsed: true,
|
|
33153
|
+
failOpen: false,
|
|
33154
|
+
passed: true,
|
|
33155
|
+
passReason: "ac_quote_not_substring_demoted",
|
|
33156
|
+
blockingThreshold: threshold,
|
|
33157
|
+
result: { passed: true, findings: [] },
|
|
33158
|
+
advisoryFindings: allAdvisory.length > 0 ? allAdvisory : undefined,
|
|
33159
|
+
diffAvailable,
|
|
33160
|
+
adversarialDropAnalysis,
|
|
33161
|
+
adversarialAcceptAnalysis: []
|
|
33162
|
+
});
|
|
33163
|
+
return {
|
|
33164
|
+
check: "adversarial",
|
|
33165
|
+
success: true,
|
|
33166
|
+
passReason: "ac_quote_not_substring_demoted",
|
|
33167
|
+
command: "",
|
|
33168
|
+
exitCode: 0,
|
|
33169
|
+
output: `Adversarial review passed: ${acDropped.length} blocking finding(s) demoted to advisory \u2014 all cited AC quotes were fabricated and could not be validated.`,
|
|
33170
|
+
durationMs,
|
|
33171
|
+
advisoryFindings: allAdvisory.length > 0 ? allAdvisory : undefined,
|
|
33172
|
+
cost: llmCost
|
|
33173
|
+
};
|
|
33174
|
+
}
|
|
33112
33175
|
logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
|
|
33113
33176
|
storyId: story.id,
|
|
33114
33177
|
durationMs,
|
|
@@ -33653,6 +33716,35 @@ function acFailureToFinding(acId, output) {
|
|
|
33653
33716
|
fixTarget: "source"
|
|
33654
33717
|
};
|
|
33655
33718
|
}
|
|
33719
|
+
function executionFailureToFinding(params) {
|
|
33720
|
+
const tail = tailLines(params.output, 40);
|
|
33721
|
+
const exitStr = params.exitCode !== undefined ? ` (exit ${params.exitCode})` : "";
|
|
33722
|
+
const message = `Test runner exited non-zero without structured failures${exitStr}. Command: \`${params.command}\`
|
|
33723
|
+
|
|
33724
|
+
--- runner output (last 40 lines) ---
|
|
33725
|
+
${tail}`;
|
|
33726
|
+
return {
|
|
33727
|
+
source: "test-runner",
|
|
33728
|
+
severity: "error",
|
|
33729
|
+
category: "execution-failed",
|
|
33730
|
+
message,
|
|
33731
|
+
fixTarget: "source",
|
|
33732
|
+
meta: {
|
|
33733
|
+
command: params.command,
|
|
33734
|
+
exitCode: params.exitCode,
|
|
33735
|
+
packageDir: params.packageDir,
|
|
33736
|
+
cwd: params.cwd
|
|
33737
|
+
}
|
|
33738
|
+
};
|
|
33739
|
+
}
|
|
33740
|
+
function tailLines(s, n) {
|
|
33741
|
+
if (!s)
|
|
33742
|
+
return "(no output)";
|
|
33743
|
+
const lines = s.split(`
|
|
33744
|
+
`);
|
|
33745
|
+
return lines.slice(Math.max(0, lines.length - n)).join(`
|
|
33746
|
+
`);
|
|
33747
|
+
}
|
|
33656
33748
|
function acSentinelToFinding(sentinel, _output) {
|
|
33657
33749
|
if (sentinel === "AC-HOOK") {
|
|
33658
33750
|
return {
|
|
@@ -36874,6 +36966,29 @@ var init_verdict = __esm(() => {
|
|
|
36874
36966
|
});
|
|
36875
36967
|
|
|
36876
36968
|
// src/operations/verify.ts
|
|
36969
|
+
function parseVerdictFromStdout(output, _input, _ctx) {
|
|
36970
|
+
if (!output || !output.trim()) {
|
|
36971
|
+
throw new ParseValidationError("verifier produced no stdout");
|
|
36972
|
+
}
|
|
36973
|
+
const raw = tryParseLLMJson(output);
|
|
36974
|
+
if (!raw || typeof raw !== "object") {
|
|
36975
|
+
throw new ParseValidationError("verifier stdout is not a JSON object");
|
|
36976
|
+
}
|
|
36977
|
+
const verdict = isValidVerdict(raw) ? raw : coerceVerdict(raw);
|
|
36978
|
+
if (!verdict) {
|
|
36979
|
+
throw new ParseValidationError("verifier stdout JSON missing required VerifierVerdict fields");
|
|
36980
|
+
}
|
|
36981
|
+
const categorization = categorizeVerdict(verdict, verdict.tests.allPassing === true);
|
|
36982
|
+
return {
|
|
36983
|
+
success: categorization.success,
|
|
36984
|
+
filesChanged: [],
|
|
36985
|
+
estimatedCostUsd: 0,
|
|
36986
|
+
durationMs: 0,
|
|
36987
|
+
output,
|
|
36988
|
+
...categorization.failureCategory && { failureCategory: categorization.failureCategory },
|
|
36989
|
+
...categorization.reviewReason && { reviewReason: categorization.reviewReason }
|
|
36990
|
+
};
|
|
36991
|
+
}
|
|
36877
36992
|
async function runVerifierIsolation(beforeRef, ctx) {
|
|
36878
36993
|
if (!beforeRef)
|
|
36879
36994
|
return;
|
|
@@ -36882,16 +36997,31 @@ async function runVerifierIsolation(beforeRef, ctx) {
|
|
|
36882
36997
|
}
|
|
36883
36998
|
var verifierOp;
|
|
36884
36999
|
var init_verify = __esm(() => {
|
|
37000
|
+
init_retry();
|
|
36885
37001
|
init_config();
|
|
37002
|
+
init_tdd_builder();
|
|
36886
37003
|
init_isolation();
|
|
36887
37004
|
init_verdict();
|
|
36888
|
-
init__session_output();
|
|
36889
37005
|
verifierOp = {
|
|
36890
37006
|
kind: "run",
|
|
36891
37007
|
name: "verifier",
|
|
36892
37008
|
stage: "verify",
|
|
36893
37009
|
session: { role: "verifier", lifetime: "fresh" },
|
|
36894
37010
|
config: tddConfigSelector,
|
|
37011
|
+
retry: makeParseRetryStrategy({
|
|
37012
|
+
validate: (parsed) => {
|
|
37013
|
+
if (!parsed || typeof parsed !== "object")
|
|
37014
|
+
return false;
|
|
37015
|
+
const r = parsed;
|
|
37016
|
+
return isValidVerdict(r) || coerceVerdict(r) !== null;
|
|
37017
|
+
},
|
|
37018
|
+
reviewerKind: "verifier",
|
|
37019
|
+
maxAttempts: 2,
|
|
37020
|
+
prompts: {
|
|
37021
|
+
invalid: () => TddPromptBuilder.verdictRetry(),
|
|
37022
|
+
truncated: () => TddPromptBuilder.verdictRetryCondensed()
|
|
37023
|
+
}
|
|
37024
|
+
}),
|
|
36895
37025
|
build(input, _ctx) {
|
|
36896
37026
|
if (input.promptMarkdown?.trim()) {
|
|
36897
37027
|
return {
|
|
@@ -36908,13 +37038,8 @@ var init_verify = __esm(() => {
|
|
|
36908
37038
|
}
|
|
36909
37039
|
};
|
|
36910
37040
|
},
|
|
36911
|
-
parse
|
|
36912
|
-
const envelope = parseSessionJsonOutput(output);
|
|
36913
|
-
return { ...envelope, estimatedCostUsd: 0, durationMs: 0 };
|
|
36914
|
-
},
|
|
37041
|
+
parse: parseVerdictFromStdout,
|
|
36915
37042
|
async verify(parsed, input, ctx) {
|
|
36916
|
-
if (!parsed.success)
|
|
36917
|
-
return null;
|
|
36918
37043
|
const isolation = await runVerifierIsolation(input.beforeRef, ctx);
|
|
36919
37044
|
return isolation ? { ...parsed, isolation } : parsed;
|
|
36920
37045
|
},
|
|
@@ -36922,20 +37047,28 @@ var init_verify = __esm(() => {
|
|
|
36922
37047
|
const packageDir = verifyCtx.packageView.packageDir;
|
|
36923
37048
|
try {
|
|
36924
37049
|
const verdict = await readVerdict(packageDir);
|
|
36925
|
-
if (
|
|
36926
|
-
|
|
36927
|
-
|
|
36928
|
-
|
|
36929
|
-
|
|
37050
|
+
if (verdict) {
|
|
37051
|
+
const testsAllPassing = verdict.tests.allPassing === true;
|
|
37052
|
+
const categorization = categorizeVerdict(verdict, testsAllPassing);
|
|
37053
|
+
const isolation = await runVerifierIsolation(input.beforeRef, verifyCtx);
|
|
37054
|
+
return {
|
|
37055
|
+
success: categorization.success,
|
|
37056
|
+
filesChanged: [],
|
|
37057
|
+
estimatedCostUsd: 0,
|
|
37058
|
+
durationMs: 0,
|
|
37059
|
+
output: "",
|
|
37060
|
+
...categorization.failureCategory && { failureCategory: categorization.failureCategory },
|
|
37061
|
+
...categorization.reviewReason && { reviewReason: categorization.reviewReason },
|
|
37062
|
+
...isolation && { isolation }
|
|
37063
|
+
};
|
|
37064
|
+
}
|
|
36930
37065
|
return {
|
|
36931
|
-
success:
|
|
37066
|
+
success: false,
|
|
36932
37067
|
filesChanged: [],
|
|
36933
37068
|
estimatedCostUsd: 0,
|
|
36934
37069
|
durationMs: 0,
|
|
36935
37070
|
output: "",
|
|
36936
|
-
|
|
36937
|
-
...categorization.reviewReason && { reviewReason: categorization.reviewReason },
|
|
36938
|
-
...isolation && { isolation }
|
|
37071
|
+
reviewReason: "verifier produced unparseable verdict in stdout after retries and no usable verdict file on disk"
|
|
36939
37072
|
};
|
|
36940
37073
|
} finally {
|
|
36941
37074
|
await cleanupVerdict(packageDir);
|
|
@@ -37688,7 +37821,9 @@ async function runVerificationCore(options) {
|
|
|
37688
37821
|
success: options.acceptOnTimeout ?? false,
|
|
37689
37822
|
countsTowardEscalation: false,
|
|
37690
37823
|
error: execution.error,
|
|
37691
|
-
output: execution.output
|
|
37824
|
+
output: execution.output,
|
|
37825
|
+
exitCode: execution.exitCode,
|
|
37826
|
+
command: finalCommand
|
|
37692
37827
|
};
|
|
37693
37828
|
}
|
|
37694
37829
|
const exitCode = execution.exitCode ?? 1;
|
|
@@ -37702,7 +37837,9 @@ async function runVerificationCore(options) {
|
|
|
37702
37837
|
error: analysis.error,
|
|
37703
37838
|
output: execution.output,
|
|
37704
37839
|
passCount: analysis.passCount,
|
|
37705
|
-
failCount: analysis.failCount
|
|
37840
|
+
failCount: analysis.failCount,
|
|
37841
|
+
exitCode,
|
|
37842
|
+
command: finalCommand
|
|
37706
37843
|
};
|
|
37707
37844
|
}
|
|
37708
37845
|
return {
|
|
@@ -37711,10 +37848,19 @@ async function runVerificationCore(options) {
|
|
|
37711
37848
|
countsTowardEscalation: true,
|
|
37712
37849
|
output: execution.output,
|
|
37713
37850
|
passCount: analysis.passCount,
|
|
37714
|
-
failCount: analysis.failCount
|
|
37851
|
+
failCount: analysis.failCount,
|
|
37852
|
+
exitCode,
|
|
37853
|
+
command: finalCommand
|
|
37715
37854
|
};
|
|
37716
37855
|
}
|
|
37717
|
-
return {
|
|
37856
|
+
return {
|
|
37857
|
+
status: "SUCCESS",
|
|
37858
|
+
success: true,
|
|
37859
|
+
countsTowardEscalation: true,
|
|
37860
|
+
output: execution.output,
|
|
37861
|
+
exitCode,
|
|
37862
|
+
command: finalCommand
|
|
37863
|
+
};
|
|
37718
37864
|
}
|
|
37719
37865
|
async function fullSuite(options) {
|
|
37720
37866
|
return runVerificationCore(options);
|
|
@@ -37791,7 +37937,9 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37791
37937
|
failed: parsedSummary.failed ?? 0,
|
|
37792
37938
|
output: result.output ?? "",
|
|
37793
37939
|
parsedSummary,
|
|
37794
|
-
timedOut: result.status === "TIMEOUT"
|
|
37940
|
+
timedOut: result.status === "TIMEOUT",
|
|
37941
|
+
exitCode: result.exitCode,
|
|
37942
|
+
command: result.command ?? gateCtx.testCmd
|
|
37795
37943
|
};
|
|
37796
37944
|
}
|
|
37797
37945
|
};
|
|
@@ -37818,6 +37966,13 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37818
37966
|
};
|
|
37819
37967
|
}
|
|
37820
37968
|
const gateCtx = await deps.resolveGateContext(input, ctx);
|
|
37969
|
+
logger.info("verify[regression]", "Running full-suite gate", {
|
|
37970
|
+
storyId: input.story.id,
|
|
37971
|
+
packageDir: input.story.workdir,
|
|
37972
|
+
cwd: input.workdir,
|
|
37973
|
+
command: gateCtx.testCmd,
|
|
37974
|
+
timeoutSeconds: gateCtx.fullSuiteTimeout
|
|
37975
|
+
});
|
|
37821
37976
|
const testResult = await deps.runTests(input, gateCtx);
|
|
37822
37977
|
if (testResult.passed) {
|
|
37823
37978
|
return { success: true, passed: true, status: "passed", estimatedCostUsd: 0, attempts: 0, findings: [] };
|
|
@@ -37851,13 +38006,27 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37851
38006
|
}
|
|
37852
38007
|
const findings = testSummaryToFindings(testResult.parsedSummary);
|
|
37853
38008
|
if (findings.length === 0) {
|
|
38009
|
+
const cmd = testResult.command ?? gateCtx.testCmd;
|
|
38010
|
+
const synth = executionFailureToFinding({
|
|
38011
|
+
command: cmd,
|
|
38012
|
+
exitCode: testResult.exitCode,
|
|
38013
|
+
output: testResult.output,
|
|
38014
|
+
packageDir: input.story.workdir,
|
|
38015
|
+
cwd: input.workdir
|
|
38016
|
+
});
|
|
38017
|
+
logger.warn("verify[regression]", "Full-suite gate execution-failed \u2014 emitting synth finding", {
|
|
38018
|
+
storyId: input.story.id,
|
|
38019
|
+
command: cmd,
|
|
38020
|
+
exitCode: testResult.exitCode,
|
|
38021
|
+
packageDir: input.story.workdir
|
|
38022
|
+
});
|
|
37854
38023
|
return {
|
|
37855
38024
|
success: false,
|
|
37856
38025
|
passed: false,
|
|
37857
38026
|
status: "execution-failed",
|
|
37858
38027
|
estimatedCostUsd: 0,
|
|
37859
38028
|
attempts: 0,
|
|
37860
|
-
findings: []
|
|
38029
|
+
findings: [synth]
|
|
37861
38030
|
};
|
|
37862
38031
|
}
|
|
37863
38032
|
return { success: false, passed: false, status: "failed", estimatedCostUsd: 0, attempts: 0, findings };
|
|
@@ -37869,7 +38038,7 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37869
38038
|
function makeFullSuiteRectifyStrategy(story, config2) {
|
|
37870
38039
|
return {
|
|
37871
38040
|
name: "full-suite-rectify",
|
|
37872
|
-
appliesTo: (finding) => finding.source === "test-runner" && finding.category === "failed-test",
|
|
38041
|
+
appliesTo: (finding) => finding.source === "test-runner" && (finding.category === "failed-test" || finding.category === "execution-failed"),
|
|
37873
38042
|
fixOp: implementerOp,
|
|
37874
38043
|
buildInput: (findings) => ({
|
|
37875
38044
|
story,
|
|
@@ -38561,6 +38730,15 @@ var init_verify_scoped = __esm(() => {
|
|
|
38561
38730
|
command: selection.effectiveCommand
|
|
38562
38731
|
});
|
|
38563
38732
|
}
|
|
38733
|
+
const scopedTimeout = ctxConfig.execution?.regressionGate?.timeoutSeconds ?? 600;
|
|
38734
|
+
logger.info("verify[scoped]", "Running scoped tests", {
|
|
38735
|
+
storyId: input.storyId,
|
|
38736
|
+
packageDir: input.packageDir,
|
|
38737
|
+
cwd: input.workdir,
|
|
38738
|
+
command: selection.effectiveCommand,
|
|
38739
|
+
timeoutSeconds: scopedTimeout,
|
|
38740
|
+
isFullSuite: selection.isFullSuite
|
|
38741
|
+
});
|
|
38564
38742
|
const start = Date.now();
|
|
38565
38743
|
const result = await deps.regression({
|
|
38566
38744
|
workdir: input.workdir,
|
|
@@ -51387,9 +51565,9 @@ var init_acceptance2 = __esm(() => {
|
|
|
51387
51565
|
function logTestOutput(logger, stage, output, opts = {}) {
|
|
51388
51566
|
if (!logger || !output)
|
|
51389
51567
|
return;
|
|
51390
|
-
const
|
|
51568
|
+
const tailLines2 = opts.tailLines ?? 20;
|
|
51391
51569
|
const lines = output.split(`
|
|
51392
|
-
`).slice(-
|
|
51570
|
+
`).slice(-tailLines2).join(`
|
|
51393
51571
|
`);
|
|
51394
51572
|
logger.debug(stage, "Test output (tail)", {
|
|
51395
51573
|
...opts.storyId !== undefined && { storyId: opts.storyId },
|
|
@@ -52779,10 +52957,13 @@ async function refreshReviewInputForDispatch(opName, input) {
|
|
|
52779
52957
|
return fallback;
|
|
52780
52958
|
}
|
|
52781
52959
|
}
|
|
52782
|
-
function formatPhaseResultMessage(opName, success2) {
|
|
52960
|
+
function formatPhaseResultMessage(opName, success2, stage) {
|
|
52783
52961
|
if (opName === "greenfield-gate") {
|
|
52784
52962
|
return success2 ? "Greenfield-gate: pre-existing tests detected (not greenfield) \u2014 proceeding with normal TDD" : "Greenfield-gate: no pre-existing tests \u2014 greenfield run, pausing TDD test-writer";
|
|
52785
52963
|
}
|
|
52964
|
+
if (stage === "rectification") {
|
|
52965
|
+
return `Rectification strategy completed: ${opName}`;
|
|
52966
|
+
}
|
|
52786
52967
|
return success2 ? `Phase passed: ${opName}` : `Phase failed: ${opName}`;
|
|
52787
52968
|
}
|
|
52788
52969
|
function isSlot(value) {
|
|
@@ -52975,7 +53156,7 @@ function logUnifiedReviewPhaseStart(storyId, opName) {
|
|
|
52975
53156
|
logger?.info("review", "Running adversarial check", { storyId });
|
|
52976
53157
|
}
|
|
52977
53158
|
}
|
|
52978
|
-
function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTddPhase) {
|
|
53159
|
+
function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTddPhase, stage) {
|
|
52979
53160
|
if (isTddPhase)
|
|
52980
53161
|
return;
|
|
52981
53162
|
if (opName === "semantic-review" || opName === "adversarial-review")
|
|
@@ -52992,7 +53173,11 @@ function logDeterministicPhaseOutcome(storyId, opName, output, durationMs, isTdd
|
|
|
52992
53173
|
data.findingsCount = findingsCount;
|
|
52993
53174
|
if (status !== undefined)
|
|
52994
53175
|
data.status = status;
|
|
52995
|
-
const message = formatPhaseResultMessage(opName, success2);
|
|
53176
|
+
const message = formatPhaseResultMessage(opName, success2, stage);
|
|
53177
|
+
if (stage === "rectification") {
|
|
53178
|
+
logger?.info("story-orchestrator", message, data);
|
|
53179
|
+
return;
|
|
53180
|
+
}
|
|
52996
53181
|
if (success2) {
|
|
52997
53182
|
logger?.info("story-orchestrator", message, data);
|
|
52998
53183
|
} else {
|
|
@@ -53069,7 +53254,7 @@ async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = fa
|
|
|
53069
53254
|
phaseOutputs[opName] = output;
|
|
53070
53255
|
emitReviewDecision(ctx, opName, output);
|
|
53071
53256
|
logUnifiedReviewPhaseResult(ctx.storyId, opName, output);
|
|
53072
|
-
logDeterministicPhaseOutcome(ctx.storyId, opName, output, Date.now() - phaseStartedAt, isTddPhase);
|
|
53257
|
+
logDeterministicPhaseOutcome(ctx.storyId, opName, output, Date.now() - phaseStartedAt, isTddPhase, slot.op.stage);
|
|
53073
53258
|
if (isTddPhase) {
|
|
53074
53259
|
const durationMs = Date.now() - phaseStartedAt;
|
|
53075
53260
|
logger?.info("tdd", `Session complete: ${opName}`, {
|
|
@@ -53453,23 +53638,8 @@ var init_story_orchestrator = __esm(() => {
|
|
|
53453
53638
|
STRATEGY_TO_REVALIDATION_PHASES = {
|
|
53454
53639
|
"mechanical-lintfix": ["lint-check"],
|
|
53455
53640
|
"mechanical-formatfix": ["lint-check"],
|
|
53456
|
-
"autofix-implementer": [
|
|
53457
|
-
|
|
53458
|
-
"typecheck-check",
|
|
53459
|
-
"full-suite-gate",
|
|
53460
|
-
"verifier",
|
|
53461
|
-
"verify-scoped",
|
|
53462
|
-
"semantic-review",
|
|
53463
|
-
"adversarial-review"
|
|
53464
|
-
],
|
|
53465
|
-
"autofix-test-writer": [
|
|
53466
|
-
"lint-check",
|
|
53467
|
-
"typecheck-check",
|
|
53468
|
-
"full-suite-gate",
|
|
53469
|
-
"verifier",
|
|
53470
|
-
"verify-scoped",
|
|
53471
|
-
"adversarial-review"
|
|
53472
|
-
],
|
|
53641
|
+
"autofix-implementer": ["lint-check", "typecheck-check", "full-suite-gate", "semantic-review", "adversarial-review"],
|
|
53642
|
+
"autofix-test-writer": ["lint-check", "typecheck-check", "full-suite-gate", "adversarial-review"],
|
|
53473
53643
|
"full-suite-rectify": [
|
|
53474
53644
|
"lint-check",
|
|
53475
53645
|
"typecheck-check",
|
|
@@ -53817,7 +53987,7 @@ function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason, failure
|
|
|
53817
53987
|
}
|
|
53818
53988
|
return { action: "escalate", reason: buildReason("isolation-violation") };
|
|
53819
53989
|
}
|
|
53820
|
-
if (failureCategory === "session-failure" || failureCategory === "tests-failing" || failureCategory === "full-suite-gate-exhausted" || failureCategory === "verifier-rejected") {
|
|
53990
|
+
if (failureCategory === "session-failure" || failureCategory === "tests-failing" || failureCategory === "full-suite-gate-exhausted" || failureCategory === "verifier-rejected" || failureCategory === "runtime-crash") {
|
|
53821
53991
|
return { action: "escalate", reason: buildReason(failureCategory) };
|
|
53822
53992
|
}
|
|
53823
53993
|
if (failureCategory === "greenfield-no-tests") {
|
|
@@ -53989,6 +54159,12 @@ function deriveTddFailureCategory(phaseOutputs, unfixedFindings) {
|
|
|
53989
54159
|
return "full-suite-gate-exhausted";
|
|
53990
54160
|
}
|
|
53991
54161
|
}
|
|
54162
|
+
if (!verifierPassed) {
|
|
54163
|
+
const rectOutputCrash = phaseOutputs.rectification;
|
|
54164
|
+
if (rectOutputCrash?.exitReason === "validator-error") {
|
|
54165
|
+
return "runtime-crash";
|
|
54166
|
+
}
|
|
54167
|
+
}
|
|
53992
54168
|
if (!verifierPassed) {
|
|
53993
54169
|
const gateOutput = phaseOutputs[fullSuiteGateOp.name];
|
|
53994
54170
|
if (gateOutput && (gateOutput.success === false || gateOutput.passed === false)) {
|
|
@@ -54321,9 +54497,10 @@ var init_post_run = __esm(() => {
|
|
|
54321
54497
|
});
|
|
54322
54498
|
|
|
54323
54499
|
// src/pipeline/stages/execution.ts
|
|
54324
|
-
var executionStage, _executionDeps;
|
|
54500
|
+
var RUNTIME_CRASH_CODES, executionStage, _executionDeps;
|
|
54325
54501
|
var init_execution = __esm(() => {
|
|
54326
54502
|
init_agents();
|
|
54503
|
+
init_errors();
|
|
54327
54504
|
init_build_plan_for_strategy();
|
|
54328
54505
|
init_plan_inputs();
|
|
54329
54506
|
init_post_run();
|
|
@@ -54331,6 +54508,7 @@ var init_execution = __esm(() => {
|
|
|
54331
54508
|
init_logger2();
|
|
54332
54509
|
init_git();
|
|
54333
54510
|
init_execution_helpers();
|
|
54511
|
+
RUNTIME_CRASH_CODES = new Set(["CALL_OP_NO_OUTPUT", "CALL_OP_MAX_RETRIES"]);
|
|
54334
54512
|
executionStage = {
|
|
54335
54513
|
name: "execution",
|
|
54336
54514
|
enabled: () => true,
|
|
@@ -54389,10 +54567,15 @@ var init_execution = __esm(() => {
|
|
|
54389
54567
|
} : null;
|
|
54390
54568
|
const initialRef = tddMode ? await _executionDeps.captureGitRef(ctx.workdir) ?? "HEAD" : null;
|
|
54391
54569
|
const inputs = await _executionDeps.assemblePlanInputsFromCtx(ctx);
|
|
54392
|
-
const plan = await buildPlanForStrategy(callCtx, ctx.story, ctx.config, ctx.routing.testStrategy, inputs);
|
|
54570
|
+
const plan = await _executionDeps.buildPlanForStrategy(callCtx, ctx.story, ctx.config, ctx.routing.testStrategy, inputs);
|
|
54393
54571
|
let planResult;
|
|
54394
54572
|
try {
|
|
54395
54573
|
planResult = await plan.run();
|
|
54574
|
+
} catch (err) {
|
|
54575
|
+
if (err instanceof NaxError && RUNTIME_CRASH_CODES.has(err.code)) {
|
|
54576
|
+
ctx.tddFailureCategory = "runtime-crash";
|
|
54577
|
+
}
|
|
54578
|
+
throw err;
|
|
54396
54579
|
} finally {
|
|
54397
54580
|
unsubscribe();
|
|
54398
54581
|
}
|
|
@@ -54414,6 +54597,7 @@ var init_execution = __esm(() => {
|
|
|
54414
54597
|
validateAgentForTier,
|
|
54415
54598
|
captureGitRef,
|
|
54416
54599
|
assemblePlanInputsFromCtx,
|
|
54600
|
+
buildPlanForStrategy,
|
|
54417
54601
|
applyPostRunInspection,
|
|
54418
54602
|
decideStageAction
|
|
54419
54603
|
};
|
|
@@ -57833,7 +58017,7 @@ var package_default;
|
|
|
57833
58017
|
var init_package = __esm(() => {
|
|
57834
58018
|
package_default = {
|
|
57835
58019
|
name: "@nathapp/nax",
|
|
57836
|
-
version: "0.67.
|
|
58020
|
+
version: "0.67.19",
|
|
57837
58021
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
57838
58022
|
type: "module",
|
|
57839
58023
|
bin: {
|
|
@@ -57928,8 +58112,8 @@ var init_version = __esm(() => {
|
|
|
57928
58112
|
NAX_VERSION = package_default.version;
|
|
57929
58113
|
NAX_COMMIT = (() => {
|
|
57930
58114
|
try {
|
|
57931
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
57932
|
-
return "
|
|
58115
|
+
if (/^[0-9a-f]{6,10}$/.test("e80ba4d6"))
|
|
58116
|
+
return "e80ba4d6";
|
|
57933
58117
|
} catch {}
|
|
57934
58118
|
try {
|
|
57935
58119
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|