@nathapp/nax 0.59.3 → 0.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +534 -309
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -18334,7 +18334,11 @@ var init_schemas3 = __esm(() => {
|
|
|
18334
18334
|
fixModel: "balanced",
|
|
18335
18335
|
strategy: "diagnose-first",
|
|
18336
18336
|
maxRetries: 2
|
|
18337
|
-
})
|
|
18337
|
+
}),
|
|
18338
|
+
suggestedTestPath: exports_external.string().min(1).optional(),
|
|
18339
|
+
hardening: exports_external.object({
|
|
18340
|
+
enabled: exports_external.boolean().default(true)
|
|
18341
|
+
}).optional().default({ enabled: true })
|
|
18338
18342
|
});
|
|
18339
18343
|
TestCoverageConfigSchema = exports_external.object({
|
|
18340
18344
|
enabled: exports_external.boolean().default(true),
|
|
@@ -18663,7 +18667,8 @@ var init_schemas3 = __esm(() => {
|
|
|
18663
18667
|
fixModel: "balanced",
|
|
18664
18668
|
strategy: "diagnose-first",
|
|
18665
18669
|
maxRetries: 2
|
|
18666
|
-
}
|
|
18670
|
+
},
|
|
18671
|
+
hardening: { enabled: true }
|
|
18667
18672
|
}),
|
|
18668
18673
|
context: ContextConfigSchema.default({
|
|
18669
18674
|
fileInjection: "disabled",
|
|
@@ -21444,14 +21449,20 @@ function tryParseLLMJson(text) {
|
|
|
21444
21449
|
}
|
|
21445
21450
|
|
|
21446
21451
|
// src/debate/resolvers.ts
|
|
21447
|
-
function
|
|
21448
|
-
return
|
|
21449
|
-
|
|
21452
|
+
function buildDebaterLabel(debater) {
|
|
21453
|
+
return debater.persona ? `${debater.agent} (${debater.persona})` : debater.agent;
|
|
21454
|
+
}
|
|
21455
|
+
function buildProposalsSection(proposals, debaters) {
|
|
21456
|
+
return proposals.map((p, i) => {
|
|
21457
|
+
const label = debaters?.[i] ? buildDebaterLabel(debaters[i]) : String(i + 1);
|
|
21458
|
+
return `### Proposal ${label}
|
|
21459
|
+
${p}`;
|
|
21460
|
+
}).join(`
|
|
21450
21461
|
|
|
21451
21462
|
`);
|
|
21452
21463
|
}
|
|
21453
|
-
function buildSynthesisPrompt(proposals, critiques) {
|
|
21454
|
-
const proposalsSection = buildProposalsSection(proposals);
|
|
21464
|
+
function buildSynthesisPrompt(proposals, critiques, debaters) {
|
|
21465
|
+
const proposalsSection = buildProposalsSection(proposals, debaters);
|
|
21455
21466
|
const critiquesSection = critiques.length > 0 ? `
|
|
21456
21467
|
|
|
21457
21468
|
## Critiques
|
|
@@ -21466,8 +21477,8 @@ ${proposalsSection}${critiquesSection}
|
|
|
21466
21477
|
|
|
21467
21478
|
Please synthesize these into the best possible unified response, incorporating the strongest elements from each proposal.`;
|
|
21468
21479
|
}
|
|
21469
|
-
function buildJudgePrompt(proposals, critiques) {
|
|
21470
|
-
const proposalsSection = buildProposalsSection(proposals);
|
|
21480
|
+
function buildJudgePrompt(proposals, critiques, debaters) {
|
|
21481
|
+
const proposalsSection = buildProposalsSection(proposals, debaters);
|
|
21471
21482
|
const critiquesSection = critiques.length > 0 ? `
|
|
21472
21483
|
|
|
21473
21484
|
## Critiques
|
|
@@ -21515,7 +21526,7 @@ function majorityResolver(proposals, failOpen) {
|
|
|
21515
21526
|
return passCount > failCount ? "passed" : "failed";
|
|
21516
21527
|
}
|
|
21517
21528
|
async function synthesisResolver(proposals, critiques, opts) {
|
|
21518
|
-
const base = buildSynthesisPrompt(proposals, critiques);
|
|
21529
|
+
const base = buildSynthesisPrompt(proposals, critiques, opts.debaters);
|
|
21519
21530
|
const prompt = opts.promptSuffix ? `${base}
|
|
21520
21531
|
|
|
21521
21532
|
${opts.promptSuffix}` : base;
|
|
@@ -21527,7 +21538,7 @@ async function judgeResolver(proposals, critiques, resolverConfig, opts) {
|
|
|
21527
21538
|
if (!adapter) {
|
|
21528
21539
|
throw new Error(`[debate] Judge agent '${agentName}' not found`);
|
|
21529
21540
|
}
|
|
21530
|
-
const prompt = buildJudgePrompt(proposals, critiques);
|
|
21541
|
+
const prompt = buildJudgePrompt(proposals, critiques, opts.debaters);
|
|
21531
21542
|
return adapter.complete(prompt, opts.completeOptions);
|
|
21532
21543
|
}
|
|
21533
21544
|
var DEFAULT_FALLBACK_AGENT = "claude";
|
|
@@ -21606,7 +21617,7 @@ function resolveModelDefForDebater(debater, tier, config2) {
|
|
|
21606
21617
|
return resolveModelForAgent(configModels, debater.agent, "fast", configDefaultAgent);
|
|
21607
21618
|
}
|
|
21608
21619
|
}
|
|
21609
|
-
async function resolveOutcome(proposalOutputs, critiqueOutputs, stageConfig, config2, storyId, timeoutMs, workdir, featureName, reviewerSession, resolverContext, promptSuffix) {
|
|
21620
|
+
async function resolveOutcome(proposalOutputs, critiqueOutputs, stageConfig, config2, storyId, timeoutMs, workdir, featureName, reviewerSession, resolverContext, promptSuffix, debaters) {
|
|
21610
21621
|
const resolverConfig = stageConfig.resolver;
|
|
21611
21622
|
const logger = _debateSessionDeps.getSafeLogger();
|
|
21612
21623
|
if (reviewerSession && resolverContext) {
|
|
@@ -21675,6 +21686,7 @@ async function resolveOutcome(proposalOutputs, critiqueOutputs, stageConfig, con
|
|
|
21675
21686
|
const resolverResult = await synthesisResolver(proposalOutputs, critiqueOutputs, {
|
|
21676
21687
|
adapter,
|
|
21677
21688
|
promptSuffix,
|
|
21689
|
+
debaters,
|
|
21678
21690
|
completeOptions: {
|
|
21679
21691
|
model: resolveDebaterModel({ agent: agentName }, config2),
|
|
21680
21692
|
config: config2,
|
|
@@ -21700,6 +21712,7 @@ async function resolveOutcome(proposalOutputs, critiqueOutputs, stageConfig, con
|
|
|
21700
21712
|
const resolverResult = await judgeResolver(proposalOutputs, critiqueOutputs, resolverConfig, {
|
|
21701
21713
|
getAgent: (name) => _debateSessionDeps.getAgent(name, config2),
|
|
21702
21714
|
defaultAgentName: RESOLVER_FALLBACK_AGENT,
|
|
21715
|
+
debaters,
|
|
21703
21716
|
completeOptions: {
|
|
21704
21717
|
model: resolveDebaterModel({ agent: agentName }, config2),
|
|
21705
21718
|
config: config2,
|
|
@@ -21772,6 +21785,9 @@ function resolvePersonas(debaters, stage, autoPersona) {
|
|
|
21772
21785
|
return { ...d, persona: assigned };
|
|
21773
21786
|
});
|
|
21774
21787
|
}
|
|
21788
|
+
function buildDebaterLabel2(debater) {
|
|
21789
|
+
return debater.persona ? `${debater.agent} (${debater.persona})` : debater.agent;
|
|
21790
|
+
}
|
|
21775
21791
|
var PERSONA_FRAGMENTS, PLAN_ROTATION, REVIEW_ROTATION;
|
|
21776
21792
|
var init_personas = __esm(() => {
|
|
21777
21793
|
PERSONA_FRAGMENTS = {
|
|
@@ -21918,6 +21934,130 @@ ${c.output}`).join(`
|
|
|
21918
21934
|
buildDebaterLabel(debater) {
|
|
21919
21935
|
return debater.persona ? `${debater.agent} (${debater.persona})` : debater.agent;
|
|
21920
21936
|
}
|
|
21937
|
+
buildReviewPrompt(diff, story) {
|
|
21938
|
+
const criteria = story.acceptanceCriteria.map((c) => `- ${c}`).join(`
|
|
21939
|
+
`);
|
|
21940
|
+
return [
|
|
21941
|
+
`Review the following code diff for story ${story.id}: ${story.title}`,
|
|
21942
|
+
"",
|
|
21943
|
+
"## Acceptance Criteria",
|
|
21944
|
+
criteria,
|
|
21945
|
+
"",
|
|
21946
|
+
"## Diff",
|
|
21947
|
+
diff,
|
|
21948
|
+
"",
|
|
21949
|
+
"Also flag any changes in the diff not required by the acceptance criteria above as out-of-scope findings.",
|
|
21950
|
+
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string } }"
|
|
21951
|
+
].join(`
|
|
21952
|
+
`);
|
|
21953
|
+
}
|
|
21954
|
+
buildReReviewPrompt(updatedDiff, previousFindings) {
|
|
21955
|
+
const findingsList = previousFindings.length > 0 ? previousFindings.map((f) => `- ${f.ruleId}: ${f.message}`).join(`
|
|
21956
|
+
`) : "(none)";
|
|
21957
|
+
return [
|
|
21958
|
+
"This is a follow-up re-review. Please review the updated diff below.",
|
|
21959
|
+
"",
|
|
21960
|
+
"## Previous Findings",
|
|
21961
|
+
findingsList,
|
|
21962
|
+
"",
|
|
21963
|
+
"## Updated Diff",
|
|
21964
|
+
updatedDiff,
|
|
21965
|
+
"",
|
|
21966
|
+
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string }, deltaSummary: string }",
|
|
21967
|
+
"deltaSummary should describe which previous findings are resolved vs still present."
|
|
21968
|
+
].join(`
|
|
21969
|
+
`);
|
|
21970
|
+
}
|
|
21971
|
+
buildResolverPrompt(proposals, critiques, diff, story, resolverContext) {
|
|
21972
|
+
const criteria = story.acceptanceCriteria.map((c) => `- ${c}`).join(`
|
|
21973
|
+
`);
|
|
21974
|
+
const framing = this.buildResolverFraming(resolverContext);
|
|
21975
|
+
const voteTally = this.buildVoteTallyLine(resolverContext);
|
|
21976
|
+
const proposalsSection = this.buildLabeledProposalsSection(proposals);
|
|
21977
|
+
const critiquesSection = this.buildLabeledCritiquesSection(critiques);
|
|
21978
|
+
return [
|
|
21979
|
+
framing,
|
|
21980
|
+
"",
|
|
21981
|
+
`## Story ${story.id}: ${story.title}`,
|
|
21982
|
+
"",
|
|
21983
|
+
"## Acceptance Criteria",
|
|
21984
|
+
criteria,
|
|
21985
|
+
"",
|
|
21986
|
+
"## Debater Proposals",
|
|
21987
|
+
proposalsSection,
|
|
21988
|
+
critiquesSection,
|
|
21989
|
+
"",
|
|
21990
|
+
"## Diff",
|
|
21991
|
+
diff,
|
|
21992
|
+
voteTally,
|
|
21993
|
+
"",
|
|
21994
|
+
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string } }"
|
|
21995
|
+
].filter((line) => line !== undefined).join(`
|
|
21996
|
+
`);
|
|
21997
|
+
}
|
|
21998
|
+
buildReResolverPrompt(proposals, critiques, updatedDiff, previousFindings, resolverContext) {
|
|
21999
|
+
const framing = this.buildResolverFraming(resolverContext);
|
|
22000
|
+
const findingsList = previousFindings.length > 0 ? previousFindings.map((f) => `- ${f.ruleId}: ${f.message}`).join(`
|
|
22001
|
+
`) : "(none)";
|
|
22002
|
+
const proposalsSection = this.buildLabeledProposalsSection(proposals);
|
|
22003
|
+
const critiquesSection = this.buildLabeledCritiquesSection(critiques);
|
|
22004
|
+
return [
|
|
22005
|
+
`${framing} This is a re-review after implementer changes.`,
|
|
22006
|
+
"",
|
|
22007
|
+
"## Previous Findings",
|
|
22008
|
+
findingsList,
|
|
22009
|
+
"",
|
|
22010
|
+
"## Updated Debater Proposals",
|
|
22011
|
+
proposalsSection,
|
|
22012
|
+
critiquesSection,
|
|
22013
|
+
"",
|
|
22014
|
+
"## Updated Diff",
|
|
22015
|
+
updatedDiff,
|
|
22016
|
+
"",
|
|
22017
|
+
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string }, deltaSummary: string }",
|
|
22018
|
+
"deltaSummary should describe which previous findings are resolved vs still present."
|
|
22019
|
+
].filter((line) => line !== undefined).join(`
|
|
22020
|
+
`);
|
|
22021
|
+
}
|
|
22022
|
+
buildResolverFraming(ctx) {
|
|
22023
|
+
switch (ctx.resolverType) {
|
|
22024
|
+
case "majority-fail-closed":
|
|
22025
|
+
case "majority-fail-open":
|
|
22026
|
+
return "You are the authoritative reviewer resolving a debate. A preliminary vote was taken \u2014 see tally below. Verify disputed findings using tools (READ files, GREP for usage) and give your final verdict.";
|
|
22027
|
+
case "synthesis":
|
|
22028
|
+
return "You are a synthesis reviewer. Synthesize the debater proposals into a single, coherent, tool-verified verdict. Use READ and GREP to verify claims before ruling.";
|
|
22029
|
+
case "custom":
|
|
22030
|
+
return "You are the judge. Evaluate the debater proposals independently. Verify claims with tools (READ, GREP) and give your final authoritative verdict.";
|
|
22031
|
+
default:
|
|
22032
|
+
return "You are the reviewer. Evaluate the debater proposals and give your final authoritative verdict.";
|
|
22033
|
+
}
|
|
22034
|
+
}
|
|
22035
|
+
buildVoteTallyLine(ctx) {
|
|
22036
|
+
if (!ctx.majorityVote)
|
|
22037
|
+
return "";
|
|
22038
|
+
const { passCount, failCount } = ctx.majorityVote;
|
|
22039
|
+
const failOpenNote = ctx.resolverType === "majority-fail-open" ? " (unparseable proposals count as pass)" : " (unparseable proposals count as fail)";
|
|
22040
|
+
return `
|
|
22041
|
+
|
|
22042
|
+
The preliminary majority vote is: **${passCount} passed, ${failCount} failed**${failOpenNote}. Verify the failing findings with tools before giving your authoritative verdict.`;
|
|
22043
|
+
}
|
|
22044
|
+
buildLabeledProposalsSection(proposals) {
|
|
22045
|
+
return proposals.map((p) => `### ${p.debater}
|
|
22046
|
+
${p.output}`).join(`
|
|
22047
|
+
|
|
22048
|
+
`);
|
|
22049
|
+
}
|
|
22050
|
+
buildLabeledCritiquesSection(critiques) {
|
|
22051
|
+
if (critiques.length === 0)
|
|
22052
|
+
return "";
|
|
22053
|
+
return `
|
|
22054
|
+
|
|
22055
|
+
## Critiques
|
|
22056
|
+
${critiques.map((c, i) => `### Critique ${i + 1}
|
|
22057
|
+
${c}`).join(`
|
|
22058
|
+
|
|
22059
|
+
`)}`;
|
|
22060
|
+
}
|
|
21921
22061
|
}
|
|
21922
22062
|
var init_prompt_builder = __esm(() => {
|
|
21923
22063
|
init_personas();
|
|
@@ -21998,7 +22138,8 @@ async function runStateful(ctx, prompt) {
|
|
|
21998
22138
|
});
|
|
21999
22139
|
const debate = ctx.config?.debate;
|
|
22000
22140
|
const concurrencyLimit = debate?.maxConcurrentDebaters ?? 2;
|
|
22001
|
-
const
|
|
22141
|
+
const proposalBuilder = new DebatePromptBuilder({ taskContext: prompt, outputFormat: "", stage: ctx.stage }, { debaters: resolved.map((r) => r.debater), sessionMode: "stateful" });
|
|
22142
|
+
const proposalSettled = await allSettledBounded(resolved.map(({ debater, adapter }, debaterIdx) => () => runStatefulTurn(ctx, adapter, debater, proposalBuilder.buildProposalPrompt(debaterIdx), `debate-${ctx.stage}-${debaterIdx}`, config2.rounds > 1)), concurrencyLimit);
|
|
22002
22143
|
const successfulProposals = proposalSettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
|
|
22003
22144
|
for (const r of proposalSettled) {
|
|
22004
22145
|
if (r.status === "fulfilled") {
|
|
@@ -22090,9 +22231,9 @@ async function runStateful(ctx, prompt) {
|
|
|
22090
22231
|
const proposalOutputs = successfulProposals.map((s) => s.output);
|
|
22091
22232
|
const fullResolverContext = ctx.resolverContextInput ? {
|
|
22092
22233
|
...ctx.resolverContextInput,
|
|
22093
|
-
labeledProposals: successfulProposals.map((s) => ({ debater: s.debater
|
|
22234
|
+
labeledProposals: successfulProposals.map((s) => ({ debater: buildDebaterLabel2(s.debater), output: s.output }))
|
|
22094
22235
|
} : undefined;
|
|
22095
|
-
const outcome = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, ctx.timeoutSeconds * 1000, ctx.workdir, ctx.featureName, ctx.reviewerSession, fullResolverContext);
|
|
22236
|
+
const outcome = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, ctx.timeoutSeconds * 1000, ctx.workdir, ctx.featureName, ctx.reviewerSession, fullResolverContext, undefined, successfulProposals.map((s) => s.debater));
|
|
22096
22237
|
totalCostUsd += outcome.resolverCostUsd;
|
|
22097
22238
|
const proposals = successfulProposals.map((s) => ({
|
|
22098
22239
|
debater: s.debater,
|
|
@@ -22241,9 +22382,9 @@ async function runHybrid(ctx, prompt) {
|
|
|
22241
22382
|
const critiqueOutputs = rebuttals.map((r) => r.output);
|
|
22242
22383
|
const fullResolverContext = ctx.resolverContextInput ? {
|
|
22243
22384
|
...ctx.resolverContextInput,
|
|
22244
|
-
labeledProposals: successfulProposals.map((s) => ({ debater: s.debater
|
|
22385
|
+
labeledProposals: successfulProposals.map((s) => ({ debater: buildDebaterLabel2(s.debater), output: s.output }))
|
|
22245
22386
|
} : undefined;
|
|
22246
|
-
const resolveResult = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, ctx.timeoutSeconds * 1000, ctx.workdir, ctx.featureName, ctx.reviewerSession, fullResolverContext);
|
|
22387
|
+
const resolveResult = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, ctx.timeoutSeconds * 1000, ctx.workdir, ctx.featureName, ctx.reviewerSession, fullResolverContext, undefined, successfulProposals.map((s) => s.debater));
|
|
22247
22388
|
totalCostUsd += resolveResult.resolverCostUsd;
|
|
22248
22389
|
return {
|
|
22249
22390
|
storyId: ctx.storyId,
|
|
@@ -22288,7 +22429,8 @@ async function runOneShot(ctx, prompt) {
|
|
|
22288
22429
|
});
|
|
22289
22430
|
const debate = ctx.config?.debate;
|
|
22290
22431
|
const concurrencyLimit = debate?.maxConcurrentDebaters ?? 2;
|
|
22291
|
-
const
|
|
22432
|
+
const proposalBuilder = new DebatePromptBuilder({ taskContext: prompt, outputFormat: "", stage: ctx.stage }, { debaters: resolved.map((r) => r.debater), sessionMode: "one-shot" });
|
|
22433
|
+
const proposalSettled = await allSettledBounded(resolved.map(({ debater, adapter }, i) => () => runComplete(adapter, proposalBuilder.buildProposalPrompt(i), {
|
|
22292
22434
|
model: resolveDebaterModel(debater, ctx.config),
|
|
22293
22435
|
featureName: ctx.stage,
|
|
22294
22436
|
config: ctx.config,
|
|
@@ -22393,9 +22535,9 @@ async function runOneShot(ctx, prompt) {
|
|
|
22393
22535
|
const proposalOutputs = successful.map((p) => p.output);
|
|
22394
22536
|
const fullResolverContext = ctx.resolverContextInput ? {
|
|
22395
22537
|
...ctx.resolverContextInput,
|
|
22396
|
-
labeledProposals: successful.map((p) => ({ debater: p.debater
|
|
22538
|
+
labeledProposals: successful.map((p) => ({ debater: buildDebaterLabel2(p.debater), output: p.output }))
|
|
22397
22539
|
} : undefined;
|
|
22398
|
-
const outcome = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, ctx.timeoutMs, ctx.workdir, ctx.featureName, ctx.reviewerSession, fullResolverContext);
|
|
22540
|
+
const outcome = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, ctx.timeoutMs, ctx.workdir, ctx.featureName, ctx.reviewerSession, fullResolverContext, undefined, successful.map((p) => p.debater));
|
|
22399
22541
|
totalCostUsd += outcome.resolverCostUsd;
|
|
22400
22542
|
const proposals = successful.map((p) => ({
|
|
22401
22543
|
debater: p.debater,
|
|
@@ -22553,8 +22695,21 @@ Do NOT output the JSON to the conversation. Write the file, then reply with a br
|
|
|
22553
22695
|
logger?.warn("debate", "hybrid mode requires sessionMode: stateful for plan \u2014 running as panel");
|
|
22554
22696
|
}
|
|
22555
22697
|
const resolverTimeoutMs = (ctx.stageConfig.timeoutSeconds ?? 600) * 1000;
|
|
22556
|
-
const
|
|
22557
|
-
|
|
22698
|
+
const specAnchor = opts.specContent ? `
|
|
22699
|
+
|
|
22700
|
+
## Original Spec
|
|
22701
|
+
|
|
22702
|
+
${opts.specContent}
|
|
22703
|
+
|
|
22704
|
+
## Synthesis Rules \u2014 Acceptance Criteria
|
|
22705
|
+
|
|
22706
|
+
The spec above is the authoritative source for acceptance criteria.
|
|
22707
|
+
- Each story's \`acceptanceCriteria\` array MUST contain only criteria that are explicitly stated or directly implied by the spec.
|
|
22708
|
+
- If a debater proposed criteria beyond the spec (edge cases, error handling, implementation details), place those in a separate \`suggestedCriteria\` array on the same story object.
|
|
22709
|
+
- Never silently merge debater-invented criteria into \`acceptanceCriteria\`. The distinction matters: \`acceptanceCriteria\` drives automated testing; \`suggestedCriteria\` is logged for human review.
|
|
22710
|
+
- Preserve the spec's AC wording. You may refine for clarity but must not change semantics.` : "";
|
|
22711
|
+
const planSynthesisSuffix = `IMPORTANT: Your response must be a single valid JSON object in PRD format (with project, feature, branchName, userStories array, etc.). Do NOT wrap it in markdown fences. Output raw JSON only.${specAnchor}`;
|
|
22712
|
+
const outcome = await resolveOutcome(proposalOutputs, critiqueOutputs, ctx.stageConfig, ctx.config, ctx.storyId, resolverTimeoutMs, opts.workdir, opts.feature, undefined, undefined, planSynthesisSuffix, successful.map((p) => p.debater));
|
|
22558
22713
|
const winningOutput = outcome.output ?? successful[0].output;
|
|
22559
22714
|
const proposals = successful.map((p) => ({ debater: p.debater, output: p.output }));
|
|
22560
22715
|
logger?.info("debate", "debate:result", {
|
|
@@ -24916,6 +25071,24 @@ function resolveAcceptanceTestCandidates(options) {
|
|
|
24916
25071
|
return [];
|
|
24917
25072
|
return [resolveAcceptanceFeatureTestPath(options.featureDir, options.testPathConfig, options.language)];
|
|
24918
25073
|
}
|
|
25074
|
+
function suggestedTestFilename(language) {
|
|
25075
|
+
switch (language?.toLowerCase()) {
|
|
25076
|
+
case "go":
|
|
25077
|
+
return ".nax-suggested_test.go";
|
|
25078
|
+
case "python":
|
|
25079
|
+
return ".nax-suggested.test.py";
|
|
25080
|
+
case "rust":
|
|
25081
|
+
return ".nax-suggested.rs";
|
|
25082
|
+
default:
|
|
25083
|
+
return ".nax-suggested.test.ts";
|
|
25084
|
+
}
|
|
25085
|
+
}
|
|
25086
|
+
function resolveSuggestedTestFile(language, testPathConfig) {
|
|
25087
|
+
return testPathConfig ?? suggestedTestFilename(language);
|
|
25088
|
+
}
|
|
25089
|
+
function resolveSuggestedPackageFeatureTestPath(packageDir, featureName, testPathConfig, language) {
|
|
25090
|
+
return path.join(packageDir, ".nax", "features", featureName, resolveSuggestedTestFile(language, testPathConfig));
|
|
25091
|
+
}
|
|
24919
25092
|
async function findExistingAcceptanceTestPath(options) {
|
|
24920
25093
|
const candidates = resolveAcceptanceTestCandidates(options);
|
|
24921
25094
|
for (const testPath of candidates) {
|
|
@@ -25782,7 +25955,8 @@ Rules:
|
|
|
25782
25955
|
- Every test MUST have real assertions that PASS when the feature is correctly implemented and FAIL when it is broken
|
|
25783
25956
|
- **Prefer behavioral tests** \u2014 import functions and call them rather than reading source files. For example, to verify "getPostRunActions() returns empty array", import PluginRegistry and call getPostRunActions(), don't grep the source file for the method name.
|
|
25784
25957
|
- **File output (REQUIRED)**: Write the acceptance test file DIRECTLY to the path shown below. Do NOT output the test code in your response. After writing the file, reply with a brief confirmation.
|
|
25785
|
-
- **Path anchor (CRITICAL)**: Write the test file to this exact path: \`${join16(options.workdir, ".nax", "features", options.featureName, resolveAcceptanceTestFile2(options.language, options.config?.acceptance?.testPath))}\`. Import from package sources using relative paths like \`../../../src/...\` (3 levels up from \`.nax/features/<name>/\` to the package root)
|
|
25958
|
+
- **Path anchor (CRITICAL)**: Write the test file to this exact path: \`${options.targetTestFile ?? join16(options.workdir, ".nax", "features", options.featureName, resolveAcceptanceTestFile2(options.language, options.config?.acceptance?.testPath))}\`. Import from package sources using relative paths like \`../../../src/...\` (3 levels up from \`.nax/features/<name>/\` to the package root).
|
|
25959
|
+
- **Process cwd**: When spawning child processes to invoke a CLI or binary, set the working directory to the **package root** (\`join(import.meta.dir, "../../..")\`) as your default \u2014 unless your Step 2 exploration reveals the CLI uses a different working directory convention (e.g. reads config from \`~/.config/\`, or resolves paths relative to a flag value). Always check how the CLI resolves file paths before assuming.`;
|
|
25786
25960
|
const implementationSection = options.implementationContext && options.implementationContext.length > 0 ? `
|
|
25787
25961
|
|
|
25788
25962
|
## Implementation (already exists)
|
|
@@ -25814,7 +25988,7 @@ Previous test failed because: ${options.previousFailure}` : "";
|
|
|
25814
25988
|
outputPreview: rawOutput.slice(0, 300)
|
|
25815
25989
|
});
|
|
25816
25990
|
if (!testCode) {
|
|
25817
|
-
const targetPath = join16(options.workdir, ".nax", "features", options.featureName, resolveAcceptanceTestFile2(options.language, options.config?.acceptance?.testPath));
|
|
25991
|
+
const targetPath = options.targetTestFile ?? join16(options.workdir, ".nax", "features", options.featureName, resolveAcceptanceTestFile2(options.language, options.config?.acceptance?.testPath));
|
|
25818
25992
|
const backupPath = `${targetPath}.llm-recovery.bak`;
|
|
25819
25993
|
let recoveryFailed = false;
|
|
25820
25994
|
logger.debug("acceptance", "BUG-076 recovery: checking for agent-written file", {
|
|
@@ -26179,156 +26353,6 @@ function logTestOutput(logger, stage, output, opts = {}) {
|
|
|
26179
26353
|
});
|
|
26180
26354
|
}
|
|
26181
26355
|
|
|
26182
|
-
// src/pipeline/stages/acceptance.ts
|
|
26183
|
-
var exports_acceptance = {};
|
|
26184
|
-
__export(exports_acceptance, {
|
|
26185
|
-
acceptanceStage: () => acceptanceStage
|
|
26186
|
-
});
|
|
26187
|
-
function parseTestFailures(output) {
|
|
26188
|
-
const failedACs = [];
|
|
26189
|
-
const lines = output.split(`
|
|
26190
|
-
`);
|
|
26191
|
-
for (const line of lines) {
|
|
26192
|
-
if (line.includes("(fail)")) {
|
|
26193
|
-
const acMatch = line.match(/(AC-\d+):/i);
|
|
26194
|
-
if (acMatch) {
|
|
26195
|
-
const acId = acMatch[1].toUpperCase();
|
|
26196
|
-
if (!failedACs.includes(acId)) {
|
|
26197
|
-
failedACs.push(acId);
|
|
26198
|
-
}
|
|
26199
|
-
}
|
|
26200
|
-
}
|
|
26201
|
-
}
|
|
26202
|
-
return failedACs;
|
|
26203
|
-
}
|
|
26204
|
-
function areAllStoriesComplete(ctx) {
|
|
26205
|
-
const counts = countStories(ctx.prd);
|
|
26206
|
-
const totalComplete = counts.passed + counts.failed + counts.skipped;
|
|
26207
|
-
return totalComplete === counts.total;
|
|
26208
|
-
}
|
|
26209
|
-
var acceptanceStage;
|
|
26210
|
-
var init_acceptance = __esm(() => {
|
|
26211
|
-
init_generator();
|
|
26212
|
-
init_test_path();
|
|
26213
|
-
init_logger2();
|
|
26214
|
-
init_prd();
|
|
26215
|
-
acceptanceStage = {
|
|
26216
|
-
name: "acceptance",
|
|
26217
|
-
enabled(ctx) {
|
|
26218
|
-
if (!ctx.config.acceptance.enabled) {
|
|
26219
|
-
return false;
|
|
26220
|
-
}
|
|
26221
|
-
if (!areAllStoriesComplete(ctx)) {
|
|
26222
|
-
return false;
|
|
26223
|
-
}
|
|
26224
|
-
return true;
|
|
26225
|
-
},
|
|
26226
|
-
async execute(ctx) {
|
|
26227
|
-
const logger = getLogger();
|
|
26228
|
-
logger.info("acceptance", "Running acceptance tests", { storyId: ctx.story.id });
|
|
26229
|
-
if (!ctx.featureDir) {
|
|
26230
|
-
logger.warn("acceptance", "No feature directory \u2014 skipping acceptance tests", { storyId: ctx.story.id });
|
|
26231
|
-
return { action: "continue" };
|
|
26232
|
-
}
|
|
26233
|
-
const testGroups = ctx.acceptanceTestPaths ?? [
|
|
26234
|
-
{
|
|
26235
|
-
testPath: resolveAcceptanceFeatureTestPath(ctx.featureDir, ctx.config.acceptance.testPath, ctx.config.project?.language),
|
|
26236
|
-
packageDir: ctx.workdir
|
|
26237
|
-
}
|
|
26238
|
-
];
|
|
26239
|
-
const allFailedACs = [];
|
|
26240
|
-
const allOutputParts = [];
|
|
26241
|
-
let anyError = false;
|
|
26242
|
-
let errorExitCode = 0;
|
|
26243
|
-
for (const { testPath, packageDir } of testGroups) {
|
|
26244
|
-
const testFile = Bun.file(testPath);
|
|
26245
|
-
const exists = await testFile.exists();
|
|
26246
|
-
if (!exists) {
|
|
26247
|
-
logger.warn("acceptance", "Acceptance test file not found \u2014 skipping", { storyId: ctx.story.id, testPath });
|
|
26248
|
-
continue;
|
|
26249
|
-
}
|
|
26250
|
-
const testCmdParts = buildAcceptanceRunCommand(testPath, ctx.config.project?.testFramework, ctx.config.acceptance.command);
|
|
26251
|
-
logger.info("acceptance", "Running acceptance command", {
|
|
26252
|
-
storyId: ctx.story.id,
|
|
26253
|
-
cmd: testCmdParts.join(" "),
|
|
26254
|
-
packageDir
|
|
26255
|
-
});
|
|
26256
|
-
const proc = Bun.spawn(testCmdParts, {
|
|
26257
|
-
cwd: packageDir,
|
|
26258
|
-
stdout: "pipe",
|
|
26259
|
-
stderr: "pipe"
|
|
26260
|
-
});
|
|
26261
|
-
const [exitCode, stdout, stderr] = await Promise.all([
|
|
26262
|
-
proc.exited,
|
|
26263
|
-
new Response(proc.stdout).text(),
|
|
26264
|
-
new Response(proc.stderr).text()
|
|
26265
|
-
]);
|
|
26266
|
-
const output = `${stdout}
|
|
26267
|
-
${stderr}`;
|
|
26268
|
-
allOutputParts.push(output);
|
|
26269
|
-
const failedACs = parseTestFailures(output);
|
|
26270
|
-
const overrides = ctx.prd.acceptanceOverrides ?? {};
|
|
26271
|
-
const actualFailures = failedACs.filter((acId) => !overrides[acId]);
|
|
26272
|
-
const overriddenFailures = failedACs.filter((acId) => overrides[acId]);
|
|
26273
|
-
if (overriddenFailures.length > 0) {
|
|
26274
|
-
logger.warn("acceptance", "Skipped failures (overridden)", {
|
|
26275
|
-
storyId: ctx.story.id,
|
|
26276
|
-
overriddenFailures,
|
|
26277
|
-
overrides: overriddenFailures.map((acId) => ({ acId, reason: overrides[acId] }))
|
|
26278
|
-
});
|
|
26279
|
-
}
|
|
26280
|
-
if (failedACs.length === 0 && exitCode !== 0) {
|
|
26281
|
-
logger.error("acceptance", "Tests errored with no AC failures parsed", {
|
|
26282
|
-
storyId: ctx.story.id,
|
|
26283
|
-
exitCode,
|
|
26284
|
-
packageDir
|
|
26285
|
-
});
|
|
26286
|
-
logTestOutput(logger, "acceptance", output);
|
|
26287
|
-
anyError = true;
|
|
26288
|
-
errorExitCode = exitCode;
|
|
26289
|
-
allFailedACs.push("AC-ERROR");
|
|
26290
|
-
continue;
|
|
26291
|
-
}
|
|
26292
|
-
for (const acId of actualFailures) {
|
|
26293
|
-
if (!allFailedACs.includes(acId)) {
|
|
26294
|
-
allFailedACs.push(acId);
|
|
26295
|
-
}
|
|
26296
|
-
}
|
|
26297
|
-
if (actualFailures.length > 0) {
|
|
26298
|
-
logger.error("acceptance", "Acceptance tests failed", {
|
|
26299
|
-
storyId: ctx.story.id,
|
|
26300
|
-
failedACs: actualFailures,
|
|
26301
|
-
packageDir
|
|
26302
|
-
});
|
|
26303
|
-
logTestOutput(logger, "acceptance", output);
|
|
26304
|
-
} else if (exitCode === 0) {
|
|
26305
|
-
logger.info("acceptance", "Package acceptance tests passed", { storyId: ctx.story.id, packageDir });
|
|
26306
|
-
}
|
|
26307
|
-
}
|
|
26308
|
-
const combinedOutput = allOutputParts.join(`
|
|
26309
|
-
`);
|
|
26310
|
-
if (allFailedACs.length === 0) {
|
|
26311
|
-
logger.info("acceptance", "All acceptance tests passed", { storyId: ctx.story.id });
|
|
26312
|
-
return { action: "continue" };
|
|
26313
|
-
}
|
|
26314
|
-
ctx.acceptanceFailures = {
|
|
26315
|
-
failedACs: allFailedACs,
|
|
26316
|
-
testOutput: combinedOutput
|
|
26317
|
-
};
|
|
26318
|
-
if (anyError) {
|
|
26319
|
-
return {
|
|
26320
|
-
action: "fail",
|
|
26321
|
-
reason: `Acceptance tests errored (exit code ${errorExitCode}): syntax error, import failure, or unhandled exception`
|
|
26322
|
-
};
|
|
26323
|
-
}
|
|
26324
|
-
return {
|
|
26325
|
-
action: "fail",
|
|
26326
|
-
reason: `Acceptance tests failed: ${allFailedACs.join(", ")}`
|
|
26327
|
-
};
|
|
26328
|
-
}
|
|
26329
|
-
};
|
|
26330
|
-
});
|
|
26331
|
-
|
|
26332
26356
|
// src/acceptance/refinement.ts
|
|
26333
26357
|
var exports_refinement = {};
|
|
26334
26358
|
__export(exports_refinement, {
|
|
@@ -26495,6 +26519,315 @@ var init_refinement = __esm(() => {
|
|
|
26495
26519
|
};
|
|
26496
26520
|
});
|
|
26497
26521
|
|
|
26522
|
+
// src/acceptance/hardening.ts
|
|
26523
|
+
var exports_hardening = {};
|
|
26524
|
+
__export(exports_hardening, {
|
|
26525
|
+
runHardeningPass: () => runHardeningPass,
|
|
26526
|
+
_hardeningDeps: () => _hardeningDeps
|
|
26527
|
+
});
|
|
26528
|
+
async function runHardeningPass(ctx) {
|
|
26529
|
+
const logger = getSafeLogger();
|
|
26530
|
+
const result = { promoted: [], discarded: [], costUsd: 0 };
|
|
26531
|
+
const storiesWithSuggested = ctx.prd.userStories.filter((s) => s.suggestedCriteria && s.suggestedCriteria.length > 0);
|
|
26532
|
+
if (storiesWithSuggested.length === 0)
|
|
26533
|
+
return result;
|
|
26534
|
+
logger?.info("acceptance", "Starting hardening pass", {
|
|
26535
|
+
storyId: storiesWithSuggested[0].id,
|
|
26536
|
+
storiesWithSuggested: storiesWithSuggested.length,
|
|
26537
|
+
totalSuggestedACs: storiesWithSuggested.reduce((n, s) => n + (s.suggestedCriteria?.length ?? 0), 0)
|
|
26538
|
+
});
|
|
26539
|
+
try {
|
|
26540
|
+
const allRefined = [];
|
|
26541
|
+
for (const story of storiesWithSuggested) {
|
|
26542
|
+
const criteria = story.suggestedCriteria ?? [];
|
|
26543
|
+
const refined = await _hardeningDeps.refine(criteria, {
|
|
26544
|
+
storyId: story.id,
|
|
26545
|
+
featureName: ctx.prd.feature,
|
|
26546
|
+
workdir: ctx.workdir,
|
|
26547
|
+
codebaseContext: "",
|
|
26548
|
+
config: ctx.config
|
|
26549
|
+
});
|
|
26550
|
+
allRefined.push(...refined);
|
|
26551
|
+
}
|
|
26552
|
+
const language = ctx.config.project?.language;
|
|
26553
|
+
const suggestedTestPath = resolveSuggestedPackageFeatureTestPath(ctx.workdir, ctx.prd.feature, ctx.config.acceptance?.suggestedTestPath, language);
|
|
26554
|
+
let modelDef;
|
|
26555
|
+
try {
|
|
26556
|
+
modelDef = resolveModelForAgent(ctx.config.models, ctx.config.autoMode?.defaultAgent ?? "claude", ctx.config.acceptance?.model ?? "fast", ctx.config.autoMode?.defaultAgent ?? "claude");
|
|
26557
|
+
} catch {
|
|
26558
|
+
modelDef = { provider: "anthropic", model: "claude-haiku-4-5-20251001" };
|
|
26559
|
+
}
|
|
26560
|
+
const genResult = await _hardeningDeps.generate(storiesWithSuggested, allRefined, {
|
|
26561
|
+
featureName: ctx.prd.feature,
|
|
26562
|
+
workdir: ctx.workdir,
|
|
26563
|
+
featureDir: ctx.featureDir,
|
|
26564
|
+
codebaseContext: "",
|
|
26565
|
+
modelTier: ctx.config.acceptance?.model ?? "fast",
|
|
26566
|
+
modelDef,
|
|
26567
|
+
config: ctx.config,
|
|
26568
|
+
language,
|
|
26569
|
+
targetTestFile: suggestedTestPath
|
|
26570
|
+
});
|
|
26571
|
+
if (genResult.testCode) {
|
|
26572
|
+
await _hardeningDeps.writeFile(suggestedTestPath, genResult.testCode);
|
|
26573
|
+
}
|
|
26574
|
+
const testCmd = buildAcceptanceRunCommand(suggestedTestPath, ctx.config.project?.testFramework, ctx.config.acceptance?.command);
|
|
26575
|
+
const proc = _hardeningDeps.spawn(testCmd, {
|
|
26576
|
+
cwd: ctx.workdir,
|
|
26577
|
+
stdout: "pipe",
|
|
26578
|
+
stderr: "pipe"
|
|
26579
|
+
});
|
|
26580
|
+
const [exitCode, stdout, stderr] = await Promise.all([
|
|
26581
|
+
proc.exited,
|
|
26582
|
+
new Response(proc.stdout).text(),
|
|
26583
|
+
new Response(proc.stderr).text()
|
|
26584
|
+
]);
|
|
26585
|
+
const output = `${stdout}
|
|
26586
|
+
${stderr}`;
|
|
26587
|
+
const failedACs = parseTestFailures(output);
|
|
26588
|
+
const failedSet = new Set(failedACs.map((ac) => ac.toUpperCase()));
|
|
26589
|
+
let acIndex = 0;
|
|
26590
|
+
for (const story of storiesWithSuggested) {
|
|
26591
|
+
const suggested = story.suggestedCriteria ?? [];
|
|
26592
|
+
const toPromote = [];
|
|
26593
|
+
const toDiscard = [];
|
|
26594
|
+
for (const criterion of suggested) {
|
|
26595
|
+
acIndex++;
|
|
26596
|
+
const acId = `AC-${acIndex}`;
|
|
26597
|
+
if (failedSet.has(acId) || exitCode !== 0 && failedACs.length === 0) {
|
|
26598
|
+
toDiscard.push(criterion);
|
|
26599
|
+
} else {
|
|
26600
|
+
toPromote.push(criterion);
|
|
26601
|
+
}
|
|
26602
|
+
}
|
|
26603
|
+
if (toPromote.length > 0) {
|
|
26604
|
+
story.acceptanceCriteria = [...story.acceptanceCriteria, ...toPromote];
|
|
26605
|
+
result.promoted.push(...toPromote);
|
|
26606
|
+
}
|
|
26607
|
+
result.discarded.push(...toDiscard);
|
|
26608
|
+
story.suggestedCriteria = toDiscard.length > 0 ? toDiscard : undefined;
|
|
26609
|
+
}
|
|
26610
|
+
if (result.promoted.length > 0) {
|
|
26611
|
+
await _hardeningDeps.savePRD(ctx.prd, ctx.prdPath);
|
|
26612
|
+
}
|
|
26613
|
+
logger?.info("acceptance", "Hardening pass complete", {
|
|
26614
|
+
storyId: storiesWithSuggested[0].id,
|
|
26615
|
+
promoted: result.promoted.length,
|
|
26616
|
+
discarded: result.discarded.length,
|
|
26617
|
+
costUsd: result.costUsd
|
|
26618
|
+
});
|
|
26619
|
+
} catch (err) {
|
|
26620
|
+
logger?.warn("acceptance", "Hardening pass failed (non-blocking)", {
|
|
26621
|
+
storyId: storiesWithSuggested[0].id,
|
|
26622
|
+
error: err instanceof Error ? err.message : String(err)
|
|
26623
|
+
});
|
|
26624
|
+
}
|
|
26625
|
+
return result;
|
|
26626
|
+
}
|
|
26627
|
+
var _hardeningDeps;
|
|
26628
|
+
var init_hardening = __esm(() => {
|
|
26629
|
+
init_config();
|
|
26630
|
+
init_logger2();
|
|
26631
|
+
init_acceptance();
|
|
26632
|
+
init_prd();
|
|
26633
|
+
init_generator();
|
|
26634
|
+
init_generator();
|
|
26635
|
+
init_refinement();
|
|
26636
|
+
init_test_path();
|
|
26637
|
+
_hardeningDeps = {
|
|
26638
|
+
refine: refineAcceptanceCriteria,
|
|
26639
|
+
generate: generateFromPRD,
|
|
26640
|
+
savePRD,
|
|
26641
|
+
spawn: Bun.spawn,
|
|
26642
|
+
writeFile: async (p, c) => {
|
|
26643
|
+
await Bun.write(p, c);
|
|
26644
|
+
}
|
|
26645
|
+
};
|
|
26646
|
+
});
|
|
26647
|
+
|
|
26648
|
+
// src/pipeline/stages/acceptance.ts
|
|
26649
|
+
var exports_acceptance = {};
|
|
26650
|
+
__export(exports_acceptance, {
|
|
26651
|
+
parseTestFailures: () => parseTestFailures,
|
|
26652
|
+
acceptanceStage: () => acceptanceStage,
|
|
26653
|
+
_acceptanceStageDeps: () => _acceptanceStageDeps
|
|
26654
|
+
});
|
|
26655
|
+
function parseTestFailures(output) {
|
|
26656
|
+
const failedACs = [];
|
|
26657
|
+
const lines = output.split(`
|
|
26658
|
+
`);
|
|
26659
|
+
for (const line of lines) {
|
|
26660
|
+
if (line.includes("(fail)")) {
|
|
26661
|
+
const acMatch = line.match(/(AC-\d+):/i);
|
|
26662
|
+
if (acMatch) {
|
|
26663
|
+
const acId = acMatch[1].toUpperCase();
|
|
26664
|
+
if (!failedACs.includes(acId)) {
|
|
26665
|
+
failedACs.push(acId);
|
|
26666
|
+
}
|
|
26667
|
+
}
|
|
26668
|
+
}
|
|
26669
|
+
}
|
|
26670
|
+
return failedACs;
|
|
26671
|
+
}
|
|
26672
|
+
function areAllStoriesComplete(ctx) {
|
|
26673
|
+
const counts = countStories(ctx.prd);
|
|
26674
|
+
const totalComplete = counts.passed + counts.failed + counts.skipped;
|
|
26675
|
+
return totalComplete === counts.total;
|
|
26676
|
+
}
|
|
26677
|
+
var _acceptanceStageDeps, acceptanceStage;
|
|
26678
|
+
var init_acceptance = __esm(() => {
|
|
26679
|
+
init_generator();
|
|
26680
|
+
init_test_path();
|
|
26681
|
+
init_logger2();
|
|
26682
|
+
init_prd();
|
|
26683
|
+
_acceptanceStageDeps = {
|
|
26684
|
+
runHardeningPass: async (ctx) => {
|
|
26685
|
+
const { runHardeningPass: runHardeningPass2 } = await Promise.resolve().then(() => (init_hardening(), exports_hardening));
|
|
26686
|
+
return runHardeningPass2(ctx);
|
|
26687
|
+
}
|
|
26688
|
+
};
|
|
26689
|
+
acceptanceStage = {
|
|
26690
|
+
name: "acceptance",
|
|
26691
|
+
enabled(ctx) {
|
|
26692
|
+
if (!ctx.config.acceptance.enabled) {
|
|
26693
|
+
return false;
|
|
26694
|
+
}
|
|
26695
|
+
if (!areAllStoriesComplete(ctx)) {
|
|
26696
|
+
return false;
|
|
26697
|
+
}
|
|
26698
|
+
return true;
|
|
26699
|
+
},
|
|
26700
|
+
async execute(ctx) {
|
|
26701
|
+
const logger = getLogger();
|
|
26702
|
+
logger.info("acceptance", "Running acceptance tests", { storyId: ctx.story.id });
|
|
26703
|
+
if (!ctx.featureDir) {
|
|
26704
|
+
logger.warn("acceptance", "No feature directory \u2014 skipping acceptance tests", { storyId: ctx.story.id });
|
|
26705
|
+
return { action: "continue" };
|
|
26706
|
+
}
|
|
26707
|
+
const testGroups = ctx.acceptanceTestPaths ?? [
|
|
26708
|
+
{
|
|
26709
|
+
testPath: resolveAcceptanceFeatureTestPath(ctx.featureDir, ctx.config.acceptance.testPath, ctx.config.project?.language),
|
|
26710
|
+
packageDir: ctx.workdir
|
|
26711
|
+
}
|
|
26712
|
+
];
|
|
26713
|
+
const allFailedACs = [];
|
|
26714
|
+
const allOutputParts = [];
|
|
26715
|
+
let anyError = false;
|
|
26716
|
+
let errorExitCode = 0;
|
|
26717
|
+
for (const { testPath, packageDir } of testGroups) {
|
|
26718
|
+
const testFile = Bun.file(testPath);
|
|
26719
|
+
const exists = await testFile.exists();
|
|
26720
|
+
if (!exists) {
|
|
26721
|
+
logger.warn("acceptance", "Acceptance test file not found \u2014 skipping", { storyId: ctx.story.id, testPath });
|
|
26722
|
+
continue;
|
|
26723
|
+
}
|
|
26724
|
+
const testCmdParts = buildAcceptanceRunCommand(testPath, ctx.config.project?.testFramework, ctx.config.acceptance.command);
|
|
26725
|
+
logger.info("acceptance", "Running acceptance command", {
|
|
26726
|
+
storyId: ctx.story.id,
|
|
26727
|
+
cmd: testCmdParts.join(" "),
|
|
26728
|
+
packageDir
|
|
26729
|
+
});
|
|
26730
|
+
const proc = Bun.spawn(testCmdParts, {
|
|
26731
|
+
cwd: packageDir,
|
|
26732
|
+
stdout: "pipe",
|
|
26733
|
+
stderr: "pipe"
|
|
26734
|
+
});
|
|
26735
|
+
const [exitCode, stdout, stderr] = await Promise.all([
|
|
26736
|
+
proc.exited,
|
|
26737
|
+
new Response(proc.stdout).text(),
|
|
26738
|
+
new Response(proc.stderr).text()
|
|
26739
|
+
]);
|
|
26740
|
+
const output = `${stdout}
|
|
26741
|
+
${stderr}`;
|
|
26742
|
+
allOutputParts.push(output);
|
|
26743
|
+
const failedACs = parseTestFailures(output);
|
|
26744
|
+
const overrides = ctx.prd.acceptanceOverrides ?? {};
|
|
26745
|
+
const actualFailures = failedACs.filter((acId) => !overrides[acId]);
|
|
26746
|
+
const overriddenFailures = failedACs.filter((acId) => overrides[acId]);
|
|
26747
|
+
if (overriddenFailures.length > 0) {
|
|
26748
|
+
logger.warn("acceptance", "Skipped failures (overridden)", {
|
|
26749
|
+
storyId: ctx.story.id,
|
|
26750
|
+
overriddenFailures,
|
|
26751
|
+
overrides: overriddenFailures.map((acId) => ({ acId, reason: overrides[acId] }))
|
|
26752
|
+
});
|
|
26753
|
+
}
|
|
26754
|
+
if (failedACs.length === 0 && exitCode !== 0) {
|
|
26755
|
+
logger.error("acceptance", "Tests errored with no AC failures parsed", {
|
|
26756
|
+
storyId: ctx.story.id,
|
|
26757
|
+
exitCode,
|
|
26758
|
+
packageDir
|
|
26759
|
+
});
|
|
26760
|
+
logTestOutput(logger, "acceptance", output);
|
|
26761
|
+
anyError = true;
|
|
26762
|
+
errorExitCode = exitCode;
|
|
26763
|
+
allFailedACs.push("AC-ERROR");
|
|
26764
|
+
continue;
|
|
26765
|
+
}
|
|
26766
|
+
for (const acId of actualFailures) {
|
|
26767
|
+
if (!allFailedACs.includes(acId)) {
|
|
26768
|
+
allFailedACs.push(acId);
|
|
26769
|
+
}
|
|
26770
|
+
}
|
|
26771
|
+
if (actualFailures.length > 0) {
|
|
26772
|
+
logger.error("acceptance", "Acceptance tests failed", {
|
|
26773
|
+
storyId: ctx.story.id,
|
|
26774
|
+
failedACs: actualFailures,
|
|
26775
|
+
packageDir
|
|
26776
|
+
});
|
|
26777
|
+
logTestOutput(logger, "acceptance", output);
|
|
26778
|
+
} else if (exitCode === 0) {
|
|
26779
|
+
logger.info("acceptance", "Package acceptance tests passed", { storyId: ctx.story.id, packageDir });
|
|
26780
|
+
}
|
|
26781
|
+
}
|
|
26782
|
+
const combinedOutput = allOutputParts.join(`
|
|
26783
|
+
`);
|
|
26784
|
+
if (allFailedACs.length === 0) {
|
|
26785
|
+
logger.info("acceptance", "All acceptance tests passed", { storyId: ctx.story.id });
|
|
26786
|
+
const hardeningEnabled = ctx.config.acceptance?.hardening?.enabled !== false;
|
|
26787
|
+
const hasAnySuggested = ctx.prd.userStories.some((s) => s.suggestedCriteria && s.suggestedCriteria.length > 0);
|
|
26788
|
+
if (hardeningEnabled && hasAnySuggested && ctx.featureDir) {
|
|
26789
|
+
try {
|
|
26790
|
+
const prdPath = ctx.prdPath ?? `${ctx.featureDir}/prd.json`;
|
|
26791
|
+
const result = await _acceptanceStageDeps.runHardeningPass({
|
|
26792
|
+
prd: ctx.prd,
|
|
26793
|
+
prdPath,
|
|
26794
|
+
featureDir: ctx.featureDir,
|
|
26795
|
+
workdir: ctx.workdir,
|
|
26796
|
+
config: ctx.config,
|
|
26797
|
+
agentGetFn: ctx.agentGetFn
|
|
26798
|
+
});
|
|
26799
|
+
logger.info("acceptance", "Hardening pass complete", {
|
|
26800
|
+
storyId: ctx.story.id,
|
|
26801
|
+
promoted: result.promoted.length,
|
|
26802
|
+
discarded: result.discarded.length
|
|
26803
|
+
});
|
|
26804
|
+
} catch (err) {
|
|
26805
|
+
logger.warn("acceptance", "Hardening pass failed (non-blocking)", {
|
|
26806
|
+
storyId: ctx.story.id,
|
|
26807
|
+
error: err instanceof Error ? err.message : String(err)
|
|
26808
|
+
});
|
|
26809
|
+
}
|
|
26810
|
+
}
|
|
26811
|
+
return { action: "continue" };
|
|
26812
|
+
}
|
|
26813
|
+
ctx.acceptanceFailures = {
|
|
26814
|
+
failedACs: allFailedACs,
|
|
26815
|
+
testOutput: combinedOutput
|
|
26816
|
+
};
|
|
26817
|
+
if (anyError) {
|
|
26818
|
+
return {
|
|
26819
|
+
action: "fail",
|
|
26820
|
+
reason: `Acceptance tests errored (exit code ${errorExitCode}): syntax error, import failure, or unhandled exception`
|
|
26821
|
+
};
|
|
26822
|
+
}
|
|
26823
|
+
return {
|
|
26824
|
+
action: "fail",
|
|
26825
|
+
reason: `Acceptance tests failed: ${allFailedACs.join(", ")}`
|
|
26826
|
+
};
|
|
26827
|
+
}
|
|
26828
|
+
};
|
|
26829
|
+
});
|
|
26830
|
+
|
|
26498
26831
|
// src/pipeline/stages/acceptance-setup.ts
|
|
26499
26832
|
var exports_acceptance_setup = {};
|
|
26500
26833
|
__export(exports_acceptance_setup, {
|
|
@@ -27118,132 +27451,6 @@ var init_agents = __esm(() => {
|
|
|
27118
27451
|
init_errors();
|
|
27119
27452
|
});
|
|
27120
27453
|
|
|
27121
|
-
// src/review/dialogue-prompts.ts
|
|
27122
|
-
function buildReviewPrompt(diff, story, _semanticConfig) {
|
|
27123
|
-
const criteria = story.acceptanceCriteria.map((c) => `- ${c}`).join(`
|
|
27124
|
-
`);
|
|
27125
|
-
return [
|
|
27126
|
-
`Review the following code diff for story ${story.id}: ${story.title}`,
|
|
27127
|
-
"",
|
|
27128
|
-
"## Acceptance Criteria",
|
|
27129
|
-
criteria,
|
|
27130
|
-
"",
|
|
27131
|
-
"## Diff",
|
|
27132
|
-
diff,
|
|
27133
|
-
"",
|
|
27134
|
-
"Also flag any changes in the diff not required by the acceptance criteria above as out-of-scope findings.",
|
|
27135
|
-
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string } }"
|
|
27136
|
-
].join(`
|
|
27137
|
-
`);
|
|
27138
|
-
}
|
|
27139
|
-
function buildReReviewPrompt(updatedDiff, previousFindings) {
|
|
27140
|
-
const findingsList = previousFindings.length > 0 ? previousFindings.map((f) => `- ${f.ruleId}: ${f.message}`).join(`
|
|
27141
|
-
`) : "(none)";
|
|
27142
|
-
return [
|
|
27143
|
-
"This is a follow-up re-review. Please review the updated diff below.",
|
|
27144
|
-
"",
|
|
27145
|
-
"## Previous Findings",
|
|
27146
|
-
findingsList,
|
|
27147
|
-
"",
|
|
27148
|
-
"## Updated Diff",
|
|
27149
|
-
updatedDiff,
|
|
27150
|
-
"",
|
|
27151
|
-
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string }, deltaSummary: string }",
|
|
27152
|
-
"deltaSummary should describe which previous findings are resolved vs still present."
|
|
27153
|
-
].join(`
|
|
27154
|
-
`);
|
|
27155
|
-
}
|
|
27156
|
-
function buildProposalsSection2(proposals) {
|
|
27157
|
-
return proposals.map((p) => `### ${p.debater}
|
|
27158
|
-
${p.output}`).join(`
|
|
27159
|
-
|
|
27160
|
-
`);
|
|
27161
|
-
}
|
|
27162
|
-
function buildCritiquesSection(critiques) {
|
|
27163
|
-
if (critiques.length === 0)
|
|
27164
|
-
return "";
|
|
27165
|
-
return `
|
|
27166
|
-
|
|
27167
|
-
## Critiques
|
|
27168
|
-
${critiques.map((c, i) => `### Critique ${i + 1}
|
|
27169
|
-
${c}`).join(`
|
|
27170
|
-
|
|
27171
|
-
`)}`;
|
|
27172
|
-
}
|
|
27173
|
-
function buildVoteTallyLine(ctx) {
|
|
27174
|
-
if (!ctx.majorityVote)
|
|
27175
|
-
return "";
|
|
27176
|
-
const { passCount, failCount } = ctx.majorityVote;
|
|
27177
|
-
const failOpenNote = ctx.resolverType === "majority-fail-open" ? " (unparseable proposals count as pass)" : " (unparseable proposals count as fail)";
|
|
27178
|
-
return `
|
|
27179
|
-
|
|
27180
|
-
The preliminary majority vote is: **${passCount} passed, ${failCount} failed**${failOpenNote}. Verify the failing findings with tools before giving your authoritative verdict.`;
|
|
27181
|
-
}
|
|
27182
|
-
function buildResolverFraming(ctx) {
|
|
27183
|
-
switch (ctx.resolverType) {
|
|
27184
|
-
case "majority-fail-closed":
|
|
27185
|
-
case "majority-fail-open":
|
|
27186
|
-
return "You are the authoritative reviewer resolving a debate. A preliminary vote was taken \u2014 see tally below. Verify disputed findings using tools (READ files, GREP for usage) and give your final verdict.";
|
|
27187
|
-
case "synthesis":
|
|
27188
|
-
return "You are a synthesis reviewer. Synthesize the debater proposals into a single, coherent, tool-verified verdict. Use READ and GREP to verify claims before ruling.";
|
|
27189
|
-
case "custom":
|
|
27190
|
-
return "You are the judge. Evaluate the debater proposals independently. Verify claims with tools (READ, GREP) and give your final authoritative verdict.";
|
|
27191
|
-
default:
|
|
27192
|
-
return "You are the reviewer. Evaluate the debater proposals and give your final authoritative verdict.";
|
|
27193
|
-
}
|
|
27194
|
-
}
|
|
27195
|
-
function buildDebateResolverPrompt(proposals, critiques, diff, story, _semanticConfig, resolverContext) {
|
|
27196
|
-
const criteria = story.acceptanceCriteria.map((c) => `- ${c}`).join(`
|
|
27197
|
-
`);
|
|
27198
|
-
const framing = buildResolverFraming(resolverContext);
|
|
27199
|
-
const voteTally = buildVoteTallyLine(resolverContext);
|
|
27200
|
-
const proposalsSection = buildProposalsSection2(proposals);
|
|
27201
|
-
const critiquesSection = buildCritiquesSection(critiques);
|
|
27202
|
-
return [
|
|
27203
|
-
framing,
|
|
27204
|
-
"",
|
|
27205
|
-
`## Story ${story.id}: ${story.title}`,
|
|
27206
|
-
"",
|
|
27207
|
-
"## Acceptance Criteria",
|
|
27208
|
-
criteria,
|
|
27209
|
-
"",
|
|
27210
|
-
"## Debater Proposals",
|
|
27211
|
-
proposalsSection,
|
|
27212
|
-
critiquesSection,
|
|
27213
|
-
"",
|
|
27214
|
-
"## Diff",
|
|
27215
|
-
diff,
|
|
27216
|
-
voteTally,
|
|
27217
|
-
"",
|
|
27218
|
-
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string } }"
|
|
27219
|
-
].filter((line) => line !== undefined).join(`
|
|
27220
|
-
`);
|
|
27221
|
-
}
|
|
27222
|
-
function buildDebateReReviewPrompt(proposals, critiques, updatedDiff, previousFindings, resolverContext) {
|
|
27223
|
-
const framing = buildResolverFraming(resolverContext);
|
|
27224
|
-
const findingsList = previousFindings.length > 0 ? previousFindings.map((f) => `- ${f.ruleId}: ${f.message}`).join(`
|
|
27225
|
-
`) : "(none)";
|
|
27226
|
-
const proposalsSection = buildProposalsSection2(proposals);
|
|
27227
|
-
const critiquesSection = buildCritiquesSection(critiques);
|
|
27228
|
-
return [
|
|
27229
|
-
`${framing} This is a re-review after implementer changes.`,
|
|
27230
|
-
"",
|
|
27231
|
-
"## Previous Findings",
|
|
27232
|
-
findingsList,
|
|
27233
|
-
"",
|
|
27234
|
-
"## Updated Debater Proposals",
|
|
27235
|
-
proposalsSection,
|
|
27236
|
-
critiquesSection,
|
|
27237
|
-
"",
|
|
27238
|
-
"## Updated Diff",
|
|
27239
|
-
updatedDiff,
|
|
27240
|
-
"",
|
|
27241
|
-
"Respond with JSON: { passed: boolean, findings: [...], findingReasoning: { [id]: string }, deltaSummary: string }",
|
|
27242
|
-
"deltaSummary should describe which previous findings are resolved vs still present."
|
|
27243
|
-
].filter((line) => line !== undefined).join(`
|
|
27244
|
-
`);
|
|
27245
|
-
}
|
|
27246
|
-
|
|
27247
27454
|
// src/review/dialogue.ts
|
|
27248
27455
|
function extractDeltaSummary(rawOutput, previousFindings, newFindings) {
|
|
27249
27456
|
const parsed = tryParseLLMJson(rawOutput);
|
|
@@ -27319,6 +27526,7 @@ function createReviewerSession(agent, storyId, workdir, featureName, _config) {
|
|
|
27319
27526
|
generation: 1,
|
|
27320
27527
|
pendingCompactionContext: null
|
|
27321
27528
|
};
|
|
27529
|
+
const promptBuilder = new DebatePromptBuilder({ taskContext: "", outputFormat: "", stage: "review" }, { debaters: [], sessionMode: "stateful" });
|
|
27322
27530
|
function resolveRunParams(semanticConfig) {
|
|
27323
27531
|
const modelTier = semanticConfig.modelTier;
|
|
27324
27532
|
const defaultAgent = _config.autoMode?.defaultAgent ?? "claude";
|
|
@@ -27353,7 +27561,7 @@ ${prompt}`,
|
|
|
27353
27561
|
if (!active) {
|
|
27354
27562
|
throw new NaxError(`[dialogue] ReviewerSession for story ${storyId} has been destroyed`, "REVIEWER_SESSION_DESTROYED", { stage: "review", storyId, featureName });
|
|
27355
27563
|
}
|
|
27356
|
-
const prompt = buildReviewPrompt(diff, story
|
|
27564
|
+
const prompt = promptBuilder.buildReviewPrompt(diff, story);
|
|
27357
27565
|
const { modelTier, modelDef, timeoutSeconds } = resolveRunParams(semanticConfig);
|
|
27358
27566
|
const { effectivePrompt, acpSessionName } = buildEffectiveRunArgs(prompt);
|
|
27359
27567
|
const result = await agent.run({
|
|
@@ -27391,7 +27599,7 @@ ${prompt}`,
|
|
|
27391
27599
|
});
|
|
27392
27600
|
}
|
|
27393
27601
|
const previousFindings = lastCheckResult.checkResult.findings;
|
|
27394
|
-
const prompt = buildReReviewPrompt(updatedDiff, previousFindings);
|
|
27602
|
+
const prompt = promptBuilder.buildReReviewPrompt(updatedDiff, previousFindings);
|
|
27395
27603
|
const { modelTier, modelDef, timeoutSeconds } = resolveRunParams(lastSemanticConfig);
|
|
27396
27604
|
const { effectivePrompt, acpSessionName } = buildEffectiveRunArgs(prompt);
|
|
27397
27605
|
const result = await agent.run({
|
|
@@ -27451,7 +27659,7 @@ ${prompt}`,
|
|
|
27451
27659
|
if (!active) {
|
|
27452
27660
|
throw new NaxError(`[dialogue] ReviewerSession for story ${storyId} has been destroyed`, "REVIEWER_SESSION_DESTROYED", { stage: "review", storyId, featureName });
|
|
27453
27661
|
}
|
|
27454
|
-
const prompt =
|
|
27662
|
+
const prompt = promptBuilder.buildResolverPrompt(proposals, critiques, diff, story, resolverContext);
|
|
27455
27663
|
const { modelTier, modelDef, timeoutSeconds } = resolveRunParams(semanticConfig);
|
|
27456
27664
|
const { effectivePrompt, acpSessionName } = buildEffectiveRunArgs(prompt);
|
|
27457
27665
|
const result = await agent.run({
|
|
@@ -27486,7 +27694,7 @@ ${prompt}`,
|
|
|
27486
27694
|
throw new NaxError(`[dialogue] reReviewDebate() called before any resolveDebate() on story ${storyId}`, "NO_REVIEW_RESULT", { stage: "review", storyId });
|
|
27487
27695
|
}
|
|
27488
27696
|
const previousFindings = lastCheckResult.checkResult.findings;
|
|
27489
|
-
const prompt =
|
|
27697
|
+
const prompt = promptBuilder.buildReResolverPrompt(proposals, critiques, updatedDiff, previousFindings, resolverContext);
|
|
27490
27698
|
const { modelTier, modelDef, timeoutSeconds } = resolveRunParams(lastSemanticConfig);
|
|
27491
27699
|
const { effectivePrompt, acpSessionName } = buildEffectiveRunArgs(prompt);
|
|
27492
27700
|
const result = await agent.run({
|
|
@@ -27538,6 +27746,7 @@ ${prompt}`,
|
|
|
27538
27746
|
};
|
|
27539
27747
|
}
|
|
27540
27748
|
var init_dialogue = __esm(() => {
|
|
27749
|
+
init_prompt_builder();
|
|
27541
27750
|
init_errors();
|
|
27542
27751
|
});
|
|
27543
27752
|
|
|
@@ -36362,7 +36571,7 @@ var package_default;
|
|
|
36362
36571
|
var init_package = __esm(() => {
|
|
36363
36572
|
package_default = {
|
|
36364
36573
|
name: "@nathapp/nax",
|
|
36365
|
-
version: "0.
|
|
36574
|
+
version: "0.60.0",
|
|
36366
36575
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
36367
36576
|
type: "module",
|
|
36368
36577
|
bin: {
|
|
@@ -36442,8 +36651,8 @@ var init_version = __esm(() => {
|
|
|
36442
36651
|
NAX_VERSION = package_default.version;
|
|
36443
36652
|
NAX_COMMIT = (() => {
|
|
36444
36653
|
try {
|
|
36445
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
36446
|
-
return "
|
|
36654
|
+
if (/^[0-9a-f]{6,10}$/.test("73c9c082"))
|
|
36655
|
+
return "73c9c082";
|
|
36447
36656
|
} catch {}
|
|
36448
36657
|
try {
|
|
36449
36658
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
@@ -73015,6 +73224,20 @@ function validateStory(raw, index, allIds) {
|
|
|
73015
73224
|
throw new Error(`[schema] story[${index}].acceptanceCriteria[${i}] must be a string`);
|
|
73016
73225
|
}
|
|
73017
73226
|
}
|
|
73227
|
+
let suggestedCriteria;
|
|
73228
|
+
if (s.suggestedCriteria !== undefined && s.suggestedCriteria !== null) {
|
|
73229
|
+
if (!Array.isArray(s.suggestedCriteria)) {
|
|
73230
|
+
throw new Error(`[schema] story[${index}].suggestedCriteria must be an array when present`);
|
|
73231
|
+
}
|
|
73232
|
+
if (s.suggestedCriteria.length > 0) {
|
|
73233
|
+
for (let i = 0;i < s.suggestedCriteria.length; i++) {
|
|
73234
|
+
if (typeof s.suggestedCriteria[i] !== "string") {
|
|
73235
|
+
throw new Error(`[schema] story[${index}].suggestedCriteria[${i}] must be a string`);
|
|
73236
|
+
}
|
|
73237
|
+
}
|
|
73238
|
+
suggestedCriteria = s.suggestedCriteria;
|
|
73239
|
+
}
|
|
73240
|
+
}
|
|
73018
73241
|
const routing = typeof s.routing === "object" && s.routing !== null ? s.routing : {};
|
|
73019
73242
|
const rawComplexity = routing.complexity ?? s.complexity;
|
|
73020
73243
|
if (rawComplexity === undefined || rawComplexity === null) {
|
|
@@ -73082,7 +73305,8 @@ function validateStory(raw, index, allIds) {
|
|
|
73082
73305
|
...noTestJustification !== undefined ? { noTestJustification } : {}
|
|
73083
73306
|
},
|
|
73084
73307
|
...workdir !== undefined ? { workdir } : {},
|
|
73085
|
-
...contextFiles.length > 0 ? { contextFiles } : {}
|
|
73308
|
+
...contextFiles.length > 0 ? { contextFiles } : {},
|
|
73309
|
+
...suggestedCriteria !== undefined ? { suggestedCriteria } : {}
|
|
73086
73310
|
};
|
|
73087
73311
|
}
|
|
73088
73312
|
function sanitizeInvalidEscapes(text) {
|
|
@@ -73220,7 +73444,8 @@ async function planCommand(workdir, config2, options) {
|
|
|
73220
73444
|
outputDir,
|
|
73221
73445
|
timeoutSeconds,
|
|
73222
73446
|
dangerouslySkipPermissions: resolvedPerm.skipPermissions,
|
|
73223
|
-
maxInteractionTurns: config2?.agent?.maxInteractionTurns
|
|
73447
|
+
maxInteractionTurns: config2?.agent?.maxInteractionTurns,
|
|
73448
|
+
specContent
|
|
73224
73449
|
});
|
|
73225
73450
|
if (debateResult.outcome !== "failed" && debateResult.output) {
|
|
73226
73451
|
rawResponse = debateResult.output;
|