@nathapp/nax 0.67.10 → 0.67.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +726 -230
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -16833,7 +16833,8 @@ var init_schemas_execution = __esm(() => {
|
|
|
16833
16833
|
});
|
|
16834
16834
|
RectificationConfigSchema = exports_external.object({
|
|
16835
16835
|
enabled: exports_external.boolean().default(true),
|
|
16836
|
-
|
|
16836
|
+
maxAttemptsTotal: exports_external.number().int().min(1).max(50).default(12),
|
|
16837
|
+
maxAttemptsPerStrategy: exports_external.number().int().min(1).max(20).default(3),
|
|
16837
16838
|
fullSuiteTimeoutSeconds: exports_external.number().int().min(10).max(600).default(120),
|
|
16838
16839
|
maxFailureSummaryChars: exports_external.number().int().min(500).max(1e4).default(2000),
|
|
16839
16840
|
abortOnIncreasingFailures: exports_external.boolean().default(true),
|
|
@@ -16845,8 +16846,7 @@ var init_schemas_execution = __esm(() => {
|
|
|
16845
16846
|
enabled: exports_external.boolean().default(true),
|
|
16846
16847
|
timeoutSeconds: exports_external.number().int().min(10).max(600).default(120),
|
|
16847
16848
|
acceptOnTimeout: exports_external.boolean().default(true),
|
|
16848
|
-
mode: exports_external.enum(["deferred", "per-story", "disabled"]).default("deferred")
|
|
16849
|
-
maxRectificationAttempts: exports_external.number().int().min(1).default(2)
|
|
16849
|
+
mode: exports_external.enum(["deferred", "per-story", "disabled"]).default("deferred")
|
|
16850
16850
|
});
|
|
16851
16851
|
SmartTestRunnerConfigSchema = exports_external.object({
|
|
16852
16852
|
enabled: exports_external.boolean().default(true),
|
|
@@ -16928,16 +16928,10 @@ var init_schemas_execution = __esm(() => {
|
|
|
16928
16928
|
autofix: exports_external.object({
|
|
16929
16929
|
enabled: exports_external.boolean().default(true),
|
|
16930
16930
|
maxAttempts: exports_external.number().int().min(1).default(3),
|
|
16931
|
-
maxTotalAttempts: exports_external.number().int().min(1).default(12),
|
|
16932
|
-
rethinkAtAttempt: exports_external.number().int().min(1).default(2),
|
|
16933
|
-
urgencyAtAttempt: exports_external.number().int().min(1).default(3),
|
|
16934
16931
|
enforceTestWriterIsolation: exports_external.boolean().default(true)
|
|
16935
16932
|
}).default({
|
|
16936
16933
|
enabled: true,
|
|
16937
16934
|
maxAttempts: 3,
|
|
16938
|
-
maxTotalAttempts: 12,
|
|
16939
|
-
rethinkAtAttempt: 2,
|
|
16940
|
-
urgencyAtAttempt: 3,
|
|
16941
16935
|
enforceTestWriterIsolation: true
|
|
16942
16936
|
}),
|
|
16943
16937
|
forceExit: exports_external.boolean().default(false),
|
|
@@ -17225,6 +17219,7 @@ var init_schemas_review = __esm(() => {
|
|
|
17225
17219
|
excludePatterns: exports_external.array(exports_external.string()).optional(),
|
|
17226
17220
|
parallel: exports_external.boolean().default(false),
|
|
17227
17221
|
maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
|
|
17222
|
+
acRegroundOnDrop: exports_external.boolean().default(true),
|
|
17228
17223
|
substantiation: exports_external.object({
|
|
17229
17224
|
requote: exports_external.boolean().default(true),
|
|
17230
17225
|
maxRequotes: exports_external.number().int().min(0).default(5)
|
|
@@ -17324,7 +17319,8 @@ var init_schemas3 = __esm(() => {
|
|
|
17324
17319
|
maxStoriesPerFeature: 500,
|
|
17325
17320
|
rectification: {
|
|
17326
17321
|
enabled: true,
|
|
17327
|
-
|
|
17322
|
+
maxAttemptsTotal: 12,
|
|
17323
|
+
maxAttemptsPerStrategy: 3,
|
|
17328
17324
|
fullSuiteTimeoutSeconds: 300,
|
|
17329
17325
|
maxFailureSummaryChars: 2000,
|
|
17330
17326
|
abortOnIncreasingFailures: true,
|
|
@@ -17336,8 +17332,7 @@ var init_schemas3 = __esm(() => {
|
|
|
17336
17332
|
enabled: true,
|
|
17337
17333
|
timeoutSeconds: 300,
|
|
17338
17334
|
acceptOnTimeout: true,
|
|
17339
|
-
mode: "deferred"
|
|
17340
|
-
maxRectificationAttempts: 3
|
|
17335
|
+
mode: "deferred"
|
|
17341
17336
|
},
|
|
17342
17337
|
contextProviderTokenBudget: 2000,
|
|
17343
17338
|
permissionProfile: "unrestricted",
|
|
@@ -17363,9 +17358,6 @@ var init_schemas3 = __esm(() => {
|
|
|
17363
17358
|
autofix: {
|
|
17364
17359
|
enabled: true,
|
|
17365
17360
|
maxAttempts: 3,
|
|
17366
|
-
maxTotalAttempts: 12,
|
|
17367
|
-
rethinkAtAttempt: 2,
|
|
17368
|
-
urgencyAtAttempt: 3,
|
|
17369
17361
|
enforceTestWriterIsolation: true
|
|
17370
17362
|
},
|
|
17371
17363
|
forceExit: false,
|
|
@@ -17462,6 +17454,7 @@ var init_schemas3 = __esm(() => {
|
|
|
17462
17454
|
timeoutMs: 600000,
|
|
17463
17455
|
parallel: false,
|
|
17464
17456
|
maxConcurrentSessions: 2,
|
|
17457
|
+
acRegroundOnDrop: true,
|
|
17465
17458
|
substantiation: {
|
|
17466
17459
|
requote: true,
|
|
17467
17460
|
maxRequotes: 5
|
|
@@ -18678,6 +18671,47 @@ function rejectLegacyAgentKeys(conf) {
|
|
|
18678
18671
|
`);
|
|
18679
18672
|
throw new NaxError(message, "CONFIG_LEGACY_AGENT_KEYS", { stage: "config", legacyKeys });
|
|
18680
18673
|
}
|
|
18674
|
+
function rejectLegacyRectificationKeys(conf) {
|
|
18675
|
+
const legacyKeys = [];
|
|
18676
|
+
const migrationHints = [];
|
|
18677
|
+
const quality = conf.quality;
|
|
18678
|
+
const autofix = quality?.autofix;
|
|
18679
|
+
if (autofix && typeof autofix === "object") {
|
|
18680
|
+
if ("maxTotalAttempts" in autofix) {
|
|
18681
|
+
legacyKeys.push("quality.autofix.maxTotalAttempts");
|
|
18682
|
+
migrationHints.push("- Move `quality.autofix.maxTotalAttempts` \u2192 `execution.rectification.maxAttemptsTotal`");
|
|
18683
|
+
}
|
|
18684
|
+
if ("rethinkAtAttempt" in autofix) {
|
|
18685
|
+
legacyKeys.push("quality.autofix.rethinkAtAttempt");
|
|
18686
|
+
migrationHints.push("- Move `quality.autofix.rethinkAtAttempt` \u2192 `execution.rectification.rethinkAtAttempt`");
|
|
18687
|
+
}
|
|
18688
|
+
if ("urgencyAtAttempt" in autofix) {
|
|
18689
|
+
legacyKeys.push("quality.autofix.urgencyAtAttempt");
|
|
18690
|
+
migrationHints.push("- Move `quality.autofix.urgencyAtAttempt` \u2192 `execution.rectification.urgencyAtAttempt`");
|
|
18691
|
+
}
|
|
18692
|
+
}
|
|
18693
|
+
const execution = conf.execution;
|
|
18694
|
+
const rectification = execution?.rectification;
|
|
18695
|
+
if (rectification && typeof rectification === "object" && "maxRetries" in rectification) {
|
|
18696
|
+
legacyKeys.push("execution.rectification.maxRetries");
|
|
18697
|
+
migrationHints.push("- Rename `execution.rectification.maxRetries` \u2192 `execution.rectification.maxAttemptsTotal` (default changed from 2 to 12)");
|
|
18698
|
+
}
|
|
18699
|
+
const regressionGate = execution?.regressionGate;
|
|
18700
|
+
if (regressionGate && typeof regressionGate === "object" && "maxRectificationAttempts" in regressionGate) {
|
|
18701
|
+
legacyKeys.push("execution.regressionGate.maxRectificationAttempts");
|
|
18702
|
+
migrationHints.push("- Remove `execution.regressionGate.maxRectificationAttempts` \u2014 the regression cycle now shares `execution.rectification.maxAttemptsTotal`");
|
|
18703
|
+
}
|
|
18704
|
+
if (legacyKeys.length === 0)
|
|
18705
|
+
return;
|
|
18706
|
+
const message = [
|
|
18707
|
+
`Invalid configuration \u2014 legacy rectification-cap keys detected: ${legacyKeys.join(", ")}.`,
|
|
18708
|
+
"These were consolidated under `execution.rectification.*` so one config controls the unified",
|
|
18709
|
+
"fix cycle (semantic + adversarial + mechanical + regression). Migrate as follows:",
|
|
18710
|
+
...migrationHints
|
|
18711
|
+
].join(`
|
|
18712
|
+
`);
|
|
18713
|
+
throw new NaxError(message, "CONFIG_LEGACY_RECTIFICATION_KEYS", { stage: "config", legacyKeys });
|
|
18714
|
+
}
|
|
18681
18715
|
function applyBatchModeCompat(conf) {
|
|
18682
18716
|
const routing = conf.routing;
|
|
18683
18717
|
const llm = routing?.llm;
|
|
@@ -18784,6 +18818,7 @@ async function loadConfig(startDir, cliOverrides) {
|
|
|
18784
18818
|
return structuredClone(DEFAULT_CONFIG);
|
|
18785
18819
|
}
|
|
18786
18820
|
rejectLegacyAgentKeys(rawConfig);
|
|
18821
|
+
rejectLegacyRectificationKeys(rawConfig);
|
|
18787
18822
|
const result = NaxConfigSchema.safeParse(rawConfig);
|
|
18788
18823
|
if (!result.success) {
|
|
18789
18824
|
const errors3 = result.error.issues.map((err) => {
|
|
@@ -18836,6 +18871,7 @@ async function loadConfigForWorkdir(rootConfigPath, packageDir, cliOverrides) {
|
|
|
18836
18871
|
const rawMerged = deepMergeConfig(merged, profileData);
|
|
18837
18872
|
rawMerged.profile = packageProfile;
|
|
18838
18873
|
rejectLegacyAgentKeys(rawMerged);
|
|
18874
|
+
rejectLegacyRectificationKeys(rawMerged);
|
|
18839
18875
|
const result = NaxConfigSchema.safeParse(rawMerged);
|
|
18840
18876
|
if (result.success) {
|
|
18841
18877
|
merged = result.data;
|
|
@@ -21059,6 +21095,7 @@ class DispatchEventBus {
|
|
|
21059
21095
|
_completedListeners = new Set;
|
|
21060
21096
|
_errorListeners = new Set;
|
|
21061
21097
|
_reviewDecisionListeners = new Set;
|
|
21098
|
+
_reviewRepromptListeners = new Set;
|
|
21062
21099
|
onDispatch(l) {
|
|
21063
21100
|
this._dispatchListeners.add(l);
|
|
21064
21101
|
return () => this._dispatchListeners.delete(l);
|
|
@@ -21075,6 +21112,10 @@ class DispatchEventBus {
|
|
|
21075
21112
|
this._reviewDecisionListeners.add(l);
|
|
21076
21113
|
return () => this._reviewDecisionListeners.delete(l);
|
|
21077
21114
|
}
|
|
21115
|
+
onReviewReprompt(l) {
|
|
21116
|
+
this._reviewRepromptListeners.add(l);
|
|
21117
|
+
return () => this._reviewRepromptListeners.delete(l);
|
|
21118
|
+
}
|
|
21078
21119
|
emitDispatch(event) {
|
|
21079
21120
|
for (const l of this._dispatchListeners) {
|
|
21080
21121
|
try {
|
|
@@ -21111,6 +21152,15 @@ class DispatchEventBus {
|
|
|
21111
21152
|
}
|
|
21112
21153
|
}
|
|
21113
21154
|
}
|
|
21155
|
+
emitReviewReprompt(event) {
|
|
21156
|
+
for (const l of this._reviewRepromptListeners) {
|
|
21157
|
+
try {
|
|
21158
|
+
l(event);
|
|
21159
|
+
} catch (err) {
|
|
21160
|
+
getSafeLogger()?.warn("dispatch-bus", "review-reprompt-listener threw", { error: errorMessage(err) });
|
|
21161
|
+
}
|
|
21162
|
+
}
|
|
21163
|
+
}
|
|
21114
21164
|
}
|
|
21115
21165
|
var init_dispatch_events = __esm(() => {
|
|
21116
21166
|
init_logger2();
|
|
@@ -30342,78 +30392,6 @@ function truncate(s, max) {
|
|
|
30342
30392
|
var MAX_BLOCK_CHARS = 6000;
|
|
30343
30393
|
|
|
30344
30394
|
// src/prompts/builders/review-builder.ts
|
|
30345
|
-
class ReviewPromptBuilder {
|
|
30346
|
-
buildSemanticReviewPrompt(story, semanticConfig, options) {
|
|
30347
|
-
const acList = story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30348
|
-
`);
|
|
30349
|
-
const customRulesBlock = semanticConfig.rules.length > 0 ? `
|
|
30350
|
-
## Additional Review Rules
|
|
30351
|
-
${semanticConfig.rules.map((r, i) => `${i + 1}. ${r}`).join(`
|
|
30352
|
-
`)}
|
|
30353
|
-
` : "";
|
|
30354
|
-
const priorIterationsBlock = buildPriorIterationsBlock(options.priorSemanticIterations ?? []);
|
|
30355
|
-
let diffSection;
|
|
30356
|
-
if (options.mode === "ref") {
|
|
30357
|
-
diffSection = buildRefDiffSection(options.storyGitRef ?? "", options.stat ?? "", options.excludePatterns ?? []);
|
|
30358
|
-
} else {
|
|
30359
|
-
diffSection = buildEmbeddedDiffSection(options.diff ?? "");
|
|
30360
|
-
}
|
|
30361
|
-
const core2 = `${SEMANTIC_ROLE}
|
|
30362
|
-
|
|
30363
|
-
## Story: ${story.title}
|
|
30364
|
-
|
|
30365
|
-
### Description
|
|
30366
|
-
${story.description}
|
|
30367
|
-
|
|
30368
|
-
### Acceptance Criteria
|
|
30369
|
-
${acList}
|
|
30370
|
-
${customRulesBlock}${priorIterationsBlock}
|
|
30371
|
-
${SEMANTIC_INSTRUCTIONS}
|
|
30372
|
-
${SEMANTIC_OUTPUT_SCHEMA}
|
|
30373
|
-
|
|
30374
|
-
${diffSection}`;
|
|
30375
|
-
return wrapJsonPrompt(core2);
|
|
30376
|
-
}
|
|
30377
|
-
static jsonRetry() {
|
|
30378
|
-
return `Your previous response could not be parsed as valid JSON.
|
|
30379
|
-
` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
|
|
30380
|
-
` + "The object must start with { and end with }.";
|
|
30381
|
-
}
|
|
30382
|
-
static jsonRetryCondensed(opts) {
|
|
30383
|
-
const threshold = opts?.blockingThreshold ?? "error";
|
|
30384
|
-
const advisoryCap = opts?.advisoryCap ?? 3;
|
|
30385
|
-
const blockingList = threshold === "error" ? '"error"' : threshold === "warning" ? '"error" and "warning"' : '"error", "warning", and "info"';
|
|
30386
|
-
const blockingClause = threshold === "info" ? "Include ALL findings \u2014 do not drop any by severity." : `Include ALL findings with severity ${blockingList} (these are blocking \u2014 do not drop them).`;
|
|
30387
|
-
const advisoryClause = threshold === "info" ? "If your response would still exceed limits, prioritize the highest-severity findings first." : `Below that, include at most ${advisoryCap} additional findings (highest severity first).`;
|
|
30388
|
-
return `Your previous response was truncated and could not be parsed as valid JSON.
|
|
30389
|
-
Respond with a condensed summary:
|
|
30390
|
-
- ${blockingClause}
|
|
30391
|
-
- ${advisoryClause}
|
|
30392
|
-
- Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
|
|
30393
|
-
Output ONLY a complete, valid JSON object. It must start with { and end with }.
|
|
30394
|
-
Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
|
|
30395
|
-
}
|
|
30396
|
-
static requoteVerbatim(opts) {
|
|
30397
|
-
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30398
|
-
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30399
|
-
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30400
|
-
|
|
30401
|
-
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30402
|
-
|
|
30403
|
-
Return ONLY this JSON object:
|
|
30404
|
-
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30405
|
-
|
|
30406
|
-
Finding issue: ${opts.finding.issue}
|
|
30407
|
-
Referenced file: ${file3}
|
|
30408
|
-
Referenced line: ${line}
|
|
30409
|
-
|
|
30410
|
-
Rules:
|
|
30411
|
-
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30412
|
-
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30413
|
-
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30414
|
-
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30415
|
-
}
|
|
30416
|
-
}
|
|
30417
30395
|
function buildEmbeddedDiffSection(diff) {
|
|
30418
30396
|
return `## Git Diff (production code only \u2014 test files excluded)
|
|
30419
30397
|
|
|
@@ -30493,9 +30471,114 @@ Notes:
|
|
|
30493
30471
|
- \`acIndex\` is required when severity is "error" (1-based, into the Acceptance Criteria list above).
|
|
30494
30472
|
- \`acQuote\` is optional advisory metadata for human auditors \u2014 not validated.
|
|
30495
30473
|
- Omit both for "warning", "info", "unverifiable".
|
|
30496
|
-
If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }
|
|
30474
|
+
If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }.`, ReviewPromptBuilder;
|
|
30497
30475
|
var init_review_builder = __esm(() => {
|
|
30498
30476
|
SEMANTIC_ROLE = "You are a semantic code reviewer with access to the repository files. " + "Your job is to walk each acceptance criterion (AC) and judge whether the production code fulfills it \u2014 fully, partially, or not at all. " + "Test coverage gaps and convention/lint issues are out of scope \u2014 adversarial review and lint/typecheck handle those.";
|
|
30477
|
+
ReviewPromptBuilder = class ReviewPromptBuilder {
|
|
30478
|
+
buildSemanticReviewPrompt(story, semanticConfig, options) {
|
|
30479
|
+
const acList = story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30480
|
+
`);
|
|
30481
|
+
const customRulesBlock = semanticConfig.rules.length > 0 ? `
|
|
30482
|
+
## Additional Review Rules
|
|
30483
|
+
${semanticConfig.rules.map((r, i) => `${i + 1}. ${r}`).join(`
|
|
30484
|
+
`)}
|
|
30485
|
+
` : "";
|
|
30486
|
+
const priorIterationsBlock = buildPriorIterationsBlock(options.priorSemanticIterations ?? []);
|
|
30487
|
+
let diffSection;
|
|
30488
|
+
if (options.mode === "ref") {
|
|
30489
|
+
diffSection = buildRefDiffSection(options.storyGitRef ?? "", options.stat ?? "", options.excludePatterns ?? []);
|
|
30490
|
+
} else {
|
|
30491
|
+
diffSection = buildEmbeddedDiffSection(options.diff ?? "");
|
|
30492
|
+
}
|
|
30493
|
+
const core2 = `${SEMANTIC_ROLE}
|
|
30494
|
+
|
|
30495
|
+
## Story: ${story.title}
|
|
30496
|
+
|
|
30497
|
+
### Description
|
|
30498
|
+
${story.description}
|
|
30499
|
+
|
|
30500
|
+
### Acceptance Criteria
|
|
30501
|
+
${acList}
|
|
30502
|
+
${customRulesBlock}${priorIterationsBlock}
|
|
30503
|
+
${SEMANTIC_INSTRUCTIONS}
|
|
30504
|
+
${SEMANTIC_OUTPUT_SCHEMA}
|
|
30505
|
+
|
|
30506
|
+
${diffSection}`;
|
|
30507
|
+
return wrapJsonPrompt(core2);
|
|
30508
|
+
}
|
|
30509
|
+
static jsonRetry() {
|
|
30510
|
+
return `Your previous response could not be parsed as valid JSON.
|
|
30511
|
+
` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
|
|
30512
|
+
` + "The object must start with { and end with }.";
|
|
30513
|
+
}
|
|
30514
|
+
static jsonRetryCondensed(opts) {
|
|
30515
|
+
const threshold = opts?.blockingThreshold ?? "error";
|
|
30516
|
+
const advisoryCap = opts?.advisoryCap ?? 3;
|
|
30517
|
+
const blockingList = threshold === "error" ? '"error"' : threshold === "warning" ? '"error" and "warning"' : '"error", "warning", and "info"';
|
|
30518
|
+
const blockingClause = threshold === "info" ? "Include ALL findings \u2014 do not drop any by severity." : `Include ALL findings with severity ${blockingList} (these are blocking \u2014 do not drop them).`;
|
|
30519
|
+
const advisoryClause = threshold === "info" ? "If your response would still exceed limits, prioritize the highest-severity findings first." : `Below that, include at most ${advisoryCap} additional findings (highest severity first).`;
|
|
30520
|
+
return `Your previous response was truncated and could not be parsed as valid JSON.
|
|
30521
|
+
Respond with a condensed summary:
|
|
30522
|
+
- ${blockingClause}
|
|
30523
|
+
- ${advisoryClause}
|
|
30524
|
+
- Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
|
|
30525
|
+
Output ONLY a complete, valid JSON object. It must start with { and end with }.
|
|
30526
|
+
Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
|
|
30527
|
+
}
|
|
30528
|
+
static requoteVerbatim(opts) {
|
|
30529
|
+
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30530
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30531
|
+
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30532
|
+
|
|
30533
|
+
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30534
|
+
|
|
30535
|
+
Return ONLY this JSON object:
|
|
30536
|
+
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30537
|
+
|
|
30538
|
+
Finding issue: ${opts.finding.issue}
|
|
30539
|
+
Referenced file: ${file3}
|
|
30540
|
+
Referenced line: ${line}
|
|
30541
|
+
|
|
30542
|
+
Rules:
|
|
30543
|
+
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30544
|
+
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30545
|
+
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30546
|
+
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30547
|
+
}
|
|
30548
|
+
static DROP_CODE_MESSAGES_MINIMAL = {
|
|
30549
|
+
missing_ac_index: "no `acIndex` field was provided \u2014 every blocking finding must cite an AC by 1-based index",
|
|
30550
|
+
ac_index_out_of_range: "`acIndex` is 0 or larger than the AC list \u2014 ACs are 1-indexed; the lowest valid value is 1"
|
|
30551
|
+
};
|
|
30552
|
+
static regroundDroppedFindings(opts) {
|
|
30553
|
+
const { drops, acceptanceCriteria } = opts;
|
|
30554
|
+
if (drops.length === 0)
|
|
30555
|
+
return "";
|
|
30556
|
+
const firstDrop = drops[0];
|
|
30557
|
+
const codeMessage = ReviewPromptBuilder.DROP_CODE_MESSAGES_MINIMAL[firstDrop.code];
|
|
30558
|
+
const acList = acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30559
|
+
`);
|
|
30560
|
+
return `Your previous review produced ${drops.length} finding${drops.length > 1 ? "s" : ""} that ${drops.length > 1 ? "were" : "was"} dropped because:
|
|
30561
|
+
|
|
30562
|
+
${codeMessage}
|
|
30563
|
+
|
|
30564
|
+
The dropped finding${drops.length > 1 ? "s" : ""} ${drops.length > 1 ? "are" : "is"}:
|
|
30565
|
+
${drops.map((d, i) => `${i + 1}. [${d.finding.severity}] ${d.finding.issue}`).join(`
|
|
30566
|
+
`)}
|
|
30567
|
+
|
|
30568
|
+
Please re-review the code and re-issue any valid findings. For each finding you re-issue:
|
|
30569
|
+
- You MUST include a valid \`acIndex\` (1-based index into the AC list below)
|
|
30570
|
+
- You MUST include a \`verifiedBy\` field with verified evidence
|
|
30571
|
+
|
|
30572
|
+
## Acceptance Criteria
|
|
30573
|
+
${acList}
|
|
30574
|
+
|
|
30575
|
+
## Rules
|
|
30576
|
+
- If a finding's locus (file / symbol) is not named in any AC bullet, downgrade it to \`"info"\` or \`"warning"\`
|
|
30577
|
+
- Only re-issue findings that are genuinely substantiated by the code and constrained by an AC
|
|
30578
|
+
- Return ONLY a JSON object with the same shape as before:
|
|
30579
|
+
{"passed":true|false,"findings":[...]}`;
|
|
30580
|
+
}
|
|
30581
|
+
};
|
|
30499
30582
|
});
|
|
30500
30583
|
|
|
30501
30584
|
// src/prompts/builders/adversarial-review-builder.ts
|
|
@@ -30578,92 +30661,6 @@ ${diff}\`\`\`
|
|
|
30578
30661
|
|
|
30579
30662
|
`;
|
|
30580
30663
|
}
|
|
30581
|
-
|
|
30582
|
-
class AdversarialReviewPromptBuilder {
|
|
30583
|
-
buildAdversarialReviewPrompt(story, config2, options) {
|
|
30584
|
-
const {
|
|
30585
|
-
mode,
|
|
30586
|
-
diff,
|
|
30587
|
-
storyGitRef,
|
|
30588
|
-
stat,
|
|
30589
|
-
testInventory,
|
|
30590
|
-
excludePatterns,
|
|
30591
|
-
testGlobs,
|
|
30592
|
-
refExcludePatterns,
|
|
30593
|
-
priorAdversarialIterations,
|
|
30594
|
-
blockingThreshold
|
|
30595
|
-
} = options;
|
|
30596
|
-
const priorFindingsBlock = buildPriorIterationsBlock(priorAdversarialIterations ?? []);
|
|
30597
|
-
const storyBlock = `## Story Under Review
|
|
30598
|
-
|
|
30599
|
-
**ID:** ${story.id}
|
|
30600
|
-
**Title:** ${story.title}
|
|
30601
|
-
**Description:** ${story.description || "(none)"}
|
|
30602
|
-
|
|
30603
|
-
**Acceptance Criteria:**
|
|
30604
|
-
${story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30605
|
-
`)}
|
|
30606
|
-
|
|
30607
|
-
`;
|
|
30608
|
-
const customRulesBlock = config2.rules.length > 0 ? `## Project-Specific Adversarial Rules
|
|
30609
|
-
|
|
30610
|
-
${config2.rules.map((r) => `- ${r}`).join(`
|
|
30611
|
-
`)}
|
|
30612
|
-
|
|
30613
|
-
` : "";
|
|
30614
|
-
let diffBlock;
|
|
30615
|
-
if (mode === "ref" && storyGitRef) {
|
|
30616
|
-
diffBlock = buildAdversarialRefDiffSection(storyGitRef, stat, excludePatterns ?? [], testGlobs ?? [], refExcludePatterns ?? []);
|
|
30617
|
-
} else if (mode === "embedded" && diff) {
|
|
30618
|
-
diffBlock = buildAdversarialEmbeddedDiffSection(diff, testInventory);
|
|
30619
|
-
} else {
|
|
30620
|
-
diffBlock = `## Diff
|
|
30621
|
-
|
|
30622
|
-
(No diff available \u2014 review based on story context only)
|
|
30623
|
-
|
|
30624
|
-
`;
|
|
30625
|
-
}
|
|
30626
|
-
return [
|
|
30627
|
-
ADVERSARIAL_ROLE,
|
|
30628
|
-
`
|
|
30629
|
-
|
|
30630
|
-
`,
|
|
30631
|
-
priorFindingsBlock,
|
|
30632
|
-
storyBlock,
|
|
30633
|
-
ADVERSARIAL_INSTRUCTIONS,
|
|
30634
|
-
`
|
|
30635
|
-
|
|
30636
|
-
`,
|
|
30637
|
-
customRulesBlock,
|
|
30638
|
-
buildBlockingThresholdBlock(blockingThreshold ?? "error"),
|
|
30639
|
-
OUTPUT_SCHEMA,
|
|
30640
|
-
`
|
|
30641
|
-
|
|
30642
|
-
`,
|
|
30643
|
-
diffBlock
|
|
30644
|
-
].join("");
|
|
30645
|
-
}
|
|
30646
|
-
static requoteVerbatim(opts) {
|
|
30647
|
-
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30648
|
-
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30649
|
-
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30650
|
-
|
|
30651
|
-
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30652
|
-
|
|
30653
|
-
Return ONLY this JSON object:
|
|
30654
|
-
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30655
|
-
|
|
30656
|
-
Finding issue: ${opts.finding.issue}
|
|
30657
|
-
Referenced file: ${file3}
|
|
30658
|
-
Referenced line: ${line}
|
|
30659
|
-
|
|
30660
|
-
Rules:
|
|
30661
|
-
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30662
|
-
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30663
|
-
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30664
|
-
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30665
|
-
}
|
|
30666
|
-
}
|
|
30667
30664
|
var ADVERSARIAL_ROLE = `You are an adversarial code reviewer with full access to the repository.
|
|
30668
30665
|
|
|
30669
30666
|
Your job is NOT to re-verify that the code satisfies the acceptance criteria \u2014 semantic review owns that question. Don't re-litigate AC correctness.
|
|
@@ -30772,8 +30769,130 @@ Worked example:
|
|
|
30772
30769
|
**Scope constraints are not Acceptance Criteria:**
|
|
30773
30770
|
The story description may contain a "Scope" section with "In:" and "Out:" bullets. These are implementation guidelines, not ACs. A finding about code changed outside the stated scope (e.g., a file listed under "Out:") cannot cite a scope constraint as its \`acQuote\`/\`acIndex\` because scope text is not in the numbered AC list. Emit scope-violation findings as \`"warning"\` \u2014 never \`"error"\`. Never use \`acIndex: 0\`; \`acIndex\` is 1-based (first AC bullet = 1).
|
|
30774
30771
|
|
|
30775
|
-
If you cannot find an AC that names the **specific symbol** in your finding, downgrade to \`"info"\` or \`"warning"\`. A finding dropped by the validator is worse than one correctly classified as advisory
|
|
30776
|
-
var init_adversarial_review_builder = () => {
|
|
30772
|
+
If you cannot find an AC that names the **specific symbol** in your finding, downgrade to \`"info"\` or \`"warning"\`. A finding dropped by the validator is worse than one correctly classified as advisory.`, AdversarialReviewPromptBuilder;
|
|
30773
|
+
var init_adversarial_review_builder = __esm(() => {
|
|
30774
|
+
AdversarialReviewPromptBuilder = class AdversarialReviewPromptBuilder {
|
|
30775
|
+
buildAdversarialReviewPrompt(story, config2, options) {
|
|
30776
|
+
const {
|
|
30777
|
+
mode,
|
|
30778
|
+
diff,
|
|
30779
|
+
storyGitRef,
|
|
30780
|
+
stat,
|
|
30781
|
+
testInventory,
|
|
30782
|
+
excludePatterns,
|
|
30783
|
+
testGlobs,
|
|
30784
|
+
refExcludePatterns,
|
|
30785
|
+
priorAdversarialIterations,
|
|
30786
|
+
blockingThreshold
|
|
30787
|
+
} = options;
|
|
30788
|
+
const priorFindingsBlock = buildPriorIterationsBlock(priorAdversarialIterations ?? []);
|
|
30789
|
+
const storyBlock = `## Story Under Review
|
|
30790
|
+
|
|
30791
|
+
**ID:** ${story.id}
|
|
30792
|
+
**Title:** ${story.title}
|
|
30793
|
+
**Description:** ${story.description || "(none)"}
|
|
30794
|
+
|
|
30795
|
+
**Acceptance Criteria:**
|
|
30796
|
+
${story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30797
|
+
`)}
|
|
30798
|
+
|
|
30799
|
+
`;
|
|
30800
|
+
const customRulesBlock = config2.rules.length > 0 ? `## Project-Specific Adversarial Rules
|
|
30801
|
+
|
|
30802
|
+
${config2.rules.map((r) => `- ${r}`).join(`
|
|
30803
|
+
`)}
|
|
30804
|
+
|
|
30805
|
+
` : "";
|
|
30806
|
+
let diffBlock;
|
|
30807
|
+
if (mode === "ref" && storyGitRef) {
|
|
30808
|
+
diffBlock = buildAdversarialRefDiffSection(storyGitRef, stat, excludePatterns ?? [], testGlobs ?? [], refExcludePatterns ?? []);
|
|
30809
|
+
} else if (mode === "embedded" && diff) {
|
|
30810
|
+
diffBlock = buildAdversarialEmbeddedDiffSection(diff, testInventory);
|
|
30811
|
+
} else {
|
|
30812
|
+
diffBlock = `## Diff
|
|
30813
|
+
|
|
30814
|
+
(No diff available \u2014 review based on story context only)
|
|
30815
|
+
|
|
30816
|
+
`;
|
|
30817
|
+
}
|
|
30818
|
+
return [
|
|
30819
|
+
ADVERSARIAL_ROLE,
|
|
30820
|
+
`
|
|
30821
|
+
|
|
30822
|
+
`,
|
|
30823
|
+
priorFindingsBlock,
|
|
30824
|
+
storyBlock,
|
|
30825
|
+
ADVERSARIAL_INSTRUCTIONS,
|
|
30826
|
+
`
|
|
30827
|
+
|
|
30828
|
+
`,
|
|
30829
|
+
customRulesBlock,
|
|
30830
|
+
buildBlockingThresholdBlock(blockingThreshold ?? "error"),
|
|
30831
|
+
OUTPUT_SCHEMA,
|
|
30832
|
+
`
|
|
30833
|
+
|
|
30834
|
+
`,
|
|
30835
|
+
diffBlock
|
|
30836
|
+
].join("");
|
|
30837
|
+
}
|
|
30838
|
+
static requoteVerbatim(opts) {
|
|
30839
|
+
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30840
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30841
|
+
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30842
|
+
|
|
30843
|
+
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30844
|
+
|
|
30845
|
+
Return ONLY this JSON object:
|
|
30846
|
+
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30847
|
+
|
|
30848
|
+
Finding issue: ${opts.finding.issue}
|
|
30849
|
+
Referenced file: ${file3}
|
|
30850
|
+
Referenced line: ${line}
|
|
30851
|
+
|
|
30852
|
+
Rules:
|
|
30853
|
+
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30854
|
+
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30855
|
+
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30856
|
+
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30857
|
+
}
|
|
30858
|
+
static DROP_CODE_MESSAGES_QUOTE = {
|
|
30859
|
+
missing_ac_quote: "no `acQuote` field was provided \u2014 every blocking finding must cite an AC",
|
|
30860
|
+
ac_index_out_of_range: "`acIndex` is 0 or larger than the AC list \u2014 ACs are 1-indexed; the lowest valid value is 1",
|
|
30861
|
+
ac_quote_not_substring: "`acQuote` text does not appear verbatim in any AC bullet \u2014 copy the AC text character-for-character",
|
|
30862
|
+
ac_quote_does_not_constrain_locus: "the cited AC mentions the file but not the specific symbol your finding flags \u2014 pick a different AC, or downgrade to `info` / `warning`"
|
|
30863
|
+
};
|
|
30864
|
+
static regroundDroppedFindings(opts) {
|
|
30865
|
+
const { drops, acceptanceCriteria } = opts;
|
|
30866
|
+
if (drops.length === 0)
|
|
30867
|
+
return "";
|
|
30868
|
+
const firstDrop = drops[0];
|
|
30869
|
+
const codeMessage = AdversarialReviewPromptBuilder.DROP_CODE_MESSAGES_QUOTE[firstDrop.code] ?? `rejection code: ${firstDrop.code}`;
|
|
30870
|
+
const acList = acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30871
|
+
`);
|
|
30872
|
+
return `Your previous review produced ${drops.length} finding${drops.length > 1 ? "s" : ""} that ${drops.length > 1 ? "were" : "was"} dropped because:
|
|
30873
|
+
|
|
30874
|
+
${codeMessage}
|
|
30875
|
+
|
|
30876
|
+
The dropped finding${drops.length > 1 ? "s" : ""} ${drops.length > 1 ? "are" : "is"}:
|
|
30877
|
+
${drops.map((d, i) => `${i + 1}. [${d.finding.severity}] ${d.finding.issue}`).join(`
|
|
30878
|
+
`)}
|
|
30879
|
+
|
|
30880
|
+
Please re-review the code and re-issue any valid findings. For each finding you re-issue:
|
|
30881
|
+
- You MUST include a valid \`acQuote\` that appears verbatim in one of the AC bullets below
|
|
30882
|
+
- You MUST include a valid \`acIndex\` (1-based index into the AC list)
|
|
30883
|
+
- The \`acQuote\` must cite the specific symbol you are flagging, not just the file
|
|
30884
|
+
|
|
30885
|
+
## Acceptance Criteria
|
|
30886
|
+
${acList}
|
|
30887
|
+
|
|
30888
|
+
## Rules
|
|
30889
|
+
- If a finding's locus (file / symbol) is not named in any AC bullet, downgrade it to \`"info"\` or \`"warning"\`
|
|
30890
|
+
- Only re-issue findings that are genuinely substantiated by the code and constrained by an AC
|
|
30891
|
+
- Return ONLY a JSON object with the same shape as before:
|
|
30892
|
+
{"passed":true|false,"findings":[...]}`;
|
|
30893
|
+
}
|
|
30894
|
+
};
|
|
30895
|
+
});
|
|
30777
30896
|
|
|
30778
30897
|
// src/prompts/builders/acceptance-builder-helpers.ts
|
|
30779
30898
|
function formatTestOutputForFix(rawOutput) {
|
|
@@ -31546,6 +31665,28 @@ function isRecord(value) {
|
|
|
31546
31665
|
var init_requote_response = () => {};
|
|
31547
31666
|
|
|
31548
31667
|
// src/operations/adversarial-review.ts
|
|
31668
|
+
function withRepromptMarker(output, info) {
|
|
31669
|
+
const parsed = tryParseLLMJson(output);
|
|
31670
|
+
if (!parsed || typeof parsed !== "object")
|
|
31671
|
+
return output;
|
|
31672
|
+
return JSON.stringify({ ...parsed, _repromptInfo: info });
|
|
31673
|
+
}
|
|
31674
|
+
function extractRepromptInfo(raw) {
|
|
31675
|
+
if (!raw || typeof raw !== "object")
|
|
31676
|
+
return;
|
|
31677
|
+
const info = raw._repromptInfo;
|
|
31678
|
+
if (!info || typeof info !== "object")
|
|
31679
|
+
return;
|
|
31680
|
+
const i = info;
|
|
31681
|
+
if (typeof i.dropCount !== "number" || typeof i.costUsd !== "number" || typeof i.outcome !== "string") {
|
|
31682
|
+
return;
|
|
31683
|
+
}
|
|
31684
|
+
return {
|
|
31685
|
+
dropCount: i.dropCount,
|
|
31686
|
+
costUsd: i.costUsd,
|
|
31687
|
+
outcome: i.outcome
|
|
31688
|
+
};
|
|
31689
|
+
}
|
|
31549
31690
|
async function requoteBlockingAdversarialFindings(findings, ctx) {
|
|
31550
31691
|
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
31551
31692
|
const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
|
|
@@ -31614,6 +31755,69 @@ async function requoteBlockingAdversarialFindings(findings, ctx) {
|
|
|
31614
31755
|
}
|
|
31615
31756
|
return { findings: next, changed, extraCostUsd };
|
|
31616
31757
|
}
|
|
31758
|
+
function evaluateRepromptTrigger(shape, input) {
|
|
31759
|
+
if (input.adversarialConfig.acRegroundOnDrop === false)
|
|
31760
|
+
return { shouldReprompt: false };
|
|
31761
|
+
if (shape.passed)
|
|
31762
|
+
return { shouldReprompt: false };
|
|
31763
|
+
const { accepted, dropped } = filterByAcQuote(shape.findings, input.story.acceptanceCriteria);
|
|
31764
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
31765
|
+
const blockingAccepted = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
31766
|
+
if (blockingAccepted.length > 0)
|
|
31767
|
+
return { shouldReprompt: false };
|
|
31768
|
+
if (dropped.length === 0)
|
|
31769
|
+
return { shouldReprompt: false };
|
|
31770
|
+
return { shouldReprompt: true, acDropped: dropped };
|
|
31771
|
+
}
|
|
31772
|
+
async function performAdversarialReground(turn, firstParsed, drops, ctx) {
|
|
31773
|
+
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
31774
|
+
const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
|
|
31775
|
+
const { accepted: firstAccepted } = filterByAcQuote(firstParsed.findings, acceptanceCriteria);
|
|
31776
|
+
const firstAdvisory = firstAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
31777
|
+
const repromptPrompt = AdversarialReviewPromptBuilder.regroundDroppedFindings({
|
|
31778
|
+
drops,
|
|
31779
|
+
acceptanceCriteria
|
|
31780
|
+
});
|
|
31781
|
+
const secondTurn = await ctx.send(repromptPrompt);
|
|
31782
|
+
const secondParsed = validateAdversarialShape(tryParseLLMJson(secondTurn.output));
|
|
31783
|
+
const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
|
|
31784
|
+
const dropCount = drops.length;
|
|
31785
|
+
if (!secondParsed) {
|
|
31786
|
+
return {
|
|
31787
|
+
...turn,
|
|
31788
|
+
output: withRepromptMarker(turn.output, { dropCount, outcome: "parse-failed", costUsd })
|
|
31789
|
+
};
|
|
31790
|
+
}
|
|
31791
|
+
const { accepted: secondAccepted } = filterByAcQuote(secondParsed.findings, acceptanceCriteria);
|
|
31792
|
+
const secondBlocking = secondAccepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
31793
|
+
if (secondBlocking.length > 0) {
|
|
31794
|
+
return {
|
|
31795
|
+
...turn,
|
|
31796
|
+
output: JSON.stringify({
|
|
31797
|
+
passed: false,
|
|
31798
|
+
findings: secondParsed.findings,
|
|
31799
|
+
_repromptInfo: { dropCount, outcome: "recovered-blocking", costUsd }
|
|
31800
|
+
}),
|
|
31801
|
+
estimatedCostUsd: costUsd
|
|
31802
|
+
};
|
|
31803
|
+
}
|
|
31804
|
+
if (secondParsed.passed) {
|
|
31805
|
+
const secondAdvisory = secondAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
31806
|
+
return {
|
|
31807
|
+
...turn,
|
|
31808
|
+
output: JSON.stringify({
|
|
31809
|
+
passed: true,
|
|
31810
|
+
findings: [...firstAdvisory, ...secondAdvisory],
|
|
31811
|
+
_repromptInfo: { dropCount, outcome: "recovered-advisory-only", costUsd }
|
|
31812
|
+
}),
|
|
31813
|
+
estimatedCostUsd: costUsd
|
|
31814
|
+
};
|
|
31815
|
+
}
|
|
31816
|
+
return {
|
|
31817
|
+
...turn,
|
|
31818
|
+
output: withRepromptMarker(turn.output, { dropCount, outcome: "still-dropped", costUsd })
|
|
31819
|
+
};
|
|
31820
|
+
}
|
|
31617
31821
|
var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
|
|
31618
31822
|
validate: (parsed) => validateAdversarialShape(parsed) !== null,
|
|
31619
31823
|
reviewerKind: "adversarial",
|
|
@@ -31654,15 +31858,30 @@ var init_adversarial_review = __esm(() => {
|
|
|
31654
31858
|
const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
|
|
31655
31859
|
if (!parsed)
|
|
31656
31860
|
return turn;
|
|
31657
|
-
|
|
31658
|
-
if (!requoted.changed)
|
|
31861
|
+
if (ctx.input.mode !== "ref")
|
|
31659
31862
|
return turn;
|
|
31660
|
-
const
|
|
31661
|
-
|
|
31662
|
-
|
|
31663
|
-
|
|
31664
|
-
|
|
31665
|
-
|
|
31863
|
+
const regroundEnabled = ctx.input.adversarialConfig.acRegroundOnDrop !== false;
|
|
31864
|
+
if (regroundEnabled) {
|
|
31865
|
+
const firstShape = { passed: parsed.passed, findings: parsed.findings };
|
|
31866
|
+
const trigger = evaluateRepromptTrigger(firstShape, ctx.input);
|
|
31867
|
+
if (trigger.shouldReprompt) {
|
|
31868
|
+
return await performAdversarialReground(turn, parsed, trigger.acDropped, ctx);
|
|
31869
|
+
}
|
|
31870
|
+
}
|
|
31871
|
+
const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
|
|
31872
|
+
const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
|
|
31873
|
+
if (!requoteEnabled || maxRequotes <= 0)
|
|
31874
|
+
return turn;
|
|
31875
|
+
const requoted = await requoteBlockingAdversarialFindings(parsed.findings, ctx);
|
|
31876
|
+
if (requoted.changed) {
|
|
31877
|
+
const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
|
|
31878
|
+
return {
|
|
31879
|
+
...turn,
|
|
31880
|
+
output: JSON.stringify({ passed, findings: requoted.findings }),
|
|
31881
|
+
estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
|
|
31882
|
+
};
|
|
31883
|
+
}
|
|
31884
|
+
return turn;
|
|
31666
31885
|
},
|
|
31667
31886
|
build(input, _ctx) {
|
|
31668
31887
|
const base = new AdversarialReviewPromptBuilder().buildAdversarialReviewPrompt(input.story, input.adversarialConfig, {
|
|
@@ -31686,16 +31905,25 @@ var init_adversarial_review = __esm(() => {
|
|
|
31686
31905
|
parse(output, _input, _ctx) {
|
|
31687
31906
|
const raw = tryParseLLMJson(output);
|
|
31688
31907
|
const parsed = validateAdversarialShape(raw);
|
|
31908
|
+
const repromptEvent = extractRepromptInfo(raw);
|
|
31689
31909
|
if (parsed) {
|
|
31690
31910
|
return {
|
|
31691
31911
|
passed: parsed.passed,
|
|
31692
31912
|
findings: parsed.findings,
|
|
31693
31913
|
normalizedFindings: [],
|
|
31694
|
-
acDropped: []
|
|
31914
|
+
acDropped: [],
|
|
31915
|
+
repromptEvent
|
|
31695
31916
|
};
|
|
31696
31917
|
}
|
|
31697
31918
|
if (/"passed"\s*:\s*false/.test(output) && !/"findings"\s*:\s*\[\s*\{/.test(output)) {
|
|
31698
|
-
return {
|
|
31919
|
+
return {
|
|
31920
|
+
passed: false,
|
|
31921
|
+
findings: [],
|
|
31922
|
+
normalizedFindings: [],
|
|
31923
|
+
acDropped: [],
|
|
31924
|
+
looksLikeFail: true,
|
|
31925
|
+
repromptEvent
|
|
31926
|
+
};
|
|
31699
31927
|
}
|
|
31700
31928
|
throw new ParseValidationError("[adversarial-review] parse failed: invalid JSON shape");
|
|
31701
31929
|
},
|
|
@@ -32198,7 +32426,7 @@ async function runAdversarialReview(opts) {
|
|
|
32198
32426
|
} = opts;
|
|
32199
32427
|
const startTime = Date.now();
|
|
32200
32428
|
const logger = getSafeLogger();
|
|
32201
|
-
const effectiveRef = await resolveEffectiveRef(workdir, storyGitRef, story.id);
|
|
32429
|
+
const effectiveRef = await _adversarialDeps.resolveEffectiveRef(workdir, storyGitRef, story.id);
|
|
32202
32430
|
if (!effectiveRef) {
|
|
32203
32431
|
return {
|
|
32204
32432
|
check: "adversarial",
|
|
@@ -32217,7 +32445,7 @@ async function runAdversarialReview(opts) {
|
|
|
32217
32445
|
});
|
|
32218
32446
|
const repoRoot = projectDir ?? workdir;
|
|
32219
32447
|
const packageDir = workdir !== repoRoot ? workdir : undefined;
|
|
32220
|
-
const stat = await collectDiffStat(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32448
|
+
const stat = await _adversarialDeps.collectDiffStat(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32221
32449
|
if (!stat) {
|
|
32222
32450
|
return {
|
|
32223
32451
|
check: "adversarial",
|
|
@@ -32399,6 +32627,16 @@ async function runAdversarialReview(opts) {
|
|
|
32399
32627
|
durationMs: Date.now() - startTime
|
|
32400
32628
|
};
|
|
32401
32629
|
}
|
|
32630
|
+
if (opResult.repromptEvent) {
|
|
32631
|
+
runtime.dispatchEvents.emitReviewReprompt({
|
|
32632
|
+
kind: "review-reprompt-on-drop",
|
|
32633
|
+
storyId: story.id,
|
|
32634
|
+
reviewer: "adversarial",
|
|
32635
|
+
dropCount: opResult.repromptEvent.dropCount,
|
|
32636
|
+
repromptOutcome: opResult.repromptEvent.outcome,
|
|
32637
|
+
costUsd: opResult.repromptEvent.costUsd
|
|
32638
|
+
});
|
|
32639
|
+
}
|
|
32402
32640
|
const threshold = blockingThreshold ?? "error";
|
|
32403
32641
|
const allFindings = opResult.findings;
|
|
32404
32642
|
const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
@@ -32410,7 +32648,7 @@ async function runAdversarialReview(opts) {
|
|
|
32410
32648
|
diffFiles = extractDiffFiles(diff);
|
|
32411
32649
|
diffAvailable = true;
|
|
32412
32650
|
} else {
|
|
32413
|
-
const list = await collectDiffFileList(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32651
|
+
const list = await _adversarialDeps.collectDiffFileList(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32414
32652
|
if (list === undefined) {
|
|
32415
32653
|
diffFiles = new Set;
|
|
32416
32654
|
diffAvailable = false;
|
|
@@ -32586,7 +32824,10 @@ var init_adversarial = __esm(() => {
|
|
|
32586
32824
|
init_review_audit();
|
|
32587
32825
|
_adversarialDeps = {
|
|
32588
32826
|
writeReviewAudit,
|
|
32589
|
-
callOp
|
|
32827
|
+
callOp,
|
|
32828
|
+
resolveEffectiveRef,
|
|
32829
|
+
collectDiffStat,
|
|
32830
|
+
collectDiffFileList
|
|
32590
32831
|
};
|
|
32591
32832
|
});
|
|
32592
32833
|
|
|
@@ -33289,6 +33530,13 @@ class ScopedStrategy {
|
|
|
33289
33530
|
const durationMs = Date.now() - start;
|
|
33290
33531
|
if (result.success) {
|
|
33291
33532
|
const parsed2 = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
33533
|
+
logger.info("verify[scoped]", "Scoped tests passed", {
|
|
33534
|
+
storyId: ctx.storyId,
|
|
33535
|
+
passCount: parsed2.passed,
|
|
33536
|
+
durationMs,
|
|
33537
|
+
scopeTestFallback: scopeTestFallback ?? false,
|
|
33538
|
+
isFullSuite
|
|
33539
|
+
});
|
|
33292
33540
|
return makePassResult(ctx.storyId, "scoped", {
|
|
33293
33541
|
rawOutput: result.output,
|
|
33294
33542
|
passCount: parsed2.passed,
|
|
@@ -33297,6 +33545,12 @@ class ScopedStrategy {
|
|
|
33297
33545
|
});
|
|
33298
33546
|
}
|
|
33299
33547
|
if (result.status === "TIMEOUT") {
|
|
33548
|
+
logger.warn("verify[scoped]", "Scoped tests timed out", {
|
|
33549
|
+
storyId: ctx.storyId,
|
|
33550
|
+
durationMs,
|
|
33551
|
+
scopeTestFallback: scopeTestFallback ?? false,
|
|
33552
|
+
isFullSuite
|
|
33553
|
+
});
|
|
33300
33554
|
return makeFailResult(ctx.storyId, "scoped", "TIMEOUT", {
|
|
33301
33555
|
rawOutput: result.output,
|
|
33302
33556
|
durationMs,
|
|
@@ -33305,6 +33559,14 @@ class ScopedStrategy {
|
|
|
33305
33559
|
});
|
|
33306
33560
|
}
|
|
33307
33561
|
const parsed = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
33562
|
+
logger.warn("verify[scoped]", "Scoped tests failed", {
|
|
33563
|
+
storyId: ctx.storyId,
|
|
33564
|
+
passCount: parsed.passed,
|
|
33565
|
+
failCount: parsed.failed,
|
|
33566
|
+
durationMs,
|
|
33567
|
+
scopeTestFallback: scopeTestFallback ?? false,
|
|
33568
|
+
isFullSuite
|
|
33569
|
+
});
|
|
33308
33570
|
return makeFailResult(ctx.storyId, "scoped", "TEST_FAILURE", {
|
|
33309
33571
|
rawOutput: result.output,
|
|
33310
33572
|
passCount: parsed.passed,
|
|
@@ -35168,6 +35430,91 @@ var init_acceptance_fix = __esm(() => {
|
|
|
35168
35430
|
});
|
|
35169
35431
|
|
|
35170
35432
|
// src/operations/semantic-review.ts
|
|
35433
|
+
function withRepromptMarker2(output, info) {
|
|
35434
|
+
const parsed = tryParseLLMJson(output);
|
|
35435
|
+
if (!parsed || typeof parsed !== "object")
|
|
35436
|
+
return output;
|
|
35437
|
+
return JSON.stringify({ ...parsed, _repromptInfo: info });
|
|
35438
|
+
}
|
|
35439
|
+
function extractRepromptInfo2(raw) {
|
|
35440
|
+
if (!raw || typeof raw !== "object")
|
|
35441
|
+
return;
|
|
35442
|
+
const info = raw._repromptInfo;
|
|
35443
|
+
if (!info || typeof info !== "object")
|
|
35444
|
+
return;
|
|
35445
|
+
const i = info;
|
|
35446
|
+
if (typeof i.dropCount !== "number" || typeof i.costUsd !== "number" || typeof i.outcome !== "string") {
|
|
35447
|
+
return;
|
|
35448
|
+
}
|
|
35449
|
+
return {
|
|
35450
|
+
dropCount: i.dropCount,
|
|
35451
|
+
costUsd: i.costUsd,
|
|
35452
|
+
outcome: i.outcome
|
|
35453
|
+
};
|
|
35454
|
+
}
|
|
35455
|
+
function evaluateRepromptTrigger2(shape, input) {
|
|
35456
|
+
if (input.semanticConfig.acRegroundOnDrop === false)
|
|
35457
|
+
return { shouldReprompt: false };
|
|
35458
|
+
if (shape.passed)
|
|
35459
|
+
return { shouldReprompt: false };
|
|
35460
|
+
const { accepted, dropped } = filterByAcGroundingMinimal(shape.findings, input.story.acceptanceCriteria);
|
|
35461
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
35462
|
+
const blockingAccepted = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35463
|
+
if (blockingAccepted.length > 0)
|
|
35464
|
+
return { shouldReprompt: false };
|
|
35465
|
+
if (dropped.length === 0)
|
|
35466
|
+
return { shouldReprompt: false };
|
|
35467
|
+
return { shouldReprompt: true, acDropped: dropped };
|
|
35468
|
+
}
|
|
35469
|
+
async function performSemanticReground(turn, firstParsed, drops, ctx) {
|
|
35470
|
+
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
35471
|
+
const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
|
|
35472
|
+
const { accepted: firstAccepted } = filterByAcGroundingMinimal(firstParsed.findings, acceptanceCriteria);
|
|
35473
|
+
const firstAdvisory = firstAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
35474
|
+
const repromptPrompt = ReviewPromptBuilder.regroundDroppedFindings({
|
|
35475
|
+
drops,
|
|
35476
|
+
acceptanceCriteria
|
|
35477
|
+
});
|
|
35478
|
+
const secondTurn = await ctx.send(repromptPrompt);
|
|
35479
|
+
const secondParsed = validateLLMShape(tryParseLLMJson(secondTurn.output));
|
|
35480
|
+
const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
|
|
35481
|
+
const dropCount = drops.length;
|
|
35482
|
+
if (!secondParsed) {
|
|
35483
|
+
return {
|
|
35484
|
+
...turn,
|
|
35485
|
+
output: withRepromptMarker2(turn.output, { dropCount, outcome: "parse-failed", costUsd })
|
|
35486
|
+
};
|
|
35487
|
+
}
|
|
35488
|
+
const { accepted: secondAccepted } = filterByAcGroundingMinimal(secondParsed.findings, acceptanceCriteria);
|
|
35489
|
+
const secondBlocking = secondAccepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35490
|
+
if (secondBlocking.length > 0) {
|
|
35491
|
+
return {
|
|
35492
|
+
...turn,
|
|
35493
|
+
output: JSON.stringify({
|
|
35494
|
+
passed: false,
|
|
35495
|
+
findings: secondParsed.findings,
|
|
35496
|
+
_repromptInfo: { dropCount, outcome: "recovered-blocking", costUsd }
|
|
35497
|
+
}),
|
|
35498
|
+
estimatedCostUsd: costUsd
|
|
35499
|
+
};
|
|
35500
|
+
}
|
|
35501
|
+
if (secondParsed.passed) {
|
|
35502
|
+
const secondAdvisory = secondAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
35503
|
+
return {
|
|
35504
|
+
...turn,
|
|
35505
|
+
output: JSON.stringify({
|
|
35506
|
+
passed: true,
|
|
35507
|
+
findings: [...firstAdvisory, ...secondAdvisory],
|
|
35508
|
+
_repromptInfo: { dropCount, outcome: "recovered-advisory-only", costUsd }
|
|
35509
|
+
}),
|
|
35510
|
+
estimatedCostUsd: costUsd
|
|
35511
|
+
};
|
|
35512
|
+
}
|
|
35513
|
+
return {
|
|
35514
|
+
...turn,
|
|
35515
|
+
output: withRepromptMarker2(turn.output, { dropCount, outcome: "still-dropped", costUsd })
|
|
35516
|
+
};
|
|
35517
|
+
}
|
|
35171
35518
|
async function requoteBlockingFindings(findings, ctx) {
|
|
35172
35519
|
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
35173
35520
|
const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES2;
|
|
@@ -35243,14 +35590,24 @@ var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requ
|
|
|
35243
35590
|
if (!parsed)
|
|
35244
35591
|
return turn;
|
|
35245
35592
|
const requoted = await requoteBlockingFindings(parsed.findings, ctx);
|
|
35246
|
-
if (
|
|
35593
|
+
if (requoted.changed) {
|
|
35594
|
+
const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
|
|
35595
|
+
return {
|
|
35596
|
+
...turn,
|
|
35597
|
+
output: JSON.stringify({ passed, findings: requoted.findings }),
|
|
35598
|
+
estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
|
|
35599
|
+
};
|
|
35600
|
+
}
|
|
35601
|
+
if (ctx.input.mode !== "ref")
|
|
35247
35602
|
return turn;
|
|
35248
|
-
const
|
|
35249
|
-
|
|
35250
|
-
|
|
35251
|
-
|
|
35252
|
-
|
|
35253
|
-
|
|
35603
|
+
const regroundEnabled = ctx.input.semanticConfig.acRegroundOnDrop !== false;
|
|
35604
|
+
if (!regroundEnabled)
|
|
35605
|
+
return turn;
|
|
35606
|
+
const firstShape = { passed: parsed.passed, findings: requoted.findings };
|
|
35607
|
+
const trigger = evaluateRepromptTrigger2(firstShape, ctx.input);
|
|
35608
|
+
if (!trigger.shouldReprompt)
|
|
35609
|
+
return turn;
|
|
35610
|
+
return performSemanticReground(turn, firstShape, trigger.acDropped, ctx);
|
|
35254
35611
|
}, semanticReviewOp;
|
|
35255
35612
|
var init_semantic_review = __esm(() => {
|
|
35256
35613
|
init_retry();
|
|
@@ -35259,7 +35616,13 @@ var init_semantic_review = __esm(() => {
|
|
|
35259
35616
|
init_prompts();
|
|
35260
35617
|
init_finding_filters();
|
|
35261
35618
|
init_requote_response();
|
|
35262
|
-
FAIL_OPEN2 = {
|
|
35619
|
+
FAIL_OPEN2 = {
|
|
35620
|
+
passed: true,
|
|
35621
|
+
findings: [],
|
|
35622
|
+
normalizedFindings: [],
|
|
35623
|
+
acDropped: [],
|
|
35624
|
+
failOpen: true
|
|
35625
|
+
};
|
|
35263
35626
|
semanticReviewOp = {
|
|
35264
35627
|
kind: "run",
|
|
35265
35628
|
name: "semantic-review",
|
|
@@ -35276,7 +35639,7 @@ var init_semantic_review = __esm(() => {
|
|
|
35276
35639
|
invalid: () => ReviewPromptBuilder.jsonRetry(),
|
|
35277
35640
|
truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
|
|
35278
35641
|
},
|
|
35279
|
-
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], looksLikeFail: true } : FAIL_OPEN2,
|
|
35642
|
+
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN2,
|
|
35280
35643
|
logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
|
|
35281
35644
|
}),
|
|
35282
35645
|
hopBody: semanticReviewHopBody,
|
|
@@ -35298,15 +35661,25 @@ var init_semantic_review = __esm(() => {
|
|
|
35298
35661
|
parse(output, _input, _ctx) {
|
|
35299
35662
|
const raw = tryParseLLMJson(output);
|
|
35300
35663
|
const parsed = validateLLMShape(raw);
|
|
35664
|
+
const repromptEvent = extractRepromptInfo2(raw);
|
|
35301
35665
|
if (parsed) {
|
|
35302
35666
|
return {
|
|
35303
35667
|
passed: parsed.passed,
|
|
35304
35668
|
findings: parsed.findings,
|
|
35305
|
-
normalizedFindings: []
|
|
35669
|
+
normalizedFindings: [],
|
|
35670
|
+
acDropped: [],
|
|
35671
|
+
repromptEvent
|
|
35306
35672
|
};
|
|
35307
35673
|
}
|
|
35308
35674
|
if (/"passed"\s*:\s*false/.test(output)) {
|
|
35309
|
-
return {
|
|
35675
|
+
return {
|
|
35676
|
+
passed: false,
|
|
35677
|
+
findings: [],
|
|
35678
|
+
normalizedFindings: [],
|
|
35679
|
+
acDropped: [],
|
|
35680
|
+
looksLikeFail: true,
|
|
35681
|
+
repromptEvent
|
|
35682
|
+
};
|
|
35310
35683
|
}
|
|
35311
35684
|
return FAIL_OPEN2;
|
|
35312
35685
|
},
|
|
@@ -35319,14 +35692,15 @@ var init_semantic_review = __esm(() => {
|
|
|
35319
35692
|
const findings = parsed.findings;
|
|
35320
35693
|
const sanitized = sanitizeRefModeFindings(findings, input.mode, threshold);
|
|
35321
35694
|
const substantiated = await substantiateSemanticEvidence(sanitized, input.mode, input.workdir, input.story.id, threshold);
|
|
35322
|
-
const { accepted } = filterByAcGroundingMinimal(substantiated, input.story.acceptanceCriteria);
|
|
35695
|
+
const { accepted, dropped } = filterByAcGroundingMinimal(substantiated, input.story.acceptanceCriteria);
|
|
35323
35696
|
const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35324
35697
|
const passed = parsed.passed && blocking.length === 0;
|
|
35325
35698
|
return {
|
|
35326
35699
|
...parsed,
|
|
35327
35700
|
passed,
|
|
35328
35701
|
findings: accepted,
|
|
35329
|
-
normalizedFindings: toReviewFindings(blocking)
|
|
35702
|
+
normalizedFindings: toReviewFindings(blocking),
|
|
35703
|
+
acDropped: dropped
|
|
35330
35704
|
};
|
|
35331
35705
|
}
|
|
35332
35706
|
};
|
|
@@ -37368,7 +37742,7 @@ var init_greenfield_gate = __esm(() => {
|
|
|
37368
37742
|
});
|
|
37369
37743
|
// src/verification/rectification.ts
|
|
37370
37744
|
function shouldRetryRectification(state, config2) {
|
|
37371
|
-
if (state.attempt >= config2.
|
|
37745
|
+
if (state.attempt >= config2.maxAttemptsTotal) {
|
|
37372
37746
|
return false;
|
|
37373
37747
|
}
|
|
37374
37748
|
if (state.lastExitCode !== undefined && state.lastExitCode !== 0 && state.currentFailures === 0) {
|
|
@@ -37471,7 +37845,7 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37471
37845
|
});
|
|
37472
37846
|
|
|
37473
37847
|
// src/operations/full-suite-rectify.ts
|
|
37474
|
-
function makeFullSuiteRectifyStrategy(story) {
|
|
37848
|
+
function makeFullSuiteRectifyStrategy(story, config2) {
|
|
37475
37849
|
return {
|
|
37476
37850
|
name: "full-suite-rectify",
|
|
37477
37851
|
appliesTo: (finding) => finding.source === "test-runner" && finding.category === "failed-test",
|
|
@@ -37481,7 +37855,7 @@ function makeFullSuiteRectifyStrategy(story) {
|
|
|
37481
37855
|
contextMarkdown: RectifierPromptBuilder.failingTestContext(findings)
|
|
37482
37856
|
}),
|
|
37483
37857
|
extractApplied: () => ({ targetFiles: [], summary: "Fixed failing tests" }),
|
|
37484
|
-
maxAttempts:
|
|
37858
|
+
maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
|
|
37485
37859
|
coRun: "exclusive"
|
|
37486
37860
|
};
|
|
37487
37861
|
}
|
|
@@ -37522,7 +37896,7 @@ var init__finding_to_check = __esm(() => {
|
|
|
37522
37896
|
});
|
|
37523
37897
|
|
|
37524
37898
|
// src/operations/autofix-implementer-strategy.ts
|
|
37525
|
-
function makeAutofixImplementerStrategy(story) {
|
|
37899
|
+
function makeAutofixImplementerStrategy(story, config2) {
|
|
37526
37900
|
return {
|
|
37527
37901
|
name: "autofix-implementer",
|
|
37528
37902
|
appliesTo: (f) => f.fixTarget === "source" && IMPLEMENTER_SOURCES.has(f.source),
|
|
@@ -37535,7 +37909,7 @@ function makeAutofixImplementerStrategy(story) {
|
|
|
37535
37909
|
summary: output.unresolvedReason ?? "",
|
|
37536
37910
|
unresolved: output.unresolvedReason
|
|
37537
37911
|
}),
|
|
37538
|
-
maxAttempts:
|
|
37912
|
+
maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
|
|
37539
37913
|
coRun: "co-run-sequential"
|
|
37540
37914
|
};
|
|
37541
37915
|
}
|
|
@@ -37557,7 +37931,7 @@ function makeAutofixTestWriterStrategy(story, config2) {
|
|
|
37557
37931
|
story,
|
|
37558
37932
|
blockingThreshold: config2.review?.blockingThreshold
|
|
37559
37933
|
}),
|
|
37560
|
-
maxAttempts:
|
|
37934
|
+
maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
|
|
37561
37935
|
coRun: "co-run-sequential"
|
|
37562
37936
|
};
|
|
37563
37937
|
}
|
|
@@ -39502,6 +39876,42 @@ async function runSemanticReview(opts) {
|
|
|
39502
39876
|
durationMs: Date.now() - startTime
|
|
39503
39877
|
};
|
|
39504
39878
|
}
|
|
39879
|
+
if (opResult.looksLikeFail) {
|
|
39880
|
+
logger?.warn("semantic", "LLM returned truncated JSON with passed:false \u2014 treating as failure", {
|
|
39881
|
+
storyId: story.id
|
|
39882
|
+
});
|
|
39883
|
+
recordSemanticAudit({
|
|
39884
|
+
runtime,
|
|
39885
|
+
workdir,
|
|
39886
|
+
projectDir,
|
|
39887
|
+
storyId: story.id,
|
|
39888
|
+
featureName,
|
|
39889
|
+
parsed: false,
|
|
39890
|
+
looksLikeFail: true,
|
|
39891
|
+
failOpen: false,
|
|
39892
|
+
passed: false,
|
|
39893
|
+
blockingThreshold,
|
|
39894
|
+
result: null
|
|
39895
|
+
});
|
|
39896
|
+
return {
|
|
39897
|
+
check: "semantic",
|
|
39898
|
+
success: false,
|
|
39899
|
+
command: "",
|
|
39900
|
+
exitCode: 1,
|
|
39901
|
+
output: "semantic review: LLM response truncated but indicated failure (passed:false found in partial response)",
|
|
39902
|
+
durationMs: Date.now() - startTime
|
|
39903
|
+
};
|
|
39904
|
+
}
|
|
39905
|
+
if (opResult.repromptEvent) {
|
|
39906
|
+
runtime.dispatchEvents.emitReviewReprompt({
|
|
39907
|
+
kind: "review-reprompt-on-drop",
|
|
39908
|
+
storyId: story.id,
|
|
39909
|
+
reviewer: "semantic",
|
|
39910
|
+
dropCount: opResult.repromptEvent.dropCount,
|
|
39911
|
+
repromptOutcome: opResult.repromptEvent.outcome,
|
|
39912
|
+
costUsd: opResult.repromptEvent.costUsd
|
|
39913
|
+
});
|
|
39914
|
+
}
|
|
39505
39915
|
const threshold = blockingThreshold ?? "error";
|
|
39506
39916
|
const allFindings = opResult.findings;
|
|
39507
39917
|
const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
@@ -39903,6 +40313,18 @@ async function runReview(opts) {
|
|
|
39903
40313
|
naxIgnoreIndex
|
|
39904
40314
|
}) : normalizeMechanicalFindings(checkName, await runCheck(checkName, command, workdir, storyId, env2), workdir);
|
|
39905
40315
|
checks3.push(result);
|
|
40316
|
+
if (result.success) {
|
|
40317
|
+
logger?.info("review", `${checkName} passed`, {
|
|
40318
|
+
storyId,
|
|
40319
|
+
durationMs: result.durationMs
|
|
40320
|
+
});
|
|
40321
|
+
} else {
|
|
40322
|
+
logger?.warn("review", `${checkName} failed`, {
|
|
40323
|
+
storyId,
|
|
40324
|
+
exitCode: result.exitCode,
|
|
40325
|
+
durationMs: result.durationMs
|
|
40326
|
+
});
|
|
40327
|
+
}
|
|
39906
40328
|
if (!result.success && !firstFailure) {
|
|
39907
40329
|
firstFailure = `${checkName} failed (exit code ${result.exitCode})`;
|
|
39908
40330
|
}
|
|
@@ -52209,11 +52631,24 @@ function toReviewDecisionPayload(opName, output) {
|
|
|
52209
52631
|
if (typeof record2.passed !== "boolean" || !Array.isArray(record2.findings)) {
|
|
52210
52632
|
return null;
|
|
52211
52633
|
}
|
|
52634
|
+
const acDropped = Array.isArray(record2.acDropped) ? record2.acDropped.map((d) => {
|
|
52635
|
+
const entry = d ?? {};
|
|
52636
|
+
const finding = entry.finding ?? {};
|
|
52637
|
+
return {
|
|
52638
|
+
code: typeof entry.code === "string" ? entry.code : undefined,
|
|
52639
|
+
severity: typeof finding.severity === "string" ? finding.severity : undefined,
|
|
52640
|
+
file: typeof finding.file === "string" ? finding.file : undefined,
|
|
52641
|
+
line: typeof finding.line === "number" ? finding.line : undefined,
|
|
52642
|
+
issue: typeof finding.issue === "string" ? finding.issue : undefined,
|
|
52643
|
+
acIndex: typeof finding.acIndex === "number" ? finding.acIndex : undefined
|
|
52644
|
+
};
|
|
52645
|
+
}) : undefined;
|
|
52212
52646
|
return {
|
|
52213
52647
|
reviewer,
|
|
52214
52648
|
parsed: true,
|
|
52215
52649
|
passed: record2.passed,
|
|
52216
|
-
result: { passed: record2.passed, findings: record2.findings }
|
|
52650
|
+
result: { passed: record2.passed, findings: record2.findings },
|
|
52651
|
+
acDropped
|
|
52217
52652
|
};
|
|
52218
52653
|
}
|
|
52219
52654
|
function emitReviewDecision(ctx, opName, output) {
|
|
@@ -52262,12 +52697,38 @@ function logUnifiedReviewPhaseResult(storyId, opName, output) {
|
|
|
52262
52697
|
const title = payload.reviewer === "semantic" ? "Semantic review" : "Adversarial review";
|
|
52263
52698
|
if (payload.passed) {
|
|
52264
52699
|
logger?.info("review", `${title} passed`, { storyId });
|
|
52265
|
-
|
|
52266
|
-
|
|
52700
|
+
return;
|
|
52701
|
+
}
|
|
52702
|
+
if (findingsCount === 0) {
|
|
52703
|
+
const dropped = payload.acDropped ?? [];
|
|
52704
|
+
const droppedSummary = dropped.slice(0, 5);
|
|
52705
|
+
logger?.warn("review", `${title} failed: 0 findings \u2014 ${dropped.length > 0 ? `${dropped.length} blocking finding(s) dropped as ungrounded by AC-grounding filter` : "model emitted passed:false but produced no findings (likely empty output)"}`, {
|
|
52267
52706
|
storyId,
|
|
52268
|
-
findingsCount
|
|
52707
|
+
findingsCount,
|
|
52708
|
+
reason: dropped.length > 0 ? "ac-grounding-drop" : "passed-false-no-findings",
|
|
52709
|
+
droppedCount: dropped.length || undefined,
|
|
52710
|
+
droppedFindings: droppedSummary.length > 0 ? droppedSummary : undefined,
|
|
52711
|
+
droppedTruncated: dropped.length > droppedSummary.length || undefined
|
|
52269
52712
|
});
|
|
52713
|
+
return;
|
|
52270
52714
|
}
|
|
52715
|
+
const findingsSummary = payload.result.findings.slice(0, 5).map((f) => {
|
|
52716
|
+
const r = f ?? {};
|
|
52717
|
+
return {
|
|
52718
|
+
severity: typeof r.severity === "string" ? r.severity : undefined,
|
|
52719
|
+
file: typeof r.file === "string" ? r.file : undefined,
|
|
52720
|
+
line: typeof r.line === "number" ? r.line : undefined,
|
|
52721
|
+
rule: typeof r.rule === "string" ? r.rule : undefined,
|
|
52722
|
+
issue: typeof r.issue === "string" ? r.issue : typeof r.message === "string" ? r.message : undefined,
|
|
52723
|
+
acIndex: typeof r.acIndex === "number" ? r.acIndex : undefined
|
|
52724
|
+
};
|
|
52725
|
+
});
|
|
52726
|
+
logger?.warn("review", `${title} failed: ${findingsCount} findings`, {
|
|
52727
|
+
storyId,
|
|
52728
|
+
findingsCount,
|
|
52729
|
+
findings: findingsSummary,
|
|
52730
|
+
truncated: findingsCount > findingsSummary.length
|
|
52731
|
+
});
|
|
52271
52732
|
}
|
|
52272
52733
|
async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = false) {
|
|
52273
52734
|
const logger = getSafeLogger();
|
|
@@ -52639,10 +53100,10 @@ function buildPlanForStrategy(ctx, story, config2, testStrategy, inputs) {
|
|
|
52639
53100
|
strategies.push(makeMechanicalFormatFixStrategy());
|
|
52640
53101
|
}
|
|
52641
53102
|
if (isThreeSession && inputs.fullSuiteGate) {
|
|
52642
|
-
strategies.push(makeFullSuiteRectifyStrategy(story));
|
|
53103
|
+
strategies.push(makeFullSuiteRectifyStrategy(story, config2));
|
|
52643
53104
|
}
|
|
52644
53105
|
if (config2.quality.autofix?.enabled !== false) {
|
|
52645
|
-
strategies.push(makeAutofixImplementerStrategy(story));
|
|
53106
|
+
strategies.push(makeAutofixImplementerStrategy(story, config2));
|
|
52646
53107
|
strategies.push(makeAutofixTestWriterStrategy(story, config2));
|
|
52647
53108
|
}
|
|
52648
53109
|
const rectOpts = {
|
|
@@ -52783,9 +53244,9 @@ async function assemblePlanInputsFromCtx(ctx) {
|
|
|
52783
53244
|
blockingThreshold: ctx.config.review.blockingThreshold
|
|
52784
53245
|
} : undefined;
|
|
52785
53246
|
const rectificationInput = ctx.config.execution?.rectification?.enabled === true ? {
|
|
52786
|
-
maxAttempts: ctx.config.execution.rectification.
|
|
53247
|
+
maxAttempts: ctx.config.execution.rectification.maxAttemptsTotal,
|
|
52787
53248
|
strategies: [],
|
|
52788
|
-
abortOnIncreasingFailures: ctx.config.execution.rectification.abortOnIncreasingFailures
|
|
53249
|
+
abortOnIncreasingFailures: ctx.config.execution.rectification.abortOnIncreasingFailures
|
|
52789
53250
|
} : undefined;
|
|
52790
53251
|
return {
|
|
52791
53252
|
story,
|
|
@@ -53249,10 +53710,29 @@ Category: ${failureCategory ?? "unknown"}`,
|
|
|
53249
53710
|
}
|
|
53250
53711
|
}
|
|
53251
53712
|
if (!planResult.success) {
|
|
53713
|
+
const failedPhases = {};
|
|
53714
|
+
for (const [name, output] of Object.entries(planResult.phaseOutputs)) {
|
|
53715
|
+
if (!output || typeof output !== "object")
|
|
53716
|
+
continue;
|
|
53717
|
+
const r = output;
|
|
53718
|
+
const passed = typeof r.passed === "boolean" ? r.passed : undefined;
|
|
53719
|
+
const success2 = typeof r.success === "boolean" ? r.success : undefined;
|
|
53720
|
+
const explicitFail = passed === false || success2 === false;
|
|
53721
|
+
if (!explicitFail)
|
|
53722
|
+
continue;
|
|
53723
|
+
const findings = Array.isArray(r.findings) ? r.findings.length : undefined;
|
|
53724
|
+
failedPhases[name] = { passed, success: success2, findingsCount: findings };
|
|
53725
|
+
}
|
|
53726
|
+
const stderrTail = (agentResult.stderr ?? "").slice(-500);
|
|
53727
|
+
const outputTail = (agentResult.output ?? "").slice(-500);
|
|
53252
53728
|
logger.error("execution", "Agent session failed", {
|
|
53253
53729
|
storyId: ctx.story.id,
|
|
53254
53730
|
exitCode: agentResult.exitCode,
|
|
53255
|
-
rateLimited: agentResult.rateLimited
|
|
53731
|
+
rateLimited: agentResult.rateLimited,
|
|
53732
|
+
failureCategory: failureCategory ?? "unknown",
|
|
53733
|
+
failedPhases: Object.keys(failedPhases).length > 0 ? failedPhases : undefined,
|
|
53734
|
+
stderrTail: stderrTail || undefined,
|
|
53735
|
+
outputTail: outputTail || undefined
|
|
53256
53736
|
});
|
|
53257
53737
|
if (agentResult.rateLimited) {
|
|
53258
53738
|
logger.warn("execution", "Rate limited \u2014 will retry", { storyId: ctx.story.id });
|
|
@@ -53976,6 +54456,11 @@ class RegressionStrategy {
|
|
|
53976
54456
|
const durationMs = Date.now() - start;
|
|
53977
54457
|
if (result.success) {
|
|
53978
54458
|
const parsed2 = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
54459
|
+
logger?.info("verify[regression]", "Full-suite regression gate passed", {
|
|
54460
|
+
storyId: ctx.storyId,
|
|
54461
|
+
passCount: parsed2.passed,
|
|
54462
|
+
durationMs
|
|
54463
|
+
});
|
|
53979
54464
|
return makePassResult(ctx.storyId, "regression", {
|
|
53980
54465
|
rawOutput: result.output,
|
|
53981
54466
|
passCount: parsed2.passed,
|
|
@@ -53989,9 +54474,19 @@ class RegressionStrategy {
|
|
|
53989
54474
|
return makePassResult(ctx.storyId, "regression", { durationMs });
|
|
53990
54475
|
}
|
|
53991
54476
|
if (result.status === "TIMEOUT") {
|
|
54477
|
+
logger?.warn("verify[regression]", "Full-suite regression gate timed out", {
|
|
54478
|
+
storyId: ctx.storyId,
|
|
54479
|
+
durationMs
|
|
54480
|
+
});
|
|
53992
54481
|
return makeFailResult(ctx.storyId, "regression", "TIMEOUT", { rawOutput: result.output, durationMs });
|
|
53993
54482
|
}
|
|
53994
54483
|
const parsed = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
54484
|
+
logger?.warn("verify[regression]", "Full-suite regression gate failed", {
|
|
54485
|
+
storyId: ctx.storyId,
|
|
54486
|
+
passCount: parsed.passed,
|
|
54487
|
+
failCount: parsed.failed,
|
|
54488
|
+
durationMs
|
|
54489
|
+
});
|
|
53995
54490
|
return makeFailResult(ctx.storyId, "regression", "TEST_FAILURE", {
|
|
53996
54491
|
rawOutput: result.output,
|
|
53997
54492
|
passCount: parsed.passed,
|
|
@@ -57068,7 +57563,7 @@ var package_default;
|
|
|
57068
57563
|
var init_package = __esm(() => {
|
|
57069
57564
|
package_default = {
|
|
57070
57565
|
name: "@nathapp/nax",
|
|
57071
|
-
version: "0.67.
|
|
57566
|
+
version: "0.67.11",
|
|
57072
57567
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
57073
57568
|
type: "module",
|
|
57074
57569
|
bin: {
|
|
@@ -57163,8 +57658,8 @@ var init_version = __esm(() => {
|
|
|
57163
57658
|
NAX_VERSION = package_default.version;
|
|
57164
57659
|
NAX_COMMIT = (() => {
|
|
57165
57660
|
try {
|
|
57166
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
57167
|
-
return "
|
|
57661
|
+
if (/^[0-9a-f]{6,10}$/.test("0db5c72e"))
|
|
57662
|
+
return "0db5c72e";
|
|
57168
57663
|
} catch {}
|
|
57169
57664
|
try {
|
|
57170
57665
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
@@ -58138,7 +58633,7 @@ async function runDeferredRegression(options) {
|
|
|
58138
58633
|
}
|
|
58139
58634
|
const testCommand = config2.quality.commands.test ?? "bun test";
|
|
58140
58635
|
const timeoutSeconds = config2.execution.regressionGate?.timeoutSeconds ?? 120;
|
|
58141
|
-
const maxRectificationAttempts = config2.execution.
|
|
58636
|
+
const maxRectificationAttempts = config2.execution.rectification.maxAttemptsTotal;
|
|
58142
58637
|
const acceptOnTimeout = config2.execution.regressionGate?.acceptOnTimeout ?? true;
|
|
58143
58638
|
const verifyOpts = {
|
|
58144
58639
|
workdir,
|
|
@@ -58302,7 +58797,7 @@ async function runDeferredRegression(options) {
|
|
|
58302
58797
|
const cycle = {
|
|
58303
58798
|
findings: initialFindings,
|
|
58304
58799
|
iterations: [],
|
|
58305
|
-
strategies: [makeFullSuiteRectifyStrategy(story)],
|
|
58800
|
+
strategies: [makeFullSuiteRectifyStrategy(story, config2)],
|
|
58306
58801
|
config: { maxAttemptsTotal: maxRectificationAttempts, validatorRetries: 1 },
|
|
58307
58802
|
validate: async (_cycleCtx, _opts) => {
|
|
58308
58803
|
const verification = await _regressionDeps.runVerification(verifyOpts);
|
|
@@ -60348,7 +60843,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
|
|
|
60348
60843
|
feature: ctx.feature,
|
|
60349
60844
|
attempts: ctx.story.attempts
|
|
60350
60845
|
});
|
|
60351
|
-
if (ctx.story.attempts !== undefined && ctx.story.attempts >= ctx.config.execution.rectification.
|
|
60846
|
+
if (ctx.story.attempts !== undefined && ctx.story.attempts >= ctx.config.execution.rectification.maxAttemptsTotal) {
|
|
60352
60847
|
await pipelineEventBus.emitAsync({
|
|
60353
60848
|
type: "human-review:requested",
|
|
60354
60849
|
storyId: ctx.story.id,
|
|
@@ -60631,7 +61126,7 @@ function selectNextStories(prd, config2, batchPlan, currentBatchIndex, lastStory
|
|
|
60631
61126
|
nextBatchIndex: currentBatchIndex + 1
|
|
60632
61127
|
};
|
|
60633
61128
|
}
|
|
60634
|
-
const story = getNextStory(prd, lastStoryId, config2.execution.rectification?.
|
|
61129
|
+
const story = getNextStory(prd, lastStoryId, config2.execution.rectification?.maxAttemptsTotal ?? 12);
|
|
60635
61130
|
if (!story)
|
|
60636
61131
|
return null;
|
|
60637
61132
|
return {
|
|
@@ -95551,15 +96046,16 @@ var FIELD_DESCRIPTIONS = {
|
|
|
95551
96046
|
"execution.contextProviderTokenBudget": "Token budget for plugin context providers",
|
|
95552
96047
|
"execution.lintCommand": "Lint command override (null=disabled, undefined=auto-detect)",
|
|
95553
96048
|
"execution.typecheckCommand": "Typecheck command override (null=disabled, undefined=auto-detect)",
|
|
95554
|
-
"execution.rectification": "
|
|
96049
|
+
"execution.rectification": "Unified fix-cycle settings \u2014 shared by story-orchestrator (semantic + adversarial + mechanical) and post-run regression cycles",
|
|
95555
96050
|
"execution.rectification.enabled": "Enable rectification loop",
|
|
95556
|
-
"execution.rectification.
|
|
96051
|
+
"execution.rectification.maxAttemptsTotal": "Total iteration cap for the unified fix cycle (default: 12). Per-strategy caps are the granular bound.",
|
|
96052
|
+
"execution.rectification.maxAttemptsPerStrategy": "Default per-strategy cap for LLM-driven strategies \u2014 autofix-implementer / autofix-test-writer / full-suite-rectify (default: 3). Mechanical strategies stay at 1.",
|
|
95557
96053
|
"execution.rectification.fullSuiteTimeoutSeconds": "Timeout for full test suite run in seconds",
|
|
95558
96054
|
"execution.rectification.maxFailureSummaryChars": "Max characters in failure summary",
|
|
95559
96055
|
"execution.rectification.abortOnIncreasingFailures": "Abort if failure count increases",
|
|
95560
|
-
"execution.rectification.escalateOnExhaustion": "Enable model tier escalation when
|
|
95561
|
-
"execution.rectification.rethinkAtAttempt": "Attempt number at which 'rethink your approach' language is injected into the prompt (default: 2
|
|
95562
|
-
"execution.rectification.urgencyAtAttempt": "Attempt number at which 'final chance before escalation' urgency is added
|
|
96056
|
+
"execution.rectification.escalateOnExhaustion": "Enable model tier escalation when attempts are exhausted with remaining failures",
|
|
96057
|
+
"execution.rectification.rethinkAtAttempt": "Attempt number at which 'rethink your approach' language is injected into the prompt (default: 2)",
|
|
96058
|
+
"execution.rectification.urgencyAtAttempt": "Attempt number at which 'final chance before escalation' urgency is added (default: 3)",
|
|
95563
96059
|
"execution.regressionGate": "Regression gate settings (full suite after scoped tests)",
|
|
95564
96060
|
"execution.regressionGate.enabled": "Enable full-suite regression gate",
|
|
95565
96061
|
"execution.regressionGate.timeoutSeconds": "Timeout for regression run in seconds",
|