@nathapp/nax 0.67.9 → 0.67.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +1241 -620
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -16833,7 +16833,8 @@ var init_schemas_execution = __esm(() => {
|
|
|
16833
16833
|
});
|
|
16834
16834
|
RectificationConfigSchema = exports_external.object({
|
|
16835
16835
|
enabled: exports_external.boolean().default(true),
|
|
16836
|
-
|
|
16836
|
+
maxAttemptsTotal: exports_external.number().int().min(1).max(50).default(12),
|
|
16837
|
+
maxAttemptsPerStrategy: exports_external.number().int().min(1).max(20).default(3),
|
|
16837
16838
|
fullSuiteTimeoutSeconds: exports_external.number().int().min(10).max(600).default(120),
|
|
16838
16839
|
maxFailureSummaryChars: exports_external.number().int().min(500).max(1e4).default(2000),
|
|
16839
16840
|
abortOnIncreasingFailures: exports_external.boolean().default(true),
|
|
@@ -16845,8 +16846,7 @@ var init_schemas_execution = __esm(() => {
|
|
|
16845
16846
|
enabled: exports_external.boolean().default(true),
|
|
16846
16847
|
timeoutSeconds: exports_external.number().int().min(10).max(600).default(120),
|
|
16847
16848
|
acceptOnTimeout: exports_external.boolean().default(true),
|
|
16848
|
-
mode: exports_external.enum(["deferred", "per-story", "disabled"]).default("deferred")
|
|
16849
|
-
maxRectificationAttempts: exports_external.number().int().min(1).default(2)
|
|
16849
|
+
mode: exports_external.enum(["deferred", "per-story", "disabled"]).default("deferred")
|
|
16850
16850
|
});
|
|
16851
16851
|
SmartTestRunnerConfigSchema = exports_external.object({
|
|
16852
16852
|
enabled: exports_external.boolean().default(true),
|
|
@@ -16928,16 +16928,10 @@ var init_schemas_execution = __esm(() => {
|
|
|
16928
16928
|
autofix: exports_external.object({
|
|
16929
16929
|
enabled: exports_external.boolean().default(true),
|
|
16930
16930
|
maxAttempts: exports_external.number().int().min(1).default(3),
|
|
16931
|
-
maxTotalAttempts: exports_external.number().int().min(1).default(12),
|
|
16932
|
-
rethinkAtAttempt: exports_external.number().int().min(1).default(2),
|
|
16933
|
-
urgencyAtAttempt: exports_external.number().int().min(1).default(3),
|
|
16934
16931
|
enforceTestWriterIsolation: exports_external.boolean().default(true)
|
|
16935
16932
|
}).default({
|
|
16936
16933
|
enabled: true,
|
|
16937
16934
|
maxAttempts: 3,
|
|
16938
|
-
maxTotalAttempts: 12,
|
|
16939
|
-
rethinkAtAttempt: 2,
|
|
16940
|
-
urgencyAtAttempt: 3,
|
|
16941
16935
|
enforceTestWriterIsolation: true
|
|
16942
16936
|
}),
|
|
16943
16937
|
forceExit: exports_external.boolean().default(false),
|
|
@@ -17224,7 +17218,12 @@ var init_schemas_review = __esm(() => {
|
|
|
17224
17218
|
timeoutMs: exports_external.number().int().positive().default(600000),
|
|
17225
17219
|
excludePatterns: exports_external.array(exports_external.string()).optional(),
|
|
17226
17220
|
parallel: exports_external.boolean().default(false),
|
|
17227
|
-
maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2)
|
|
17221
|
+
maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
|
|
17222
|
+
acRegroundOnDrop: exports_external.boolean().default(true),
|
|
17223
|
+
substantiation: exports_external.object({
|
|
17224
|
+
requote: exports_external.boolean().default(true),
|
|
17225
|
+
maxRequotes: exports_external.number().int().min(0).default(5)
|
|
17226
|
+
}).optional()
|
|
17228
17227
|
});
|
|
17229
17228
|
ReviewConfigSchema = exports_external.object({
|
|
17230
17229
|
enabled: exports_external.boolean(),
|
|
@@ -17320,7 +17319,8 @@ var init_schemas3 = __esm(() => {
|
|
|
17320
17319
|
maxStoriesPerFeature: 500,
|
|
17321
17320
|
rectification: {
|
|
17322
17321
|
enabled: true,
|
|
17323
|
-
|
|
17322
|
+
maxAttemptsTotal: 12,
|
|
17323
|
+
maxAttemptsPerStrategy: 3,
|
|
17324
17324
|
fullSuiteTimeoutSeconds: 300,
|
|
17325
17325
|
maxFailureSummaryChars: 2000,
|
|
17326
17326
|
abortOnIncreasingFailures: true,
|
|
@@ -17332,8 +17332,7 @@ var init_schemas3 = __esm(() => {
|
|
|
17332
17332
|
enabled: true,
|
|
17333
17333
|
timeoutSeconds: 300,
|
|
17334
17334
|
acceptOnTimeout: true,
|
|
17335
|
-
mode: "deferred"
|
|
17336
|
-
maxRectificationAttempts: 3
|
|
17335
|
+
mode: "deferred"
|
|
17337
17336
|
},
|
|
17338
17337
|
contextProviderTokenBudget: 2000,
|
|
17339
17338
|
permissionProfile: "unrestricted",
|
|
@@ -17359,9 +17358,6 @@ var init_schemas3 = __esm(() => {
|
|
|
17359
17358
|
autofix: {
|
|
17360
17359
|
enabled: true,
|
|
17361
17360
|
maxAttempts: 3,
|
|
17362
|
-
maxTotalAttempts: 12,
|
|
17363
|
-
rethinkAtAttempt: 2,
|
|
17364
|
-
urgencyAtAttempt: 3,
|
|
17365
17361
|
enforceTestWriterIsolation: true
|
|
17366
17362
|
},
|
|
17367
17363
|
forceExit: false,
|
|
@@ -17450,6 +17446,19 @@ var init_schemas3 = __esm(() => {
|
|
|
17450
17446
|
":!.nax/",
|
|
17451
17447
|
":!.nax-pids"
|
|
17452
17448
|
]
|
|
17449
|
+
},
|
|
17450
|
+
adversarial: {
|
|
17451
|
+
model: "balanced",
|
|
17452
|
+
diffMode: "ref",
|
|
17453
|
+
rules: [],
|
|
17454
|
+
timeoutMs: 600000,
|
|
17455
|
+
parallel: false,
|
|
17456
|
+
maxConcurrentSessions: 2,
|
|
17457
|
+
acRegroundOnDrop: true,
|
|
17458
|
+
substantiation: {
|
|
17459
|
+
requote: true,
|
|
17460
|
+
maxRequotes: 5
|
|
17461
|
+
}
|
|
17453
17462
|
}
|
|
17454
17463
|
}),
|
|
17455
17464
|
plan: PlanConfigSchema.default({
|
|
@@ -18662,6 +18671,47 @@ function rejectLegacyAgentKeys(conf) {
|
|
|
18662
18671
|
`);
|
|
18663
18672
|
throw new NaxError(message, "CONFIG_LEGACY_AGENT_KEYS", { stage: "config", legacyKeys });
|
|
18664
18673
|
}
|
|
18674
|
+
function rejectLegacyRectificationKeys(conf) {
|
|
18675
|
+
const legacyKeys = [];
|
|
18676
|
+
const migrationHints = [];
|
|
18677
|
+
const quality = conf.quality;
|
|
18678
|
+
const autofix = quality?.autofix;
|
|
18679
|
+
if (autofix && typeof autofix === "object") {
|
|
18680
|
+
if ("maxTotalAttempts" in autofix) {
|
|
18681
|
+
legacyKeys.push("quality.autofix.maxTotalAttempts");
|
|
18682
|
+
migrationHints.push("- Move `quality.autofix.maxTotalAttempts` \u2192 `execution.rectification.maxAttemptsTotal`");
|
|
18683
|
+
}
|
|
18684
|
+
if ("rethinkAtAttempt" in autofix) {
|
|
18685
|
+
legacyKeys.push("quality.autofix.rethinkAtAttempt");
|
|
18686
|
+
migrationHints.push("- Move `quality.autofix.rethinkAtAttempt` \u2192 `execution.rectification.rethinkAtAttempt`");
|
|
18687
|
+
}
|
|
18688
|
+
if ("urgencyAtAttempt" in autofix) {
|
|
18689
|
+
legacyKeys.push("quality.autofix.urgencyAtAttempt");
|
|
18690
|
+
migrationHints.push("- Move `quality.autofix.urgencyAtAttempt` \u2192 `execution.rectification.urgencyAtAttempt`");
|
|
18691
|
+
}
|
|
18692
|
+
}
|
|
18693
|
+
const execution = conf.execution;
|
|
18694
|
+
const rectification = execution?.rectification;
|
|
18695
|
+
if (rectification && typeof rectification === "object" && "maxRetries" in rectification) {
|
|
18696
|
+
legacyKeys.push("execution.rectification.maxRetries");
|
|
18697
|
+
migrationHints.push("- Rename `execution.rectification.maxRetries` \u2192 `execution.rectification.maxAttemptsTotal` (default changed from 2 to 12)");
|
|
18698
|
+
}
|
|
18699
|
+
const regressionGate = execution?.regressionGate;
|
|
18700
|
+
if (regressionGate && typeof regressionGate === "object" && "maxRectificationAttempts" in regressionGate) {
|
|
18701
|
+
legacyKeys.push("execution.regressionGate.maxRectificationAttempts");
|
|
18702
|
+
migrationHints.push("- Remove `execution.regressionGate.maxRectificationAttempts` \u2014 the regression cycle now shares `execution.rectification.maxAttemptsTotal`");
|
|
18703
|
+
}
|
|
18704
|
+
if (legacyKeys.length === 0)
|
|
18705
|
+
return;
|
|
18706
|
+
const message = [
|
|
18707
|
+
`Invalid configuration \u2014 legacy rectification-cap keys detected: ${legacyKeys.join(", ")}.`,
|
|
18708
|
+
"These were consolidated under `execution.rectification.*` so one config controls the unified",
|
|
18709
|
+
"fix cycle (semantic + adversarial + mechanical + regression). Migrate as follows:",
|
|
18710
|
+
...migrationHints
|
|
18711
|
+
].join(`
|
|
18712
|
+
`);
|
|
18713
|
+
throw new NaxError(message, "CONFIG_LEGACY_RECTIFICATION_KEYS", { stage: "config", legacyKeys });
|
|
18714
|
+
}
|
|
18665
18715
|
function applyBatchModeCompat(conf) {
|
|
18666
18716
|
const routing = conf.routing;
|
|
18667
18717
|
const llm = routing?.llm;
|
|
@@ -18768,6 +18818,7 @@ async function loadConfig(startDir, cliOverrides) {
|
|
|
18768
18818
|
return structuredClone(DEFAULT_CONFIG);
|
|
18769
18819
|
}
|
|
18770
18820
|
rejectLegacyAgentKeys(rawConfig);
|
|
18821
|
+
rejectLegacyRectificationKeys(rawConfig);
|
|
18771
18822
|
const result = NaxConfigSchema.safeParse(rawConfig);
|
|
18772
18823
|
if (!result.success) {
|
|
18773
18824
|
const errors3 = result.error.issues.map((err) => {
|
|
@@ -18820,6 +18871,7 @@ async function loadConfigForWorkdir(rootConfigPath, packageDir, cliOverrides) {
|
|
|
18820
18871
|
const rawMerged = deepMergeConfig(merged, profileData);
|
|
18821
18872
|
rawMerged.profile = packageProfile;
|
|
18822
18873
|
rejectLegacyAgentKeys(rawMerged);
|
|
18874
|
+
rejectLegacyRectificationKeys(rawMerged);
|
|
18823
18875
|
const result = NaxConfigSchema.safeParse(rawMerged);
|
|
18824
18876
|
if (result.success) {
|
|
18825
18877
|
merged = result.data;
|
|
@@ -21043,6 +21095,7 @@ class DispatchEventBus {
|
|
|
21043
21095
|
_completedListeners = new Set;
|
|
21044
21096
|
_errorListeners = new Set;
|
|
21045
21097
|
_reviewDecisionListeners = new Set;
|
|
21098
|
+
_reviewRepromptListeners = new Set;
|
|
21046
21099
|
onDispatch(l) {
|
|
21047
21100
|
this._dispatchListeners.add(l);
|
|
21048
21101
|
return () => this._dispatchListeners.delete(l);
|
|
@@ -21059,6 +21112,10 @@ class DispatchEventBus {
|
|
|
21059
21112
|
this._reviewDecisionListeners.add(l);
|
|
21060
21113
|
return () => this._reviewDecisionListeners.delete(l);
|
|
21061
21114
|
}
|
|
21115
|
+
onReviewReprompt(l) {
|
|
21116
|
+
this._reviewRepromptListeners.add(l);
|
|
21117
|
+
return () => this._reviewRepromptListeners.delete(l);
|
|
21118
|
+
}
|
|
21062
21119
|
emitDispatch(event) {
|
|
21063
21120
|
for (const l of this._dispatchListeners) {
|
|
21064
21121
|
try {
|
|
@@ -21095,6 +21152,15 @@ class DispatchEventBus {
|
|
|
21095
21152
|
}
|
|
21096
21153
|
}
|
|
21097
21154
|
}
|
|
21155
|
+
emitReviewReprompt(event) {
|
|
21156
|
+
for (const l of this._reviewRepromptListeners) {
|
|
21157
|
+
try {
|
|
21158
|
+
l(event);
|
|
21159
|
+
} catch (err) {
|
|
21160
|
+
getSafeLogger()?.warn("dispatch-bus", "review-reprompt-listener threw", { error: errorMessage(err) });
|
|
21161
|
+
}
|
|
21162
|
+
}
|
|
21163
|
+
}
|
|
21098
21164
|
}
|
|
21099
21165
|
var init_dispatch_events = __esm(() => {
|
|
21100
21166
|
init_logger2();
|
|
@@ -21922,7 +21988,8 @@ function makeParseRetryStrategy(opts) {
|
|
|
21922
21988
|
if (ctx.site === "complete") {
|
|
21923
21989
|
getSafeLogger()?.warn(opts.reviewerKind, "makeParseRetryStrategy: lastOutput is not populated on complete-kind ops \u2014 retry will never fire", { storyId: ctx.storyId });
|
|
21924
21990
|
}
|
|
21925
|
-
|
|
21991
|
+
const fallback = opts.exhaustedFallback ? opts.exhaustedFallback("") : undefined;
|
|
21992
|
+
return { retry: false, ...fallback !== undefined ? { fallback } : {} };
|
|
21926
21993
|
}
|
|
21927
21994
|
let parsed;
|
|
21928
21995
|
try {
|
|
@@ -30325,78 +30392,6 @@ function truncate(s, max) {
|
|
|
30325
30392
|
var MAX_BLOCK_CHARS = 6000;
|
|
30326
30393
|
|
|
30327
30394
|
// src/prompts/builders/review-builder.ts
|
|
30328
|
-
class ReviewPromptBuilder {
|
|
30329
|
-
buildSemanticReviewPrompt(story, semanticConfig, options) {
|
|
30330
|
-
const acList = story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30331
|
-
`);
|
|
30332
|
-
const customRulesBlock = semanticConfig.rules.length > 0 ? `
|
|
30333
|
-
## Additional Review Rules
|
|
30334
|
-
${semanticConfig.rules.map((r, i) => `${i + 1}. ${r}`).join(`
|
|
30335
|
-
`)}
|
|
30336
|
-
` : "";
|
|
30337
|
-
const priorIterationsBlock = buildPriorIterationsBlock(options.priorSemanticIterations ?? []);
|
|
30338
|
-
let diffSection;
|
|
30339
|
-
if (options.mode === "ref") {
|
|
30340
|
-
diffSection = buildRefDiffSection(options.storyGitRef ?? "", options.stat ?? "", options.excludePatterns ?? []);
|
|
30341
|
-
} else {
|
|
30342
|
-
diffSection = buildEmbeddedDiffSection(options.diff ?? "");
|
|
30343
|
-
}
|
|
30344
|
-
const core2 = `${SEMANTIC_ROLE}
|
|
30345
|
-
|
|
30346
|
-
## Story: ${story.title}
|
|
30347
|
-
|
|
30348
|
-
### Description
|
|
30349
|
-
${story.description}
|
|
30350
|
-
|
|
30351
|
-
### Acceptance Criteria
|
|
30352
|
-
${acList}
|
|
30353
|
-
${customRulesBlock}${priorIterationsBlock}
|
|
30354
|
-
${SEMANTIC_INSTRUCTIONS}
|
|
30355
|
-
${SEMANTIC_OUTPUT_SCHEMA}
|
|
30356
|
-
|
|
30357
|
-
${diffSection}`;
|
|
30358
|
-
return wrapJsonPrompt(core2);
|
|
30359
|
-
}
|
|
30360
|
-
static jsonRetry() {
|
|
30361
|
-
return `Your previous response could not be parsed as valid JSON.
|
|
30362
|
-
` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
|
|
30363
|
-
` + "The object must start with { and end with }.";
|
|
30364
|
-
}
|
|
30365
|
-
static jsonRetryCondensed(opts) {
|
|
30366
|
-
const threshold = opts?.blockingThreshold ?? "error";
|
|
30367
|
-
const advisoryCap = opts?.advisoryCap ?? 3;
|
|
30368
|
-
const blockingList = threshold === "error" ? '"error"' : threshold === "warning" ? '"error" and "warning"' : '"error", "warning", and "info"';
|
|
30369
|
-
const blockingClause = threshold === "info" ? "Include ALL findings \u2014 do not drop any by severity." : `Include ALL findings with severity ${blockingList} (these are blocking \u2014 do not drop them).`;
|
|
30370
|
-
const advisoryClause = threshold === "info" ? "If your response would still exceed limits, prioritize the highest-severity findings first." : `Below that, include at most ${advisoryCap} additional findings (highest severity first).`;
|
|
30371
|
-
return `Your previous response was truncated and could not be parsed as valid JSON.
|
|
30372
|
-
Respond with a condensed summary:
|
|
30373
|
-
- ${blockingClause}
|
|
30374
|
-
- ${advisoryClause}
|
|
30375
|
-
- Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
|
|
30376
|
-
Output ONLY a complete, valid JSON object. It must start with { and end with }.
|
|
30377
|
-
Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
|
|
30378
|
-
}
|
|
30379
|
-
static requoteVerbatim(opts) {
|
|
30380
|
-
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30381
|
-
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30382
|
-
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30383
|
-
|
|
30384
|
-
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30385
|
-
|
|
30386
|
-
Return ONLY this JSON object:
|
|
30387
|
-
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30388
|
-
|
|
30389
|
-
Finding issue: ${opts.finding.issue}
|
|
30390
|
-
Referenced file: ${file3}
|
|
30391
|
-
Referenced line: ${line}
|
|
30392
|
-
|
|
30393
|
-
Rules:
|
|
30394
|
-
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30395
|
-
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30396
|
-
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30397
|
-
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30398
|
-
}
|
|
30399
|
-
}
|
|
30400
30395
|
function buildEmbeddedDiffSection(diff) {
|
|
30401
30396
|
return `## Git Diff (production code only \u2014 test files excluded)
|
|
30402
30397
|
|
|
@@ -30476,9 +30471,114 @@ Notes:
|
|
|
30476
30471
|
- \`acIndex\` is required when severity is "error" (1-based, into the Acceptance Criteria list above).
|
|
30477
30472
|
- \`acQuote\` is optional advisory metadata for human auditors \u2014 not validated.
|
|
30478
30473
|
- Omit both for "warning", "info", "unverifiable".
|
|
30479
|
-
If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }
|
|
30474
|
+
If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }.`, ReviewPromptBuilder;
|
|
30480
30475
|
var init_review_builder = __esm(() => {
|
|
30481
30476
|
SEMANTIC_ROLE = "You are a semantic code reviewer with access to the repository files. " + "Your job is to walk each acceptance criterion (AC) and judge whether the production code fulfills it \u2014 fully, partially, or not at all. " + "Test coverage gaps and convention/lint issues are out of scope \u2014 adversarial review and lint/typecheck handle those.";
|
|
30477
|
+
ReviewPromptBuilder = class ReviewPromptBuilder {
|
|
30478
|
+
buildSemanticReviewPrompt(story, semanticConfig, options) {
|
|
30479
|
+
const acList = story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30480
|
+
`);
|
|
30481
|
+
const customRulesBlock = semanticConfig.rules.length > 0 ? `
|
|
30482
|
+
## Additional Review Rules
|
|
30483
|
+
${semanticConfig.rules.map((r, i) => `${i + 1}. ${r}`).join(`
|
|
30484
|
+
`)}
|
|
30485
|
+
` : "";
|
|
30486
|
+
const priorIterationsBlock = buildPriorIterationsBlock(options.priorSemanticIterations ?? []);
|
|
30487
|
+
let diffSection;
|
|
30488
|
+
if (options.mode === "ref") {
|
|
30489
|
+
diffSection = buildRefDiffSection(options.storyGitRef ?? "", options.stat ?? "", options.excludePatterns ?? []);
|
|
30490
|
+
} else {
|
|
30491
|
+
diffSection = buildEmbeddedDiffSection(options.diff ?? "");
|
|
30492
|
+
}
|
|
30493
|
+
const core2 = `${SEMANTIC_ROLE}
|
|
30494
|
+
|
|
30495
|
+
## Story: ${story.title}
|
|
30496
|
+
|
|
30497
|
+
### Description
|
|
30498
|
+
${story.description}
|
|
30499
|
+
|
|
30500
|
+
### Acceptance Criteria
|
|
30501
|
+
${acList}
|
|
30502
|
+
${customRulesBlock}${priorIterationsBlock}
|
|
30503
|
+
${SEMANTIC_INSTRUCTIONS}
|
|
30504
|
+
${SEMANTIC_OUTPUT_SCHEMA}
|
|
30505
|
+
|
|
30506
|
+
${diffSection}`;
|
|
30507
|
+
return wrapJsonPrompt(core2);
|
|
30508
|
+
}
|
|
30509
|
+
static jsonRetry() {
|
|
30510
|
+
return `Your previous response could not be parsed as valid JSON.
|
|
30511
|
+
` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
|
|
30512
|
+
` + "The object must start with { and end with }.";
|
|
30513
|
+
}
|
|
30514
|
+
static jsonRetryCondensed(opts) {
|
|
30515
|
+
const threshold = opts?.blockingThreshold ?? "error";
|
|
30516
|
+
const advisoryCap = opts?.advisoryCap ?? 3;
|
|
30517
|
+
const blockingList = threshold === "error" ? '"error"' : threshold === "warning" ? '"error" and "warning"' : '"error", "warning", and "info"';
|
|
30518
|
+
const blockingClause = threshold === "info" ? "Include ALL findings \u2014 do not drop any by severity." : `Include ALL findings with severity ${blockingList} (these are blocking \u2014 do not drop them).`;
|
|
30519
|
+
const advisoryClause = threshold === "info" ? "If your response would still exceed limits, prioritize the highest-severity findings first." : `Below that, include at most ${advisoryCap} additional findings (highest severity first).`;
|
|
30520
|
+
return `Your previous response was truncated and could not be parsed as valid JSON.
|
|
30521
|
+
Respond with a condensed summary:
|
|
30522
|
+
- ${blockingClause}
|
|
30523
|
+
- ${advisoryClause}
|
|
30524
|
+
- Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
|
|
30525
|
+
Output ONLY a complete, valid JSON object. It must start with { and end with }.
|
|
30526
|
+
Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
|
|
30527
|
+
}
|
|
30528
|
+
static requoteVerbatim(opts) {
|
|
30529
|
+
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30530
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30531
|
+
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30532
|
+
|
|
30533
|
+
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30534
|
+
|
|
30535
|
+
Return ONLY this JSON object:
|
|
30536
|
+
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30537
|
+
|
|
30538
|
+
Finding issue: ${opts.finding.issue}
|
|
30539
|
+
Referenced file: ${file3}
|
|
30540
|
+
Referenced line: ${line}
|
|
30541
|
+
|
|
30542
|
+
Rules:
|
|
30543
|
+
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30544
|
+
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30545
|
+
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30546
|
+
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30547
|
+
}
|
|
30548
|
+
static DROP_CODE_MESSAGES_MINIMAL = {
|
|
30549
|
+
missing_ac_index: "no `acIndex` field was provided \u2014 every blocking finding must cite an AC by 1-based index",
|
|
30550
|
+
ac_index_out_of_range: "`acIndex` is 0 or larger than the AC list \u2014 ACs are 1-indexed; the lowest valid value is 1"
|
|
30551
|
+
};
|
|
30552
|
+
static regroundDroppedFindings(opts) {
|
|
30553
|
+
const { drops, acceptanceCriteria } = opts;
|
|
30554
|
+
if (drops.length === 0)
|
|
30555
|
+
return "";
|
|
30556
|
+
const firstDrop = drops[0];
|
|
30557
|
+
const codeMessage = ReviewPromptBuilder.DROP_CODE_MESSAGES_MINIMAL[firstDrop.code];
|
|
30558
|
+
const acList = acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30559
|
+
`);
|
|
30560
|
+
return `Your previous review produced ${drops.length} finding${drops.length > 1 ? "s" : ""} that ${drops.length > 1 ? "were" : "was"} dropped because:
|
|
30561
|
+
|
|
30562
|
+
${codeMessage}
|
|
30563
|
+
|
|
30564
|
+
The dropped finding${drops.length > 1 ? "s" : ""} ${drops.length > 1 ? "are" : "is"}:
|
|
30565
|
+
${drops.map((d, i) => `${i + 1}. [${d.finding.severity}] ${d.finding.issue}`).join(`
|
|
30566
|
+
`)}
|
|
30567
|
+
|
|
30568
|
+
Please re-review the code and re-issue any valid findings. For each finding you re-issue:
|
|
30569
|
+
- You MUST include a valid \`acIndex\` (1-based index into the AC list below)
|
|
30570
|
+
- You MUST include a \`verifiedBy\` field with verified evidence
|
|
30571
|
+
|
|
30572
|
+
## Acceptance Criteria
|
|
30573
|
+
${acList}
|
|
30574
|
+
|
|
30575
|
+
## Rules
|
|
30576
|
+
- If a finding's locus (file / symbol) is not named in any AC bullet, downgrade it to \`"info"\` or \`"warning"\`
|
|
30577
|
+
- Only re-issue findings that are genuinely substantiated by the code and constrained by an AC
|
|
30578
|
+
- Return ONLY a JSON object with the same shape as before:
|
|
30579
|
+
{"passed":true|false,"findings":[...]}`;
|
|
30580
|
+
}
|
|
30581
|
+
};
|
|
30482
30582
|
});
|
|
30483
30583
|
|
|
30484
30584
|
// src/prompts/builders/adversarial-review-builder.ts
|
|
@@ -30561,72 +30661,6 @@ ${diff}\`\`\`
|
|
|
30561
30661
|
|
|
30562
30662
|
`;
|
|
30563
30663
|
}
|
|
30564
|
-
|
|
30565
|
-
class AdversarialReviewPromptBuilder {
|
|
30566
|
-
buildAdversarialReviewPrompt(story, config2, options) {
|
|
30567
|
-
const {
|
|
30568
|
-
mode,
|
|
30569
|
-
diff,
|
|
30570
|
-
storyGitRef,
|
|
30571
|
-
stat,
|
|
30572
|
-
testInventory,
|
|
30573
|
-
excludePatterns,
|
|
30574
|
-
testGlobs,
|
|
30575
|
-
refExcludePatterns,
|
|
30576
|
-
priorAdversarialIterations,
|
|
30577
|
-
blockingThreshold
|
|
30578
|
-
} = options;
|
|
30579
|
-
const priorFindingsBlock = buildPriorIterationsBlock(priorAdversarialIterations ?? []);
|
|
30580
|
-
const storyBlock = `## Story Under Review
|
|
30581
|
-
|
|
30582
|
-
**ID:** ${story.id}
|
|
30583
|
-
**Title:** ${story.title}
|
|
30584
|
-
**Description:** ${story.description || "(none)"}
|
|
30585
|
-
|
|
30586
|
-
**Acceptance Criteria:**
|
|
30587
|
-
${story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30588
|
-
`)}
|
|
30589
|
-
|
|
30590
|
-
`;
|
|
30591
|
-
const customRulesBlock = config2.rules.length > 0 ? `## Project-Specific Adversarial Rules
|
|
30592
|
-
|
|
30593
|
-
${config2.rules.map((r) => `- ${r}`).join(`
|
|
30594
|
-
`)}
|
|
30595
|
-
|
|
30596
|
-
` : "";
|
|
30597
|
-
let diffBlock;
|
|
30598
|
-
if (mode === "ref" && storyGitRef) {
|
|
30599
|
-
diffBlock = buildAdversarialRefDiffSection(storyGitRef, stat, excludePatterns ?? [], testGlobs ?? [], refExcludePatterns ?? []);
|
|
30600
|
-
} else if (mode === "embedded" && diff) {
|
|
30601
|
-
diffBlock = buildAdversarialEmbeddedDiffSection(diff, testInventory);
|
|
30602
|
-
} else {
|
|
30603
|
-
diffBlock = `## Diff
|
|
30604
|
-
|
|
30605
|
-
(No diff available \u2014 review based on story context only)
|
|
30606
|
-
|
|
30607
|
-
`;
|
|
30608
|
-
}
|
|
30609
|
-
return [
|
|
30610
|
-
ADVERSARIAL_ROLE,
|
|
30611
|
-
`
|
|
30612
|
-
|
|
30613
|
-
`,
|
|
30614
|
-
priorFindingsBlock,
|
|
30615
|
-
storyBlock,
|
|
30616
|
-
ADVERSARIAL_INSTRUCTIONS,
|
|
30617
|
-
`
|
|
30618
|
-
|
|
30619
|
-
`,
|
|
30620
|
-
customRulesBlock,
|
|
30621
|
-
buildBlockingThresholdBlock(blockingThreshold ?? "error"),
|
|
30622
|
-
OUTPUT_SCHEMA,
|
|
30623
|
-
`
|
|
30624
|
-
|
|
30625
|
-
`,
|
|
30626
|
-
diffBlock
|
|
30627
|
-
].join("");
|
|
30628
|
-
}
|
|
30629
|
-
}
|
|
30630
30664
|
var ADVERSARIAL_ROLE = `You are an adversarial code reviewer with full access to the repository.
|
|
30631
30665
|
|
|
30632
30666
|
Your job is NOT to re-verify that the code satisfies the acceptance criteria \u2014 semantic review owns that question. Don't re-litigate AC correctness.
|
|
@@ -30735,8 +30769,130 @@ Worked example:
|
|
|
30735
30769
|
**Scope constraints are not Acceptance Criteria:**
|
|
30736
30770
|
The story description may contain a "Scope" section with "In:" and "Out:" bullets. These are implementation guidelines, not ACs. A finding about code changed outside the stated scope (e.g., a file listed under "Out:") cannot cite a scope constraint as its \`acQuote\`/\`acIndex\` because scope text is not in the numbered AC list. Emit scope-violation findings as \`"warning"\` \u2014 never \`"error"\`. Never use \`acIndex: 0\`; \`acIndex\` is 1-based (first AC bullet = 1).
|
|
30737
30771
|
|
|
30738
|
-
If you cannot find an AC that names the **specific symbol** in your finding, downgrade to \`"info"\` or \`"warning"\`. A finding dropped by the validator is worse than one correctly classified as advisory
|
|
30739
|
-
var init_adversarial_review_builder = () => {
|
|
30772
|
+
If you cannot find an AC that names the **specific symbol** in your finding, downgrade to \`"info"\` or \`"warning"\`. A finding dropped by the validator is worse than one correctly classified as advisory.`, AdversarialReviewPromptBuilder;
|
|
30773
|
+
var init_adversarial_review_builder = __esm(() => {
|
|
30774
|
+
AdversarialReviewPromptBuilder = class AdversarialReviewPromptBuilder {
|
|
30775
|
+
buildAdversarialReviewPrompt(story, config2, options) {
|
|
30776
|
+
const {
|
|
30777
|
+
mode,
|
|
30778
|
+
diff,
|
|
30779
|
+
storyGitRef,
|
|
30780
|
+
stat,
|
|
30781
|
+
testInventory,
|
|
30782
|
+
excludePatterns,
|
|
30783
|
+
testGlobs,
|
|
30784
|
+
refExcludePatterns,
|
|
30785
|
+
priorAdversarialIterations,
|
|
30786
|
+
blockingThreshold
|
|
30787
|
+
} = options;
|
|
30788
|
+
const priorFindingsBlock = buildPriorIterationsBlock(priorAdversarialIterations ?? []);
|
|
30789
|
+
const storyBlock = `## Story Under Review
|
|
30790
|
+
|
|
30791
|
+
**ID:** ${story.id}
|
|
30792
|
+
**Title:** ${story.title}
|
|
30793
|
+
**Description:** ${story.description || "(none)"}
|
|
30794
|
+
|
|
30795
|
+
**Acceptance Criteria:**
|
|
30796
|
+
${story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30797
|
+
`)}
|
|
30798
|
+
|
|
30799
|
+
`;
|
|
30800
|
+
const customRulesBlock = config2.rules.length > 0 ? `## Project-Specific Adversarial Rules
|
|
30801
|
+
|
|
30802
|
+
${config2.rules.map((r) => `- ${r}`).join(`
|
|
30803
|
+
`)}
|
|
30804
|
+
|
|
30805
|
+
` : "";
|
|
30806
|
+
let diffBlock;
|
|
30807
|
+
if (mode === "ref" && storyGitRef) {
|
|
30808
|
+
diffBlock = buildAdversarialRefDiffSection(storyGitRef, stat, excludePatterns ?? [], testGlobs ?? [], refExcludePatterns ?? []);
|
|
30809
|
+
} else if (mode === "embedded" && diff) {
|
|
30810
|
+
diffBlock = buildAdversarialEmbeddedDiffSection(diff, testInventory);
|
|
30811
|
+
} else {
|
|
30812
|
+
diffBlock = `## Diff
|
|
30813
|
+
|
|
30814
|
+
(No diff available \u2014 review based on story context only)
|
|
30815
|
+
|
|
30816
|
+
`;
|
|
30817
|
+
}
|
|
30818
|
+
return [
|
|
30819
|
+
ADVERSARIAL_ROLE,
|
|
30820
|
+
`
|
|
30821
|
+
|
|
30822
|
+
`,
|
|
30823
|
+
priorFindingsBlock,
|
|
30824
|
+
storyBlock,
|
|
30825
|
+
ADVERSARIAL_INSTRUCTIONS,
|
|
30826
|
+
`
|
|
30827
|
+
|
|
30828
|
+
`,
|
|
30829
|
+
customRulesBlock,
|
|
30830
|
+
buildBlockingThresholdBlock(blockingThreshold ?? "error"),
|
|
30831
|
+
OUTPUT_SCHEMA,
|
|
30832
|
+
`
|
|
30833
|
+
|
|
30834
|
+
`,
|
|
30835
|
+
diffBlock
|
|
30836
|
+
].join("");
|
|
30837
|
+
}
|
|
30838
|
+
static requoteVerbatim(opts) {
|
|
30839
|
+
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30840
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30841
|
+
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30842
|
+
|
|
30843
|
+
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30844
|
+
|
|
30845
|
+
Return ONLY this JSON object:
|
|
30846
|
+
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30847
|
+
|
|
30848
|
+
Finding issue: ${opts.finding.issue}
|
|
30849
|
+
Referenced file: ${file3}
|
|
30850
|
+
Referenced line: ${line}
|
|
30851
|
+
|
|
30852
|
+
Rules:
|
|
30853
|
+
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30854
|
+
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30855
|
+
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30856
|
+
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30857
|
+
}
|
|
30858
|
+
static DROP_CODE_MESSAGES_QUOTE = {
|
|
30859
|
+
missing_ac_quote: "no `acQuote` field was provided \u2014 every blocking finding must cite an AC",
|
|
30860
|
+
ac_index_out_of_range: "`acIndex` is 0 or larger than the AC list \u2014 ACs are 1-indexed; the lowest valid value is 1",
|
|
30861
|
+
ac_quote_not_substring: "`acQuote` text does not appear verbatim in any AC bullet \u2014 copy the AC text character-for-character",
|
|
30862
|
+
ac_quote_does_not_constrain_locus: "the cited AC mentions the file but not the specific symbol your finding flags \u2014 pick a different AC, or downgrade to `info` / `warning`"
|
|
30863
|
+
};
|
|
30864
|
+
static regroundDroppedFindings(opts) {
|
|
30865
|
+
const { drops, acceptanceCriteria } = opts;
|
|
30866
|
+
if (drops.length === 0)
|
|
30867
|
+
return "";
|
|
30868
|
+
const firstDrop = drops[0];
|
|
30869
|
+
const codeMessage = AdversarialReviewPromptBuilder.DROP_CODE_MESSAGES_QUOTE[firstDrop.code] ?? `rejection code: ${firstDrop.code}`;
|
|
30870
|
+
const acList = acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
|
|
30871
|
+
`);
|
|
30872
|
+
return `Your previous review produced ${drops.length} finding${drops.length > 1 ? "s" : ""} that ${drops.length > 1 ? "were" : "was"} dropped because:
|
|
30873
|
+
|
|
30874
|
+
${codeMessage}
|
|
30875
|
+
|
|
30876
|
+
The dropped finding${drops.length > 1 ? "s" : ""} ${drops.length > 1 ? "are" : "is"}:
|
|
30877
|
+
${drops.map((d, i) => `${i + 1}. [${d.finding.severity}] ${d.finding.issue}`).join(`
|
|
30878
|
+
`)}
|
|
30879
|
+
|
|
30880
|
+
Please re-review the code and re-issue any valid findings. For each finding you re-issue:
|
|
30881
|
+
- You MUST include a valid \`acQuote\` that appears verbatim in one of the AC bullets below
|
|
30882
|
+
- You MUST include a valid \`acIndex\` (1-based index into the AC list)
|
|
30883
|
+
- The \`acQuote\` must cite the specific symbol you are flagging, not just the file
|
|
30884
|
+
|
|
30885
|
+
## Acceptance Criteria
|
|
30886
|
+
${acList}
|
|
30887
|
+
|
|
30888
|
+
## Rules
|
|
30889
|
+
- If a finding's locus (file / symbol) is not named in any AC bullet, downgrade it to \`"info"\` or \`"warning"\`
|
|
30890
|
+
- Only re-issue findings that are genuinely substantiated by the code and constrained by an AC
|
|
30891
|
+
- Return ONLY a JSON object with the same shape as before:
|
|
30892
|
+
{"passed":true|false,"findings":[...]}`;
|
|
30893
|
+
}
|
|
30894
|
+
};
|
|
30895
|
+
});
|
|
30740
30896
|
|
|
30741
30897
|
// src/prompts/builders/acceptance-builder-helpers.ts
|
|
30742
30898
|
function formatTestOutputForFix(rawOutput) {
|
|
@@ -31249,8 +31405,420 @@ var init_adversarial_helpers = __esm(() => {
|
|
|
31249
31405
|
init_severity();
|
|
31250
31406
|
});
|
|
31251
31407
|
|
|
31408
|
+
// src/review/semantic-helpers.ts
|
|
31409
|
+
function validateLLMShape(parsed) {
|
|
31410
|
+
if (typeof parsed !== "object" || parsed === null)
|
|
31411
|
+
return null;
|
|
31412
|
+
const obj = parsed;
|
|
31413
|
+
if (typeof obj.passed !== "boolean")
|
|
31414
|
+
return null;
|
|
31415
|
+
if (!Array.isArray(obj.findings))
|
|
31416
|
+
return null;
|
|
31417
|
+
return { passed: obj.passed, findings: obj.findings };
|
|
31418
|
+
}
|
|
31419
|
+
function parseLLMResponse(raw) {
|
|
31420
|
+
try {
|
|
31421
|
+
return validateLLMShape(tryParseLLMJson(raw));
|
|
31422
|
+
} catch {
|
|
31423
|
+
return null;
|
|
31424
|
+
}
|
|
31425
|
+
}
|
|
31426
|
+
function formatFindings2(findings) {
|
|
31427
|
+
return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
|
|
31428
|
+
Suggestion: ${f.suggestion}`).join(`
|
|
31429
|
+
`);
|
|
31430
|
+
}
|
|
31431
|
+
function normalizeSeverity2(sev) {
|
|
31432
|
+
if (sev === "warn")
|
|
31433
|
+
return "warning";
|
|
31434
|
+
if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
|
|
31435
|
+
return sev;
|
|
31436
|
+
return "info";
|
|
31437
|
+
}
|
|
31438
|
+
function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
|
|
31439
|
+
if (diffMode !== "ref")
|
|
31440
|
+
return findings;
|
|
31441
|
+
return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
|
|
31442
|
+
}
|
|
31443
|
+
function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
|
|
31444
|
+
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31445
|
+
return false;
|
|
31446
|
+
return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
|
|
31447
|
+
}
|
|
31448
|
+
function mentionsUnverifiedSource(finding) {
|
|
31449
|
+
const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
|
|
31450
|
+
return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
|
|
31451
|
+
}
|
|
31452
|
+
function hasVerifiedEvidence(finding) {
|
|
31453
|
+
const evidence = finding.verifiedBy;
|
|
31454
|
+
return !!evidence?.file?.trim() && !!evidence.observed?.trim();
|
|
31455
|
+
}
|
|
31456
|
+
function downgradeToUnverifiable(finding) {
|
|
31457
|
+
return {
|
|
31458
|
+
...finding,
|
|
31459
|
+
severity: "unverifiable"
|
|
31460
|
+
};
|
|
31461
|
+
}
|
|
31462
|
+
function llmFindingToFinding(f) {
|
|
31463
|
+
const metaExtras = {};
|
|
31464
|
+
if (f.verifiedBy)
|
|
31465
|
+
metaExtras.verifiedBy = f.verifiedBy;
|
|
31466
|
+
if (f.acQuote)
|
|
31467
|
+
metaExtras.acQuote = f.acQuote;
|
|
31468
|
+
if (f.acIndex != null)
|
|
31469
|
+
metaExtras.acIndex = f.acIndex;
|
|
31470
|
+
return {
|
|
31471
|
+
source: "semantic-review",
|
|
31472
|
+
severity: normalizeSeverity2(f.severity),
|
|
31473
|
+
category: "",
|
|
31474
|
+
file: f.file,
|
|
31475
|
+
line: f.line,
|
|
31476
|
+
message: f.issue,
|
|
31477
|
+
suggestion: f.suggestion ?? undefined,
|
|
31478
|
+
fixTarget: "source",
|
|
31479
|
+
meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
|
|
31480
|
+
};
|
|
31481
|
+
}
|
|
31482
|
+
function toReviewFindings(findings) {
|
|
31483
|
+
return findings.map(llmFindingToFinding);
|
|
31484
|
+
}
|
|
31485
|
+
var UNVERIFIED_FINDING_PATTERNS;
|
|
31486
|
+
var init_semantic_helpers = __esm(() => {
|
|
31487
|
+
init_severity();
|
|
31488
|
+
UNVERIFIED_FINDING_PATTERNS = [
|
|
31489
|
+
"cannot verify",
|
|
31490
|
+
"can't verify",
|
|
31491
|
+
"from diff alone",
|
|
31492
|
+
"missing from diff",
|
|
31493
|
+
"not found in diff",
|
|
31494
|
+
"not present in diff",
|
|
31495
|
+
"does not appear in diff"
|
|
31496
|
+
];
|
|
31497
|
+
});
|
|
31498
|
+
|
|
31499
|
+
// src/review/semantic-evidence.ts
|
|
31500
|
+
import { isAbsolute as isAbsolute8 } from "path";
|
|
31501
|
+
async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
|
|
31502
|
+
if (diffMode !== "ref")
|
|
31503
|
+
return findings;
|
|
31504
|
+
return Promise.all(findings.map(async (finding) => {
|
|
31505
|
+
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31506
|
+
return finding;
|
|
31507
|
+
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
31508
|
+
if (evidence.status !== "unmatched")
|
|
31509
|
+
return finding;
|
|
31510
|
+
return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
|
|
31511
|
+
}));
|
|
31512
|
+
}
|
|
31513
|
+
async function checkFindingEvidence(opts) {
|
|
31514
|
+
const observed = opts.finding.verifiedBy?.observed?.trim();
|
|
31515
|
+
const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
|
|
31516
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
31517
|
+
if (!observed)
|
|
31518
|
+
return { status: "missing-observed", file: file3, line };
|
|
31519
|
+
const contents = await readSafeFile(opts.workdir, file3);
|
|
31520
|
+
if (contents === null)
|
|
31521
|
+
return { status: "unreadable", file: file3, line, observed };
|
|
31522
|
+
return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
|
|
31523
|
+
}
|
|
31524
|
+
function matchesEvidence(contents, observed, line) {
|
|
31525
|
+
if (!line || line <= 0) {
|
|
31526
|
+
return normalizedIncludes(contents, observed);
|
|
31527
|
+
}
|
|
31528
|
+
const lines = contents.split(`
|
|
31529
|
+
`);
|
|
31530
|
+
const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
|
|
31531
|
+
const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
|
|
31532
|
+
const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
|
|
31533
|
+
const windowText = lines.slice(start, end).join(`
|
|
31534
|
+
`);
|
|
31535
|
+
return normalizedIncludes(windowText, observed);
|
|
31536
|
+
}
|
|
31537
|
+
function downgradeUnsubstantiatedFinding(opts) {
|
|
31538
|
+
_evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
|
|
31539
|
+
storyId: opts.storyId,
|
|
31540
|
+
event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
|
|
31541
|
+
file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
|
|
31542
|
+
line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
|
|
31543
|
+
issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
|
|
31544
|
+
observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
|
|
31545
|
+
});
|
|
31546
|
+
return { ...opts.finding, severity: "unverifiable" };
|
|
31547
|
+
}
|
|
31548
|
+
async function readSafeFile(workdir, file3) {
|
|
31549
|
+
const validated = validateModulePath(file3, [workdir]);
|
|
31550
|
+
if (validated.valid && validated.absolutePath) {
|
|
31551
|
+
try {
|
|
31552
|
+
return await Bun.file(validated.absolutePath).text();
|
|
31553
|
+
} catch {
|
|
31554
|
+
return null;
|
|
31555
|
+
}
|
|
31556
|
+
}
|
|
31557
|
+
if (isAbsolute8(file3)) {
|
|
31558
|
+
try {
|
|
31559
|
+
return await Bun.file(file3).text();
|
|
31560
|
+
} catch {
|
|
31561
|
+
return null;
|
|
31562
|
+
}
|
|
31563
|
+
}
|
|
31564
|
+
return null;
|
|
31565
|
+
}
|
|
31566
|
+
function normalizedIncludes(contents, observed) {
|
|
31567
|
+
const normalizedObserved = normalizeEvidenceText(observed);
|
|
31568
|
+
return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
|
|
31569
|
+
}
|
|
31570
|
+
function normalizeEvidenceText(text) {
|
|
31571
|
+
return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
|
|
31572
|
+
}
|
|
31573
|
+
function stripWrappingQuotes(text) {
|
|
31574
|
+
let trimmed = text.trim();
|
|
31575
|
+
while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
|
|
31576
|
+
trimmed = trimmed.slice(1, -1).trim();
|
|
31577
|
+
}
|
|
31578
|
+
return trimmed;
|
|
31579
|
+
}
|
|
31580
|
+
function isMatchingWrapper(first, last) {
|
|
31581
|
+
return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
|
|
31582
|
+
}
|
|
31583
|
+
var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
|
|
31584
|
+
var init_semantic_evidence = __esm(() => {
|
|
31585
|
+
init_logger2();
|
|
31586
|
+
init_path_security2();
|
|
31587
|
+
init_semantic_helpers();
|
|
31588
|
+
_evidenceDeps = {
|
|
31589
|
+
getLogger: getSafeLogger
|
|
31590
|
+
};
|
|
31591
|
+
});
|
|
31592
|
+
|
|
31593
|
+
// src/review/finding-filters.ts
|
|
31594
|
+
async function substantiateAdversarialFindings(opts) {
|
|
31595
|
+
const { findings, workdir, storyId, blockingThreshold } = opts;
|
|
31596
|
+
return Promise.all(findings.map(async (finding) => {
|
|
31597
|
+
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31598
|
+
return finding;
|
|
31599
|
+
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
31600
|
+
if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
|
|
31601
|
+
return finding;
|
|
31602
|
+
return downgradeUnsubstantiatedFinding({
|
|
31603
|
+
finding,
|
|
31604
|
+
storyId,
|
|
31605
|
+
event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
|
|
31606
|
+
file: evidence.file,
|
|
31607
|
+
line: evidence.line,
|
|
31608
|
+
observed: evidence.observed
|
|
31609
|
+
});
|
|
31610
|
+
}));
|
|
31611
|
+
}
|
|
31612
|
+
var init_finding_filters = __esm(() => {
|
|
31613
|
+
init_adversarial_helpers();
|
|
31614
|
+
init_semantic_evidence();
|
|
31615
|
+
init_semantic_helpers();
|
|
31616
|
+
init_semantic_evidence();
|
|
31617
|
+
init_ac_quote_validator();
|
|
31618
|
+
});
|
|
31619
|
+
|
|
31620
|
+
// src/review/requote-response.ts
|
|
31621
|
+
function parseRequoteResponse(output) {
|
|
31622
|
+
const parsed = tryParseLLMJson(output);
|
|
31623
|
+
if (!isRecord(parsed))
|
|
31624
|
+
return null;
|
|
31625
|
+
const canonical = extractCanonical(parsed);
|
|
31626
|
+
if (canonical)
|
|
31627
|
+
return canonical;
|
|
31628
|
+
const findings = parsed.findings;
|
|
31629
|
+
if (!Array.isArray(findings) || findings.length !== 1)
|
|
31630
|
+
return null;
|
|
31631
|
+
const finding = findings[0];
|
|
31632
|
+
if (!isRecord(finding))
|
|
31633
|
+
return null;
|
|
31634
|
+
return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
|
|
31635
|
+
}
|
|
31636
|
+
function extractCanonical(value) {
|
|
31637
|
+
if (!isRecord(value))
|
|
31638
|
+
return null;
|
|
31639
|
+
if (typeof value.file !== "string" || typeof value.observed !== "string")
|
|
31640
|
+
return null;
|
|
31641
|
+
const file3 = value.file.trim();
|
|
31642
|
+
if (!file3)
|
|
31643
|
+
return null;
|
|
31644
|
+
const line = coerceLine(value.line);
|
|
31645
|
+
if (line === null)
|
|
31646
|
+
return null;
|
|
31647
|
+
return {
|
|
31648
|
+
file: file3,
|
|
31649
|
+
line: line === undefined ? undefined : line,
|
|
31650
|
+
observed: value.observed
|
|
31651
|
+
};
|
|
31652
|
+
}
|
|
31653
|
+
function coerceLine(value) {
|
|
31654
|
+
if (value == null)
|
|
31655
|
+
return;
|
|
31656
|
+
if (typeof value === "number")
|
|
31657
|
+
return value;
|
|
31658
|
+
if (typeof value === "string" && /^\d+$/.test(value))
|
|
31659
|
+
return Number.parseInt(value, 10);
|
|
31660
|
+
return null;
|
|
31661
|
+
}
|
|
31662
|
+
function isRecord(value) {
|
|
31663
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
31664
|
+
}
|
|
31665
|
+
var init_requote_response = () => {};
|
|
31666
|
+
|
|
31252
31667
|
// src/operations/adversarial-review.ts
|
|
31253
|
-
|
|
31668
|
+
function withRepromptMarker(output, info) {
|
|
31669
|
+
const parsed = tryParseLLMJson(output);
|
|
31670
|
+
if (!parsed || typeof parsed !== "object")
|
|
31671
|
+
return output;
|
|
31672
|
+
return JSON.stringify({ ...parsed, _repromptInfo: info });
|
|
31673
|
+
}
|
|
31674
|
+
function extractRepromptInfo(raw) {
|
|
31675
|
+
if (!raw || typeof raw !== "object")
|
|
31676
|
+
return;
|
|
31677
|
+
const info = raw._repromptInfo;
|
|
31678
|
+
if (!info || typeof info !== "object")
|
|
31679
|
+
return;
|
|
31680
|
+
const i = info;
|
|
31681
|
+
if (typeof i.dropCount !== "number" || typeof i.costUsd !== "number" || typeof i.outcome !== "string") {
|
|
31682
|
+
return;
|
|
31683
|
+
}
|
|
31684
|
+
return {
|
|
31685
|
+
dropCount: i.dropCount,
|
|
31686
|
+
costUsd: i.costUsd,
|
|
31687
|
+
outcome: i.outcome
|
|
31688
|
+
};
|
|
31689
|
+
}
|
|
31690
|
+
async function requoteBlockingAdversarialFindings(findings, ctx) {
|
|
31691
|
+
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
31692
|
+
const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
|
|
31693
|
+
const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
|
|
31694
|
+
if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
|
|
31695
|
+
return { findings, changed: false, extraCostUsd: 0 };
|
|
31696
|
+
}
|
|
31697
|
+
const next = [...findings];
|
|
31698
|
+
let changed = false;
|
|
31699
|
+
let extraCostUsd = 0;
|
|
31700
|
+
let used = 0;
|
|
31701
|
+
for (const [index, finding] of next.entries()) {
|
|
31702
|
+
if (!isBlockingSeverity(finding.severity, threshold))
|
|
31703
|
+
continue;
|
|
31704
|
+
const initialEvidence = await checkFindingEvidence({ finding, workdir: ctx.input.workdir });
|
|
31705
|
+
if (initialEvidence.status !== "unmatched")
|
|
31706
|
+
continue;
|
|
31707
|
+
if (used >= maxRequotes)
|
|
31708
|
+
break;
|
|
31709
|
+
used += 1;
|
|
31710
|
+
const retry = await ctx.send(AdversarialReviewPromptBuilder.requoteVerbatim({ finding }));
|
|
31711
|
+
extraCostUsd += retry.estimatedCostUsd ?? 0;
|
|
31712
|
+
const requote = parseRequoteResponse(retry.output);
|
|
31713
|
+
if (!requote) {
|
|
31714
|
+
next[index] = downgradeUnsubstantiatedFinding({
|
|
31715
|
+
finding,
|
|
31716
|
+
storyId: ctx.input.story.id,
|
|
31717
|
+
event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
|
|
31718
|
+
...initialEvidence
|
|
31719
|
+
});
|
|
31720
|
+
changed = true;
|
|
31721
|
+
continue;
|
|
31722
|
+
}
|
|
31723
|
+
const updatedFinding = {
|
|
31724
|
+
...finding,
|
|
31725
|
+
verifiedBy: {
|
|
31726
|
+
file: requote.file,
|
|
31727
|
+
line: requote.line,
|
|
31728
|
+
observed: requote.observed
|
|
31729
|
+
}
|
|
31730
|
+
};
|
|
31731
|
+
const requotedEvidence = await checkFindingEvidence({
|
|
31732
|
+
finding: updatedFinding,
|
|
31733
|
+
workdir: ctx.input.workdir
|
|
31734
|
+
});
|
|
31735
|
+
if (requotedEvidence.status === "matched") {
|
|
31736
|
+
getSafeLogger()?.info("review", "Recovered adversarial finding via same-session requote", {
|
|
31737
|
+
storyId: ctx.input.story.id,
|
|
31738
|
+
event: ADVERSARIAL_REQUOTE_RECOVERED_EVENT,
|
|
31739
|
+
file: requotedEvidence.file,
|
|
31740
|
+
line: requotedEvidence.line
|
|
31741
|
+
});
|
|
31742
|
+
next[index] = updatedFinding;
|
|
31743
|
+
changed = true;
|
|
31744
|
+
continue;
|
|
31745
|
+
}
|
|
31746
|
+
next[index] = downgradeUnsubstantiatedFinding({
|
|
31747
|
+
finding: updatedFinding,
|
|
31748
|
+
storyId: ctx.input.story.id,
|
|
31749
|
+
event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
|
|
31750
|
+
file: requotedEvidence.file,
|
|
31751
|
+
line: requotedEvidence.line,
|
|
31752
|
+
observed: requotedEvidence.observed
|
|
31753
|
+
});
|
|
31754
|
+
changed = true;
|
|
31755
|
+
}
|
|
31756
|
+
return { findings: next, changed, extraCostUsd };
|
|
31757
|
+
}
|
|
31758
|
+
function evaluateRepromptTrigger(shape, input) {
|
|
31759
|
+
if (input.adversarialConfig.acRegroundOnDrop === false)
|
|
31760
|
+
return { shouldReprompt: false };
|
|
31761
|
+
if (shape.passed)
|
|
31762
|
+
return { shouldReprompt: false };
|
|
31763
|
+
const { accepted, dropped } = filterByAcQuote(shape.findings, input.story.acceptanceCriteria);
|
|
31764
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
31765
|
+
const blockingAccepted = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
31766
|
+
if (blockingAccepted.length > 0)
|
|
31767
|
+
return { shouldReprompt: false };
|
|
31768
|
+
if (dropped.length === 0)
|
|
31769
|
+
return { shouldReprompt: false };
|
|
31770
|
+
return { shouldReprompt: true, acDropped: dropped };
|
|
31771
|
+
}
|
|
31772
|
+
async function performAdversarialReground(turn, firstParsed, drops, ctx) {
|
|
31773
|
+
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
31774
|
+
const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
|
|
31775
|
+
const { accepted: firstAccepted } = filterByAcQuote(firstParsed.findings, acceptanceCriteria);
|
|
31776
|
+
const firstAdvisory = firstAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
31777
|
+
const repromptPrompt = AdversarialReviewPromptBuilder.regroundDroppedFindings({
|
|
31778
|
+
drops,
|
|
31779
|
+
acceptanceCriteria
|
|
31780
|
+
});
|
|
31781
|
+
const secondTurn = await ctx.send(repromptPrompt);
|
|
31782
|
+
const secondParsed = validateAdversarialShape(tryParseLLMJson(secondTurn.output));
|
|
31783
|
+
const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
|
|
31784
|
+
const dropCount = drops.length;
|
|
31785
|
+
if (!secondParsed) {
|
|
31786
|
+
return {
|
|
31787
|
+
...turn,
|
|
31788
|
+
output: withRepromptMarker(turn.output, { dropCount, outcome: "parse-failed", costUsd })
|
|
31789
|
+
};
|
|
31790
|
+
}
|
|
31791
|
+
const { accepted: secondAccepted } = filterByAcQuote(secondParsed.findings, acceptanceCriteria);
|
|
31792
|
+
const secondBlocking = secondAccepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
31793
|
+
if (secondBlocking.length > 0) {
|
|
31794
|
+
return {
|
|
31795
|
+
...turn,
|
|
31796
|
+
output: JSON.stringify({
|
|
31797
|
+
passed: false,
|
|
31798
|
+
findings: secondParsed.findings,
|
|
31799
|
+
_repromptInfo: { dropCount, outcome: "recovered-blocking", costUsd }
|
|
31800
|
+
}),
|
|
31801
|
+
estimatedCostUsd: costUsd
|
|
31802
|
+
};
|
|
31803
|
+
}
|
|
31804
|
+
if (secondParsed.passed) {
|
|
31805
|
+
const secondAdvisory = secondAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
31806
|
+
return {
|
|
31807
|
+
...turn,
|
|
31808
|
+
output: JSON.stringify({
|
|
31809
|
+
passed: true,
|
|
31810
|
+
findings: [...firstAdvisory, ...secondAdvisory],
|
|
31811
|
+
_repromptInfo: { dropCount, outcome: "recovered-advisory-only", costUsd }
|
|
31812
|
+
}),
|
|
31813
|
+
estimatedCostUsd: costUsd
|
|
31814
|
+
};
|
|
31815
|
+
}
|
|
31816
|
+
return {
|
|
31817
|
+
...turn,
|
|
31818
|
+
output: withRepromptMarker(turn.output, { dropCount, outcome: "still-dropped", costUsd })
|
|
31819
|
+
};
|
|
31820
|
+
}
|
|
31821
|
+
var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
|
|
31254
31822
|
validate: (parsed) => validateAdversarialShape(parsed) !== null,
|
|
31255
31823
|
reviewerKind: "adversarial",
|
|
31256
31824
|
maxAttempts: 2,
|
|
@@ -31258,15 +31826,24 @@ var FAIL_OPEN, adversarialParseRetry = (input) => makeParseRetryStrategy({
|
|
|
31258
31826
|
invalid: () => ReviewPromptBuilder.jsonRetry(),
|
|
31259
31827
|
truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
|
|
31260
31828
|
},
|
|
31261
|
-
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], looksLikeFail: true } : FAIL_OPEN,
|
|
31829
|
+
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN,
|
|
31262
31830
|
logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
|
|
31263
31831
|
}), adversarialReviewOp;
|
|
31264
31832
|
var init_adversarial_review = __esm(() => {
|
|
31265
31833
|
init_retry();
|
|
31266
31834
|
init_config();
|
|
31835
|
+
init_logger2();
|
|
31267
31836
|
init_prompts();
|
|
31268
31837
|
init_adversarial_helpers();
|
|
31269
|
-
|
|
31838
|
+
init_finding_filters();
|
|
31839
|
+
init_requote_response();
|
|
31840
|
+
FAIL_OPEN = {
|
|
31841
|
+
passed: true,
|
|
31842
|
+
findings: [],
|
|
31843
|
+
normalizedFindings: [],
|
|
31844
|
+
acDropped: [],
|
|
31845
|
+
failOpen: true
|
|
31846
|
+
};
|
|
31270
31847
|
adversarialReviewOp = {
|
|
31271
31848
|
kind: "run",
|
|
31272
31849
|
name: "adversarial-review",
|
|
@@ -31276,6 +31853,36 @@ var init_adversarial_review = __esm(() => {
|
|
|
31276
31853
|
model: (input) => input.adversarialConfig.model,
|
|
31277
31854
|
timeoutMs: (input) => input.adversarialConfig.timeoutMs,
|
|
31278
31855
|
retry: (input) => adversarialParseRetry(input),
|
|
31856
|
+
async hopBody(initialPrompt, ctx) {
|
|
31857
|
+
const turn = await ctx.sendWithParseRetry(initialPrompt);
|
|
31858
|
+
const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
|
|
31859
|
+
if (!parsed)
|
|
31860
|
+
return turn;
|
|
31861
|
+
if (ctx.input.mode !== "ref")
|
|
31862
|
+
return turn;
|
|
31863
|
+
const regroundEnabled = ctx.input.adversarialConfig.acRegroundOnDrop !== false;
|
|
31864
|
+
if (regroundEnabled) {
|
|
31865
|
+
const firstShape = { passed: parsed.passed, findings: parsed.findings };
|
|
31866
|
+
const trigger = evaluateRepromptTrigger(firstShape, ctx.input);
|
|
31867
|
+
if (trigger.shouldReprompt) {
|
|
31868
|
+
return await performAdversarialReground(turn, parsed, trigger.acDropped, ctx);
|
|
31869
|
+
}
|
|
31870
|
+
}
|
|
31871
|
+
const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
|
|
31872
|
+
const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
|
|
31873
|
+
if (!requoteEnabled || maxRequotes <= 0)
|
|
31874
|
+
return turn;
|
|
31875
|
+
const requoted = await requoteBlockingAdversarialFindings(parsed.findings, ctx);
|
|
31876
|
+
if (requoted.changed) {
|
|
31877
|
+
const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
|
|
31878
|
+
return {
|
|
31879
|
+
...turn,
|
|
31880
|
+
output: JSON.stringify({ passed, findings: requoted.findings }),
|
|
31881
|
+
estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
|
|
31882
|
+
};
|
|
31883
|
+
}
|
|
31884
|
+
return turn;
|
|
31885
|
+
},
|
|
31279
31886
|
build(input, _ctx) {
|
|
31280
31887
|
const base = new AdversarialReviewPromptBuilder().buildAdversarialReviewPrompt(input.story, input.adversarialConfig, {
|
|
31281
31888
|
mode: input.mode,
|
|
@@ -31298,12 +31905,51 @@ var init_adversarial_review = __esm(() => {
|
|
|
31298
31905
|
parse(output, _input, _ctx) {
|
|
31299
31906
|
const raw = tryParseLLMJson(output);
|
|
31300
31907
|
const parsed = validateAdversarialShape(raw);
|
|
31301
|
-
|
|
31302
|
-
|
|
31908
|
+
const repromptEvent = extractRepromptInfo(raw);
|
|
31909
|
+
if (parsed) {
|
|
31910
|
+
return {
|
|
31911
|
+
passed: parsed.passed,
|
|
31912
|
+
findings: parsed.findings,
|
|
31913
|
+
normalizedFindings: [],
|
|
31914
|
+
acDropped: [],
|
|
31915
|
+
repromptEvent
|
|
31916
|
+
};
|
|
31917
|
+
}
|
|
31303
31918
|
if (/"passed"\s*:\s*false/.test(output) && !/"findings"\s*:\s*\[\s*\{/.test(output)) {
|
|
31304
|
-
return {
|
|
31919
|
+
return {
|
|
31920
|
+
passed: false,
|
|
31921
|
+
findings: [],
|
|
31922
|
+
normalizedFindings: [],
|
|
31923
|
+
acDropped: [],
|
|
31924
|
+
looksLikeFail: true,
|
|
31925
|
+
repromptEvent
|
|
31926
|
+
};
|
|
31305
31927
|
}
|
|
31306
31928
|
throw new ParseValidationError("[adversarial-review] parse failed: invalid JSON shape");
|
|
31929
|
+
},
|
|
31930
|
+
async verify(parsed, input, _verifyCtx) {
|
|
31931
|
+
if (parsed.failOpen || parsed.looksLikeFail)
|
|
31932
|
+
return parsed;
|
|
31933
|
+
if (parsed.findings.length === 0)
|
|
31934
|
+
return parsed;
|
|
31935
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
31936
|
+
const findings = parsed.findings;
|
|
31937
|
+
const substantiated = await substantiateAdversarialFindings({
|
|
31938
|
+
findings,
|
|
31939
|
+
workdir: input.workdir,
|
|
31940
|
+
storyId: input.story.id,
|
|
31941
|
+
blockingThreshold: threshold
|
|
31942
|
+
});
|
|
31943
|
+
const { accepted, dropped } = filterByAcQuote(substantiated, input.story.acceptanceCriteria);
|
|
31944
|
+
const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
31945
|
+
const passed = parsed.passed && blocking.length === 0;
|
|
31946
|
+
return {
|
|
31947
|
+
...parsed,
|
|
31948
|
+
passed,
|
|
31949
|
+
findings: accepted,
|
|
31950
|
+
normalizedFindings: toAdversarialReviewFindings(blocking),
|
|
31951
|
+
acDropped: dropped
|
|
31952
|
+
};
|
|
31307
31953
|
}
|
|
31308
31954
|
};
|
|
31309
31955
|
});
|
|
@@ -31738,191 +32384,6 @@ var init_review_audit = __esm(() => {
|
|
|
31738
32384
|
};
|
|
31739
32385
|
});
|
|
31740
32386
|
|
|
31741
|
-
// src/review/semantic-helpers.ts
|
|
31742
|
-
function validateLLMShape(parsed) {
|
|
31743
|
-
if (typeof parsed !== "object" || parsed === null)
|
|
31744
|
-
return null;
|
|
31745
|
-
const obj = parsed;
|
|
31746
|
-
if (typeof obj.passed !== "boolean")
|
|
31747
|
-
return null;
|
|
31748
|
-
if (!Array.isArray(obj.findings))
|
|
31749
|
-
return null;
|
|
31750
|
-
return { passed: obj.passed, findings: obj.findings };
|
|
31751
|
-
}
|
|
31752
|
-
function parseLLMResponse(raw) {
|
|
31753
|
-
try {
|
|
31754
|
-
return validateLLMShape(tryParseLLMJson(raw));
|
|
31755
|
-
} catch {
|
|
31756
|
-
return null;
|
|
31757
|
-
}
|
|
31758
|
-
}
|
|
31759
|
-
function formatFindings2(findings) {
|
|
31760
|
-
return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
|
|
31761
|
-
Suggestion: ${f.suggestion}`).join(`
|
|
31762
|
-
`);
|
|
31763
|
-
}
|
|
31764
|
-
function normalizeSeverity2(sev) {
|
|
31765
|
-
if (sev === "warn")
|
|
31766
|
-
return "warning";
|
|
31767
|
-
if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
|
|
31768
|
-
return sev;
|
|
31769
|
-
return "info";
|
|
31770
|
-
}
|
|
31771
|
-
function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
|
|
31772
|
-
if (diffMode !== "ref")
|
|
31773
|
-
return findings;
|
|
31774
|
-
return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
|
|
31775
|
-
}
|
|
31776
|
-
function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
|
|
31777
|
-
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31778
|
-
return false;
|
|
31779
|
-
return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
|
|
31780
|
-
}
|
|
31781
|
-
function mentionsUnverifiedSource(finding) {
|
|
31782
|
-
const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
|
|
31783
|
-
return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
|
|
31784
|
-
}
|
|
31785
|
-
function hasVerifiedEvidence(finding) {
|
|
31786
|
-
const evidence = finding.verifiedBy;
|
|
31787
|
-
return !!evidence?.file?.trim() && !!evidence.observed?.trim();
|
|
31788
|
-
}
|
|
31789
|
-
function downgradeToUnverifiable(finding) {
|
|
31790
|
-
return {
|
|
31791
|
-
...finding,
|
|
31792
|
-
severity: "unverifiable"
|
|
31793
|
-
};
|
|
31794
|
-
}
|
|
31795
|
-
function llmFindingToFinding(f) {
|
|
31796
|
-
const metaExtras = {};
|
|
31797
|
-
if (f.verifiedBy)
|
|
31798
|
-
metaExtras.verifiedBy = f.verifiedBy;
|
|
31799
|
-
if (f.acQuote)
|
|
31800
|
-
metaExtras.acQuote = f.acQuote;
|
|
31801
|
-
if (f.acIndex != null)
|
|
31802
|
-
metaExtras.acIndex = f.acIndex;
|
|
31803
|
-
return {
|
|
31804
|
-
source: "semantic-review",
|
|
31805
|
-
severity: normalizeSeverity2(f.severity),
|
|
31806
|
-
category: "",
|
|
31807
|
-
file: f.file,
|
|
31808
|
-
line: f.line,
|
|
31809
|
-
message: f.issue,
|
|
31810
|
-
suggestion: f.suggestion ?? undefined,
|
|
31811
|
-
fixTarget: "source",
|
|
31812
|
-
meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
|
|
31813
|
-
};
|
|
31814
|
-
}
|
|
31815
|
-
function toReviewFindings(findings) {
|
|
31816
|
-
return findings.map(llmFindingToFinding);
|
|
31817
|
-
}
|
|
31818
|
-
var UNVERIFIED_FINDING_PATTERNS;
|
|
31819
|
-
var init_semantic_helpers = __esm(() => {
|
|
31820
|
-
init_severity();
|
|
31821
|
-
UNVERIFIED_FINDING_PATTERNS = [
|
|
31822
|
-
"cannot verify",
|
|
31823
|
-
"can't verify",
|
|
31824
|
-
"from diff alone",
|
|
31825
|
-
"missing from diff",
|
|
31826
|
-
"not found in diff",
|
|
31827
|
-
"not present in diff",
|
|
31828
|
-
"does not appear in diff"
|
|
31829
|
-
];
|
|
31830
|
-
});
|
|
31831
|
-
|
|
31832
|
-
// src/review/semantic-evidence.ts
|
|
31833
|
-
import { isAbsolute as isAbsolute8 } from "path";
|
|
31834
|
-
async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
|
|
31835
|
-
if (diffMode !== "ref")
|
|
31836
|
-
return findings;
|
|
31837
|
-
return Promise.all(findings.map(async (finding) => {
|
|
31838
|
-
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31839
|
-
return finding;
|
|
31840
|
-
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
31841
|
-
if (evidence.status !== "unmatched")
|
|
31842
|
-
return finding;
|
|
31843
|
-
return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
|
|
31844
|
-
}));
|
|
31845
|
-
}
|
|
31846
|
-
async function checkFindingEvidence(opts) {
|
|
31847
|
-
const observed = opts.finding.verifiedBy?.observed?.trim();
|
|
31848
|
-
const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
|
|
31849
|
-
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
31850
|
-
if (!observed)
|
|
31851
|
-
return { status: "missing-observed", file: file3, line };
|
|
31852
|
-
const contents = await readSafeFile(opts.workdir, file3);
|
|
31853
|
-
if (contents === null)
|
|
31854
|
-
return { status: "unreadable", file: file3, line, observed };
|
|
31855
|
-
return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
|
|
31856
|
-
}
|
|
31857
|
-
function matchesEvidence(contents, observed, line) {
|
|
31858
|
-
if (!line || line <= 0) {
|
|
31859
|
-
return normalizedIncludes(contents, observed);
|
|
31860
|
-
}
|
|
31861
|
-
const lines = contents.split(`
|
|
31862
|
-
`);
|
|
31863
|
-
const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
|
|
31864
|
-
const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
|
|
31865
|
-
const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
|
|
31866
|
-
const windowText = lines.slice(start, end).join(`
|
|
31867
|
-
`);
|
|
31868
|
-
return normalizedIncludes(windowText, observed);
|
|
31869
|
-
}
|
|
31870
|
-
function downgradeUnsubstantiatedFinding(opts) {
|
|
31871
|
-
_evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
|
|
31872
|
-
storyId: opts.storyId,
|
|
31873
|
-
event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
|
|
31874
|
-
file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
|
|
31875
|
-
line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
|
|
31876
|
-
issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
|
|
31877
|
-
observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
|
|
31878
|
-
});
|
|
31879
|
-
return { ...opts.finding, severity: "unverifiable" };
|
|
31880
|
-
}
|
|
31881
|
-
async function readSafeFile(workdir, file3) {
|
|
31882
|
-
const validated = validateModulePath(file3, [workdir]);
|
|
31883
|
-
if (validated.valid && validated.absolutePath) {
|
|
31884
|
-
try {
|
|
31885
|
-
return await Bun.file(validated.absolutePath).text();
|
|
31886
|
-
} catch {
|
|
31887
|
-
return null;
|
|
31888
|
-
}
|
|
31889
|
-
}
|
|
31890
|
-
if (isAbsolute8(file3)) {
|
|
31891
|
-
try {
|
|
31892
|
-
return await Bun.file(file3).text();
|
|
31893
|
-
} catch {
|
|
31894
|
-
return null;
|
|
31895
|
-
}
|
|
31896
|
-
}
|
|
31897
|
-
return null;
|
|
31898
|
-
}
|
|
31899
|
-
function normalizedIncludes(contents, observed) {
|
|
31900
|
-
const normalizedObserved = normalizeEvidenceText(observed);
|
|
31901
|
-
return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
|
|
31902
|
-
}
|
|
31903
|
-
function normalizeEvidenceText(text) {
|
|
31904
|
-
return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
|
|
31905
|
-
}
|
|
31906
|
-
function stripWrappingQuotes(text) {
|
|
31907
|
-
let trimmed = text.trim();
|
|
31908
|
-
while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
|
|
31909
|
-
trimmed = trimmed.slice(1, -1).trim();
|
|
31910
|
-
}
|
|
31911
|
-
return trimmed;
|
|
31912
|
-
}
|
|
31913
|
-
function isMatchingWrapper(first, last) {
|
|
31914
|
-
return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
|
|
31915
|
-
}
|
|
31916
|
-
var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
|
|
31917
|
-
var init_semantic_evidence = __esm(() => {
|
|
31918
|
-
init_logger2();
|
|
31919
|
-
init_path_security2();
|
|
31920
|
-
init_semantic_helpers();
|
|
31921
|
-
_evidenceDeps = {
|
|
31922
|
-
getLogger: getSafeLogger
|
|
31923
|
-
};
|
|
31924
|
-
});
|
|
31925
|
-
|
|
31926
32387
|
// src/review/adversarial.ts
|
|
31927
32388
|
import { relative as relative7, sep } from "path";
|
|
31928
32389
|
function recordAdversarialAudit(opts) {
|
|
@@ -31955,7 +32416,6 @@ async function runAdversarialReview(opts) {
|
|
|
31955
32416
|
agentManager,
|
|
31956
32417
|
config: naxConfig,
|
|
31957
32418
|
featureName,
|
|
31958
|
-
priorFailures,
|
|
31959
32419
|
blockingThreshold,
|
|
31960
32420
|
featureContextMarkdown,
|
|
31961
32421
|
contextBundle,
|
|
@@ -31966,7 +32426,7 @@ async function runAdversarialReview(opts) {
|
|
|
31966
32426
|
} = opts;
|
|
31967
32427
|
const startTime = Date.now();
|
|
31968
32428
|
const logger = getSafeLogger();
|
|
31969
|
-
const effectiveRef = await resolveEffectiveRef(workdir, storyGitRef, story.id);
|
|
32429
|
+
const effectiveRef = await _adversarialDeps.resolveEffectiveRef(workdir, storyGitRef, story.id);
|
|
31970
32430
|
if (!effectiveRef) {
|
|
31971
32431
|
return {
|
|
31972
32432
|
check: "adversarial",
|
|
@@ -31985,7 +32445,7 @@ async function runAdversarialReview(opts) {
|
|
|
31985
32445
|
});
|
|
31986
32446
|
const repoRoot = projectDir ?? workdir;
|
|
31987
32447
|
const packageDir = workdir !== repoRoot ? workdir : undefined;
|
|
31988
|
-
const stat = await collectDiffStat(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32448
|
+
const stat = await _adversarialDeps.collectDiffStat(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
31989
32449
|
if (!stat) {
|
|
31990
32450
|
return {
|
|
31991
32451
|
check: "adversarial",
|
|
@@ -32076,13 +32536,13 @@ async function runAdversarialReview(opts) {
|
|
|
32076
32536
|
let opResult;
|
|
32077
32537
|
try {
|
|
32078
32538
|
opResult = await _adversarialDeps.callOp(callCtx, adversarialReviewOp, {
|
|
32539
|
+
workdir,
|
|
32079
32540
|
story,
|
|
32080
32541
|
adversarialConfig,
|
|
32081
32542
|
mode: diffMode,
|
|
32082
32543
|
diff,
|
|
32083
32544
|
storyGitRef: effectiveRef,
|
|
32084
32545
|
stat,
|
|
32085
|
-
priorFailures,
|
|
32086
32546
|
testInventory,
|
|
32087
32547
|
excludePatterns: adversarialConfig.excludePatterns,
|
|
32088
32548
|
testGlobs: resolvedTestPatterns.globs,
|
|
@@ -32167,34 +32627,28 @@ async function runAdversarialReview(opts) {
|
|
|
32167
32627
|
durationMs: Date.now() - startTime
|
|
32168
32628
|
};
|
|
32169
32629
|
}
|
|
32170
|
-
|
|
32171
|
-
|
|
32172
|
-
|
|
32173
|
-
};
|
|
32174
|
-
const blockingThresholdEffective = blockingThreshold ?? "error";
|
|
32175
|
-
const substantiatedFindings = await Promise.all(rawParsedRaw.findings.map(async (finding) => {
|
|
32176
|
-
if (!isBlockingSeverity(finding.severity, blockingThresholdEffective))
|
|
32177
|
-
return finding;
|
|
32178
|
-
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
32179
|
-
if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
|
|
32180
|
-
return finding;
|
|
32181
|
-
return downgradeUnsubstantiatedFinding({
|
|
32182
|
-
finding,
|
|
32630
|
+
if (opResult.repromptEvent) {
|
|
32631
|
+
runtime.dispatchEvents.emitReviewReprompt({
|
|
32632
|
+
kind: "review-reprompt-on-drop",
|
|
32183
32633
|
storyId: story.id,
|
|
32184
|
-
|
|
32185
|
-
|
|
32186
|
-
|
|
32187
|
-
|
|
32634
|
+
reviewer: "adversarial",
|
|
32635
|
+
dropCount: opResult.repromptEvent.dropCount,
|
|
32636
|
+
repromptOutcome: opResult.repromptEvent.outcome,
|
|
32637
|
+
costUsd: opResult.repromptEvent.costUsd
|
|
32188
32638
|
});
|
|
32189
|
-
}
|
|
32190
|
-
const
|
|
32639
|
+
}
|
|
32640
|
+
const threshold = blockingThreshold ?? "error";
|
|
32641
|
+
const allFindings = opResult.findings;
|
|
32642
|
+
const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
32643
|
+
const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
32644
|
+
const acDropped = opResult.acDropped ?? [];
|
|
32191
32645
|
let diffFiles;
|
|
32192
32646
|
let diffAvailable;
|
|
32193
32647
|
if (diff && diff.length > 0) {
|
|
32194
32648
|
diffFiles = extractDiffFiles(diff);
|
|
32195
32649
|
diffAvailable = true;
|
|
32196
32650
|
} else {
|
|
32197
|
-
const list = await collectDiffFileList(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32651
|
+
const list = await _adversarialDeps.collectDiffFileList(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
|
|
32198
32652
|
if (list === undefined) {
|
|
32199
32653
|
diffFiles = new Set;
|
|
32200
32654
|
diffAvailable = false;
|
|
@@ -32203,13 +32657,6 @@ async function runAdversarialReview(opts) {
|
|
|
32203
32657
|
diffAvailable = true;
|
|
32204
32658
|
}
|
|
32205
32659
|
}
|
|
32206
|
-
const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcQuote(rawParsed.findings, story.acceptanceCriteria);
|
|
32207
|
-
if (acDropped.length > 0) {
|
|
32208
|
-
logger?.warn("review", "Adversarial findings dropped: acQuote validation failed", {
|
|
32209
|
-
storyId: story.id,
|
|
32210
|
-
dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
|
|
32211
|
-
});
|
|
32212
|
-
}
|
|
32213
32660
|
const adversarialDropAnalysis = acDropped.map((d) => ({
|
|
32214
32661
|
finding: {
|
|
32215
32662
|
file: d.finding.file ?? "<unknown>",
|
|
@@ -32223,10 +32670,6 @@ async function runAdversarialReview(opts) {
|
|
|
32223
32670
|
rawCategory: d.finding.category ?? "",
|
|
32224
32671
|
counterfactual: analyzeStructuralCounterfactual({ acIndex: d.finding.acIndex, category: d.finding.category, file: d.finding.file }, story.acceptanceCriteria, diffFiles)
|
|
32225
32672
|
}));
|
|
32226
|
-
const parsed = { ...rawParsed, findings: acGroundedFindings };
|
|
32227
|
-
const threshold = blockingThresholdEffective;
|
|
32228
|
-
const blockingFindings = parsed.findings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
32229
|
-
const advisoryFindings = parsed.findings.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
32230
32673
|
const adversarialAcceptAnalysis = blockingFindings.map((f) => ({
|
|
32231
32674
|
finding: {
|
|
32232
32675
|
file: f.file,
|
|
@@ -32249,11 +32692,11 @@ async function runAdversarialReview(opts) {
|
|
|
32249
32692
|
}))
|
|
32250
32693
|
});
|
|
32251
32694
|
}
|
|
32695
|
+
const durationMs = Date.now() - startTime;
|
|
32252
32696
|
if (blockingFindings.length > 0) {
|
|
32253
|
-
const durationMs2 = Date.now() - startTime;
|
|
32254
32697
|
logger?.warn("review", `Adversarial review failed: ${blockingFindings.length} blocking findings`, {
|
|
32255
32698
|
storyId: story.id,
|
|
32256
|
-
durationMs
|
|
32699
|
+
durationMs,
|
|
32257
32700
|
findings: blockingFindings.map((f) => ({
|
|
32258
32701
|
severity: f.severity,
|
|
32259
32702
|
category: f.category,
|
|
@@ -32274,72 +32717,37 @@ async function runAdversarialReview(opts) {
|
|
|
32274
32717
|
blockingThreshold: threshold,
|
|
32275
32718
|
result: {
|
|
32276
32719
|
passed: false,
|
|
32277
|
-
findings: llmFindingsToReviewFindings(
|
|
32720
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
|
|
32278
32721
|
},
|
|
32279
32722
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32280
32723
|
diffAvailable,
|
|
32281
32724
|
adversarialDropAnalysis,
|
|
32282
32725
|
adversarialAcceptAnalysis
|
|
32283
32726
|
});
|
|
32727
|
+
const output = blockingFindings.length > 0 ? `Adversarial review failed:
|
|
32728
|
+
|
|
32729
|
+
${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)";
|
|
32284
32730
|
return {
|
|
32285
32731
|
check: "adversarial",
|
|
32286
32732
|
success: false,
|
|
32287
32733
|
command: "",
|
|
32288
32734
|
exitCode: 1,
|
|
32289
|
-
output
|
|
32290
|
-
|
|
32291
|
-
|
|
32292
|
-
durationMs: durationMs2,
|
|
32293
|
-
findings: toAdversarialReviewFindings(blockingFindings),
|
|
32735
|
+
output,
|
|
32736
|
+
durationMs,
|
|
32737
|
+
findings: blockingFindings.length > 0 ? toAdversarialReviewFindings(blockingFindings) : undefined,
|
|
32294
32738
|
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32295
32739
|
cost: llmCost
|
|
32296
32740
|
};
|
|
32297
32741
|
}
|
|
32298
|
-
if (!
|
|
32299
|
-
|
|
32300
|
-
const durationMs3 = Date.now() - startTime;
|
|
32301
|
-
logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
|
|
32302
|
-
storyId: story.id,
|
|
32303
|
-
durationMs: durationMs3,
|
|
32304
|
-
droppedCount: acDropped.length,
|
|
32305
|
-
dropCodes: acDropped.map((d) => d.code)
|
|
32306
|
-
});
|
|
32307
|
-
const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
|
|
32308
|
-
`);
|
|
32309
|
-
recordAdversarialAudit({
|
|
32310
|
-
runtime,
|
|
32311
|
-
workdir,
|
|
32312
|
-
projectDir,
|
|
32313
|
-
storyId: story.id,
|
|
32314
|
-
featureName,
|
|
32315
|
-
parsed: true,
|
|
32316
|
-
failOpen: false,
|
|
32317
|
-
passed: false,
|
|
32318
|
-
blockingThreshold: threshold,
|
|
32319
|
-
result: { passed: false, findings: [] },
|
|
32320
|
-
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32321
|
-
diffAvailable,
|
|
32322
|
-
adversarialDropAnalysis,
|
|
32323
|
-
adversarialAcceptAnalysis: []
|
|
32324
|
-
});
|
|
32325
|
-
return {
|
|
32326
|
-
check: "adversarial",
|
|
32327
|
-
success: false,
|
|
32328
|
-
command: "",
|
|
32329
|
-
exitCode: 1,
|
|
32330
|
-
output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Either re-classify these as "info" upstream or extend the ACs. Drops:
|
|
32331
|
-
|
|
32332
|
-
${dropSummary}`,
|
|
32333
|
-
durationMs: durationMs3,
|
|
32334
|
-
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32335
|
-
cost: llmCost
|
|
32336
|
-
};
|
|
32337
|
-
}
|
|
32338
|
-
const durationMs2 = Date.now() - startTime;
|
|
32339
|
-
logger?.info("review", "Adversarial review passed (all findings below blocking threshold)", {
|
|
32742
|
+
if (!opResult.passed && acDropped.length > 0) {
|
|
32743
|
+
logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
|
|
32340
32744
|
storyId: story.id,
|
|
32341
|
-
durationMs
|
|
32745
|
+
durationMs,
|
|
32746
|
+
droppedCount: acDropped.length,
|
|
32747
|
+
dropCodes: acDropped.map((d) => d.code)
|
|
32342
32748
|
});
|
|
32749
|
+
const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
|
|
32750
|
+
`);
|
|
32343
32751
|
recordAdversarialAudit({
|
|
32344
32752
|
runtime,
|
|
32345
32753
|
workdir,
|
|
@@ -32348,12 +32756,9 @@ ${dropSummary}`,
|
|
|
32348
32756
|
featureName,
|
|
32349
32757
|
parsed: true,
|
|
32350
32758
|
failOpen: false,
|
|
32351
|
-
passed:
|
|
32759
|
+
passed: false,
|
|
32352
32760
|
blockingThreshold: threshold,
|
|
32353
|
-
result: {
|
|
32354
|
-
passed: true,
|
|
32355
|
-
findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
|
|
32356
|
-
},
|
|
32761
|
+
result: { passed: false, findings: [] },
|
|
32357
32762
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32358
32763
|
diffAvailable,
|
|
32359
32764
|
adversarialDropAnalysis,
|
|
@@ -32361,19 +32766,18 @@ ${dropSummary}`,
|
|
|
32361
32766
|
});
|
|
32362
32767
|
return {
|
|
32363
32768
|
check: "adversarial",
|
|
32364
|
-
success:
|
|
32769
|
+
success: false,
|
|
32365
32770
|
command: "",
|
|
32366
|
-
exitCode:
|
|
32367
|
-
output:
|
|
32368
|
-
|
|
32771
|
+
exitCode: 1,
|
|
32772
|
+
output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Drops:
|
|
32773
|
+
|
|
32774
|
+
${dropSummary}`,
|
|
32775
|
+
durationMs,
|
|
32369
32776
|
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32370
32777
|
cost: llmCost
|
|
32371
32778
|
};
|
|
32372
32779
|
}
|
|
32373
|
-
|
|
32374
|
-
if (parsed.passed) {
|
|
32375
|
-
logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
|
|
32376
|
-
}
|
|
32780
|
+
logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
|
|
32377
32781
|
recordAdversarialAudit({
|
|
32378
32782
|
runtime,
|
|
32379
32783
|
workdir,
|
|
@@ -32382,23 +32786,23 @@ ${dropSummary}`,
|
|
|
32382
32786
|
featureName,
|
|
32383
32787
|
parsed: true,
|
|
32384
32788
|
failOpen: false,
|
|
32385
|
-
passed:
|
|
32789
|
+
passed: true,
|
|
32386
32790
|
blockingThreshold: threshold,
|
|
32387
32791
|
result: {
|
|
32388
|
-
passed:
|
|
32389
|
-
findings: llmFindingsToReviewFindings(
|
|
32792
|
+
passed: true,
|
|
32793
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
|
|
32390
32794
|
},
|
|
32391
32795
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32392
32796
|
diffAvailable,
|
|
32393
32797
|
adversarialDropAnalysis,
|
|
32394
|
-
adversarialAcceptAnalysis
|
|
32798
|
+
adversarialAcceptAnalysis: []
|
|
32395
32799
|
});
|
|
32396
32800
|
return {
|
|
32397
32801
|
check: "adversarial",
|
|
32398
|
-
success:
|
|
32802
|
+
success: true,
|
|
32399
32803
|
command: "",
|
|
32400
|
-
exitCode:
|
|
32401
|
-
output:
|
|
32804
|
+
exitCode: 0,
|
|
32805
|
+
output: allFindings.length === 0 ? "Adversarial review passed" : "Adversarial review passed (all findings were advisory \u2014 below blocking threshold)",
|
|
32402
32806
|
durationMs,
|
|
32403
32807
|
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32404
32808
|
cost: llmCost
|
|
@@ -32413,16 +32817,17 @@ var init_adversarial = __esm(() => {
|
|
|
32413
32817
|
init_adversarial_review();
|
|
32414
32818
|
init_call();
|
|
32415
32819
|
init_test_runners();
|
|
32416
|
-
init_ac_quote_validator();
|
|
32417
32820
|
init_ac_structural_counterfactual();
|
|
32418
32821
|
init_adversarial_helpers();
|
|
32419
32822
|
init_diff_utils();
|
|
32420
32823
|
init_finding_projection();
|
|
32421
32824
|
init_review_audit();
|
|
32422
|
-
init_semantic_evidence();
|
|
32423
32825
|
_adversarialDeps = {
|
|
32424
32826
|
writeReviewAudit,
|
|
32425
|
-
callOp
|
|
32827
|
+
callOp,
|
|
32828
|
+
resolveEffectiveRef,
|
|
32829
|
+
collectDiffStat,
|
|
32830
|
+
collectDiffFileList
|
|
32426
32831
|
};
|
|
32427
32832
|
});
|
|
32428
32833
|
|
|
@@ -33125,6 +33530,13 @@ class ScopedStrategy {
|
|
|
33125
33530
|
const durationMs = Date.now() - start;
|
|
33126
33531
|
if (result.success) {
|
|
33127
33532
|
const parsed2 = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
33533
|
+
logger.info("verify[scoped]", "Scoped tests passed", {
|
|
33534
|
+
storyId: ctx.storyId,
|
|
33535
|
+
passCount: parsed2.passed,
|
|
33536
|
+
durationMs,
|
|
33537
|
+
scopeTestFallback: scopeTestFallback ?? false,
|
|
33538
|
+
isFullSuite
|
|
33539
|
+
});
|
|
33128
33540
|
return makePassResult(ctx.storyId, "scoped", {
|
|
33129
33541
|
rawOutput: result.output,
|
|
33130
33542
|
passCount: parsed2.passed,
|
|
@@ -33133,6 +33545,12 @@ class ScopedStrategy {
|
|
|
33133
33545
|
});
|
|
33134
33546
|
}
|
|
33135
33547
|
if (result.status === "TIMEOUT") {
|
|
33548
|
+
logger.warn("verify[scoped]", "Scoped tests timed out", {
|
|
33549
|
+
storyId: ctx.storyId,
|
|
33550
|
+
durationMs,
|
|
33551
|
+
scopeTestFallback: scopeTestFallback ?? false,
|
|
33552
|
+
isFullSuite
|
|
33553
|
+
});
|
|
33136
33554
|
return makeFailResult(ctx.storyId, "scoped", "TIMEOUT", {
|
|
33137
33555
|
rawOutput: result.output,
|
|
33138
33556
|
durationMs,
|
|
@@ -33141,6 +33559,14 @@ class ScopedStrategy {
|
|
|
33141
33559
|
});
|
|
33142
33560
|
}
|
|
33143
33561
|
const parsed = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
33562
|
+
logger.warn("verify[scoped]", "Scoped tests failed", {
|
|
33563
|
+
storyId: ctx.storyId,
|
|
33564
|
+
passCount: parsed.passed,
|
|
33565
|
+
failCount: parsed.failed,
|
|
33566
|
+
durationMs,
|
|
33567
|
+
scopeTestFallback: scopeTestFallback ?? false,
|
|
33568
|
+
isFullSuite
|
|
33569
|
+
});
|
|
33144
33570
|
return makeFailResult(ctx.storyId, "scoped", "TEST_FAILURE", {
|
|
33145
33571
|
rawOutput: result.output,
|
|
33146
33572
|
passCount: parsed.passed,
|
|
@@ -35003,57 +35429,95 @@ var init_acceptance_fix = __esm(() => {
|
|
|
35003
35429
|
};
|
|
35004
35430
|
});
|
|
35005
35431
|
|
|
35006
|
-
// src/
|
|
35007
|
-
function
|
|
35432
|
+
// src/operations/semantic-review.ts
|
|
35433
|
+
function withRepromptMarker2(output, info) {
|
|
35008
35434
|
const parsed = tryParseLLMJson(output);
|
|
35009
|
-
if (!
|
|
35010
|
-
return
|
|
35011
|
-
|
|
35012
|
-
if (canonical)
|
|
35013
|
-
return canonical;
|
|
35014
|
-
const findings = parsed.findings;
|
|
35015
|
-
if (!Array.isArray(findings) || findings.length !== 1)
|
|
35016
|
-
return null;
|
|
35017
|
-
const finding = findings[0];
|
|
35018
|
-
if (!isRecord(finding))
|
|
35019
|
-
return null;
|
|
35020
|
-
return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
|
|
35435
|
+
if (!parsed || typeof parsed !== "object")
|
|
35436
|
+
return output;
|
|
35437
|
+
return JSON.stringify({ ...parsed, _repromptInfo: info });
|
|
35021
35438
|
}
|
|
35022
|
-
function
|
|
35023
|
-
if (!
|
|
35024
|
-
return
|
|
35025
|
-
|
|
35026
|
-
|
|
35027
|
-
|
|
35028
|
-
|
|
35029
|
-
|
|
35030
|
-
|
|
35031
|
-
|
|
35032
|
-
return null;
|
|
35439
|
+
function extractRepromptInfo2(raw) {
|
|
35440
|
+
if (!raw || typeof raw !== "object")
|
|
35441
|
+
return;
|
|
35442
|
+
const info = raw._repromptInfo;
|
|
35443
|
+
if (!info || typeof info !== "object")
|
|
35444
|
+
return;
|
|
35445
|
+
const i = info;
|
|
35446
|
+
if (typeof i.dropCount !== "number" || typeof i.costUsd !== "number" || typeof i.outcome !== "string") {
|
|
35447
|
+
return;
|
|
35448
|
+
}
|
|
35033
35449
|
return {
|
|
35034
|
-
|
|
35035
|
-
|
|
35036
|
-
|
|
35450
|
+
dropCount: i.dropCount,
|
|
35451
|
+
costUsd: i.costUsd,
|
|
35452
|
+
outcome: i.outcome
|
|
35453
|
+
};
|
|
35454
|
+
}
|
|
35455
|
+
function evaluateRepromptTrigger2(shape, input) {
|
|
35456
|
+
if (input.semanticConfig.acRegroundOnDrop === false)
|
|
35457
|
+
return { shouldReprompt: false };
|
|
35458
|
+
if (shape.passed)
|
|
35459
|
+
return { shouldReprompt: false };
|
|
35460
|
+
const { accepted, dropped } = filterByAcGroundingMinimal(shape.findings, input.story.acceptanceCriteria);
|
|
35461
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
35462
|
+
const blockingAccepted = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35463
|
+
if (blockingAccepted.length > 0)
|
|
35464
|
+
return { shouldReprompt: false };
|
|
35465
|
+
if (dropped.length === 0)
|
|
35466
|
+
return { shouldReprompt: false };
|
|
35467
|
+
return { shouldReprompt: true, acDropped: dropped };
|
|
35468
|
+
}
|
|
35469
|
+
async function performSemanticReground(turn, firstParsed, drops, ctx) {
|
|
35470
|
+
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
35471
|
+
const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
|
|
35472
|
+
const { accepted: firstAccepted } = filterByAcGroundingMinimal(firstParsed.findings, acceptanceCriteria);
|
|
35473
|
+
const firstAdvisory = firstAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
35474
|
+
const repromptPrompt = ReviewPromptBuilder.regroundDroppedFindings({
|
|
35475
|
+
drops,
|
|
35476
|
+
acceptanceCriteria
|
|
35477
|
+
});
|
|
35478
|
+
const secondTurn = await ctx.send(repromptPrompt);
|
|
35479
|
+
const secondParsed = validateLLMShape(tryParseLLMJson(secondTurn.output));
|
|
35480
|
+
const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
|
|
35481
|
+
const dropCount = drops.length;
|
|
35482
|
+
if (!secondParsed) {
|
|
35483
|
+
return {
|
|
35484
|
+
...turn,
|
|
35485
|
+
output: withRepromptMarker2(turn.output, { dropCount, outcome: "parse-failed", costUsd })
|
|
35486
|
+
};
|
|
35487
|
+
}
|
|
35488
|
+
const { accepted: secondAccepted } = filterByAcGroundingMinimal(secondParsed.findings, acceptanceCriteria);
|
|
35489
|
+
const secondBlocking = secondAccepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35490
|
+
if (secondBlocking.length > 0) {
|
|
35491
|
+
return {
|
|
35492
|
+
...turn,
|
|
35493
|
+
output: JSON.stringify({
|
|
35494
|
+
passed: false,
|
|
35495
|
+
findings: secondParsed.findings,
|
|
35496
|
+
_repromptInfo: { dropCount, outcome: "recovered-blocking", costUsd }
|
|
35497
|
+
}),
|
|
35498
|
+
estimatedCostUsd: costUsd
|
|
35499
|
+
};
|
|
35500
|
+
}
|
|
35501
|
+
if (secondParsed.passed) {
|
|
35502
|
+
const secondAdvisory = secondAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
35503
|
+
return {
|
|
35504
|
+
...turn,
|
|
35505
|
+
output: JSON.stringify({
|
|
35506
|
+
passed: true,
|
|
35507
|
+
findings: [...firstAdvisory, ...secondAdvisory],
|
|
35508
|
+
_repromptInfo: { dropCount, outcome: "recovered-advisory-only", costUsd }
|
|
35509
|
+
}),
|
|
35510
|
+
estimatedCostUsd: costUsd
|
|
35511
|
+
};
|
|
35512
|
+
}
|
|
35513
|
+
return {
|
|
35514
|
+
...turn,
|
|
35515
|
+
output: withRepromptMarker2(turn.output, { dropCount, outcome: "still-dropped", costUsd })
|
|
35037
35516
|
};
|
|
35038
35517
|
}
|
|
35039
|
-
function coerceLine(value) {
|
|
35040
|
-
if (value == null)
|
|
35041
|
-
return;
|
|
35042
|
-
if (typeof value === "number")
|
|
35043
|
-
return value;
|
|
35044
|
-
if (typeof value === "string" && /^\d+$/.test(value))
|
|
35045
|
-
return Number.parseInt(value, 10);
|
|
35046
|
-
return null;
|
|
35047
|
-
}
|
|
35048
|
-
function isRecord(value) {
|
|
35049
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
35050
|
-
}
|
|
35051
|
-
var init_requote_response = () => {};
|
|
35052
|
-
|
|
35053
|
-
// src/operations/semantic-review.ts
|
|
35054
35518
|
async function requoteBlockingFindings(findings, ctx) {
|
|
35055
35519
|
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
35056
|
-
const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ??
|
|
35520
|
+
const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES2;
|
|
35057
35521
|
const requoteEnabled = ctx.input.semanticConfig.substantiation?.requote ?? true;
|
|
35058
35522
|
if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
|
|
35059
35523
|
return { findings, changed: false, extraCostUsd: 0 };
|
|
@@ -35120,30 +35584,45 @@ async function requoteBlockingFindings(findings, ctx) {
|
|
|
35120
35584
|
}
|
|
35121
35585
|
return { findings: next, changed, extraCostUsd };
|
|
35122
35586
|
}
|
|
35123
|
-
var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed",
|
|
35587
|
+
var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES2 = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
|
|
35124
35588
|
const turn = await ctx.sendWithParseRetry(initialPrompt);
|
|
35125
35589
|
const parsed = validateLLMShape(tryParseLLMJson(turn.output));
|
|
35126
35590
|
if (!parsed)
|
|
35127
35591
|
return turn;
|
|
35128
35592
|
const requoted = await requoteBlockingFindings(parsed.findings, ctx);
|
|
35129
|
-
if (
|
|
35593
|
+
if (requoted.changed) {
|
|
35594
|
+
const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
|
|
35595
|
+
return {
|
|
35596
|
+
...turn,
|
|
35597
|
+
output: JSON.stringify({ passed, findings: requoted.findings }),
|
|
35598
|
+
estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
|
|
35599
|
+
};
|
|
35600
|
+
}
|
|
35601
|
+
if (ctx.input.mode !== "ref")
|
|
35130
35602
|
return turn;
|
|
35131
|
-
const
|
|
35132
|
-
|
|
35133
|
-
|
|
35134
|
-
|
|
35135
|
-
|
|
35136
|
-
|
|
35603
|
+
const regroundEnabled = ctx.input.semanticConfig.acRegroundOnDrop !== false;
|
|
35604
|
+
if (!regroundEnabled)
|
|
35605
|
+
return turn;
|
|
35606
|
+
const firstShape = { passed: parsed.passed, findings: requoted.findings };
|
|
35607
|
+
const trigger = evaluateRepromptTrigger2(firstShape, ctx.input);
|
|
35608
|
+
if (!trigger.shouldReprompt)
|
|
35609
|
+
return turn;
|
|
35610
|
+
return performSemanticReground(turn, firstShape, trigger.acDropped, ctx);
|
|
35137
35611
|
}, semanticReviewOp;
|
|
35138
35612
|
var init_semantic_review = __esm(() => {
|
|
35139
35613
|
init_retry();
|
|
35140
35614
|
init_config();
|
|
35141
35615
|
init_logger2();
|
|
35142
35616
|
init_prompts();
|
|
35617
|
+
init_finding_filters();
|
|
35143
35618
|
init_requote_response();
|
|
35144
|
-
|
|
35145
|
-
|
|
35146
|
-
|
|
35619
|
+
FAIL_OPEN2 = {
|
|
35620
|
+
passed: true,
|
|
35621
|
+
findings: [],
|
|
35622
|
+
normalizedFindings: [],
|
|
35623
|
+
acDropped: [],
|
|
35624
|
+
failOpen: true
|
|
35625
|
+
};
|
|
35147
35626
|
semanticReviewOp = {
|
|
35148
35627
|
kind: "run",
|
|
35149
35628
|
name: "semantic-review",
|
|
@@ -35160,6 +35639,7 @@ var init_semantic_review = __esm(() => {
|
|
|
35160
35639
|
invalid: () => ReviewPromptBuilder.jsonRetry(),
|
|
35161
35640
|
truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
|
|
35162
35641
|
},
|
|
35642
|
+
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN2,
|
|
35163
35643
|
logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
|
|
35164
35644
|
}),
|
|
35165
35645
|
hopBody: semanticReviewHopBody,
|
|
@@ -35181,11 +35661,47 @@ var init_semantic_review = __esm(() => {
|
|
|
35181
35661
|
parse(output, _input, _ctx) {
|
|
35182
35662
|
const raw = tryParseLLMJson(output);
|
|
35183
35663
|
const parsed = validateLLMShape(raw);
|
|
35184
|
-
|
|
35185
|
-
|
|
35186
|
-
|
|
35187
|
-
|
|
35664
|
+
const repromptEvent = extractRepromptInfo2(raw);
|
|
35665
|
+
if (parsed) {
|
|
35666
|
+
return {
|
|
35667
|
+
passed: parsed.passed,
|
|
35668
|
+
findings: parsed.findings,
|
|
35669
|
+
normalizedFindings: [],
|
|
35670
|
+
acDropped: [],
|
|
35671
|
+
repromptEvent
|
|
35672
|
+
};
|
|
35673
|
+
}
|
|
35674
|
+
if (/"passed"\s*:\s*false/.test(output)) {
|
|
35675
|
+
return {
|
|
35676
|
+
passed: false,
|
|
35677
|
+
findings: [],
|
|
35678
|
+
normalizedFindings: [],
|
|
35679
|
+
acDropped: [],
|
|
35680
|
+
looksLikeFail: true,
|
|
35681
|
+
repromptEvent
|
|
35682
|
+
};
|
|
35683
|
+
}
|
|
35188
35684
|
return FAIL_OPEN2;
|
|
35685
|
+
},
|
|
35686
|
+
async verify(parsed, input, _verifyCtx) {
|
|
35687
|
+
if (parsed.failOpen || parsed.looksLikeFail)
|
|
35688
|
+
return parsed;
|
|
35689
|
+
if (parsed.findings.length === 0)
|
|
35690
|
+
return parsed;
|
|
35691
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
35692
|
+
const findings = parsed.findings;
|
|
35693
|
+
const sanitized = sanitizeRefModeFindings(findings, input.mode, threshold);
|
|
35694
|
+
const substantiated = await substantiateSemanticEvidence(sanitized, input.mode, input.workdir, input.story.id, threshold);
|
|
35695
|
+
const { accepted, dropped } = filterByAcGroundingMinimal(substantiated, input.story.acceptanceCriteria);
|
|
35696
|
+
const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35697
|
+
const passed = parsed.passed && blocking.length === 0;
|
|
35698
|
+
return {
|
|
35699
|
+
...parsed,
|
|
35700
|
+
passed,
|
|
35701
|
+
findings: accepted,
|
|
35702
|
+
normalizedFindings: toReviewFindings(blocking),
|
|
35703
|
+
acDropped: dropped
|
|
35704
|
+
};
|
|
35189
35705
|
}
|
|
35190
35706
|
};
|
|
35191
35707
|
});
|
|
@@ -37226,7 +37742,7 @@ var init_greenfield_gate = __esm(() => {
|
|
|
37226
37742
|
});
|
|
37227
37743
|
// src/verification/rectification.ts
|
|
37228
37744
|
function shouldRetryRectification(state, config2) {
|
|
37229
|
-
if (state.attempt >= config2.
|
|
37745
|
+
if (state.attempt >= config2.maxAttemptsTotal) {
|
|
37230
37746
|
return false;
|
|
37231
37747
|
}
|
|
37232
37748
|
if (state.lastExitCode !== undefined && state.lastExitCode !== 0 && state.currentFailures === 0) {
|
|
@@ -37329,7 +37845,7 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37329
37845
|
});
|
|
37330
37846
|
|
|
37331
37847
|
// src/operations/full-suite-rectify.ts
|
|
37332
|
-
function makeFullSuiteRectifyStrategy(story) {
|
|
37848
|
+
function makeFullSuiteRectifyStrategy(story, config2) {
|
|
37333
37849
|
return {
|
|
37334
37850
|
name: "full-suite-rectify",
|
|
37335
37851
|
appliesTo: (finding) => finding.source === "test-runner" && finding.category === "failed-test",
|
|
@@ -37339,7 +37855,7 @@ function makeFullSuiteRectifyStrategy(story) {
|
|
|
37339
37855
|
contextMarkdown: RectifierPromptBuilder.failingTestContext(findings)
|
|
37340
37856
|
}),
|
|
37341
37857
|
extractApplied: () => ({ targetFiles: [], summary: "Fixed failing tests" }),
|
|
37342
|
-
maxAttempts:
|
|
37858
|
+
maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
|
|
37343
37859
|
coRun: "exclusive"
|
|
37344
37860
|
};
|
|
37345
37861
|
}
|
|
@@ -37380,7 +37896,7 @@ var init__finding_to_check = __esm(() => {
|
|
|
37380
37896
|
});
|
|
37381
37897
|
|
|
37382
37898
|
// src/operations/autofix-implementer-strategy.ts
|
|
37383
|
-
function makeAutofixImplementerStrategy(story) {
|
|
37899
|
+
function makeAutofixImplementerStrategy(story, config2) {
|
|
37384
37900
|
return {
|
|
37385
37901
|
name: "autofix-implementer",
|
|
37386
37902
|
appliesTo: (f) => f.fixTarget === "source" && IMPLEMENTER_SOURCES.has(f.source),
|
|
@@ -37393,7 +37909,7 @@ function makeAutofixImplementerStrategy(story) {
|
|
|
37393
37909
|
summary: output.unresolvedReason ?? "",
|
|
37394
37910
|
unresolved: output.unresolvedReason
|
|
37395
37911
|
}),
|
|
37396
|
-
maxAttempts:
|
|
37912
|
+
maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
|
|
37397
37913
|
coRun: "co-run-sequential"
|
|
37398
37914
|
};
|
|
37399
37915
|
}
|
|
@@ -37415,7 +37931,7 @@ function makeAutofixTestWriterStrategy(story, config2) {
|
|
|
37415
37931
|
story,
|
|
37416
37932
|
blockingThreshold: config2.review?.blockingThreshold
|
|
37417
37933
|
}),
|
|
37418
|
-
maxAttempts:
|
|
37934
|
+
maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
|
|
37419
37935
|
coRun: "co-run-sequential"
|
|
37420
37936
|
};
|
|
37421
37937
|
}
|
|
@@ -39360,30 +39876,57 @@ async function runSemanticReview(opts) {
|
|
|
39360
39876
|
durationMs: Date.now() - startTime
|
|
39361
39877
|
};
|
|
39362
39878
|
}
|
|
39363
|
-
|
|
39364
|
-
|
|
39365
|
-
|
|
39366
|
-
|
|
39367
|
-
|
|
39879
|
+
if (opResult.looksLikeFail) {
|
|
39880
|
+
logger?.warn("semantic", "LLM returned truncated JSON with passed:false \u2014 treating as failure", {
|
|
39881
|
+
storyId: story.id
|
|
39882
|
+
});
|
|
39883
|
+
recordSemanticAudit({
|
|
39884
|
+
runtime,
|
|
39885
|
+
workdir,
|
|
39886
|
+
projectDir,
|
|
39887
|
+
storyId: story.id,
|
|
39888
|
+
featureName,
|
|
39889
|
+
parsed: false,
|
|
39890
|
+
looksLikeFail: true,
|
|
39891
|
+
failOpen: false,
|
|
39892
|
+
passed: false,
|
|
39893
|
+
blockingThreshold,
|
|
39894
|
+
result: null
|
|
39895
|
+
});
|
|
39896
|
+
return {
|
|
39897
|
+
check: "semantic",
|
|
39898
|
+
success: false,
|
|
39899
|
+
command: "",
|
|
39900
|
+
exitCode: 1,
|
|
39901
|
+
output: "semantic review: LLM response truncated but indicated failure (passed:false found in partial response)",
|
|
39902
|
+
durationMs: Date.now() - startTime
|
|
39903
|
+
};
|
|
39904
|
+
}
|
|
39905
|
+
if (opResult.repromptEvent) {
|
|
39906
|
+
runtime.dispatchEvents.emitReviewReprompt({
|
|
39907
|
+
kind: "review-reprompt-on-drop",
|
|
39368
39908
|
storyId: story.id,
|
|
39369
|
-
|
|
39909
|
+
reviewer: "semantic",
|
|
39910
|
+
dropCount: opResult.repromptEvent.dropCount,
|
|
39911
|
+
repromptOutcome: opResult.repromptEvent.outcome,
|
|
39912
|
+
costUsd: opResult.repromptEvent.costUsd
|
|
39370
39913
|
});
|
|
39371
39914
|
}
|
|
39372
|
-
const sanitizedParsed = { ...parsed, findings: acGroundedFindings };
|
|
39373
39915
|
const threshold = blockingThreshold ?? "error";
|
|
39374
|
-
const
|
|
39375
|
-
const
|
|
39916
|
+
const allFindings = opResult.findings;
|
|
39917
|
+
const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
39918
|
+
const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
39376
39919
|
if (advisoryFindings.length > 0) {
|
|
39377
39920
|
logger?.debug("review", `Semantic review: ${advisoryFindings.length} advisory findings (below threshold '${threshold}')`, {
|
|
39378
39921
|
storyId: story.id,
|
|
39379
39922
|
findings: advisoryFindings.map((f) => ({ severity: f.severity, file: f.file, issue: f.issue }))
|
|
39380
39923
|
});
|
|
39381
39924
|
}
|
|
39382
|
-
|
|
39383
|
-
|
|
39925
|
+
const durationMs = Date.now() - startTime;
|
|
39926
|
+
if (blockingFindings.length > 0) {
|
|
39384
39927
|
logger?.warn("review", `Semantic review failed: ${blockingFindings.length} blocking findings`, {
|
|
39385
39928
|
storyId: story.id,
|
|
39386
|
-
durationMs
|
|
39929
|
+
durationMs
|
|
39387
39930
|
});
|
|
39388
39931
|
logger?.debug("review", "Semantic review findings", {
|
|
39389
39932
|
storyId: story.id,
|
|
@@ -39410,7 +39953,7 @@ ${formatFindings2(blockingFindings)}`;
|
|
|
39410
39953
|
blockingThreshold: threshold,
|
|
39411
39954
|
result: {
|
|
39412
39955
|
passed: false,
|
|
39413
|
-
findings: llmFindingsToReviewFindings(
|
|
39956
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
|
|
39414
39957
|
},
|
|
39415
39958
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39416
39959
|
});
|
|
@@ -39420,53 +39963,16 @@ ${formatFindings2(blockingFindings)}`;
|
|
|
39420
39963
|
command: "",
|
|
39421
39964
|
exitCode: 1,
|
|
39422
39965
|
output,
|
|
39423
|
-
durationMs
|
|
39966
|
+
durationMs,
|
|
39424
39967
|
findings: toReviewFindings(blockingFindings),
|
|
39425
39968
|
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39426
39969
|
cost: llmCost
|
|
39427
39970
|
};
|
|
39428
39971
|
}
|
|
39429
|
-
if (!
|
|
39430
|
-
|
|
39431
|
-
const durationMs3 = Date.now() - startTime;
|
|
39432
|
-
logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
|
|
39433
|
-
storyId: story.id,
|
|
39434
|
-
durationMs: durationMs3,
|
|
39435
|
-
droppedCount: acDropped.length,
|
|
39436
|
-
dropCodes: acDropped.map((d) => d.code)
|
|
39437
|
-
});
|
|
39438
|
-
const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
|
|
39439
|
-
`);
|
|
39440
|
-
recordSemanticAudit({
|
|
39441
|
-
runtime,
|
|
39442
|
-
workdir,
|
|
39443
|
-
projectDir,
|
|
39444
|
-
storyId: story.id,
|
|
39445
|
-
featureName,
|
|
39446
|
-
parsed: true,
|
|
39447
|
-
failOpen: false,
|
|
39448
|
-
passed: false,
|
|
39449
|
-
blockingThreshold: threshold,
|
|
39450
|
-
result: { passed: false, findings: [] },
|
|
39451
|
-
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39452
|
-
});
|
|
39453
|
-
return {
|
|
39454
|
-
check: "semantic",
|
|
39455
|
-
success: false,
|
|
39456
|
-
command: "",
|
|
39457
|
-
exitCode: 1,
|
|
39458
|
-
output: `Semantic review failed: ${acDropped.length} blocking finding(s) dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution. Either re-classify these as "info" or ensure each error finding includes a valid acIndex. Drops:
|
|
39459
|
-
|
|
39460
|
-
${dropSummary}`,
|
|
39461
|
-
durationMs: durationMs3,
|
|
39462
|
-
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39463
|
-
cost: llmCost
|
|
39464
|
-
};
|
|
39465
|
-
}
|
|
39466
|
-
const durationMs2 = Date.now() - startTime;
|
|
39467
|
-
logger?.info("review", "Semantic review passed (all findings below blocking threshold)", {
|
|
39972
|
+
if (!opResult.passed && allFindings.length === 0) {
|
|
39973
|
+
logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
|
|
39468
39974
|
storyId: story.id,
|
|
39469
|
-
durationMs
|
|
39975
|
+
durationMs
|
|
39470
39976
|
});
|
|
39471
39977
|
recordSemanticAudit({
|
|
39472
39978
|
runtime,
|
|
@@ -39476,29 +39982,23 @@ ${dropSummary}`,
|
|
|
39476
39982
|
featureName,
|
|
39477
39983
|
parsed: true,
|
|
39478
39984
|
failOpen: false,
|
|
39479
|
-
passed:
|
|
39985
|
+
passed: false,
|
|
39480
39986
|
blockingThreshold: threshold,
|
|
39481
|
-
result: {
|
|
39482
|
-
passed: true,
|
|
39483
|
-
findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
|
|
39484
|
-
},
|
|
39987
|
+
result: { passed: false, findings: [] },
|
|
39485
39988
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39486
39989
|
});
|
|
39487
39990
|
return {
|
|
39488
39991
|
check: "semantic",
|
|
39489
|
-
success:
|
|
39992
|
+
success: false,
|
|
39490
39993
|
command: "",
|
|
39491
|
-
exitCode:
|
|
39492
|
-
output:
|
|
39493
|
-
durationMs
|
|
39994
|
+
exitCode: 1,
|
|
39995
|
+
output: 'Semantic review failed: blocking finding(s) were dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution.',
|
|
39996
|
+
durationMs,
|
|
39494
39997
|
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39495
39998
|
cost: llmCost
|
|
39496
39999
|
};
|
|
39497
40000
|
}
|
|
39498
|
-
|
|
39499
|
-
if (sanitizedParsed.passed) {
|
|
39500
|
-
logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
|
|
39501
|
-
}
|
|
40001
|
+
logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
|
|
39502
40002
|
recordSemanticAudit({
|
|
39503
40003
|
runtime,
|
|
39504
40004
|
workdir,
|
|
@@ -39507,20 +40007,20 @@ ${dropSummary}`,
|
|
|
39507
40007
|
featureName,
|
|
39508
40008
|
parsed: true,
|
|
39509
40009
|
failOpen: false,
|
|
39510
|
-
passed:
|
|
40010
|
+
passed: true,
|
|
39511
40011
|
blockingThreshold: threshold,
|
|
39512
40012
|
result: {
|
|
39513
|
-
passed:
|
|
39514
|
-
findings: llmFindingsToReviewFindings(
|
|
40013
|
+
passed: true,
|
|
40014
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
|
|
39515
40015
|
},
|
|
39516
40016
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39517
40017
|
});
|
|
39518
40018
|
return {
|
|
39519
40019
|
check: "semantic",
|
|
39520
|
-
success:
|
|
40020
|
+
success: true,
|
|
39521
40021
|
command: "",
|
|
39522
|
-
exitCode:
|
|
39523
|
-
output:
|
|
40022
|
+
exitCode: 0,
|
|
40023
|
+
output: allFindings.length === 0 ? "Semantic review passed" : "Semantic review passed (all findings were advisory \u2014 below blocking threshold)",
|
|
39524
40024
|
durationMs,
|
|
39525
40025
|
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39526
40026
|
cost: llmCost
|
|
@@ -39537,12 +40037,10 @@ var init_semantic = __esm(() => {
|
|
|
39537
40037
|
init_semantic_review();
|
|
39538
40038
|
init_prompts();
|
|
39539
40039
|
init_test_runners();
|
|
39540
|
-
init_ac_quote_validator();
|
|
39541
40040
|
init_diff_utils();
|
|
39542
40041
|
init_finding_projection();
|
|
39543
40042
|
init_review_audit();
|
|
39544
40043
|
init_semantic_debate();
|
|
39545
|
-
init_semantic_evidence();
|
|
39546
40044
|
init_semantic_helpers();
|
|
39547
40045
|
_semanticDeps = {
|
|
39548
40046
|
createDebateRunner: (opts) => new DebateRunner(opts),
|
|
@@ -39815,6 +40313,18 @@ async function runReview(opts) {
|
|
|
39815
40313
|
naxIgnoreIndex
|
|
39816
40314
|
}) : normalizeMechanicalFindings(checkName, await runCheck(checkName, command, workdir, storyId, env2), workdir);
|
|
39817
40315
|
checks3.push(result);
|
|
40316
|
+
if (result.success) {
|
|
40317
|
+
logger?.info("review", `${checkName} passed`, {
|
|
40318
|
+
storyId,
|
|
40319
|
+
durationMs: result.durationMs
|
|
40320
|
+
});
|
|
40321
|
+
} else {
|
|
40322
|
+
logger?.warn("review", `${checkName} failed`, {
|
|
40323
|
+
storyId,
|
|
40324
|
+
exitCode: result.exitCode,
|
|
40325
|
+
durationMs: result.durationMs
|
|
40326
|
+
});
|
|
40327
|
+
}
|
|
39818
40328
|
if (!result.success && !firstFailure) {
|
|
39819
40329
|
firstFailure = `${checkName} failed (exit code ${result.exitCode})`;
|
|
39820
40330
|
}
|
|
@@ -41869,6 +42379,38 @@ async function callOp(ctx, op, input) {
|
|
|
41869
42379
|
const rawOutput = outcome.result.output;
|
|
41870
42380
|
const totalCost = outcome.result.estimatedCostUsd ?? 0;
|
|
41871
42381
|
if (!rawOutput) {
|
|
42382
|
+
if (maxRetriesExceeded) {
|
|
42383
|
+
getSafeLogger()?.error("callop", "Op retry budget exhausted (empty output)", {
|
|
42384
|
+
storyId: ctx.storyId,
|
|
42385
|
+
opName: op.name,
|
|
42386
|
+
site: "run",
|
|
42387
|
+
totalAttempts: MAX_COMPLETE_RETRY_ATTEMPTS + 1
|
|
42388
|
+
});
|
|
42389
|
+
throw new NaxError(`callOp[${op.name}]: CALL_OP_MAX_RETRIES \u2014 exceeded MAX_COMPLETE_RETRY_ATTEMPTS (${MAX_COMPLETE_RETRY_ATTEMPTS})`, "CALL_OP_MAX_RETRIES", { stage: op.stage, storyId: ctx.storyId });
|
|
42390
|
+
}
|
|
42391
|
+
if (retryFallback !== undefined) {
|
|
42392
|
+
if (typeof retryFallback !== "object" || retryFallback === null) {
|
|
42393
|
+
throw new NaxError(`callOp[${op.name}]: exhaustedFallback returned a non-object (${typeof retryFallback}); fallback must be a plain object`, "CALL_OP_INVALID_FALLBACK", { stage: op.stage, storyId: ctx.storyId });
|
|
42394
|
+
}
|
|
42395
|
+
getSafeLogger()?.warn("callop", "Returning exhaustedFallback on empty output", {
|
|
42396
|
+
storyId: ctx.storyId,
|
|
42397
|
+
opName: op.name,
|
|
42398
|
+
agentName: dispatchAgent
|
|
42399
|
+
});
|
|
42400
|
+
return { ...retryFallback, estimatedCostUsd: totalCost };
|
|
42401
|
+
}
|
|
42402
|
+
if (op.recover) {
|
|
42403
|
+
const verifyCtx = makeVerifyCtx(buildCtx);
|
|
42404
|
+
const recovered = await op.recover(input, verifyCtx);
|
|
42405
|
+
if (recovered !== null) {
|
|
42406
|
+
getSafeLogger()?.warn("callop", "Recovered from empty output via op.recover", {
|
|
42407
|
+
storyId: ctx.storyId,
|
|
42408
|
+
opName: op.name,
|
|
42409
|
+
agentName: dispatchAgent
|
|
42410
|
+
});
|
|
42411
|
+
return recovered;
|
|
42412
|
+
}
|
|
42413
|
+
}
|
|
41872
42414
|
throw new NaxError(`callOp[${op.name}]: agent returned no output`, "CALL_OP_NO_OUTPUT", {
|
|
41873
42415
|
stage: op.stage,
|
|
41874
42416
|
storyId: ctx.storyId,
|
|
@@ -52017,12 +52559,16 @@ function phasePassed(opName, output) {
|
|
|
52017
52559
|
});
|
|
52018
52560
|
return true;
|
|
52019
52561
|
}
|
|
52562
|
+
function isFinding(value) {
|
|
52563
|
+
return typeof value === "object" && value !== null && typeof value.source === "string" && value.source.length > 0;
|
|
52564
|
+
}
|
|
52020
52565
|
function extractPhaseFindings(output) {
|
|
52021
52566
|
if (output === null || output === undefined || typeof output !== "object") {
|
|
52022
52567
|
return [];
|
|
52023
52568
|
}
|
|
52024
52569
|
const record2 = output;
|
|
52025
|
-
const
|
|
52570
|
+
const rawArray = Array.isArray(record2.normalizedFindings) ? record2.normalizedFindings : Array.isArray(record2.findings) ? record2.findings : [];
|
|
52571
|
+
const findings = rawArray.filter(isFinding);
|
|
52026
52572
|
const success2 = "success" in record2 ? record2.success === true : ("passed" in record2) ? record2.passed === true : findings.length === 0;
|
|
52027
52573
|
return success2 ? [] : findings;
|
|
52028
52574
|
}
|
|
@@ -52085,11 +52631,24 @@ function toReviewDecisionPayload(opName, output) {
|
|
|
52085
52631
|
if (typeof record2.passed !== "boolean" || !Array.isArray(record2.findings)) {
|
|
52086
52632
|
return null;
|
|
52087
52633
|
}
|
|
52634
|
+
const acDropped = Array.isArray(record2.acDropped) ? record2.acDropped.map((d) => {
|
|
52635
|
+
const entry = d ?? {};
|
|
52636
|
+
const finding = entry.finding ?? {};
|
|
52637
|
+
return {
|
|
52638
|
+
code: typeof entry.code === "string" ? entry.code : undefined,
|
|
52639
|
+
severity: typeof finding.severity === "string" ? finding.severity : undefined,
|
|
52640
|
+
file: typeof finding.file === "string" ? finding.file : undefined,
|
|
52641
|
+
line: typeof finding.line === "number" ? finding.line : undefined,
|
|
52642
|
+
issue: typeof finding.issue === "string" ? finding.issue : undefined,
|
|
52643
|
+
acIndex: typeof finding.acIndex === "number" ? finding.acIndex : undefined
|
|
52644
|
+
};
|
|
52645
|
+
}) : undefined;
|
|
52088
52646
|
return {
|
|
52089
52647
|
reviewer,
|
|
52090
52648
|
parsed: true,
|
|
52091
52649
|
passed: record2.passed,
|
|
52092
|
-
result: { passed: record2.passed, findings: record2.findings }
|
|
52650
|
+
result: { passed: record2.passed, findings: record2.findings },
|
|
52651
|
+
acDropped
|
|
52093
52652
|
};
|
|
52094
52653
|
}
|
|
52095
52654
|
function emitReviewDecision(ctx, opName, output) {
|
|
@@ -52138,12 +52697,38 @@ function logUnifiedReviewPhaseResult(storyId, opName, output) {
|
|
|
52138
52697
|
const title = payload.reviewer === "semantic" ? "Semantic review" : "Adversarial review";
|
|
52139
52698
|
if (payload.passed) {
|
|
52140
52699
|
logger?.info("review", `${title} passed`, { storyId });
|
|
52141
|
-
|
|
52142
|
-
|
|
52700
|
+
return;
|
|
52701
|
+
}
|
|
52702
|
+
if (findingsCount === 0) {
|
|
52703
|
+
const dropped = payload.acDropped ?? [];
|
|
52704
|
+
const droppedSummary = dropped.slice(0, 5);
|
|
52705
|
+
logger?.warn("review", `${title} failed: 0 findings \u2014 ${dropped.length > 0 ? `${dropped.length} blocking finding(s) dropped as ungrounded by AC-grounding filter` : "model emitted passed:false but produced no findings (likely empty output)"}`, {
|
|
52143
52706
|
storyId,
|
|
52144
|
-
findingsCount
|
|
52707
|
+
findingsCount,
|
|
52708
|
+
reason: dropped.length > 0 ? "ac-grounding-drop" : "passed-false-no-findings",
|
|
52709
|
+
droppedCount: dropped.length || undefined,
|
|
52710
|
+
droppedFindings: droppedSummary.length > 0 ? droppedSummary : undefined,
|
|
52711
|
+
droppedTruncated: dropped.length > droppedSummary.length || undefined
|
|
52145
52712
|
});
|
|
52713
|
+
return;
|
|
52146
52714
|
}
|
|
52715
|
+
const findingsSummary = payload.result.findings.slice(0, 5).map((f) => {
|
|
52716
|
+
const r = f ?? {};
|
|
52717
|
+
return {
|
|
52718
|
+
severity: typeof r.severity === "string" ? r.severity : undefined,
|
|
52719
|
+
file: typeof r.file === "string" ? r.file : undefined,
|
|
52720
|
+
line: typeof r.line === "number" ? r.line : undefined,
|
|
52721
|
+
rule: typeof r.rule === "string" ? r.rule : undefined,
|
|
52722
|
+
issue: typeof r.issue === "string" ? r.issue : typeof r.message === "string" ? r.message : undefined,
|
|
52723
|
+
acIndex: typeof r.acIndex === "number" ? r.acIndex : undefined
|
|
52724
|
+
};
|
|
52725
|
+
});
|
|
52726
|
+
logger?.warn("review", `${title} failed: ${findingsCount} findings`, {
|
|
52727
|
+
storyId,
|
|
52728
|
+
findingsCount,
|
|
52729
|
+
findings: findingsSummary,
|
|
52730
|
+
truncated: findingsCount > findingsSummary.length
|
|
52731
|
+
});
|
|
52147
52732
|
}
|
|
52148
52733
|
async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = false) {
|
|
52149
52734
|
const logger = getSafeLogger();
|
|
@@ -52515,10 +53100,10 @@ function buildPlanForStrategy(ctx, story, config2, testStrategy, inputs) {
|
|
|
52515
53100
|
strategies.push(makeMechanicalFormatFixStrategy());
|
|
52516
53101
|
}
|
|
52517
53102
|
if (isThreeSession && inputs.fullSuiteGate) {
|
|
52518
|
-
strategies.push(makeFullSuiteRectifyStrategy(story));
|
|
53103
|
+
strategies.push(makeFullSuiteRectifyStrategy(story, config2));
|
|
52519
53104
|
}
|
|
52520
53105
|
if (config2.quality.autofix?.enabled !== false) {
|
|
52521
|
-
strategies.push(makeAutofixImplementerStrategy(story));
|
|
53106
|
+
strategies.push(makeAutofixImplementerStrategy(story, config2));
|
|
52522
53107
|
strategies.push(makeAutofixTestWriterStrategy(story, config2));
|
|
52523
53108
|
}
|
|
52524
53109
|
const rectOpts = {
|
|
@@ -52650,6 +53235,7 @@ async function assemblePlanInputsFromCtx(ctx) {
|
|
|
52650
53235
|
blockingThreshold: ctx.config.review.blockingThreshold
|
|
52651
53236
|
} : undefined;
|
|
52652
53237
|
const adversarialReviewInput = ctx.config.review?.enabled === true && ctx.config.review.checks?.includes("adversarial") && ctx.config.review.adversarial ? {
|
|
53238
|
+
workdir: ctx.workdir,
|
|
52653
53239
|
story,
|
|
52654
53240
|
adversarialConfig: ctx.config.review.adversarial,
|
|
52655
53241
|
mode: ctx.config.review.adversarial.diffMode,
|
|
@@ -52658,9 +53244,9 @@ async function assemblePlanInputsFromCtx(ctx) {
|
|
|
52658
53244
|
blockingThreshold: ctx.config.review.blockingThreshold
|
|
52659
53245
|
} : undefined;
|
|
52660
53246
|
const rectificationInput = ctx.config.execution?.rectification?.enabled === true ? {
|
|
52661
|
-
maxAttempts: ctx.config.execution.rectification.
|
|
53247
|
+
maxAttempts: ctx.config.execution.rectification.maxAttemptsTotal,
|
|
52662
53248
|
strategies: [],
|
|
52663
|
-
abortOnIncreasingFailures: ctx.config.execution.rectification.abortOnIncreasingFailures
|
|
53249
|
+
abortOnIncreasingFailures: ctx.config.execution.rectification.abortOnIncreasingFailures
|
|
52664
53250
|
} : undefined;
|
|
52665
53251
|
return {
|
|
52666
53252
|
story,
|
|
@@ -53124,10 +53710,29 @@ Category: ${failureCategory ?? "unknown"}`,
|
|
|
53124
53710
|
}
|
|
53125
53711
|
}
|
|
53126
53712
|
if (!planResult.success) {
|
|
53713
|
+
const failedPhases = {};
|
|
53714
|
+
for (const [name, output] of Object.entries(planResult.phaseOutputs)) {
|
|
53715
|
+
if (!output || typeof output !== "object")
|
|
53716
|
+
continue;
|
|
53717
|
+
const r = output;
|
|
53718
|
+
const passed = typeof r.passed === "boolean" ? r.passed : undefined;
|
|
53719
|
+
const success2 = typeof r.success === "boolean" ? r.success : undefined;
|
|
53720
|
+
const explicitFail = passed === false || success2 === false;
|
|
53721
|
+
if (!explicitFail)
|
|
53722
|
+
continue;
|
|
53723
|
+
const findings = Array.isArray(r.findings) ? r.findings.length : undefined;
|
|
53724
|
+
failedPhases[name] = { passed, success: success2, findingsCount: findings };
|
|
53725
|
+
}
|
|
53726
|
+
const stderrTail = (agentResult.stderr ?? "").slice(-500);
|
|
53727
|
+
const outputTail = (agentResult.output ?? "").slice(-500);
|
|
53127
53728
|
logger.error("execution", "Agent session failed", {
|
|
53128
53729
|
storyId: ctx.story.id,
|
|
53129
53730
|
exitCode: agentResult.exitCode,
|
|
53130
|
-
rateLimited: agentResult.rateLimited
|
|
53731
|
+
rateLimited: agentResult.rateLimited,
|
|
53732
|
+
failureCategory: failureCategory ?? "unknown",
|
|
53733
|
+
failedPhases: Object.keys(failedPhases).length > 0 ? failedPhases : undefined,
|
|
53734
|
+
stderrTail: stderrTail || undefined,
|
|
53735
|
+
outputTail: outputTail || undefined
|
|
53131
53736
|
});
|
|
53132
53737
|
if (agentResult.rateLimited) {
|
|
53133
53738
|
logger.warn("execution", "Rate limited \u2014 will retry", { storyId: ctx.story.id });
|
|
@@ -53851,6 +54456,11 @@ class RegressionStrategy {
|
|
|
53851
54456
|
const durationMs = Date.now() - start;
|
|
53852
54457
|
if (result.success) {
|
|
53853
54458
|
const parsed2 = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
54459
|
+
logger?.info("verify[regression]", "Full-suite regression gate passed", {
|
|
54460
|
+
storyId: ctx.storyId,
|
|
54461
|
+
passCount: parsed2.passed,
|
|
54462
|
+
durationMs
|
|
54463
|
+
});
|
|
53854
54464
|
return makePassResult(ctx.storyId, "regression", {
|
|
53855
54465
|
rawOutput: result.output,
|
|
53856
54466
|
passCount: parsed2.passed,
|
|
@@ -53864,9 +54474,19 @@ class RegressionStrategy {
|
|
|
53864
54474
|
return makePassResult(ctx.storyId, "regression", { durationMs });
|
|
53865
54475
|
}
|
|
53866
54476
|
if (result.status === "TIMEOUT") {
|
|
54477
|
+
logger?.warn("verify[regression]", "Full-suite regression gate timed out", {
|
|
54478
|
+
storyId: ctx.storyId,
|
|
54479
|
+
durationMs
|
|
54480
|
+
});
|
|
53867
54481
|
return makeFailResult(ctx.storyId, "regression", "TIMEOUT", { rawOutput: result.output, durationMs });
|
|
53868
54482
|
}
|
|
53869
54483
|
const parsed = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
|
|
54484
|
+
logger?.warn("verify[regression]", "Full-suite regression gate failed", {
|
|
54485
|
+
storyId: ctx.storyId,
|
|
54486
|
+
passCount: parsed.passed,
|
|
54487
|
+
failCount: parsed.failed,
|
|
54488
|
+
durationMs
|
|
54489
|
+
});
|
|
53870
54490
|
return makeFailResult(ctx.storyId, "regression", "TEST_FAILURE", {
|
|
53871
54491
|
rawOutput: result.output,
|
|
53872
54492
|
passCount: parsed.passed,
|
|
@@ -56943,7 +57563,7 @@ var package_default;
|
|
|
56943
57563
|
var init_package = __esm(() => {
|
|
56944
57564
|
package_default = {
|
|
56945
57565
|
name: "@nathapp/nax",
|
|
56946
|
-
version: "0.67.
|
|
57566
|
+
version: "0.67.11",
|
|
56947
57567
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
56948
57568
|
type: "module",
|
|
56949
57569
|
bin: {
|
|
@@ -57038,8 +57658,8 @@ var init_version = __esm(() => {
|
|
|
57038
57658
|
NAX_VERSION = package_default.version;
|
|
57039
57659
|
NAX_COMMIT = (() => {
|
|
57040
57660
|
try {
|
|
57041
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
57042
|
-
return "
|
|
57661
|
+
if (/^[0-9a-f]{6,10}$/.test("0db5c72e"))
|
|
57662
|
+
return "0db5c72e";
|
|
57043
57663
|
} catch {}
|
|
57044
57664
|
try {
|
|
57045
57665
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
@@ -58013,7 +58633,7 @@ async function runDeferredRegression(options) {
|
|
|
58013
58633
|
}
|
|
58014
58634
|
const testCommand = config2.quality.commands.test ?? "bun test";
|
|
58015
58635
|
const timeoutSeconds = config2.execution.regressionGate?.timeoutSeconds ?? 120;
|
|
58016
|
-
const maxRectificationAttempts = config2.execution.
|
|
58636
|
+
const maxRectificationAttempts = config2.execution.rectification.maxAttemptsTotal;
|
|
58017
58637
|
const acceptOnTimeout = config2.execution.regressionGate?.acceptOnTimeout ?? true;
|
|
58018
58638
|
const verifyOpts = {
|
|
58019
58639
|
workdir,
|
|
@@ -58177,7 +58797,7 @@ async function runDeferredRegression(options) {
|
|
|
58177
58797
|
const cycle = {
|
|
58178
58798
|
findings: initialFindings,
|
|
58179
58799
|
iterations: [],
|
|
58180
|
-
strategies: [makeFullSuiteRectifyStrategy(story)],
|
|
58800
|
+
strategies: [makeFullSuiteRectifyStrategy(story, config2)],
|
|
58181
58801
|
config: { maxAttemptsTotal: maxRectificationAttempts, validatorRetries: 1 },
|
|
58182
58802
|
validate: async (_cycleCtx, _opts) => {
|
|
58183
58803
|
const verification = await _regressionDeps.runVerification(verifyOpts);
|
|
@@ -60223,7 +60843,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
|
|
|
60223
60843
|
feature: ctx.feature,
|
|
60224
60844
|
attempts: ctx.story.attempts
|
|
60225
60845
|
});
|
|
60226
|
-
if (ctx.story.attempts !== undefined && ctx.story.attempts >= ctx.config.execution.rectification.
|
|
60846
|
+
if (ctx.story.attempts !== undefined && ctx.story.attempts >= ctx.config.execution.rectification.maxAttemptsTotal) {
|
|
60227
60847
|
await pipelineEventBus.emitAsync({
|
|
60228
60848
|
type: "human-review:requested",
|
|
60229
60849
|
storyId: ctx.story.id,
|
|
@@ -60506,7 +61126,7 @@ function selectNextStories(prd, config2, batchPlan, currentBatchIndex, lastStory
|
|
|
60506
61126
|
nextBatchIndex: currentBatchIndex + 1
|
|
60507
61127
|
};
|
|
60508
61128
|
}
|
|
60509
|
-
const story = getNextStory(prd, lastStoryId, config2.execution.rectification?.
|
|
61129
|
+
const story = getNextStory(prd, lastStoryId, config2.execution.rectification?.maxAttemptsTotal ?? 12);
|
|
60510
61130
|
if (!story)
|
|
60511
61131
|
return null;
|
|
60512
61132
|
return {
|
|
@@ -95426,15 +96046,16 @@ var FIELD_DESCRIPTIONS = {
|
|
|
95426
96046
|
"execution.contextProviderTokenBudget": "Token budget for plugin context providers",
|
|
95427
96047
|
"execution.lintCommand": "Lint command override (null=disabled, undefined=auto-detect)",
|
|
95428
96048
|
"execution.typecheckCommand": "Typecheck command override (null=disabled, undefined=auto-detect)",
|
|
95429
|
-
"execution.rectification": "
|
|
96049
|
+
"execution.rectification": "Unified fix-cycle settings \u2014 shared by story-orchestrator (semantic + adversarial + mechanical) and post-run regression cycles",
|
|
95430
96050
|
"execution.rectification.enabled": "Enable rectification loop",
|
|
95431
|
-
"execution.rectification.
|
|
96051
|
+
"execution.rectification.maxAttemptsTotal": "Total iteration cap for the unified fix cycle (default: 12). Per-strategy caps are the granular bound.",
|
|
96052
|
+
"execution.rectification.maxAttemptsPerStrategy": "Default per-strategy cap for LLM-driven strategies \u2014 autofix-implementer / autofix-test-writer / full-suite-rectify (default: 3). Mechanical strategies stay at 1.",
|
|
95432
96053
|
"execution.rectification.fullSuiteTimeoutSeconds": "Timeout for full test suite run in seconds",
|
|
95433
96054
|
"execution.rectification.maxFailureSummaryChars": "Max characters in failure summary",
|
|
95434
96055
|
"execution.rectification.abortOnIncreasingFailures": "Abort if failure count increases",
|
|
95435
|
-
"execution.rectification.escalateOnExhaustion": "Enable model tier escalation when
|
|
95436
|
-
"execution.rectification.rethinkAtAttempt": "Attempt number at which 'rethink your approach' language is injected into the prompt (default: 2
|
|
95437
|
-
"execution.rectification.urgencyAtAttempt": "Attempt number at which 'final chance before escalation' urgency is added
|
|
96056
|
+
"execution.rectification.escalateOnExhaustion": "Enable model tier escalation when attempts are exhausted with remaining failures",
|
|
96057
|
+
"execution.rectification.rethinkAtAttempt": "Attempt number at which 'rethink your approach' language is injected into the prompt (default: 2)",
|
|
96058
|
+
"execution.rectification.urgencyAtAttempt": "Attempt number at which 'final chance before escalation' urgency is added (default: 3)",
|
|
95438
96059
|
"execution.regressionGate": "Regression gate settings (full suite after scoped tests)",
|
|
95439
96060
|
"execution.regressionGate.enabled": "Enable full-suite regression gate",
|
|
95440
96061
|
"execution.regressionGate.timeoutSeconds": "Timeout for regression run in seconds",
|