@nathapp/nax 0.67.9 → 0.67.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +563 -438
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -17224,7 +17224,11 @@ var init_schemas_review = __esm(() => {
|
|
|
17224
17224
|
timeoutMs: exports_external.number().int().positive().default(600000),
|
|
17225
17225
|
excludePatterns: exports_external.array(exports_external.string()).optional(),
|
|
17226
17226
|
parallel: exports_external.boolean().default(false),
|
|
17227
|
-
maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2)
|
|
17227
|
+
maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
|
|
17228
|
+
substantiation: exports_external.object({
|
|
17229
|
+
requote: exports_external.boolean().default(true),
|
|
17230
|
+
maxRequotes: exports_external.number().int().min(0).default(5)
|
|
17231
|
+
}).optional()
|
|
17228
17232
|
});
|
|
17229
17233
|
ReviewConfigSchema = exports_external.object({
|
|
17230
17234
|
enabled: exports_external.boolean(),
|
|
@@ -17450,6 +17454,18 @@ var init_schemas3 = __esm(() => {
|
|
|
17450
17454
|
":!.nax/",
|
|
17451
17455
|
":!.nax-pids"
|
|
17452
17456
|
]
|
|
17457
|
+
},
|
|
17458
|
+
adversarial: {
|
|
17459
|
+
model: "balanced",
|
|
17460
|
+
diffMode: "ref",
|
|
17461
|
+
rules: [],
|
|
17462
|
+
timeoutMs: 600000,
|
|
17463
|
+
parallel: false,
|
|
17464
|
+
maxConcurrentSessions: 2,
|
|
17465
|
+
substantiation: {
|
|
17466
|
+
requote: true,
|
|
17467
|
+
maxRequotes: 5
|
|
17468
|
+
}
|
|
17453
17469
|
}
|
|
17454
17470
|
}),
|
|
17455
17471
|
plan: PlanConfigSchema.default({
|
|
@@ -21922,7 +21938,8 @@ function makeParseRetryStrategy(opts) {
|
|
|
21922
21938
|
if (ctx.site === "complete") {
|
|
21923
21939
|
getSafeLogger()?.warn(opts.reviewerKind, "makeParseRetryStrategy: lastOutput is not populated on complete-kind ops \u2014 retry will never fire", { storyId: ctx.storyId });
|
|
21924
21940
|
}
|
|
21925
|
-
|
|
21941
|
+
const fallback = opts.exhaustedFallback ? opts.exhaustedFallback("") : undefined;
|
|
21942
|
+
return { retry: false, ...fallback !== undefined ? { fallback } : {} };
|
|
21926
21943
|
}
|
|
21927
21944
|
let parsed;
|
|
21928
21945
|
try {
|
|
@@ -30626,6 +30643,26 @@ ${config2.rules.map((r) => `- ${r}`).join(`
|
|
|
30626
30643
|
diffBlock
|
|
30627
30644
|
].join("");
|
|
30628
30645
|
}
|
|
30646
|
+
static requoteVerbatim(opts) {
|
|
30647
|
+
const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
|
|
30648
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
30649
|
+
return `Your previous verifiedBy.observed value did not match the referenced file on disk.
|
|
30650
|
+
|
|
30651
|
+
You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
|
|
30652
|
+
|
|
30653
|
+
Return ONLY this JSON object:
|
|
30654
|
+
{"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
|
|
30655
|
+
|
|
30656
|
+
Finding issue: ${opts.finding.issue}
|
|
30657
|
+
Referenced file: ${file3}
|
|
30658
|
+
Referenced line: ${line}
|
|
30659
|
+
|
|
30660
|
+
Rules:
|
|
30661
|
+
- Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
|
|
30662
|
+
- observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
|
|
30663
|
+
- If after reading the file you cannot find anything that proves the claim, set observed to "".
|
|
30664
|
+
- Do not return a full review. Do not include markdown fences or explanation.`;
|
|
30665
|
+
}
|
|
30629
30666
|
}
|
|
30630
30667
|
var ADVERSARIAL_ROLE = `You are an adversarial code reviewer with full access to the repository.
|
|
30631
30668
|
|
|
@@ -31249,8 +31286,335 @@ var init_adversarial_helpers = __esm(() => {
|
|
|
31249
31286
|
init_severity();
|
|
31250
31287
|
});
|
|
31251
31288
|
|
|
31289
|
+
// src/review/semantic-helpers.ts
|
|
31290
|
+
function validateLLMShape(parsed) {
|
|
31291
|
+
if (typeof parsed !== "object" || parsed === null)
|
|
31292
|
+
return null;
|
|
31293
|
+
const obj = parsed;
|
|
31294
|
+
if (typeof obj.passed !== "boolean")
|
|
31295
|
+
return null;
|
|
31296
|
+
if (!Array.isArray(obj.findings))
|
|
31297
|
+
return null;
|
|
31298
|
+
return { passed: obj.passed, findings: obj.findings };
|
|
31299
|
+
}
|
|
31300
|
+
function parseLLMResponse(raw) {
|
|
31301
|
+
try {
|
|
31302
|
+
return validateLLMShape(tryParseLLMJson(raw));
|
|
31303
|
+
} catch {
|
|
31304
|
+
return null;
|
|
31305
|
+
}
|
|
31306
|
+
}
|
|
31307
|
+
function formatFindings2(findings) {
|
|
31308
|
+
return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
|
|
31309
|
+
Suggestion: ${f.suggestion}`).join(`
|
|
31310
|
+
`);
|
|
31311
|
+
}
|
|
31312
|
+
function normalizeSeverity2(sev) {
|
|
31313
|
+
if (sev === "warn")
|
|
31314
|
+
return "warning";
|
|
31315
|
+
if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
|
|
31316
|
+
return sev;
|
|
31317
|
+
return "info";
|
|
31318
|
+
}
|
|
31319
|
+
function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
|
|
31320
|
+
if (diffMode !== "ref")
|
|
31321
|
+
return findings;
|
|
31322
|
+
return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
|
|
31323
|
+
}
|
|
31324
|
+
function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
|
|
31325
|
+
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31326
|
+
return false;
|
|
31327
|
+
return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
|
|
31328
|
+
}
|
|
31329
|
+
function mentionsUnverifiedSource(finding) {
|
|
31330
|
+
const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
|
|
31331
|
+
return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
|
|
31332
|
+
}
|
|
31333
|
+
function hasVerifiedEvidence(finding) {
|
|
31334
|
+
const evidence = finding.verifiedBy;
|
|
31335
|
+
return !!evidence?.file?.trim() && !!evidence.observed?.trim();
|
|
31336
|
+
}
|
|
31337
|
+
function downgradeToUnverifiable(finding) {
|
|
31338
|
+
return {
|
|
31339
|
+
...finding,
|
|
31340
|
+
severity: "unverifiable"
|
|
31341
|
+
};
|
|
31342
|
+
}
|
|
31343
|
+
function llmFindingToFinding(f) {
|
|
31344
|
+
const metaExtras = {};
|
|
31345
|
+
if (f.verifiedBy)
|
|
31346
|
+
metaExtras.verifiedBy = f.verifiedBy;
|
|
31347
|
+
if (f.acQuote)
|
|
31348
|
+
metaExtras.acQuote = f.acQuote;
|
|
31349
|
+
if (f.acIndex != null)
|
|
31350
|
+
metaExtras.acIndex = f.acIndex;
|
|
31351
|
+
return {
|
|
31352
|
+
source: "semantic-review",
|
|
31353
|
+
severity: normalizeSeverity2(f.severity),
|
|
31354
|
+
category: "",
|
|
31355
|
+
file: f.file,
|
|
31356
|
+
line: f.line,
|
|
31357
|
+
message: f.issue,
|
|
31358
|
+
suggestion: f.suggestion ?? undefined,
|
|
31359
|
+
fixTarget: "source",
|
|
31360
|
+
meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
|
|
31361
|
+
};
|
|
31362
|
+
}
|
|
31363
|
+
function toReviewFindings(findings) {
|
|
31364
|
+
return findings.map(llmFindingToFinding);
|
|
31365
|
+
}
|
|
31366
|
+
var UNVERIFIED_FINDING_PATTERNS;
|
|
31367
|
+
var init_semantic_helpers = __esm(() => {
|
|
31368
|
+
init_severity();
|
|
31369
|
+
UNVERIFIED_FINDING_PATTERNS = [
|
|
31370
|
+
"cannot verify",
|
|
31371
|
+
"can't verify",
|
|
31372
|
+
"from diff alone",
|
|
31373
|
+
"missing from diff",
|
|
31374
|
+
"not found in diff",
|
|
31375
|
+
"not present in diff",
|
|
31376
|
+
"does not appear in diff"
|
|
31377
|
+
];
|
|
31378
|
+
});
|
|
31379
|
+
|
|
31380
|
+
// src/review/semantic-evidence.ts
|
|
31381
|
+
import { isAbsolute as isAbsolute8 } from "path";
|
|
31382
|
+
async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
|
|
31383
|
+
if (diffMode !== "ref")
|
|
31384
|
+
return findings;
|
|
31385
|
+
return Promise.all(findings.map(async (finding) => {
|
|
31386
|
+
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31387
|
+
return finding;
|
|
31388
|
+
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
31389
|
+
if (evidence.status !== "unmatched")
|
|
31390
|
+
return finding;
|
|
31391
|
+
return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
|
|
31392
|
+
}));
|
|
31393
|
+
}
|
|
31394
|
+
async function checkFindingEvidence(opts) {
|
|
31395
|
+
const observed = opts.finding.verifiedBy?.observed?.trim();
|
|
31396
|
+
const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
|
|
31397
|
+
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
31398
|
+
if (!observed)
|
|
31399
|
+
return { status: "missing-observed", file: file3, line };
|
|
31400
|
+
const contents = await readSafeFile(opts.workdir, file3);
|
|
31401
|
+
if (contents === null)
|
|
31402
|
+
return { status: "unreadable", file: file3, line, observed };
|
|
31403
|
+
return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
|
|
31404
|
+
}
|
|
31405
|
+
function matchesEvidence(contents, observed, line) {
|
|
31406
|
+
if (!line || line <= 0) {
|
|
31407
|
+
return normalizedIncludes(contents, observed);
|
|
31408
|
+
}
|
|
31409
|
+
const lines = contents.split(`
|
|
31410
|
+
`);
|
|
31411
|
+
const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
|
|
31412
|
+
const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
|
|
31413
|
+
const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
|
|
31414
|
+
const windowText = lines.slice(start, end).join(`
|
|
31415
|
+
`);
|
|
31416
|
+
return normalizedIncludes(windowText, observed);
|
|
31417
|
+
}
|
|
31418
|
+
function downgradeUnsubstantiatedFinding(opts) {
|
|
31419
|
+
_evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
|
|
31420
|
+
storyId: opts.storyId,
|
|
31421
|
+
event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
|
|
31422
|
+
file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
|
|
31423
|
+
line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
|
|
31424
|
+
issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
|
|
31425
|
+
observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
|
|
31426
|
+
});
|
|
31427
|
+
return { ...opts.finding, severity: "unverifiable" };
|
|
31428
|
+
}
|
|
31429
|
+
async function readSafeFile(workdir, file3) {
|
|
31430
|
+
const validated = validateModulePath(file3, [workdir]);
|
|
31431
|
+
if (validated.valid && validated.absolutePath) {
|
|
31432
|
+
try {
|
|
31433
|
+
return await Bun.file(validated.absolutePath).text();
|
|
31434
|
+
} catch {
|
|
31435
|
+
return null;
|
|
31436
|
+
}
|
|
31437
|
+
}
|
|
31438
|
+
if (isAbsolute8(file3)) {
|
|
31439
|
+
try {
|
|
31440
|
+
return await Bun.file(file3).text();
|
|
31441
|
+
} catch {
|
|
31442
|
+
return null;
|
|
31443
|
+
}
|
|
31444
|
+
}
|
|
31445
|
+
return null;
|
|
31446
|
+
}
|
|
31447
|
+
function normalizedIncludes(contents, observed) {
|
|
31448
|
+
const normalizedObserved = normalizeEvidenceText(observed);
|
|
31449
|
+
return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
|
|
31450
|
+
}
|
|
31451
|
+
function normalizeEvidenceText(text) {
|
|
31452
|
+
return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
|
|
31453
|
+
}
|
|
31454
|
+
function stripWrappingQuotes(text) {
|
|
31455
|
+
let trimmed = text.trim();
|
|
31456
|
+
while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
|
|
31457
|
+
trimmed = trimmed.slice(1, -1).trim();
|
|
31458
|
+
}
|
|
31459
|
+
return trimmed;
|
|
31460
|
+
}
|
|
31461
|
+
function isMatchingWrapper(first, last) {
|
|
31462
|
+
return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
|
|
31463
|
+
}
|
|
31464
|
+
var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
|
|
31465
|
+
var init_semantic_evidence = __esm(() => {
|
|
31466
|
+
init_logger2();
|
|
31467
|
+
init_path_security2();
|
|
31468
|
+
init_semantic_helpers();
|
|
31469
|
+
_evidenceDeps = {
|
|
31470
|
+
getLogger: getSafeLogger
|
|
31471
|
+
};
|
|
31472
|
+
});
|
|
31473
|
+
|
|
31474
|
+
// src/review/finding-filters.ts
|
|
31475
|
+
async function substantiateAdversarialFindings(opts) {
|
|
31476
|
+
const { findings, workdir, storyId, blockingThreshold } = opts;
|
|
31477
|
+
return Promise.all(findings.map(async (finding) => {
|
|
31478
|
+
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31479
|
+
return finding;
|
|
31480
|
+
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
31481
|
+
if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
|
|
31482
|
+
return finding;
|
|
31483
|
+
return downgradeUnsubstantiatedFinding({
|
|
31484
|
+
finding,
|
|
31485
|
+
storyId,
|
|
31486
|
+
event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
|
|
31487
|
+
file: evidence.file,
|
|
31488
|
+
line: evidence.line,
|
|
31489
|
+
observed: evidence.observed
|
|
31490
|
+
});
|
|
31491
|
+
}));
|
|
31492
|
+
}
|
|
31493
|
+
var init_finding_filters = __esm(() => {
|
|
31494
|
+
init_adversarial_helpers();
|
|
31495
|
+
init_semantic_evidence();
|
|
31496
|
+
init_semantic_helpers();
|
|
31497
|
+
init_semantic_evidence();
|
|
31498
|
+
init_ac_quote_validator();
|
|
31499
|
+
});
|
|
31500
|
+
|
|
31501
|
+
// src/review/requote-response.ts
|
|
31502
|
+
function parseRequoteResponse(output) {
|
|
31503
|
+
const parsed = tryParseLLMJson(output);
|
|
31504
|
+
if (!isRecord(parsed))
|
|
31505
|
+
return null;
|
|
31506
|
+
const canonical = extractCanonical(parsed);
|
|
31507
|
+
if (canonical)
|
|
31508
|
+
return canonical;
|
|
31509
|
+
const findings = parsed.findings;
|
|
31510
|
+
if (!Array.isArray(findings) || findings.length !== 1)
|
|
31511
|
+
return null;
|
|
31512
|
+
const finding = findings[0];
|
|
31513
|
+
if (!isRecord(finding))
|
|
31514
|
+
return null;
|
|
31515
|
+
return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
|
|
31516
|
+
}
|
|
31517
|
+
function extractCanonical(value) {
|
|
31518
|
+
if (!isRecord(value))
|
|
31519
|
+
return null;
|
|
31520
|
+
if (typeof value.file !== "string" || typeof value.observed !== "string")
|
|
31521
|
+
return null;
|
|
31522
|
+
const file3 = value.file.trim();
|
|
31523
|
+
if (!file3)
|
|
31524
|
+
return null;
|
|
31525
|
+
const line = coerceLine(value.line);
|
|
31526
|
+
if (line === null)
|
|
31527
|
+
return null;
|
|
31528
|
+
return {
|
|
31529
|
+
file: file3,
|
|
31530
|
+
line: line === undefined ? undefined : line,
|
|
31531
|
+
observed: value.observed
|
|
31532
|
+
};
|
|
31533
|
+
}
|
|
31534
|
+
function coerceLine(value) {
|
|
31535
|
+
if (value == null)
|
|
31536
|
+
return;
|
|
31537
|
+
if (typeof value === "number")
|
|
31538
|
+
return value;
|
|
31539
|
+
if (typeof value === "string" && /^\d+$/.test(value))
|
|
31540
|
+
return Number.parseInt(value, 10);
|
|
31541
|
+
return null;
|
|
31542
|
+
}
|
|
31543
|
+
function isRecord(value) {
|
|
31544
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
31545
|
+
}
|
|
31546
|
+
var init_requote_response = () => {};
|
|
31547
|
+
|
|
31252
31548
|
// src/operations/adversarial-review.ts
|
|
31253
|
-
|
|
31549
|
+
async function requoteBlockingAdversarialFindings(findings, ctx) {
|
|
31550
|
+
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
31551
|
+
const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
|
|
31552
|
+
const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
|
|
31553
|
+
if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
|
|
31554
|
+
return { findings, changed: false, extraCostUsd: 0 };
|
|
31555
|
+
}
|
|
31556
|
+
const next = [...findings];
|
|
31557
|
+
let changed = false;
|
|
31558
|
+
let extraCostUsd = 0;
|
|
31559
|
+
let used = 0;
|
|
31560
|
+
for (const [index, finding] of next.entries()) {
|
|
31561
|
+
if (!isBlockingSeverity(finding.severity, threshold))
|
|
31562
|
+
continue;
|
|
31563
|
+
const initialEvidence = await checkFindingEvidence({ finding, workdir: ctx.input.workdir });
|
|
31564
|
+
if (initialEvidence.status !== "unmatched")
|
|
31565
|
+
continue;
|
|
31566
|
+
if (used >= maxRequotes)
|
|
31567
|
+
break;
|
|
31568
|
+
used += 1;
|
|
31569
|
+
const retry = await ctx.send(AdversarialReviewPromptBuilder.requoteVerbatim({ finding }));
|
|
31570
|
+
extraCostUsd += retry.estimatedCostUsd ?? 0;
|
|
31571
|
+
const requote = parseRequoteResponse(retry.output);
|
|
31572
|
+
if (!requote) {
|
|
31573
|
+
next[index] = downgradeUnsubstantiatedFinding({
|
|
31574
|
+
finding,
|
|
31575
|
+
storyId: ctx.input.story.id,
|
|
31576
|
+
event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
|
|
31577
|
+
...initialEvidence
|
|
31578
|
+
});
|
|
31579
|
+
changed = true;
|
|
31580
|
+
continue;
|
|
31581
|
+
}
|
|
31582
|
+
const updatedFinding = {
|
|
31583
|
+
...finding,
|
|
31584
|
+
verifiedBy: {
|
|
31585
|
+
file: requote.file,
|
|
31586
|
+
line: requote.line,
|
|
31587
|
+
observed: requote.observed
|
|
31588
|
+
}
|
|
31589
|
+
};
|
|
31590
|
+
const requotedEvidence = await checkFindingEvidence({
|
|
31591
|
+
finding: updatedFinding,
|
|
31592
|
+
workdir: ctx.input.workdir
|
|
31593
|
+
});
|
|
31594
|
+
if (requotedEvidence.status === "matched") {
|
|
31595
|
+
getSafeLogger()?.info("review", "Recovered adversarial finding via same-session requote", {
|
|
31596
|
+
storyId: ctx.input.story.id,
|
|
31597
|
+
event: ADVERSARIAL_REQUOTE_RECOVERED_EVENT,
|
|
31598
|
+
file: requotedEvidence.file,
|
|
31599
|
+
line: requotedEvidence.line
|
|
31600
|
+
});
|
|
31601
|
+
next[index] = updatedFinding;
|
|
31602
|
+
changed = true;
|
|
31603
|
+
continue;
|
|
31604
|
+
}
|
|
31605
|
+
next[index] = downgradeUnsubstantiatedFinding({
|
|
31606
|
+
finding: updatedFinding,
|
|
31607
|
+
storyId: ctx.input.story.id,
|
|
31608
|
+
event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
|
|
31609
|
+
file: requotedEvidence.file,
|
|
31610
|
+
line: requotedEvidence.line,
|
|
31611
|
+
observed: requotedEvidence.observed
|
|
31612
|
+
});
|
|
31613
|
+
changed = true;
|
|
31614
|
+
}
|
|
31615
|
+
return { findings: next, changed, extraCostUsd };
|
|
31616
|
+
}
|
|
31617
|
+
var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
|
|
31254
31618
|
validate: (parsed) => validateAdversarialShape(parsed) !== null,
|
|
31255
31619
|
reviewerKind: "adversarial",
|
|
31256
31620
|
maxAttempts: 2,
|
|
@@ -31258,15 +31622,24 @@ var FAIL_OPEN, adversarialParseRetry = (input) => makeParseRetryStrategy({
|
|
|
31258
31622
|
invalid: () => ReviewPromptBuilder.jsonRetry(),
|
|
31259
31623
|
truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
|
|
31260
31624
|
},
|
|
31261
|
-
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], looksLikeFail: true } : FAIL_OPEN,
|
|
31625
|
+
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN,
|
|
31262
31626
|
logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
|
|
31263
31627
|
}), adversarialReviewOp;
|
|
31264
31628
|
var init_adversarial_review = __esm(() => {
|
|
31265
31629
|
init_retry();
|
|
31266
31630
|
init_config();
|
|
31631
|
+
init_logger2();
|
|
31267
31632
|
init_prompts();
|
|
31268
31633
|
init_adversarial_helpers();
|
|
31269
|
-
|
|
31634
|
+
init_finding_filters();
|
|
31635
|
+
init_requote_response();
|
|
31636
|
+
FAIL_OPEN = {
|
|
31637
|
+
passed: true,
|
|
31638
|
+
findings: [],
|
|
31639
|
+
normalizedFindings: [],
|
|
31640
|
+
acDropped: [],
|
|
31641
|
+
failOpen: true
|
|
31642
|
+
};
|
|
31270
31643
|
adversarialReviewOp = {
|
|
31271
31644
|
kind: "run",
|
|
31272
31645
|
name: "adversarial-review",
|
|
@@ -31276,6 +31649,21 @@ var init_adversarial_review = __esm(() => {
|
|
|
31276
31649
|
model: (input) => input.adversarialConfig.model,
|
|
31277
31650
|
timeoutMs: (input) => input.adversarialConfig.timeoutMs,
|
|
31278
31651
|
retry: (input) => adversarialParseRetry(input),
|
|
31652
|
+
async hopBody(initialPrompt, ctx) {
|
|
31653
|
+
const turn = await ctx.sendWithParseRetry(initialPrompt);
|
|
31654
|
+
const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
|
|
31655
|
+
if (!parsed)
|
|
31656
|
+
return turn;
|
|
31657
|
+
const requoted = await requoteBlockingAdversarialFindings(parsed.findings, ctx);
|
|
31658
|
+
if (!requoted.changed)
|
|
31659
|
+
return turn;
|
|
31660
|
+
const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
|
|
31661
|
+
return {
|
|
31662
|
+
...turn,
|
|
31663
|
+
output: JSON.stringify({ passed, findings: requoted.findings }),
|
|
31664
|
+
estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
|
|
31665
|
+
};
|
|
31666
|
+
},
|
|
31279
31667
|
build(input, _ctx) {
|
|
31280
31668
|
const base = new AdversarialReviewPromptBuilder().buildAdversarialReviewPrompt(input.story, input.adversarialConfig, {
|
|
31281
31669
|
mode: input.mode,
|
|
@@ -31298,12 +31686,42 @@ var init_adversarial_review = __esm(() => {
|
|
|
31298
31686
|
parse(output, _input, _ctx) {
|
|
31299
31687
|
const raw = tryParseLLMJson(output);
|
|
31300
31688
|
const parsed = validateAdversarialShape(raw);
|
|
31301
|
-
if (parsed)
|
|
31302
|
-
return {
|
|
31689
|
+
if (parsed) {
|
|
31690
|
+
return {
|
|
31691
|
+
passed: parsed.passed,
|
|
31692
|
+
findings: parsed.findings,
|
|
31693
|
+
normalizedFindings: [],
|
|
31694
|
+
acDropped: []
|
|
31695
|
+
};
|
|
31696
|
+
}
|
|
31303
31697
|
if (/"passed"\s*:\s*false/.test(output) && !/"findings"\s*:\s*\[\s*\{/.test(output)) {
|
|
31304
|
-
return { passed: false, findings: [], looksLikeFail: true };
|
|
31698
|
+
return { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true };
|
|
31305
31699
|
}
|
|
31306
31700
|
throw new ParseValidationError("[adversarial-review] parse failed: invalid JSON shape");
|
|
31701
|
+
},
|
|
31702
|
+
async verify(parsed, input, _verifyCtx) {
|
|
31703
|
+
if (parsed.failOpen || parsed.looksLikeFail)
|
|
31704
|
+
return parsed;
|
|
31705
|
+
if (parsed.findings.length === 0)
|
|
31706
|
+
return parsed;
|
|
31707
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
31708
|
+
const findings = parsed.findings;
|
|
31709
|
+
const substantiated = await substantiateAdversarialFindings({
|
|
31710
|
+
findings,
|
|
31711
|
+
workdir: input.workdir,
|
|
31712
|
+
storyId: input.story.id,
|
|
31713
|
+
blockingThreshold: threshold
|
|
31714
|
+
});
|
|
31715
|
+
const { accepted, dropped } = filterByAcQuote(substantiated, input.story.acceptanceCriteria);
|
|
31716
|
+
const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
31717
|
+
const passed = parsed.passed && blocking.length === 0;
|
|
31718
|
+
return {
|
|
31719
|
+
...parsed,
|
|
31720
|
+
passed,
|
|
31721
|
+
findings: accepted,
|
|
31722
|
+
normalizedFindings: toAdversarialReviewFindings(blocking),
|
|
31723
|
+
acDropped: dropped
|
|
31724
|
+
};
|
|
31307
31725
|
}
|
|
31308
31726
|
};
|
|
31309
31727
|
});
|
|
@@ -31738,191 +32156,6 @@ var init_review_audit = __esm(() => {
|
|
|
31738
32156
|
};
|
|
31739
32157
|
});
|
|
31740
32158
|
|
|
31741
|
-
// src/review/semantic-helpers.ts
|
|
31742
|
-
function validateLLMShape(parsed) {
|
|
31743
|
-
if (typeof parsed !== "object" || parsed === null)
|
|
31744
|
-
return null;
|
|
31745
|
-
const obj = parsed;
|
|
31746
|
-
if (typeof obj.passed !== "boolean")
|
|
31747
|
-
return null;
|
|
31748
|
-
if (!Array.isArray(obj.findings))
|
|
31749
|
-
return null;
|
|
31750
|
-
return { passed: obj.passed, findings: obj.findings };
|
|
31751
|
-
}
|
|
31752
|
-
function parseLLMResponse(raw) {
|
|
31753
|
-
try {
|
|
31754
|
-
return validateLLMShape(tryParseLLMJson(raw));
|
|
31755
|
-
} catch {
|
|
31756
|
-
return null;
|
|
31757
|
-
}
|
|
31758
|
-
}
|
|
31759
|
-
function formatFindings2(findings) {
|
|
31760
|
-
return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
|
|
31761
|
-
Suggestion: ${f.suggestion}`).join(`
|
|
31762
|
-
`);
|
|
31763
|
-
}
|
|
31764
|
-
function normalizeSeverity2(sev) {
|
|
31765
|
-
if (sev === "warn")
|
|
31766
|
-
return "warning";
|
|
31767
|
-
if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
|
|
31768
|
-
return sev;
|
|
31769
|
-
return "info";
|
|
31770
|
-
}
|
|
31771
|
-
function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
|
|
31772
|
-
if (diffMode !== "ref")
|
|
31773
|
-
return findings;
|
|
31774
|
-
return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
|
|
31775
|
-
}
|
|
31776
|
-
function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
|
|
31777
|
-
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31778
|
-
return false;
|
|
31779
|
-
return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
|
|
31780
|
-
}
|
|
31781
|
-
function mentionsUnverifiedSource(finding) {
|
|
31782
|
-
const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
|
|
31783
|
-
return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
|
|
31784
|
-
}
|
|
31785
|
-
function hasVerifiedEvidence(finding) {
|
|
31786
|
-
const evidence = finding.verifiedBy;
|
|
31787
|
-
return !!evidence?.file?.trim() && !!evidence.observed?.trim();
|
|
31788
|
-
}
|
|
31789
|
-
function downgradeToUnverifiable(finding) {
|
|
31790
|
-
return {
|
|
31791
|
-
...finding,
|
|
31792
|
-
severity: "unverifiable"
|
|
31793
|
-
};
|
|
31794
|
-
}
|
|
31795
|
-
function llmFindingToFinding(f) {
|
|
31796
|
-
const metaExtras = {};
|
|
31797
|
-
if (f.verifiedBy)
|
|
31798
|
-
metaExtras.verifiedBy = f.verifiedBy;
|
|
31799
|
-
if (f.acQuote)
|
|
31800
|
-
metaExtras.acQuote = f.acQuote;
|
|
31801
|
-
if (f.acIndex != null)
|
|
31802
|
-
metaExtras.acIndex = f.acIndex;
|
|
31803
|
-
return {
|
|
31804
|
-
source: "semantic-review",
|
|
31805
|
-
severity: normalizeSeverity2(f.severity),
|
|
31806
|
-
category: "",
|
|
31807
|
-
file: f.file,
|
|
31808
|
-
line: f.line,
|
|
31809
|
-
message: f.issue,
|
|
31810
|
-
suggestion: f.suggestion ?? undefined,
|
|
31811
|
-
fixTarget: "source",
|
|
31812
|
-
meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
|
|
31813
|
-
};
|
|
31814
|
-
}
|
|
31815
|
-
function toReviewFindings(findings) {
|
|
31816
|
-
return findings.map(llmFindingToFinding);
|
|
31817
|
-
}
|
|
31818
|
-
var UNVERIFIED_FINDING_PATTERNS;
|
|
31819
|
-
var init_semantic_helpers = __esm(() => {
|
|
31820
|
-
init_severity();
|
|
31821
|
-
UNVERIFIED_FINDING_PATTERNS = [
|
|
31822
|
-
"cannot verify",
|
|
31823
|
-
"can't verify",
|
|
31824
|
-
"from diff alone",
|
|
31825
|
-
"missing from diff",
|
|
31826
|
-
"not found in diff",
|
|
31827
|
-
"not present in diff",
|
|
31828
|
-
"does not appear in diff"
|
|
31829
|
-
];
|
|
31830
|
-
});
|
|
31831
|
-
|
|
31832
|
-
// src/review/semantic-evidence.ts
|
|
31833
|
-
import { isAbsolute as isAbsolute8 } from "path";
|
|
31834
|
-
async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
|
|
31835
|
-
if (diffMode !== "ref")
|
|
31836
|
-
return findings;
|
|
31837
|
-
return Promise.all(findings.map(async (finding) => {
|
|
31838
|
-
if (!isBlockingSeverity(finding.severity, blockingThreshold))
|
|
31839
|
-
return finding;
|
|
31840
|
-
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
31841
|
-
if (evidence.status !== "unmatched")
|
|
31842
|
-
return finding;
|
|
31843
|
-
return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
|
|
31844
|
-
}));
|
|
31845
|
-
}
|
|
31846
|
-
async function checkFindingEvidence(opts) {
|
|
31847
|
-
const observed = opts.finding.verifiedBy?.observed?.trim();
|
|
31848
|
-
const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
|
|
31849
|
-
const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
|
|
31850
|
-
if (!observed)
|
|
31851
|
-
return { status: "missing-observed", file: file3, line };
|
|
31852
|
-
const contents = await readSafeFile(opts.workdir, file3);
|
|
31853
|
-
if (contents === null)
|
|
31854
|
-
return { status: "unreadable", file: file3, line, observed };
|
|
31855
|
-
return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
|
|
31856
|
-
}
|
|
31857
|
-
function matchesEvidence(contents, observed, line) {
|
|
31858
|
-
if (!line || line <= 0) {
|
|
31859
|
-
return normalizedIncludes(contents, observed);
|
|
31860
|
-
}
|
|
31861
|
-
const lines = contents.split(`
|
|
31862
|
-
`);
|
|
31863
|
-
const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
|
|
31864
|
-
const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
|
|
31865
|
-
const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
|
|
31866
|
-
const windowText = lines.slice(start, end).join(`
|
|
31867
|
-
`);
|
|
31868
|
-
return normalizedIncludes(windowText, observed);
|
|
31869
|
-
}
|
|
31870
|
-
function downgradeUnsubstantiatedFinding(opts) {
|
|
31871
|
-
_evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
|
|
31872
|
-
storyId: opts.storyId,
|
|
31873
|
-
event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
|
|
31874
|
-
file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
|
|
31875
|
-
line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
|
|
31876
|
-
issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
|
|
31877
|
-
observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
|
|
31878
|
-
});
|
|
31879
|
-
return { ...opts.finding, severity: "unverifiable" };
|
|
31880
|
-
}
|
|
31881
|
-
async function readSafeFile(workdir, file3) {
|
|
31882
|
-
const validated = validateModulePath(file3, [workdir]);
|
|
31883
|
-
if (validated.valid && validated.absolutePath) {
|
|
31884
|
-
try {
|
|
31885
|
-
return await Bun.file(validated.absolutePath).text();
|
|
31886
|
-
} catch {
|
|
31887
|
-
return null;
|
|
31888
|
-
}
|
|
31889
|
-
}
|
|
31890
|
-
if (isAbsolute8(file3)) {
|
|
31891
|
-
try {
|
|
31892
|
-
return await Bun.file(file3).text();
|
|
31893
|
-
} catch {
|
|
31894
|
-
return null;
|
|
31895
|
-
}
|
|
31896
|
-
}
|
|
31897
|
-
return null;
|
|
31898
|
-
}
|
|
31899
|
-
function normalizedIncludes(contents, observed) {
|
|
31900
|
-
const normalizedObserved = normalizeEvidenceText(observed);
|
|
31901
|
-
return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
|
|
31902
|
-
}
|
|
31903
|
-
function normalizeEvidenceText(text) {
|
|
31904
|
-
return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
|
|
31905
|
-
}
|
|
31906
|
-
function stripWrappingQuotes(text) {
|
|
31907
|
-
let trimmed = text.trim();
|
|
31908
|
-
while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
|
|
31909
|
-
trimmed = trimmed.slice(1, -1).trim();
|
|
31910
|
-
}
|
|
31911
|
-
return trimmed;
|
|
31912
|
-
}
|
|
31913
|
-
function isMatchingWrapper(first, last) {
|
|
31914
|
-
return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
|
|
31915
|
-
}
|
|
31916
|
-
var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
|
|
31917
|
-
var init_semantic_evidence = __esm(() => {
|
|
31918
|
-
init_logger2();
|
|
31919
|
-
init_path_security2();
|
|
31920
|
-
init_semantic_helpers();
|
|
31921
|
-
_evidenceDeps = {
|
|
31922
|
-
getLogger: getSafeLogger
|
|
31923
|
-
};
|
|
31924
|
-
});
|
|
31925
|
-
|
|
31926
32159
|
// src/review/adversarial.ts
|
|
31927
32160
|
import { relative as relative7, sep } from "path";
|
|
31928
32161
|
function recordAdversarialAudit(opts) {
|
|
@@ -31955,7 +32188,6 @@ async function runAdversarialReview(opts) {
|
|
|
31955
32188
|
agentManager,
|
|
31956
32189
|
config: naxConfig,
|
|
31957
32190
|
featureName,
|
|
31958
|
-
priorFailures,
|
|
31959
32191
|
blockingThreshold,
|
|
31960
32192
|
featureContextMarkdown,
|
|
31961
32193
|
contextBundle,
|
|
@@ -32076,13 +32308,13 @@ async function runAdversarialReview(opts) {
|
|
|
32076
32308
|
let opResult;
|
|
32077
32309
|
try {
|
|
32078
32310
|
opResult = await _adversarialDeps.callOp(callCtx, adversarialReviewOp, {
|
|
32311
|
+
workdir,
|
|
32079
32312
|
story,
|
|
32080
32313
|
adversarialConfig,
|
|
32081
32314
|
mode: diffMode,
|
|
32082
32315
|
diff,
|
|
32083
32316
|
storyGitRef: effectiveRef,
|
|
32084
32317
|
stat,
|
|
32085
|
-
priorFailures,
|
|
32086
32318
|
testInventory,
|
|
32087
32319
|
excludePatterns: adversarialConfig.excludePatterns,
|
|
32088
32320
|
testGlobs: resolvedTestPatterns.globs,
|
|
@@ -32167,27 +32399,11 @@ async function runAdversarialReview(opts) {
|
|
|
32167
32399
|
durationMs: Date.now() - startTime
|
|
32168
32400
|
};
|
|
32169
32401
|
}
|
|
32170
|
-
const
|
|
32171
|
-
|
|
32172
|
-
|
|
32173
|
-
|
|
32174
|
-
const
|
|
32175
|
-
const substantiatedFindings = await Promise.all(rawParsedRaw.findings.map(async (finding) => {
|
|
32176
|
-
if (!isBlockingSeverity(finding.severity, blockingThresholdEffective))
|
|
32177
|
-
return finding;
|
|
32178
|
-
const evidence = await checkFindingEvidence({ finding, workdir });
|
|
32179
|
-
if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
|
|
32180
|
-
return finding;
|
|
32181
|
-
return downgradeUnsubstantiatedFinding({
|
|
32182
|
-
finding,
|
|
32183
|
-
storyId: story.id,
|
|
32184
|
-
event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
|
|
32185
|
-
file: evidence.file,
|
|
32186
|
-
line: evidence.line,
|
|
32187
|
-
observed: evidence.observed
|
|
32188
|
-
});
|
|
32189
|
-
}));
|
|
32190
|
-
const rawParsed = { ...rawParsedRaw, findings: substantiatedFindings };
|
|
32402
|
+
const threshold = blockingThreshold ?? "error";
|
|
32403
|
+
const allFindings = opResult.findings;
|
|
32404
|
+
const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
32405
|
+
const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
32406
|
+
const acDropped = opResult.acDropped ?? [];
|
|
32191
32407
|
let diffFiles;
|
|
32192
32408
|
let diffAvailable;
|
|
32193
32409
|
if (diff && diff.length > 0) {
|
|
@@ -32203,13 +32419,6 @@ async function runAdversarialReview(opts) {
|
|
|
32203
32419
|
diffAvailable = true;
|
|
32204
32420
|
}
|
|
32205
32421
|
}
|
|
32206
|
-
const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcQuote(rawParsed.findings, story.acceptanceCriteria);
|
|
32207
|
-
if (acDropped.length > 0) {
|
|
32208
|
-
logger?.warn("review", "Adversarial findings dropped: acQuote validation failed", {
|
|
32209
|
-
storyId: story.id,
|
|
32210
|
-
dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
|
|
32211
|
-
});
|
|
32212
|
-
}
|
|
32213
32422
|
const adversarialDropAnalysis = acDropped.map((d) => ({
|
|
32214
32423
|
finding: {
|
|
32215
32424
|
file: d.finding.file ?? "<unknown>",
|
|
@@ -32223,10 +32432,6 @@ async function runAdversarialReview(opts) {
|
|
|
32223
32432
|
rawCategory: d.finding.category ?? "",
|
|
32224
32433
|
counterfactual: analyzeStructuralCounterfactual({ acIndex: d.finding.acIndex, category: d.finding.category, file: d.finding.file }, story.acceptanceCriteria, diffFiles)
|
|
32225
32434
|
}));
|
|
32226
|
-
const parsed = { ...rawParsed, findings: acGroundedFindings };
|
|
32227
|
-
const threshold = blockingThresholdEffective;
|
|
32228
|
-
const blockingFindings = parsed.findings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
32229
|
-
const advisoryFindings = parsed.findings.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
32230
32435
|
const adversarialAcceptAnalysis = blockingFindings.map((f) => ({
|
|
32231
32436
|
finding: {
|
|
32232
32437
|
file: f.file,
|
|
@@ -32249,11 +32454,11 @@ async function runAdversarialReview(opts) {
|
|
|
32249
32454
|
}))
|
|
32250
32455
|
});
|
|
32251
32456
|
}
|
|
32457
|
+
const durationMs = Date.now() - startTime;
|
|
32252
32458
|
if (blockingFindings.length > 0) {
|
|
32253
|
-
const durationMs2 = Date.now() - startTime;
|
|
32254
32459
|
logger?.warn("review", `Adversarial review failed: ${blockingFindings.length} blocking findings`, {
|
|
32255
32460
|
storyId: story.id,
|
|
32256
|
-
durationMs
|
|
32461
|
+
durationMs,
|
|
32257
32462
|
findings: blockingFindings.map((f) => ({
|
|
32258
32463
|
severity: f.severity,
|
|
32259
32464
|
category: f.category,
|
|
@@ -32274,72 +32479,37 @@ async function runAdversarialReview(opts) {
|
|
|
32274
32479
|
blockingThreshold: threshold,
|
|
32275
32480
|
result: {
|
|
32276
32481
|
passed: false,
|
|
32277
|
-
findings: llmFindingsToReviewFindings(
|
|
32482
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
|
|
32278
32483
|
},
|
|
32279
32484
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32280
32485
|
diffAvailable,
|
|
32281
32486
|
adversarialDropAnalysis,
|
|
32282
32487
|
adversarialAcceptAnalysis
|
|
32283
32488
|
});
|
|
32489
|
+
const output = blockingFindings.length > 0 ? `Adversarial review failed:
|
|
32490
|
+
|
|
32491
|
+
${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)";
|
|
32284
32492
|
return {
|
|
32285
32493
|
check: "adversarial",
|
|
32286
32494
|
success: false,
|
|
32287
32495
|
command: "",
|
|
32288
32496
|
exitCode: 1,
|
|
32289
|
-
output
|
|
32290
|
-
|
|
32291
|
-
|
|
32292
|
-
durationMs: durationMs2,
|
|
32293
|
-
findings: toAdversarialReviewFindings(blockingFindings),
|
|
32497
|
+
output,
|
|
32498
|
+
durationMs,
|
|
32499
|
+
findings: blockingFindings.length > 0 ? toAdversarialReviewFindings(blockingFindings) : undefined,
|
|
32294
32500
|
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32295
32501
|
cost: llmCost
|
|
32296
32502
|
};
|
|
32297
32503
|
}
|
|
32298
|
-
if (!
|
|
32299
|
-
|
|
32300
|
-
const durationMs3 = Date.now() - startTime;
|
|
32301
|
-
logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
|
|
32302
|
-
storyId: story.id,
|
|
32303
|
-
durationMs: durationMs3,
|
|
32304
|
-
droppedCount: acDropped.length,
|
|
32305
|
-
dropCodes: acDropped.map((d) => d.code)
|
|
32306
|
-
});
|
|
32307
|
-
const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
|
|
32308
|
-
`);
|
|
32309
|
-
recordAdversarialAudit({
|
|
32310
|
-
runtime,
|
|
32311
|
-
workdir,
|
|
32312
|
-
projectDir,
|
|
32313
|
-
storyId: story.id,
|
|
32314
|
-
featureName,
|
|
32315
|
-
parsed: true,
|
|
32316
|
-
failOpen: false,
|
|
32317
|
-
passed: false,
|
|
32318
|
-
blockingThreshold: threshold,
|
|
32319
|
-
result: { passed: false, findings: [] },
|
|
32320
|
-
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32321
|
-
diffAvailable,
|
|
32322
|
-
adversarialDropAnalysis,
|
|
32323
|
-
adversarialAcceptAnalysis: []
|
|
32324
|
-
});
|
|
32325
|
-
return {
|
|
32326
|
-
check: "adversarial",
|
|
32327
|
-
success: false,
|
|
32328
|
-
command: "",
|
|
32329
|
-
exitCode: 1,
|
|
32330
|
-
output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Either re-classify these as "info" upstream or extend the ACs. Drops:
|
|
32331
|
-
|
|
32332
|
-
${dropSummary}`,
|
|
32333
|
-
durationMs: durationMs3,
|
|
32334
|
-
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32335
|
-
cost: llmCost
|
|
32336
|
-
};
|
|
32337
|
-
}
|
|
32338
|
-
const durationMs2 = Date.now() - startTime;
|
|
32339
|
-
logger?.info("review", "Adversarial review passed (all findings below blocking threshold)", {
|
|
32504
|
+
if (!opResult.passed && acDropped.length > 0) {
|
|
32505
|
+
logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
|
|
32340
32506
|
storyId: story.id,
|
|
32341
|
-
durationMs
|
|
32507
|
+
durationMs,
|
|
32508
|
+
droppedCount: acDropped.length,
|
|
32509
|
+
dropCodes: acDropped.map((d) => d.code)
|
|
32342
32510
|
});
|
|
32511
|
+
const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
|
|
32512
|
+
`);
|
|
32343
32513
|
recordAdversarialAudit({
|
|
32344
32514
|
runtime,
|
|
32345
32515
|
workdir,
|
|
@@ -32348,12 +32518,9 @@ ${dropSummary}`,
|
|
|
32348
32518
|
featureName,
|
|
32349
32519
|
parsed: true,
|
|
32350
32520
|
failOpen: false,
|
|
32351
|
-
passed:
|
|
32521
|
+
passed: false,
|
|
32352
32522
|
blockingThreshold: threshold,
|
|
32353
|
-
result: {
|
|
32354
|
-
passed: true,
|
|
32355
|
-
findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
|
|
32356
|
-
},
|
|
32523
|
+
result: { passed: false, findings: [] },
|
|
32357
32524
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32358
32525
|
diffAvailable,
|
|
32359
32526
|
adversarialDropAnalysis,
|
|
@@ -32361,19 +32528,18 @@ ${dropSummary}`,
|
|
|
32361
32528
|
});
|
|
32362
32529
|
return {
|
|
32363
32530
|
check: "adversarial",
|
|
32364
|
-
success:
|
|
32531
|
+
success: false,
|
|
32365
32532
|
command: "",
|
|
32366
|
-
exitCode:
|
|
32367
|
-
output:
|
|
32368
|
-
|
|
32533
|
+
exitCode: 1,
|
|
32534
|
+
output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Drops:
|
|
32535
|
+
|
|
32536
|
+
${dropSummary}`,
|
|
32537
|
+
durationMs,
|
|
32369
32538
|
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32370
32539
|
cost: llmCost
|
|
32371
32540
|
};
|
|
32372
32541
|
}
|
|
32373
|
-
|
|
32374
|
-
if (parsed.passed) {
|
|
32375
|
-
logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
|
|
32376
|
-
}
|
|
32542
|
+
logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
|
|
32377
32543
|
recordAdversarialAudit({
|
|
32378
32544
|
runtime,
|
|
32379
32545
|
workdir,
|
|
@@ -32382,23 +32548,23 @@ ${dropSummary}`,
|
|
|
32382
32548
|
featureName,
|
|
32383
32549
|
parsed: true,
|
|
32384
32550
|
failOpen: false,
|
|
32385
|
-
passed:
|
|
32551
|
+
passed: true,
|
|
32386
32552
|
blockingThreshold: threshold,
|
|
32387
32553
|
result: {
|
|
32388
|
-
passed:
|
|
32389
|
-
findings: llmFindingsToReviewFindings(
|
|
32554
|
+
passed: true,
|
|
32555
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
|
|
32390
32556
|
},
|
|
32391
32557
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
|
|
32392
32558
|
diffAvailable,
|
|
32393
32559
|
adversarialDropAnalysis,
|
|
32394
|
-
adversarialAcceptAnalysis
|
|
32560
|
+
adversarialAcceptAnalysis: []
|
|
32395
32561
|
});
|
|
32396
32562
|
return {
|
|
32397
32563
|
check: "adversarial",
|
|
32398
|
-
success:
|
|
32564
|
+
success: true,
|
|
32399
32565
|
command: "",
|
|
32400
|
-
exitCode:
|
|
32401
|
-
output:
|
|
32566
|
+
exitCode: 0,
|
|
32567
|
+
output: allFindings.length === 0 ? "Adversarial review passed" : "Adversarial review passed (all findings were advisory \u2014 below blocking threshold)",
|
|
32402
32568
|
durationMs,
|
|
32403
32569
|
advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
|
|
32404
32570
|
cost: llmCost
|
|
@@ -32413,13 +32579,11 @@ var init_adversarial = __esm(() => {
|
|
|
32413
32579
|
init_adversarial_review();
|
|
32414
32580
|
init_call();
|
|
32415
32581
|
init_test_runners();
|
|
32416
|
-
init_ac_quote_validator();
|
|
32417
32582
|
init_ac_structural_counterfactual();
|
|
32418
32583
|
init_adversarial_helpers();
|
|
32419
32584
|
init_diff_utils();
|
|
32420
32585
|
init_finding_projection();
|
|
32421
32586
|
init_review_audit();
|
|
32422
|
-
init_semantic_evidence();
|
|
32423
32587
|
_adversarialDeps = {
|
|
32424
32588
|
writeReviewAudit,
|
|
32425
32589
|
callOp
|
|
@@ -35003,57 +35167,10 @@ var init_acceptance_fix = __esm(() => {
|
|
|
35003
35167
|
};
|
|
35004
35168
|
});
|
|
35005
35169
|
|
|
35006
|
-
// src/review/requote-response.ts
|
|
35007
|
-
function parseRequoteResponse(output) {
|
|
35008
|
-
const parsed = tryParseLLMJson(output);
|
|
35009
|
-
if (!isRecord(parsed))
|
|
35010
|
-
return null;
|
|
35011
|
-
const canonical = extractCanonical(parsed);
|
|
35012
|
-
if (canonical)
|
|
35013
|
-
return canonical;
|
|
35014
|
-
const findings = parsed.findings;
|
|
35015
|
-
if (!Array.isArray(findings) || findings.length !== 1)
|
|
35016
|
-
return null;
|
|
35017
|
-
const finding = findings[0];
|
|
35018
|
-
if (!isRecord(finding))
|
|
35019
|
-
return null;
|
|
35020
|
-
return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
|
|
35021
|
-
}
|
|
35022
|
-
function extractCanonical(value) {
|
|
35023
|
-
if (!isRecord(value))
|
|
35024
|
-
return null;
|
|
35025
|
-
if (typeof value.file !== "string" || typeof value.observed !== "string")
|
|
35026
|
-
return null;
|
|
35027
|
-
const file3 = value.file.trim();
|
|
35028
|
-
if (!file3)
|
|
35029
|
-
return null;
|
|
35030
|
-
const line = coerceLine(value.line);
|
|
35031
|
-
if (line === null)
|
|
35032
|
-
return null;
|
|
35033
|
-
return {
|
|
35034
|
-
file: file3,
|
|
35035
|
-
line: line === undefined ? undefined : line,
|
|
35036
|
-
observed: value.observed
|
|
35037
|
-
};
|
|
35038
|
-
}
|
|
35039
|
-
function coerceLine(value) {
|
|
35040
|
-
if (value == null)
|
|
35041
|
-
return;
|
|
35042
|
-
if (typeof value === "number")
|
|
35043
|
-
return value;
|
|
35044
|
-
if (typeof value === "string" && /^\d+$/.test(value))
|
|
35045
|
-
return Number.parseInt(value, 10);
|
|
35046
|
-
return null;
|
|
35047
|
-
}
|
|
35048
|
-
function isRecord(value) {
|
|
35049
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
35050
|
-
}
|
|
35051
|
-
var init_requote_response = () => {};
|
|
35052
|
-
|
|
35053
35170
|
// src/operations/semantic-review.ts
|
|
35054
35171
|
async function requoteBlockingFindings(findings, ctx) {
|
|
35055
35172
|
const threshold = ctx.input.blockingThreshold ?? "error";
|
|
35056
|
-
const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ??
|
|
35173
|
+
const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES2;
|
|
35057
35174
|
const requoteEnabled = ctx.input.semanticConfig.substantiation?.requote ?? true;
|
|
35058
35175
|
if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
|
|
35059
35176
|
return { findings, changed: false, extraCostUsd: 0 };
|
|
@@ -35120,7 +35237,7 @@ async function requoteBlockingFindings(findings, ctx) {
|
|
|
35120
35237
|
}
|
|
35121
35238
|
return { findings: next, changed, extraCostUsd };
|
|
35122
35239
|
}
|
|
35123
|
-
var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed",
|
|
35240
|
+
var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES2 = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
|
|
35124
35241
|
const turn = await ctx.sendWithParseRetry(initialPrompt);
|
|
35125
35242
|
const parsed = validateLLMShape(tryParseLLMJson(turn.output));
|
|
35126
35243
|
if (!parsed)
|
|
@@ -35140,10 +35257,9 @@ var init_semantic_review = __esm(() => {
|
|
|
35140
35257
|
init_config();
|
|
35141
35258
|
init_logger2();
|
|
35142
35259
|
init_prompts();
|
|
35260
|
+
init_finding_filters();
|
|
35143
35261
|
init_requote_response();
|
|
35144
|
-
|
|
35145
|
-
init_semantic_helpers();
|
|
35146
|
-
FAIL_OPEN2 = { passed: true, findings: [], failOpen: true };
|
|
35262
|
+
FAIL_OPEN2 = { passed: true, findings: [], normalizedFindings: [], failOpen: true };
|
|
35147
35263
|
semanticReviewOp = {
|
|
35148
35264
|
kind: "run",
|
|
35149
35265
|
name: "semantic-review",
|
|
@@ -35160,6 +35276,7 @@ var init_semantic_review = __esm(() => {
|
|
|
35160
35276
|
invalid: () => ReviewPromptBuilder.jsonRetry(),
|
|
35161
35277
|
truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
|
|
35162
35278
|
},
|
|
35279
|
+
exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], looksLikeFail: true } : FAIL_OPEN2,
|
|
35163
35280
|
logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
|
|
35164
35281
|
}),
|
|
35165
35282
|
hopBody: semanticReviewHopBody,
|
|
@@ -35181,11 +35298,36 @@ var init_semantic_review = __esm(() => {
|
|
|
35181
35298
|
parse(output, _input, _ctx) {
|
|
35182
35299
|
const raw = tryParseLLMJson(output);
|
|
35183
35300
|
const parsed = validateLLMShape(raw);
|
|
35184
|
-
if (parsed)
|
|
35185
|
-
return {
|
|
35186
|
-
|
|
35187
|
-
|
|
35301
|
+
if (parsed) {
|
|
35302
|
+
return {
|
|
35303
|
+
passed: parsed.passed,
|
|
35304
|
+
findings: parsed.findings,
|
|
35305
|
+
normalizedFindings: []
|
|
35306
|
+
};
|
|
35307
|
+
}
|
|
35308
|
+
if (/"passed"\s*:\s*false/.test(output)) {
|
|
35309
|
+
return { passed: false, findings: [], normalizedFindings: [], looksLikeFail: true };
|
|
35310
|
+
}
|
|
35188
35311
|
return FAIL_OPEN2;
|
|
35312
|
+
},
|
|
35313
|
+
async verify(parsed, input, _verifyCtx) {
|
|
35314
|
+
if (parsed.failOpen || parsed.looksLikeFail)
|
|
35315
|
+
return parsed;
|
|
35316
|
+
if (parsed.findings.length === 0)
|
|
35317
|
+
return parsed;
|
|
35318
|
+
const threshold = input.blockingThreshold ?? "error";
|
|
35319
|
+
const findings = parsed.findings;
|
|
35320
|
+
const sanitized = sanitizeRefModeFindings(findings, input.mode, threshold);
|
|
35321
|
+
const substantiated = await substantiateSemanticEvidence(sanitized, input.mode, input.workdir, input.story.id, threshold);
|
|
35322
|
+
const { accepted } = filterByAcGroundingMinimal(substantiated, input.story.acceptanceCriteria);
|
|
35323
|
+
const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
35324
|
+
const passed = parsed.passed && blocking.length === 0;
|
|
35325
|
+
return {
|
|
35326
|
+
...parsed,
|
|
35327
|
+
passed,
|
|
35328
|
+
findings: accepted,
|
|
35329
|
+
normalizedFindings: toReviewFindings(blocking)
|
|
35330
|
+
};
|
|
35189
35331
|
}
|
|
35190
35332
|
};
|
|
35191
35333
|
});
|
|
@@ -39360,30 +39502,21 @@ async function runSemanticReview(opts) {
|
|
|
39360
39502
|
durationMs: Date.now() - startTime
|
|
39361
39503
|
};
|
|
39362
39504
|
}
|
|
39363
|
-
const parsed = { passed: opResult.passed, findings: opResult.findings };
|
|
39364
|
-
const sanitizedFindings = await substantiateSemanticEvidence(sanitizeRefModeFindings(parsed.findings, diffMode, blockingThreshold ?? "error"), diffMode, workdir, story.id, blockingThreshold ?? "error");
|
|
39365
|
-
const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcGroundingMinimal(sanitizedFindings, story.acceptanceCriteria);
|
|
39366
|
-
if (acDropped.length > 0) {
|
|
39367
|
-
logger?.warn("review", "Semantic findings dropped: acIndex missing or out of range", {
|
|
39368
|
-
storyId: story.id,
|
|
39369
|
-
dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
|
|
39370
|
-
});
|
|
39371
|
-
}
|
|
39372
|
-
const sanitizedParsed = { ...parsed, findings: acGroundedFindings };
|
|
39373
39505
|
const threshold = blockingThreshold ?? "error";
|
|
39374
|
-
const
|
|
39375
|
-
const
|
|
39506
|
+
const allFindings = opResult.findings;
|
|
39507
|
+
const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
|
|
39508
|
+
const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
|
|
39376
39509
|
if (advisoryFindings.length > 0) {
|
|
39377
39510
|
logger?.debug("review", `Semantic review: ${advisoryFindings.length} advisory findings (below threshold '${threshold}')`, {
|
|
39378
39511
|
storyId: story.id,
|
|
39379
39512
|
findings: advisoryFindings.map((f) => ({ severity: f.severity, file: f.file, issue: f.issue }))
|
|
39380
39513
|
});
|
|
39381
39514
|
}
|
|
39382
|
-
|
|
39383
|
-
|
|
39515
|
+
const durationMs = Date.now() - startTime;
|
|
39516
|
+
if (blockingFindings.length > 0) {
|
|
39384
39517
|
logger?.warn("review", `Semantic review failed: ${blockingFindings.length} blocking findings`, {
|
|
39385
39518
|
storyId: story.id,
|
|
39386
|
-
durationMs
|
|
39519
|
+
durationMs
|
|
39387
39520
|
});
|
|
39388
39521
|
logger?.debug("review", "Semantic review findings", {
|
|
39389
39522
|
storyId: story.id,
|
|
@@ -39410,7 +39543,7 @@ ${formatFindings2(blockingFindings)}`;
|
|
|
39410
39543
|
blockingThreshold: threshold,
|
|
39411
39544
|
result: {
|
|
39412
39545
|
passed: false,
|
|
39413
|
-
findings: llmFindingsToReviewFindings(
|
|
39546
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
|
|
39414
39547
|
},
|
|
39415
39548
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39416
39549
|
});
|
|
@@ -39420,53 +39553,16 @@ ${formatFindings2(blockingFindings)}`;
|
|
|
39420
39553
|
command: "",
|
|
39421
39554
|
exitCode: 1,
|
|
39422
39555
|
output,
|
|
39423
|
-
durationMs
|
|
39556
|
+
durationMs,
|
|
39424
39557
|
findings: toReviewFindings(blockingFindings),
|
|
39425
39558
|
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39426
39559
|
cost: llmCost
|
|
39427
39560
|
};
|
|
39428
39561
|
}
|
|
39429
|
-
if (!
|
|
39430
|
-
|
|
39431
|
-
const durationMs3 = Date.now() - startTime;
|
|
39432
|
-
logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
|
|
39433
|
-
storyId: story.id,
|
|
39434
|
-
durationMs: durationMs3,
|
|
39435
|
-
droppedCount: acDropped.length,
|
|
39436
|
-
dropCodes: acDropped.map((d) => d.code)
|
|
39437
|
-
});
|
|
39438
|
-
const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
|
|
39439
|
-
`);
|
|
39440
|
-
recordSemanticAudit({
|
|
39441
|
-
runtime,
|
|
39442
|
-
workdir,
|
|
39443
|
-
projectDir,
|
|
39444
|
-
storyId: story.id,
|
|
39445
|
-
featureName,
|
|
39446
|
-
parsed: true,
|
|
39447
|
-
failOpen: false,
|
|
39448
|
-
passed: false,
|
|
39449
|
-
blockingThreshold: threshold,
|
|
39450
|
-
result: { passed: false, findings: [] },
|
|
39451
|
-
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39452
|
-
});
|
|
39453
|
-
return {
|
|
39454
|
-
check: "semantic",
|
|
39455
|
-
success: false,
|
|
39456
|
-
command: "",
|
|
39457
|
-
exitCode: 1,
|
|
39458
|
-
output: `Semantic review failed: ${acDropped.length} blocking finding(s) dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution. Either re-classify these as "info" or ensure each error finding includes a valid acIndex. Drops:
|
|
39459
|
-
|
|
39460
|
-
${dropSummary}`,
|
|
39461
|
-
durationMs: durationMs3,
|
|
39462
|
-
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39463
|
-
cost: llmCost
|
|
39464
|
-
};
|
|
39465
|
-
}
|
|
39466
|
-
const durationMs2 = Date.now() - startTime;
|
|
39467
|
-
logger?.info("review", "Semantic review passed (all findings below blocking threshold)", {
|
|
39562
|
+
if (!opResult.passed && allFindings.length === 0) {
|
|
39563
|
+
logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
|
|
39468
39564
|
storyId: story.id,
|
|
39469
|
-
durationMs
|
|
39565
|
+
durationMs
|
|
39470
39566
|
});
|
|
39471
39567
|
recordSemanticAudit({
|
|
39472
39568
|
runtime,
|
|
@@ -39476,29 +39572,23 @@ ${dropSummary}`,
|
|
|
39476
39572
|
featureName,
|
|
39477
39573
|
parsed: true,
|
|
39478
39574
|
failOpen: false,
|
|
39479
|
-
passed:
|
|
39575
|
+
passed: false,
|
|
39480
39576
|
blockingThreshold: threshold,
|
|
39481
|
-
result: {
|
|
39482
|
-
passed: true,
|
|
39483
|
-
findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
|
|
39484
|
-
},
|
|
39577
|
+
result: { passed: false, findings: [] },
|
|
39485
39578
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39486
39579
|
});
|
|
39487
39580
|
return {
|
|
39488
39581
|
check: "semantic",
|
|
39489
|
-
success:
|
|
39582
|
+
success: false,
|
|
39490
39583
|
command: "",
|
|
39491
|
-
exitCode:
|
|
39492
|
-
output:
|
|
39493
|
-
durationMs
|
|
39584
|
+
exitCode: 1,
|
|
39585
|
+
output: 'Semantic review failed: blocking finding(s) were dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution.',
|
|
39586
|
+
durationMs,
|
|
39494
39587
|
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39495
39588
|
cost: llmCost
|
|
39496
39589
|
};
|
|
39497
39590
|
}
|
|
39498
|
-
|
|
39499
|
-
if (sanitizedParsed.passed) {
|
|
39500
|
-
logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
|
|
39501
|
-
}
|
|
39591
|
+
logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
|
|
39502
39592
|
recordSemanticAudit({
|
|
39503
39593
|
runtime,
|
|
39504
39594
|
workdir,
|
|
@@ -39507,20 +39597,20 @@ ${dropSummary}`,
|
|
|
39507
39597
|
featureName,
|
|
39508
39598
|
parsed: true,
|
|
39509
39599
|
failOpen: false,
|
|
39510
|
-
passed:
|
|
39600
|
+
passed: true,
|
|
39511
39601
|
blockingThreshold: threshold,
|
|
39512
39602
|
result: {
|
|
39513
|
-
passed:
|
|
39514
|
-
findings: llmFindingsToReviewFindings(
|
|
39603
|
+
passed: true,
|
|
39604
|
+
findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
|
|
39515
39605
|
},
|
|
39516
39606
|
advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
|
|
39517
39607
|
});
|
|
39518
39608
|
return {
|
|
39519
39609
|
check: "semantic",
|
|
39520
|
-
success:
|
|
39610
|
+
success: true,
|
|
39521
39611
|
command: "",
|
|
39522
|
-
exitCode:
|
|
39523
|
-
output:
|
|
39612
|
+
exitCode: 0,
|
|
39613
|
+
output: allFindings.length === 0 ? "Semantic review passed" : "Semantic review passed (all findings were advisory \u2014 below blocking threshold)",
|
|
39524
39614
|
durationMs,
|
|
39525
39615
|
advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
|
|
39526
39616
|
cost: llmCost
|
|
@@ -39537,12 +39627,10 @@ var init_semantic = __esm(() => {
|
|
|
39537
39627
|
init_semantic_review();
|
|
39538
39628
|
init_prompts();
|
|
39539
39629
|
init_test_runners();
|
|
39540
|
-
init_ac_quote_validator();
|
|
39541
39630
|
init_diff_utils();
|
|
39542
39631
|
init_finding_projection();
|
|
39543
39632
|
init_review_audit();
|
|
39544
39633
|
init_semantic_debate();
|
|
39545
|
-
init_semantic_evidence();
|
|
39546
39634
|
init_semantic_helpers();
|
|
39547
39635
|
_semanticDeps = {
|
|
39548
39636
|
createDebateRunner: (opts) => new DebateRunner(opts),
|
|
@@ -41869,6 +41957,38 @@ async function callOp(ctx, op, input) {
|
|
|
41869
41957
|
const rawOutput = outcome.result.output;
|
|
41870
41958
|
const totalCost = outcome.result.estimatedCostUsd ?? 0;
|
|
41871
41959
|
if (!rawOutput) {
|
|
41960
|
+
if (maxRetriesExceeded) {
|
|
41961
|
+
getSafeLogger()?.error("callop", "Op retry budget exhausted (empty output)", {
|
|
41962
|
+
storyId: ctx.storyId,
|
|
41963
|
+
opName: op.name,
|
|
41964
|
+
site: "run",
|
|
41965
|
+
totalAttempts: MAX_COMPLETE_RETRY_ATTEMPTS + 1
|
|
41966
|
+
});
|
|
41967
|
+
throw new NaxError(`callOp[${op.name}]: CALL_OP_MAX_RETRIES \u2014 exceeded MAX_COMPLETE_RETRY_ATTEMPTS (${MAX_COMPLETE_RETRY_ATTEMPTS})`, "CALL_OP_MAX_RETRIES", { stage: op.stage, storyId: ctx.storyId });
|
|
41968
|
+
}
|
|
41969
|
+
if (retryFallback !== undefined) {
|
|
41970
|
+
if (typeof retryFallback !== "object" || retryFallback === null) {
|
|
41971
|
+
throw new NaxError(`callOp[${op.name}]: exhaustedFallback returned a non-object (${typeof retryFallback}); fallback must be a plain object`, "CALL_OP_INVALID_FALLBACK", { stage: op.stage, storyId: ctx.storyId });
|
|
41972
|
+
}
|
|
41973
|
+
getSafeLogger()?.warn("callop", "Returning exhaustedFallback on empty output", {
|
|
41974
|
+
storyId: ctx.storyId,
|
|
41975
|
+
opName: op.name,
|
|
41976
|
+
agentName: dispatchAgent
|
|
41977
|
+
});
|
|
41978
|
+
return { ...retryFallback, estimatedCostUsd: totalCost };
|
|
41979
|
+
}
|
|
41980
|
+
if (op.recover) {
|
|
41981
|
+
const verifyCtx = makeVerifyCtx(buildCtx);
|
|
41982
|
+
const recovered = await op.recover(input, verifyCtx);
|
|
41983
|
+
if (recovered !== null) {
|
|
41984
|
+
getSafeLogger()?.warn("callop", "Recovered from empty output via op.recover", {
|
|
41985
|
+
storyId: ctx.storyId,
|
|
41986
|
+
opName: op.name,
|
|
41987
|
+
agentName: dispatchAgent
|
|
41988
|
+
});
|
|
41989
|
+
return recovered;
|
|
41990
|
+
}
|
|
41991
|
+
}
|
|
41872
41992
|
throw new NaxError(`callOp[${op.name}]: agent returned no output`, "CALL_OP_NO_OUTPUT", {
|
|
41873
41993
|
stage: op.stage,
|
|
41874
41994
|
storyId: ctx.storyId,
|
|
@@ -52017,12 +52137,16 @@ function phasePassed(opName, output) {
|
|
|
52017
52137
|
});
|
|
52018
52138
|
return true;
|
|
52019
52139
|
}
|
|
52140
|
+
function isFinding(value) {
|
|
52141
|
+
return typeof value === "object" && value !== null && typeof value.source === "string" && value.source.length > 0;
|
|
52142
|
+
}
|
|
52020
52143
|
function extractPhaseFindings(output) {
|
|
52021
52144
|
if (output === null || output === undefined || typeof output !== "object") {
|
|
52022
52145
|
return [];
|
|
52023
52146
|
}
|
|
52024
52147
|
const record2 = output;
|
|
52025
|
-
const
|
|
52148
|
+
const rawArray = Array.isArray(record2.normalizedFindings) ? record2.normalizedFindings : Array.isArray(record2.findings) ? record2.findings : [];
|
|
52149
|
+
const findings = rawArray.filter(isFinding);
|
|
52026
52150
|
const success2 = "success" in record2 ? record2.success === true : ("passed" in record2) ? record2.passed === true : findings.length === 0;
|
|
52027
52151
|
return success2 ? [] : findings;
|
|
52028
52152
|
}
|
|
@@ -52650,6 +52774,7 @@ async function assemblePlanInputsFromCtx(ctx) {
|
|
|
52650
52774
|
blockingThreshold: ctx.config.review.blockingThreshold
|
|
52651
52775
|
} : undefined;
|
|
52652
52776
|
const adversarialReviewInput = ctx.config.review?.enabled === true && ctx.config.review.checks?.includes("adversarial") && ctx.config.review.adversarial ? {
|
|
52777
|
+
workdir: ctx.workdir,
|
|
52653
52778
|
story,
|
|
52654
52779
|
adversarialConfig: ctx.config.review.adversarial,
|
|
52655
52780
|
mode: ctx.config.review.adversarial.diffMode,
|
|
@@ -56943,7 +57068,7 @@ var package_default;
|
|
|
56943
57068
|
var init_package = __esm(() => {
|
|
56944
57069
|
package_default = {
|
|
56945
57070
|
name: "@nathapp/nax",
|
|
56946
|
-
version: "0.67.
|
|
57071
|
+
version: "0.67.10",
|
|
56947
57072
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
56948
57073
|
type: "module",
|
|
56949
57074
|
bin: {
|
|
@@ -57038,8 +57163,8 @@ var init_version = __esm(() => {
|
|
|
57038
57163
|
NAX_VERSION = package_default.version;
|
|
57039
57164
|
NAX_COMMIT = (() => {
|
|
57040
57165
|
try {
|
|
57041
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
57042
|
-
return "
|
|
57166
|
+
if (/^[0-9a-f]{6,10}$/.test("1d0ef5ac"))
|
|
57167
|
+
return "1d0ef5ac";
|
|
57043
57168
|
} catch {}
|
|
57044
57169
|
try {
|
|
57045
57170
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|