@nathapp/nax 0.67.9 → 0.67.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/nax.js +563 -438
  2. package/package.json +1 -1
package/dist/nax.js CHANGED
@@ -17224,7 +17224,11 @@ var init_schemas_review = __esm(() => {
17224
17224
  timeoutMs: exports_external.number().int().positive().default(600000),
17225
17225
  excludePatterns: exports_external.array(exports_external.string()).optional(),
17226
17226
  parallel: exports_external.boolean().default(false),
17227
- maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2)
17227
+ maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
17228
+ substantiation: exports_external.object({
17229
+ requote: exports_external.boolean().default(true),
17230
+ maxRequotes: exports_external.number().int().min(0).default(5)
17231
+ }).optional()
17228
17232
  });
17229
17233
  ReviewConfigSchema = exports_external.object({
17230
17234
  enabled: exports_external.boolean(),
@@ -17450,6 +17454,18 @@ var init_schemas3 = __esm(() => {
17450
17454
  ":!.nax/",
17451
17455
  ":!.nax-pids"
17452
17456
  ]
17457
+ },
17458
+ adversarial: {
17459
+ model: "balanced",
17460
+ diffMode: "ref",
17461
+ rules: [],
17462
+ timeoutMs: 600000,
17463
+ parallel: false,
17464
+ maxConcurrentSessions: 2,
17465
+ substantiation: {
17466
+ requote: true,
17467
+ maxRequotes: 5
17468
+ }
17453
17469
  }
17454
17470
  }),
17455
17471
  plan: PlanConfigSchema.default({
@@ -21922,7 +21938,8 @@ function makeParseRetryStrategy(opts) {
21922
21938
  if (ctx.site === "complete") {
21923
21939
  getSafeLogger()?.warn(opts.reviewerKind, "makeParseRetryStrategy: lastOutput is not populated on complete-kind ops \u2014 retry will never fire", { storyId: ctx.storyId });
21924
21940
  }
21925
- return { retry: false };
21941
+ const fallback = opts.exhaustedFallback ? opts.exhaustedFallback("") : undefined;
21942
+ return { retry: false, ...fallback !== undefined ? { fallback } : {} };
21926
21943
  }
21927
21944
  let parsed;
21928
21945
  try {
@@ -30626,6 +30643,26 @@ ${config2.rules.map((r) => `- ${r}`).join(`
30626
30643
  diffBlock
30627
30644
  ].join("");
30628
30645
  }
30646
+ static requoteVerbatim(opts) {
30647
+ const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
30648
+ const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
30649
+ return `Your previous verifiedBy.observed value did not match the referenced file on disk.
30650
+
30651
+ You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
30652
+
30653
+ Return ONLY this JSON object:
30654
+ {"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
30655
+
30656
+ Finding issue: ${opts.finding.issue}
30657
+ Referenced file: ${file3}
30658
+ Referenced line: ${line}
30659
+
30660
+ Rules:
30661
+ - Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
30662
+ - observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
30663
+ - If after reading the file you cannot find anything that proves the claim, set observed to "".
30664
+ - Do not return a full review. Do not include markdown fences or explanation.`;
30665
+ }
30629
30666
  }
30630
30667
  var ADVERSARIAL_ROLE = `You are an adversarial code reviewer with full access to the repository.
30631
30668
 
@@ -31249,8 +31286,335 @@ var init_adversarial_helpers = __esm(() => {
31249
31286
  init_severity();
31250
31287
  });
31251
31288
 
31289
+ // src/review/semantic-helpers.ts
31290
+ function validateLLMShape(parsed) {
31291
+ if (typeof parsed !== "object" || parsed === null)
31292
+ return null;
31293
+ const obj = parsed;
31294
+ if (typeof obj.passed !== "boolean")
31295
+ return null;
31296
+ if (!Array.isArray(obj.findings))
31297
+ return null;
31298
+ return { passed: obj.passed, findings: obj.findings };
31299
+ }
31300
+ function parseLLMResponse(raw) {
31301
+ try {
31302
+ return validateLLMShape(tryParseLLMJson(raw));
31303
+ } catch {
31304
+ return null;
31305
+ }
31306
+ }
31307
+ function formatFindings2(findings) {
31308
+ return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
31309
+ Suggestion: ${f.suggestion}`).join(`
31310
+ `);
31311
+ }
31312
+ function normalizeSeverity2(sev) {
31313
+ if (sev === "warn")
31314
+ return "warning";
31315
+ if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
31316
+ return sev;
31317
+ return "info";
31318
+ }
31319
+ function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
31320
+ if (diffMode !== "ref")
31321
+ return findings;
31322
+ return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
31323
+ }
31324
+ function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
31325
+ if (!isBlockingSeverity(finding.severity, blockingThreshold))
31326
+ return false;
31327
+ return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
31328
+ }
31329
+ function mentionsUnverifiedSource(finding) {
31330
+ const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
31331
+ return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
31332
+ }
31333
+ function hasVerifiedEvidence(finding) {
31334
+ const evidence = finding.verifiedBy;
31335
+ return !!evidence?.file?.trim() && !!evidence.observed?.trim();
31336
+ }
31337
+ function downgradeToUnverifiable(finding) {
31338
+ return {
31339
+ ...finding,
31340
+ severity: "unverifiable"
31341
+ };
31342
+ }
31343
+ function llmFindingToFinding(f) {
31344
+ const metaExtras = {};
31345
+ if (f.verifiedBy)
31346
+ metaExtras.verifiedBy = f.verifiedBy;
31347
+ if (f.acQuote)
31348
+ metaExtras.acQuote = f.acQuote;
31349
+ if (f.acIndex != null)
31350
+ metaExtras.acIndex = f.acIndex;
31351
+ return {
31352
+ source: "semantic-review",
31353
+ severity: normalizeSeverity2(f.severity),
31354
+ category: "",
31355
+ file: f.file,
31356
+ line: f.line,
31357
+ message: f.issue,
31358
+ suggestion: f.suggestion ?? undefined,
31359
+ fixTarget: "source",
31360
+ meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
31361
+ };
31362
+ }
31363
+ function toReviewFindings(findings) {
31364
+ return findings.map(llmFindingToFinding);
31365
+ }
31366
+ var UNVERIFIED_FINDING_PATTERNS;
31367
+ var init_semantic_helpers = __esm(() => {
31368
+ init_severity();
31369
+ UNVERIFIED_FINDING_PATTERNS = [
31370
+ "cannot verify",
31371
+ "can't verify",
31372
+ "from diff alone",
31373
+ "missing from diff",
31374
+ "not found in diff",
31375
+ "not present in diff",
31376
+ "does not appear in diff"
31377
+ ];
31378
+ });
31379
+
31380
+ // src/review/semantic-evidence.ts
31381
+ import { isAbsolute as isAbsolute8 } from "path";
31382
+ async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
31383
+ if (diffMode !== "ref")
31384
+ return findings;
31385
+ return Promise.all(findings.map(async (finding) => {
31386
+ if (!isBlockingSeverity(finding.severity, blockingThreshold))
31387
+ return finding;
31388
+ const evidence = await checkFindingEvidence({ finding, workdir });
31389
+ if (evidence.status !== "unmatched")
31390
+ return finding;
31391
+ return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
31392
+ }));
31393
+ }
31394
+ async function checkFindingEvidence(opts) {
31395
+ const observed = opts.finding.verifiedBy?.observed?.trim();
31396
+ const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
31397
+ const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
31398
+ if (!observed)
31399
+ return { status: "missing-observed", file: file3, line };
31400
+ const contents = await readSafeFile(opts.workdir, file3);
31401
+ if (contents === null)
31402
+ return { status: "unreadable", file: file3, line, observed };
31403
+ return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
31404
+ }
31405
+ function matchesEvidence(contents, observed, line) {
31406
+ if (!line || line <= 0) {
31407
+ return normalizedIncludes(contents, observed);
31408
+ }
31409
+ const lines = contents.split(`
31410
+ `);
31411
+ const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
31412
+ const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
31413
+ const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
31414
+ const windowText = lines.slice(start, end).join(`
31415
+ `);
31416
+ return normalizedIncludes(windowText, observed);
31417
+ }
31418
+ function downgradeUnsubstantiatedFinding(opts) {
31419
+ _evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
31420
+ storyId: opts.storyId,
31421
+ event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
31422
+ file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
31423
+ line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
31424
+ issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
31425
+ observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
31426
+ });
31427
+ return { ...opts.finding, severity: "unverifiable" };
31428
+ }
31429
+ async function readSafeFile(workdir, file3) {
31430
+ const validated = validateModulePath(file3, [workdir]);
31431
+ if (validated.valid && validated.absolutePath) {
31432
+ try {
31433
+ return await Bun.file(validated.absolutePath).text();
31434
+ } catch {
31435
+ return null;
31436
+ }
31437
+ }
31438
+ if (isAbsolute8(file3)) {
31439
+ try {
31440
+ return await Bun.file(file3).text();
31441
+ } catch {
31442
+ return null;
31443
+ }
31444
+ }
31445
+ return null;
31446
+ }
31447
+ function normalizedIncludes(contents, observed) {
31448
+ const normalizedObserved = normalizeEvidenceText(observed);
31449
+ return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
31450
+ }
31451
+ function normalizeEvidenceText(text) {
31452
+ return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
31453
+ }
31454
+ function stripWrappingQuotes(text) {
31455
+ let trimmed = text.trim();
31456
+ while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
31457
+ trimmed = trimmed.slice(1, -1).trim();
31458
+ }
31459
+ return trimmed;
31460
+ }
31461
+ function isMatchingWrapper(first, last) {
31462
+ return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
31463
+ }
31464
+ var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
31465
+ var init_semantic_evidence = __esm(() => {
31466
+ init_logger2();
31467
+ init_path_security2();
31468
+ init_semantic_helpers();
31469
+ _evidenceDeps = {
31470
+ getLogger: getSafeLogger
31471
+ };
31472
+ });
31473
+
31474
+ // src/review/finding-filters.ts
31475
+ async function substantiateAdversarialFindings(opts) {
31476
+ const { findings, workdir, storyId, blockingThreshold } = opts;
31477
+ return Promise.all(findings.map(async (finding) => {
31478
+ if (!isBlockingSeverity(finding.severity, blockingThreshold))
31479
+ return finding;
31480
+ const evidence = await checkFindingEvidence({ finding, workdir });
31481
+ if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
31482
+ return finding;
31483
+ return downgradeUnsubstantiatedFinding({
31484
+ finding,
31485
+ storyId,
31486
+ event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
31487
+ file: evidence.file,
31488
+ line: evidence.line,
31489
+ observed: evidence.observed
31490
+ });
31491
+ }));
31492
+ }
31493
+ var init_finding_filters = __esm(() => {
31494
+ init_adversarial_helpers();
31495
+ init_semantic_evidence();
31496
+ init_semantic_helpers();
31497
+ init_semantic_evidence();
31498
+ init_ac_quote_validator();
31499
+ });
31500
+
31501
+ // src/review/requote-response.ts
31502
+ function parseRequoteResponse(output) {
31503
+ const parsed = tryParseLLMJson(output);
31504
+ if (!isRecord(parsed))
31505
+ return null;
31506
+ const canonical = extractCanonical(parsed);
31507
+ if (canonical)
31508
+ return canonical;
31509
+ const findings = parsed.findings;
31510
+ if (!Array.isArray(findings) || findings.length !== 1)
31511
+ return null;
31512
+ const finding = findings[0];
31513
+ if (!isRecord(finding))
31514
+ return null;
31515
+ return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
31516
+ }
31517
+ function extractCanonical(value) {
31518
+ if (!isRecord(value))
31519
+ return null;
31520
+ if (typeof value.file !== "string" || typeof value.observed !== "string")
31521
+ return null;
31522
+ const file3 = value.file.trim();
31523
+ if (!file3)
31524
+ return null;
31525
+ const line = coerceLine(value.line);
31526
+ if (line === null)
31527
+ return null;
31528
+ return {
31529
+ file: file3,
31530
+ line: line === undefined ? undefined : line,
31531
+ observed: value.observed
31532
+ };
31533
+ }
31534
+ function coerceLine(value) {
31535
+ if (value == null)
31536
+ return;
31537
+ if (typeof value === "number")
31538
+ return value;
31539
+ if (typeof value === "string" && /^\d+$/.test(value))
31540
+ return Number.parseInt(value, 10);
31541
+ return null;
31542
+ }
31543
+ function isRecord(value) {
31544
+ return typeof value === "object" && value !== null && !Array.isArray(value);
31545
+ }
31546
+ var init_requote_response = () => {};
31547
+
31252
31548
  // src/operations/adversarial-review.ts
31253
- var FAIL_OPEN, adversarialParseRetry = (input) => makeParseRetryStrategy({
31549
+ async function requoteBlockingAdversarialFindings(findings, ctx) {
31550
+ const threshold = ctx.input.blockingThreshold ?? "error";
31551
+ const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
31552
+ const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
31553
+ if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
31554
+ return { findings, changed: false, extraCostUsd: 0 };
31555
+ }
31556
+ const next = [...findings];
31557
+ let changed = false;
31558
+ let extraCostUsd = 0;
31559
+ let used = 0;
31560
+ for (const [index, finding] of next.entries()) {
31561
+ if (!isBlockingSeverity(finding.severity, threshold))
31562
+ continue;
31563
+ const initialEvidence = await checkFindingEvidence({ finding, workdir: ctx.input.workdir });
31564
+ if (initialEvidence.status !== "unmatched")
31565
+ continue;
31566
+ if (used >= maxRequotes)
31567
+ break;
31568
+ used += 1;
31569
+ const retry = await ctx.send(AdversarialReviewPromptBuilder.requoteVerbatim({ finding }));
31570
+ extraCostUsd += retry.estimatedCostUsd ?? 0;
31571
+ const requote = parseRequoteResponse(retry.output);
31572
+ if (!requote) {
31573
+ next[index] = downgradeUnsubstantiatedFinding({
31574
+ finding,
31575
+ storyId: ctx.input.story.id,
31576
+ event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
31577
+ ...initialEvidence
31578
+ });
31579
+ changed = true;
31580
+ continue;
31581
+ }
31582
+ const updatedFinding = {
31583
+ ...finding,
31584
+ verifiedBy: {
31585
+ file: requote.file,
31586
+ line: requote.line,
31587
+ observed: requote.observed
31588
+ }
31589
+ };
31590
+ const requotedEvidence = await checkFindingEvidence({
31591
+ finding: updatedFinding,
31592
+ workdir: ctx.input.workdir
31593
+ });
31594
+ if (requotedEvidence.status === "matched") {
31595
+ getSafeLogger()?.info("review", "Recovered adversarial finding via same-session requote", {
31596
+ storyId: ctx.input.story.id,
31597
+ event: ADVERSARIAL_REQUOTE_RECOVERED_EVENT,
31598
+ file: requotedEvidence.file,
31599
+ line: requotedEvidence.line
31600
+ });
31601
+ next[index] = updatedFinding;
31602
+ changed = true;
31603
+ continue;
31604
+ }
31605
+ next[index] = downgradeUnsubstantiatedFinding({
31606
+ finding: updatedFinding,
31607
+ storyId: ctx.input.story.id,
31608
+ event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
31609
+ file: requotedEvidence.file,
31610
+ line: requotedEvidence.line,
31611
+ observed: requotedEvidence.observed
31612
+ });
31613
+ changed = true;
31614
+ }
31615
+ return { findings: next, changed, extraCostUsd };
31616
+ }
31617
+ var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
31254
31618
  validate: (parsed) => validateAdversarialShape(parsed) !== null,
31255
31619
  reviewerKind: "adversarial",
31256
31620
  maxAttempts: 2,
@@ -31258,15 +31622,24 @@ var FAIL_OPEN, adversarialParseRetry = (input) => makeParseRetryStrategy({
31258
31622
  invalid: () => ReviewPromptBuilder.jsonRetry(),
31259
31623
  truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
31260
31624
  },
31261
- exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], looksLikeFail: true } : FAIL_OPEN,
31625
+ exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN,
31262
31626
  logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
31263
31627
  }), adversarialReviewOp;
31264
31628
  var init_adversarial_review = __esm(() => {
31265
31629
  init_retry();
31266
31630
  init_config();
31631
+ init_logger2();
31267
31632
  init_prompts();
31268
31633
  init_adversarial_helpers();
31269
- FAIL_OPEN = { passed: true, findings: [], failOpen: true };
31634
+ init_finding_filters();
31635
+ init_requote_response();
31636
+ FAIL_OPEN = {
31637
+ passed: true,
31638
+ findings: [],
31639
+ normalizedFindings: [],
31640
+ acDropped: [],
31641
+ failOpen: true
31642
+ };
31270
31643
  adversarialReviewOp = {
31271
31644
  kind: "run",
31272
31645
  name: "adversarial-review",
@@ -31276,6 +31649,21 @@ var init_adversarial_review = __esm(() => {
31276
31649
  model: (input) => input.adversarialConfig.model,
31277
31650
  timeoutMs: (input) => input.adversarialConfig.timeoutMs,
31278
31651
  retry: (input) => adversarialParseRetry(input),
31652
+ async hopBody(initialPrompt, ctx) {
31653
+ const turn = await ctx.sendWithParseRetry(initialPrompt);
31654
+ const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
31655
+ if (!parsed)
31656
+ return turn;
31657
+ const requoted = await requoteBlockingAdversarialFindings(parsed.findings, ctx);
31658
+ if (!requoted.changed)
31659
+ return turn;
31660
+ const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
31661
+ return {
31662
+ ...turn,
31663
+ output: JSON.stringify({ passed, findings: requoted.findings }),
31664
+ estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
31665
+ };
31666
+ },
31279
31667
  build(input, _ctx) {
31280
31668
  const base = new AdversarialReviewPromptBuilder().buildAdversarialReviewPrompt(input.story, input.adversarialConfig, {
31281
31669
  mode: input.mode,
@@ -31298,12 +31686,42 @@ var init_adversarial_review = __esm(() => {
31298
31686
  parse(output, _input, _ctx) {
31299
31687
  const raw = tryParseLLMJson(output);
31300
31688
  const parsed = validateAdversarialShape(raw);
31301
- if (parsed)
31302
- return { passed: parsed.passed, findings: parsed.findings };
31689
+ if (parsed) {
31690
+ return {
31691
+ passed: parsed.passed,
31692
+ findings: parsed.findings,
31693
+ normalizedFindings: [],
31694
+ acDropped: []
31695
+ };
31696
+ }
31303
31697
  if (/"passed"\s*:\s*false/.test(output) && !/"findings"\s*:\s*\[\s*\{/.test(output)) {
31304
- return { passed: false, findings: [], looksLikeFail: true };
31698
+ return { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true };
31305
31699
  }
31306
31700
  throw new ParseValidationError("[adversarial-review] parse failed: invalid JSON shape");
31701
+ },
31702
+ async verify(parsed, input, _verifyCtx) {
31703
+ if (parsed.failOpen || parsed.looksLikeFail)
31704
+ return parsed;
31705
+ if (parsed.findings.length === 0)
31706
+ return parsed;
31707
+ const threshold = input.blockingThreshold ?? "error";
31708
+ const findings = parsed.findings;
31709
+ const substantiated = await substantiateAdversarialFindings({
31710
+ findings,
31711
+ workdir: input.workdir,
31712
+ storyId: input.story.id,
31713
+ blockingThreshold: threshold
31714
+ });
31715
+ const { accepted, dropped } = filterByAcQuote(substantiated, input.story.acceptanceCriteria);
31716
+ const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
31717
+ const passed = parsed.passed && blocking.length === 0;
31718
+ return {
31719
+ ...parsed,
31720
+ passed,
31721
+ findings: accepted,
31722
+ normalizedFindings: toAdversarialReviewFindings(blocking),
31723
+ acDropped: dropped
31724
+ };
31307
31725
  }
31308
31726
  };
31309
31727
  });
@@ -31738,191 +32156,6 @@ var init_review_audit = __esm(() => {
31738
32156
  };
31739
32157
  });
31740
32158
 
31741
- // src/review/semantic-helpers.ts
31742
- function validateLLMShape(parsed) {
31743
- if (typeof parsed !== "object" || parsed === null)
31744
- return null;
31745
- const obj = parsed;
31746
- if (typeof obj.passed !== "boolean")
31747
- return null;
31748
- if (!Array.isArray(obj.findings))
31749
- return null;
31750
- return { passed: obj.passed, findings: obj.findings };
31751
- }
31752
- function parseLLMResponse(raw) {
31753
- try {
31754
- return validateLLMShape(tryParseLLMJson(raw));
31755
- } catch {
31756
- return null;
31757
- }
31758
- }
31759
- function formatFindings2(findings) {
31760
- return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
31761
- Suggestion: ${f.suggestion}`).join(`
31762
- `);
31763
- }
31764
- function normalizeSeverity2(sev) {
31765
- if (sev === "warn")
31766
- return "warning";
31767
- if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
31768
- return sev;
31769
- return "info";
31770
- }
31771
- function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
31772
- if (diffMode !== "ref")
31773
- return findings;
31774
- return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
31775
- }
31776
- function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
31777
- if (!isBlockingSeverity(finding.severity, blockingThreshold))
31778
- return false;
31779
- return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
31780
- }
31781
- function mentionsUnverifiedSource(finding) {
31782
- const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
31783
- return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
31784
- }
31785
- function hasVerifiedEvidence(finding) {
31786
- const evidence = finding.verifiedBy;
31787
- return !!evidence?.file?.trim() && !!evidence.observed?.trim();
31788
- }
31789
- function downgradeToUnverifiable(finding) {
31790
- return {
31791
- ...finding,
31792
- severity: "unverifiable"
31793
- };
31794
- }
31795
- function llmFindingToFinding(f) {
31796
- const metaExtras = {};
31797
- if (f.verifiedBy)
31798
- metaExtras.verifiedBy = f.verifiedBy;
31799
- if (f.acQuote)
31800
- metaExtras.acQuote = f.acQuote;
31801
- if (f.acIndex != null)
31802
- metaExtras.acIndex = f.acIndex;
31803
- return {
31804
- source: "semantic-review",
31805
- severity: normalizeSeverity2(f.severity),
31806
- category: "",
31807
- file: f.file,
31808
- line: f.line,
31809
- message: f.issue,
31810
- suggestion: f.suggestion ?? undefined,
31811
- fixTarget: "source",
31812
- meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
31813
- };
31814
- }
31815
- function toReviewFindings(findings) {
31816
- return findings.map(llmFindingToFinding);
31817
- }
31818
- var UNVERIFIED_FINDING_PATTERNS;
31819
- var init_semantic_helpers = __esm(() => {
31820
- init_severity();
31821
- UNVERIFIED_FINDING_PATTERNS = [
31822
- "cannot verify",
31823
- "can't verify",
31824
- "from diff alone",
31825
- "missing from diff",
31826
- "not found in diff",
31827
- "not present in diff",
31828
- "does not appear in diff"
31829
- ];
31830
- });
31831
-
31832
- // src/review/semantic-evidence.ts
31833
- import { isAbsolute as isAbsolute8 } from "path";
31834
- async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
31835
- if (diffMode !== "ref")
31836
- return findings;
31837
- return Promise.all(findings.map(async (finding) => {
31838
- if (!isBlockingSeverity(finding.severity, blockingThreshold))
31839
- return finding;
31840
- const evidence = await checkFindingEvidence({ finding, workdir });
31841
- if (evidence.status !== "unmatched")
31842
- return finding;
31843
- return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
31844
- }));
31845
- }
31846
- async function checkFindingEvidence(opts) {
31847
- const observed = opts.finding.verifiedBy?.observed?.trim();
31848
- const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
31849
- const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
31850
- if (!observed)
31851
- return { status: "missing-observed", file: file3, line };
31852
- const contents = await readSafeFile(opts.workdir, file3);
31853
- if (contents === null)
31854
- return { status: "unreadable", file: file3, line, observed };
31855
- return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
31856
- }
31857
- function matchesEvidence(contents, observed, line) {
31858
- if (!line || line <= 0) {
31859
- return normalizedIncludes(contents, observed);
31860
- }
31861
- const lines = contents.split(`
31862
- `);
31863
- const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
31864
- const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
31865
- const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
31866
- const windowText = lines.slice(start, end).join(`
31867
- `);
31868
- return normalizedIncludes(windowText, observed);
31869
- }
31870
- function downgradeUnsubstantiatedFinding(opts) {
31871
- _evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
31872
- storyId: opts.storyId,
31873
- event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
31874
- file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
31875
- line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
31876
- issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
31877
- observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
31878
- });
31879
- return { ...opts.finding, severity: "unverifiable" };
31880
- }
31881
- async function readSafeFile(workdir, file3) {
31882
- const validated = validateModulePath(file3, [workdir]);
31883
- if (validated.valid && validated.absolutePath) {
31884
- try {
31885
- return await Bun.file(validated.absolutePath).text();
31886
- } catch {
31887
- return null;
31888
- }
31889
- }
31890
- if (isAbsolute8(file3)) {
31891
- try {
31892
- return await Bun.file(file3).text();
31893
- } catch {
31894
- return null;
31895
- }
31896
- }
31897
- return null;
31898
- }
31899
- function normalizedIncludes(contents, observed) {
31900
- const normalizedObserved = normalizeEvidenceText(observed);
31901
- return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
31902
- }
31903
- function normalizeEvidenceText(text) {
31904
- return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
31905
- }
31906
- function stripWrappingQuotes(text) {
31907
- let trimmed = text.trim();
31908
- while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
31909
- trimmed = trimmed.slice(1, -1).trim();
31910
- }
31911
- return trimmed;
31912
- }
31913
- function isMatchingWrapper(first, last) {
31914
- return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
31915
- }
31916
- var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
31917
- var init_semantic_evidence = __esm(() => {
31918
- init_logger2();
31919
- init_path_security2();
31920
- init_semantic_helpers();
31921
- _evidenceDeps = {
31922
- getLogger: getSafeLogger
31923
- };
31924
- });
31925
-
31926
32159
  // src/review/adversarial.ts
31927
32160
  import { relative as relative7, sep } from "path";
31928
32161
  function recordAdversarialAudit(opts) {
@@ -31955,7 +32188,6 @@ async function runAdversarialReview(opts) {
31955
32188
  agentManager,
31956
32189
  config: naxConfig,
31957
32190
  featureName,
31958
- priorFailures,
31959
32191
  blockingThreshold,
31960
32192
  featureContextMarkdown,
31961
32193
  contextBundle,
@@ -32076,13 +32308,13 @@ async function runAdversarialReview(opts) {
32076
32308
  let opResult;
32077
32309
  try {
32078
32310
  opResult = await _adversarialDeps.callOp(callCtx, adversarialReviewOp, {
32311
+ workdir,
32079
32312
  story,
32080
32313
  adversarialConfig,
32081
32314
  mode: diffMode,
32082
32315
  diff,
32083
32316
  storyGitRef: effectiveRef,
32084
32317
  stat,
32085
- priorFailures,
32086
32318
  testInventory,
32087
32319
  excludePatterns: adversarialConfig.excludePatterns,
32088
32320
  testGlobs: resolvedTestPatterns.globs,
@@ -32167,27 +32399,11 @@ async function runAdversarialReview(opts) {
32167
32399
  durationMs: Date.now() - startTime
32168
32400
  };
32169
32401
  }
32170
- const rawParsedRaw = {
32171
- passed: opResult.passed,
32172
- findings: opResult.findings
32173
- };
32174
- const blockingThresholdEffective = blockingThreshold ?? "error";
32175
- const substantiatedFindings = await Promise.all(rawParsedRaw.findings.map(async (finding) => {
32176
- if (!isBlockingSeverity(finding.severity, blockingThresholdEffective))
32177
- return finding;
32178
- const evidence = await checkFindingEvidence({ finding, workdir });
32179
- if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
32180
- return finding;
32181
- return downgradeUnsubstantiatedFinding({
32182
- finding,
32183
- storyId: story.id,
32184
- event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
32185
- file: evidence.file,
32186
- line: evidence.line,
32187
- observed: evidence.observed
32188
- });
32189
- }));
32190
- const rawParsed = { ...rawParsedRaw, findings: substantiatedFindings };
32402
+ const threshold = blockingThreshold ?? "error";
32403
+ const allFindings = opResult.findings;
32404
+ const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
32405
+ const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
32406
+ const acDropped = opResult.acDropped ?? [];
32191
32407
  let diffFiles;
32192
32408
  let diffAvailable;
32193
32409
  if (diff && diff.length > 0) {
@@ -32203,13 +32419,6 @@ async function runAdversarialReview(opts) {
32203
32419
  diffAvailable = true;
32204
32420
  }
32205
32421
  }
32206
- const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcQuote(rawParsed.findings, story.acceptanceCriteria);
32207
- if (acDropped.length > 0) {
32208
- logger?.warn("review", "Adversarial findings dropped: acQuote validation failed", {
32209
- storyId: story.id,
32210
- dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
32211
- });
32212
- }
32213
32422
  const adversarialDropAnalysis = acDropped.map((d) => ({
32214
32423
  finding: {
32215
32424
  file: d.finding.file ?? "<unknown>",
@@ -32223,10 +32432,6 @@ async function runAdversarialReview(opts) {
32223
32432
  rawCategory: d.finding.category ?? "",
32224
32433
  counterfactual: analyzeStructuralCounterfactual({ acIndex: d.finding.acIndex, category: d.finding.category, file: d.finding.file }, story.acceptanceCriteria, diffFiles)
32225
32434
  }));
32226
- const parsed = { ...rawParsed, findings: acGroundedFindings };
32227
- const threshold = blockingThresholdEffective;
32228
- const blockingFindings = parsed.findings.filter((f) => isBlockingSeverity(f.severity, threshold));
32229
- const advisoryFindings = parsed.findings.filter((f) => !isBlockingSeverity(f.severity, threshold));
32230
32435
  const adversarialAcceptAnalysis = blockingFindings.map((f) => ({
32231
32436
  finding: {
32232
32437
  file: f.file,
@@ -32249,11 +32454,11 @@ async function runAdversarialReview(opts) {
32249
32454
  }))
32250
32455
  });
32251
32456
  }
32457
+ const durationMs = Date.now() - startTime;
32252
32458
  if (blockingFindings.length > 0) {
32253
- const durationMs2 = Date.now() - startTime;
32254
32459
  logger?.warn("review", `Adversarial review failed: ${blockingFindings.length} blocking findings`, {
32255
32460
  storyId: story.id,
32256
- durationMs: durationMs2,
32461
+ durationMs,
32257
32462
  findings: blockingFindings.map((f) => ({
32258
32463
  severity: f.severity,
32259
32464
  category: f.category,
@@ -32274,72 +32479,37 @@ async function runAdversarialReview(opts) {
32274
32479
  blockingThreshold: threshold,
32275
32480
  result: {
32276
32481
  passed: false,
32277
- findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
32482
+ findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
32278
32483
  },
32279
32484
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32280
32485
  diffAvailable,
32281
32486
  adversarialDropAnalysis,
32282
32487
  adversarialAcceptAnalysis
32283
32488
  });
32489
+ const output = blockingFindings.length > 0 ? `Adversarial review failed:
32490
+
32491
+ ${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)";
32284
32492
  return {
32285
32493
  check: "adversarial",
32286
32494
  success: false,
32287
32495
  command: "",
32288
32496
  exitCode: 1,
32289
- output: `Adversarial review failed:
32290
-
32291
- ${formatFindings(blockingFindings)}`,
32292
- durationMs: durationMs2,
32293
- findings: toAdversarialReviewFindings(blockingFindings),
32497
+ output,
32498
+ durationMs,
32499
+ findings: blockingFindings.length > 0 ? toAdversarialReviewFindings(blockingFindings) : undefined,
32294
32500
  advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32295
32501
  cost: llmCost
32296
32502
  };
32297
32503
  }
32298
- if (!parsed.passed && blockingFindings.length === 0) {
32299
- if (acDropped.length > 0) {
32300
- const durationMs3 = Date.now() - startTime;
32301
- logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
32302
- storyId: story.id,
32303
- durationMs: durationMs3,
32304
- droppedCount: acDropped.length,
32305
- dropCodes: acDropped.map((d) => d.code)
32306
- });
32307
- const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
32308
- `);
32309
- recordAdversarialAudit({
32310
- runtime,
32311
- workdir,
32312
- projectDir,
32313
- storyId: story.id,
32314
- featureName,
32315
- parsed: true,
32316
- failOpen: false,
32317
- passed: false,
32318
- blockingThreshold: threshold,
32319
- result: { passed: false, findings: [] },
32320
- advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32321
- diffAvailable,
32322
- adversarialDropAnalysis,
32323
- adversarialAcceptAnalysis: []
32324
- });
32325
- return {
32326
- check: "adversarial",
32327
- success: false,
32328
- command: "",
32329
- exitCode: 1,
32330
- output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Either re-classify these as "info" upstream or extend the ACs. Drops:
32331
-
32332
- ${dropSummary}`,
32333
- durationMs: durationMs3,
32334
- advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32335
- cost: llmCost
32336
- };
32337
- }
32338
- const durationMs2 = Date.now() - startTime;
32339
- logger?.info("review", "Adversarial review passed (all findings below blocking threshold)", {
32504
+ if (!opResult.passed && acDropped.length > 0) {
32505
+ logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
32340
32506
  storyId: story.id,
32341
- durationMs: durationMs2
32507
+ durationMs,
32508
+ droppedCount: acDropped.length,
32509
+ dropCodes: acDropped.map((d) => d.code)
32342
32510
  });
32511
+ const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
32512
+ `);
32343
32513
  recordAdversarialAudit({
32344
32514
  runtime,
32345
32515
  workdir,
@@ -32348,12 +32518,9 @@ ${dropSummary}`,
32348
32518
  featureName,
32349
32519
  parsed: true,
32350
32520
  failOpen: false,
32351
- passed: true,
32521
+ passed: false,
32352
32522
  blockingThreshold: threshold,
32353
- result: {
32354
- passed: true,
32355
- findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
32356
- },
32523
+ result: { passed: false, findings: [] },
32357
32524
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32358
32525
  diffAvailable,
32359
32526
  adversarialDropAnalysis,
@@ -32361,19 +32528,18 @@ ${dropSummary}`,
32361
32528
  });
32362
32529
  return {
32363
32530
  check: "adversarial",
32364
- success: true,
32531
+ success: false,
32365
32532
  command: "",
32366
- exitCode: 0,
32367
- output: "Adversarial review passed (all findings were advisory \u2014 below blocking threshold)",
32368
- durationMs: durationMs2,
32533
+ exitCode: 1,
32534
+ output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Drops:
32535
+
32536
+ ${dropSummary}`,
32537
+ durationMs,
32369
32538
  advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32370
32539
  cost: llmCost
32371
32540
  };
32372
32541
  }
32373
- const durationMs = Date.now() - startTime;
32374
- if (parsed.passed) {
32375
- logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
32376
- }
32542
+ logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
32377
32543
  recordAdversarialAudit({
32378
32544
  runtime,
32379
32545
  workdir,
@@ -32382,23 +32548,23 @@ ${dropSummary}`,
32382
32548
  featureName,
32383
32549
  parsed: true,
32384
32550
  failOpen: false,
32385
- passed: parsed.passed,
32551
+ passed: true,
32386
32552
  blockingThreshold: threshold,
32387
32553
  result: {
32388
- passed: parsed.passed,
32389
- findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
32554
+ passed: true,
32555
+ findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
32390
32556
  },
32391
32557
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32392
32558
  diffAvailable,
32393
32559
  adversarialDropAnalysis,
32394
- adversarialAcceptAnalysis
32560
+ adversarialAcceptAnalysis: []
32395
32561
  });
32396
32562
  return {
32397
32563
  check: "adversarial",
32398
- success: parsed.passed,
32564
+ success: true,
32399
32565
  command: "",
32400
- exitCode: parsed.passed ? 0 : 1,
32401
- output: parsed.passed ? "Adversarial review passed" : "Adversarial review failed (no findings)",
32566
+ exitCode: 0,
32567
+ output: allFindings.length === 0 ? "Adversarial review passed" : "Adversarial review passed (all findings were advisory \u2014 below blocking threshold)",
32402
32568
  durationMs,
32403
32569
  advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32404
32570
  cost: llmCost
@@ -32413,13 +32579,11 @@ var init_adversarial = __esm(() => {
32413
32579
  init_adversarial_review();
32414
32580
  init_call();
32415
32581
  init_test_runners();
32416
- init_ac_quote_validator();
32417
32582
  init_ac_structural_counterfactual();
32418
32583
  init_adversarial_helpers();
32419
32584
  init_diff_utils();
32420
32585
  init_finding_projection();
32421
32586
  init_review_audit();
32422
- init_semantic_evidence();
32423
32587
  _adversarialDeps = {
32424
32588
  writeReviewAudit,
32425
32589
  callOp
@@ -35003,57 +35167,10 @@ var init_acceptance_fix = __esm(() => {
35003
35167
  };
35004
35168
  });
35005
35169
 
35006
- // src/review/requote-response.ts
35007
- function parseRequoteResponse(output) {
35008
- const parsed = tryParseLLMJson(output);
35009
- if (!isRecord(parsed))
35010
- return null;
35011
- const canonical = extractCanonical(parsed);
35012
- if (canonical)
35013
- return canonical;
35014
- const findings = parsed.findings;
35015
- if (!Array.isArray(findings) || findings.length !== 1)
35016
- return null;
35017
- const finding = findings[0];
35018
- if (!isRecord(finding))
35019
- return null;
35020
- return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
35021
- }
35022
- function extractCanonical(value) {
35023
- if (!isRecord(value))
35024
- return null;
35025
- if (typeof value.file !== "string" || typeof value.observed !== "string")
35026
- return null;
35027
- const file3 = value.file.trim();
35028
- if (!file3)
35029
- return null;
35030
- const line = coerceLine(value.line);
35031
- if (line === null)
35032
- return null;
35033
- return {
35034
- file: file3,
35035
- line: line === undefined ? undefined : line,
35036
- observed: value.observed
35037
- };
35038
- }
35039
- function coerceLine(value) {
35040
- if (value == null)
35041
- return;
35042
- if (typeof value === "number")
35043
- return value;
35044
- if (typeof value === "string" && /^\d+$/.test(value))
35045
- return Number.parseInt(value, 10);
35046
- return null;
35047
- }
35048
- function isRecord(value) {
35049
- return typeof value === "object" && value !== null && !Array.isArray(value);
35050
- }
35051
- var init_requote_response = () => {};
35052
-
35053
35170
  // src/operations/semantic-review.ts
35054
35171
  async function requoteBlockingFindings(findings, ctx) {
35055
35172
  const threshold = ctx.input.blockingThreshold ?? "error";
35056
- const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
35173
+ const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES2;
35057
35174
  const requoteEnabled = ctx.input.semanticConfig.substantiation?.requote ?? true;
35058
35175
  if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
35059
35176
  return { findings, changed: false, extraCostUsd: 0 };
@@ -35120,7 +35237,7 @@ async function requoteBlockingFindings(findings, ctx) {
35120
35237
  }
35121
35238
  return { findings: next, changed, extraCostUsd };
35122
35239
  }
35123
- var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
35240
+ var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES2 = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
35124
35241
  const turn = await ctx.sendWithParseRetry(initialPrompt);
35125
35242
  const parsed = validateLLMShape(tryParseLLMJson(turn.output));
35126
35243
  if (!parsed)
@@ -35140,10 +35257,9 @@ var init_semantic_review = __esm(() => {
35140
35257
  init_config();
35141
35258
  init_logger2();
35142
35259
  init_prompts();
35260
+ init_finding_filters();
35143
35261
  init_requote_response();
35144
- init_semantic_evidence();
35145
- init_semantic_helpers();
35146
- FAIL_OPEN2 = { passed: true, findings: [], failOpen: true };
35262
+ FAIL_OPEN2 = { passed: true, findings: [], normalizedFindings: [], failOpen: true };
35147
35263
  semanticReviewOp = {
35148
35264
  kind: "run",
35149
35265
  name: "semantic-review",
@@ -35160,6 +35276,7 @@ var init_semantic_review = __esm(() => {
35160
35276
  invalid: () => ReviewPromptBuilder.jsonRetry(),
35161
35277
  truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
35162
35278
  },
35279
+ exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], looksLikeFail: true } : FAIL_OPEN2,
35163
35280
  logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
35164
35281
  }),
35165
35282
  hopBody: semanticReviewHopBody,
@@ -35181,11 +35298,36 @@ var init_semantic_review = __esm(() => {
35181
35298
  parse(output, _input, _ctx) {
35182
35299
  const raw = tryParseLLMJson(output);
35183
35300
  const parsed = validateLLMShape(raw);
35184
- if (parsed)
35185
- return { passed: parsed.passed, findings: parsed.findings };
35186
- if (/"passed"\s*:\s*false/.test(output))
35187
- return { passed: false, findings: [], looksLikeFail: true };
35301
+ if (parsed) {
35302
+ return {
35303
+ passed: parsed.passed,
35304
+ findings: parsed.findings,
35305
+ normalizedFindings: []
35306
+ };
35307
+ }
35308
+ if (/"passed"\s*:\s*false/.test(output)) {
35309
+ return { passed: false, findings: [], normalizedFindings: [], looksLikeFail: true };
35310
+ }
35188
35311
  return FAIL_OPEN2;
35312
+ },
35313
+ async verify(parsed, input, _verifyCtx) {
35314
+ if (parsed.failOpen || parsed.looksLikeFail)
35315
+ return parsed;
35316
+ if (parsed.findings.length === 0)
35317
+ return parsed;
35318
+ const threshold = input.blockingThreshold ?? "error";
35319
+ const findings = parsed.findings;
35320
+ const sanitized = sanitizeRefModeFindings(findings, input.mode, threshold);
35321
+ const substantiated = await substantiateSemanticEvidence(sanitized, input.mode, input.workdir, input.story.id, threshold);
35322
+ const { accepted } = filterByAcGroundingMinimal(substantiated, input.story.acceptanceCriteria);
35323
+ const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
35324
+ const passed = parsed.passed && blocking.length === 0;
35325
+ return {
35326
+ ...parsed,
35327
+ passed,
35328
+ findings: accepted,
35329
+ normalizedFindings: toReviewFindings(blocking)
35330
+ };
35189
35331
  }
35190
35332
  };
35191
35333
  });
@@ -39360,30 +39502,21 @@ async function runSemanticReview(opts) {
39360
39502
  durationMs: Date.now() - startTime
39361
39503
  };
39362
39504
  }
39363
- const parsed = { passed: opResult.passed, findings: opResult.findings };
39364
- const sanitizedFindings = await substantiateSemanticEvidence(sanitizeRefModeFindings(parsed.findings, diffMode, blockingThreshold ?? "error"), diffMode, workdir, story.id, blockingThreshold ?? "error");
39365
- const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcGroundingMinimal(sanitizedFindings, story.acceptanceCriteria);
39366
- if (acDropped.length > 0) {
39367
- logger?.warn("review", "Semantic findings dropped: acIndex missing or out of range", {
39368
- storyId: story.id,
39369
- dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
39370
- });
39371
- }
39372
- const sanitizedParsed = { ...parsed, findings: acGroundedFindings };
39373
39505
  const threshold = blockingThreshold ?? "error";
39374
- const blockingFindings = sanitizedParsed.findings.filter((f) => isBlockingSeverity(f.severity, threshold));
39375
- const advisoryFindings = sanitizedParsed.findings.filter((f) => !isBlockingSeverity(f.severity, threshold));
39506
+ const allFindings = opResult.findings;
39507
+ const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
39508
+ const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
39376
39509
  if (advisoryFindings.length > 0) {
39377
39510
  logger?.debug("review", `Semantic review: ${advisoryFindings.length} advisory findings (below threshold '${threshold}')`, {
39378
39511
  storyId: story.id,
39379
39512
  findings: advisoryFindings.map((f) => ({ severity: f.severity, file: f.file, issue: f.issue }))
39380
39513
  });
39381
39514
  }
39382
- if (!sanitizedParsed.passed && blockingFindings.length > 0) {
39383
- const durationMs2 = Date.now() - startTime;
39515
+ const durationMs = Date.now() - startTime;
39516
+ if (blockingFindings.length > 0) {
39384
39517
  logger?.warn("review", `Semantic review failed: ${blockingFindings.length} blocking findings`, {
39385
39518
  storyId: story.id,
39386
- durationMs: durationMs2
39519
+ durationMs
39387
39520
  });
39388
39521
  logger?.debug("review", "Semantic review findings", {
39389
39522
  storyId: story.id,
@@ -39410,7 +39543,7 @@ ${formatFindings2(blockingFindings)}`;
39410
39543
  blockingThreshold: threshold,
39411
39544
  result: {
39412
39545
  passed: false,
39413
- findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
39546
+ findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
39414
39547
  },
39415
39548
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39416
39549
  });
@@ -39420,53 +39553,16 @@ ${formatFindings2(blockingFindings)}`;
39420
39553
  command: "",
39421
39554
  exitCode: 1,
39422
39555
  output,
39423
- durationMs: durationMs2,
39556
+ durationMs,
39424
39557
  findings: toReviewFindings(blockingFindings),
39425
39558
  advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39426
39559
  cost: llmCost
39427
39560
  };
39428
39561
  }
39429
- if (!sanitizedParsed.passed && blockingFindings.length === 0) {
39430
- if (acDropped.length > 0) {
39431
- const durationMs3 = Date.now() - startTime;
39432
- logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
39433
- storyId: story.id,
39434
- durationMs: durationMs3,
39435
- droppedCount: acDropped.length,
39436
- dropCodes: acDropped.map((d) => d.code)
39437
- });
39438
- const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
39439
- `);
39440
- recordSemanticAudit({
39441
- runtime,
39442
- workdir,
39443
- projectDir,
39444
- storyId: story.id,
39445
- featureName,
39446
- parsed: true,
39447
- failOpen: false,
39448
- passed: false,
39449
- blockingThreshold: threshold,
39450
- result: { passed: false, findings: [] },
39451
- advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39452
- });
39453
- return {
39454
- check: "semantic",
39455
- success: false,
39456
- command: "",
39457
- exitCode: 1,
39458
- output: `Semantic review failed: ${acDropped.length} blocking finding(s) dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution. Either re-classify these as "info" or ensure each error finding includes a valid acIndex. Drops:
39459
-
39460
- ${dropSummary}`,
39461
- durationMs: durationMs3,
39462
- advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39463
- cost: llmCost
39464
- };
39465
- }
39466
- const durationMs2 = Date.now() - startTime;
39467
- logger?.info("review", "Semantic review passed (all findings below blocking threshold)", {
39562
+ if (!opResult.passed && allFindings.length === 0) {
39563
+ logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
39468
39564
  storyId: story.id,
39469
- durationMs: durationMs2
39565
+ durationMs
39470
39566
  });
39471
39567
  recordSemanticAudit({
39472
39568
  runtime,
@@ -39476,29 +39572,23 @@ ${dropSummary}`,
39476
39572
  featureName,
39477
39573
  parsed: true,
39478
39574
  failOpen: false,
39479
- passed: true,
39575
+ passed: false,
39480
39576
  blockingThreshold: threshold,
39481
- result: {
39482
- passed: true,
39483
- findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
39484
- },
39577
+ result: { passed: false, findings: [] },
39485
39578
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39486
39579
  });
39487
39580
  return {
39488
39581
  check: "semantic",
39489
- success: true,
39582
+ success: false,
39490
39583
  command: "",
39491
- exitCode: 0,
39492
- output: "Semantic review passed (all findings were advisory \u2014 below blocking threshold)",
39493
- durationMs: durationMs2,
39584
+ exitCode: 1,
39585
+ output: 'Semantic review failed: blocking finding(s) were dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution.',
39586
+ durationMs,
39494
39587
  advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39495
39588
  cost: llmCost
39496
39589
  };
39497
39590
  }
39498
- const durationMs = Date.now() - startTime;
39499
- if (sanitizedParsed.passed) {
39500
- logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
39501
- }
39591
+ logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
39502
39592
  recordSemanticAudit({
39503
39593
  runtime,
39504
39594
  workdir,
@@ -39507,20 +39597,20 @@ ${dropSummary}`,
39507
39597
  featureName,
39508
39598
  parsed: true,
39509
39599
  failOpen: false,
39510
- passed: sanitizedParsed.passed,
39600
+ passed: true,
39511
39601
  blockingThreshold: threshold,
39512
39602
  result: {
39513
- passed: sanitizedParsed.passed,
39514
- findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
39603
+ passed: true,
39604
+ findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
39515
39605
  },
39516
39606
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39517
39607
  });
39518
39608
  return {
39519
39609
  check: "semantic",
39520
- success: sanitizedParsed.passed,
39610
+ success: true,
39521
39611
  command: "",
39522
- exitCode: sanitizedParsed.passed ? 0 : 1,
39523
- output: sanitizedParsed.passed ? "Semantic review passed" : "Semantic review failed (no findings)",
39612
+ exitCode: 0,
39613
+ output: allFindings.length === 0 ? "Semantic review passed" : "Semantic review passed (all findings were advisory \u2014 below blocking threshold)",
39524
39614
  durationMs,
39525
39615
  advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39526
39616
  cost: llmCost
@@ -39537,12 +39627,10 @@ var init_semantic = __esm(() => {
39537
39627
  init_semantic_review();
39538
39628
  init_prompts();
39539
39629
  init_test_runners();
39540
- init_ac_quote_validator();
39541
39630
  init_diff_utils();
39542
39631
  init_finding_projection();
39543
39632
  init_review_audit();
39544
39633
  init_semantic_debate();
39545
- init_semantic_evidence();
39546
39634
  init_semantic_helpers();
39547
39635
  _semanticDeps = {
39548
39636
  createDebateRunner: (opts) => new DebateRunner(opts),
@@ -41869,6 +41957,38 @@ async function callOp(ctx, op, input) {
41869
41957
  const rawOutput = outcome.result.output;
41870
41958
  const totalCost = outcome.result.estimatedCostUsd ?? 0;
41871
41959
  if (!rawOutput) {
41960
+ if (maxRetriesExceeded) {
41961
+ getSafeLogger()?.error("callop", "Op retry budget exhausted (empty output)", {
41962
+ storyId: ctx.storyId,
41963
+ opName: op.name,
41964
+ site: "run",
41965
+ totalAttempts: MAX_COMPLETE_RETRY_ATTEMPTS + 1
41966
+ });
41967
+ throw new NaxError(`callOp[${op.name}]: CALL_OP_MAX_RETRIES \u2014 exceeded MAX_COMPLETE_RETRY_ATTEMPTS (${MAX_COMPLETE_RETRY_ATTEMPTS})`, "CALL_OP_MAX_RETRIES", { stage: op.stage, storyId: ctx.storyId });
41968
+ }
41969
+ if (retryFallback !== undefined) {
41970
+ if (typeof retryFallback !== "object" || retryFallback === null) {
41971
+ throw new NaxError(`callOp[${op.name}]: exhaustedFallback returned a non-object (${typeof retryFallback}); fallback must be a plain object`, "CALL_OP_INVALID_FALLBACK", { stage: op.stage, storyId: ctx.storyId });
41972
+ }
41973
+ getSafeLogger()?.warn("callop", "Returning exhaustedFallback on empty output", {
41974
+ storyId: ctx.storyId,
41975
+ opName: op.name,
41976
+ agentName: dispatchAgent
41977
+ });
41978
+ return { ...retryFallback, estimatedCostUsd: totalCost };
41979
+ }
41980
+ if (op.recover) {
41981
+ const verifyCtx = makeVerifyCtx(buildCtx);
41982
+ const recovered = await op.recover(input, verifyCtx);
41983
+ if (recovered !== null) {
41984
+ getSafeLogger()?.warn("callop", "Recovered from empty output via op.recover", {
41985
+ storyId: ctx.storyId,
41986
+ opName: op.name,
41987
+ agentName: dispatchAgent
41988
+ });
41989
+ return recovered;
41990
+ }
41991
+ }
41872
41992
  throw new NaxError(`callOp[${op.name}]: agent returned no output`, "CALL_OP_NO_OUTPUT", {
41873
41993
  stage: op.stage,
41874
41994
  storyId: ctx.storyId,
@@ -52017,12 +52137,16 @@ function phasePassed(opName, output) {
52017
52137
  });
52018
52138
  return true;
52019
52139
  }
52140
+ function isFinding(value) {
52141
+ return typeof value === "object" && value !== null && typeof value.source === "string" && value.source.length > 0;
52142
+ }
52020
52143
  function extractPhaseFindings(output) {
52021
52144
  if (output === null || output === undefined || typeof output !== "object") {
52022
52145
  return [];
52023
52146
  }
52024
52147
  const record2 = output;
52025
- const findings = Array.isArray(record2.findings) ? record2.findings : [];
52148
+ const rawArray = Array.isArray(record2.normalizedFindings) ? record2.normalizedFindings : Array.isArray(record2.findings) ? record2.findings : [];
52149
+ const findings = rawArray.filter(isFinding);
52026
52150
  const success2 = "success" in record2 ? record2.success === true : ("passed" in record2) ? record2.passed === true : findings.length === 0;
52027
52151
  return success2 ? [] : findings;
52028
52152
  }
@@ -52650,6 +52774,7 @@ async function assemblePlanInputsFromCtx(ctx) {
52650
52774
  blockingThreshold: ctx.config.review.blockingThreshold
52651
52775
  } : undefined;
52652
52776
  const adversarialReviewInput = ctx.config.review?.enabled === true && ctx.config.review.checks?.includes("adversarial") && ctx.config.review.adversarial ? {
52777
+ workdir: ctx.workdir,
52653
52778
  story,
52654
52779
  adversarialConfig: ctx.config.review.adversarial,
52655
52780
  mode: ctx.config.review.adversarial.diffMode,
@@ -56943,7 +57068,7 @@ var package_default;
56943
57068
  var init_package = __esm(() => {
56944
57069
  package_default = {
56945
57070
  name: "@nathapp/nax",
56946
- version: "0.67.9",
57071
+ version: "0.67.10",
56947
57072
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
56948
57073
  type: "module",
56949
57074
  bin: {
@@ -57038,8 +57163,8 @@ var init_version = __esm(() => {
57038
57163
  NAX_VERSION = package_default.version;
57039
57164
  NAX_COMMIT = (() => {
57040
57165
  try {
57041
- if (/^[0-9a-f]{6,10}$/.test("ab2db4bb"))
57042
- return "ab2db4bb";
57166
+ if (/^[0-9a-f]{6,10}$/.test("1d0ef5ac"))
57167
+ return "1d0ef5ac";
57043
57168
  } catch {}
57044
57169
  try {
57045
57170
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.67.9",
3
+ "version": "0.67.10",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {