@nathapp/nax 0.67.9 → 0.67.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/nax.js +1241 -620
  2. package/package.json +1 -1
package/dist/nax.js CHANGED
@@ -16833,7 +16833,8 @@ var init_schemas_execution = __esm(() => {
16833
16833
  });
16834
16834
  RectificationConfigSchema = exports_external.object({
16835
16835
  enabled: exports_external.boolean().default(true),
16836
- maxRetries: exports_external.number().int().min(0).max(10).default(2),
16836
+ maxAttemptsTotal: exports_external.number().int().min(1).max(50).default(12),
16837
+ maxAttemptsPerStrategy: exports_external.number().int().min(1).max(20).default(3),
16837
16838
  fullSuiteTimeoutSeconds: exports_external.number().int().min(10).max(600).default(120),
16838
16839
  maxFailureSummaryChars: exports_external.number().int().min(500).max(1e4).default(2000),
16839
16840
  abortOnIncreasingFailures: exports_external.boolean().default(true),
@@ -16845,8 +16846,7 @@ var init_schemas_execution = __esm(() => {
16845
16846
  enabled: exports_external.boolean().default(true),
16846
16847
  timeoutSeconds: exports_external.number().int().min(10).max(600).default(120),
16847
16848
  acceptOnTimeout: exports_external.boolean().default(true),
16848
- mode: exports_external.enum(["deferred", "per-story", "disabled"]).default("deferred"),
16849
- maxRectificationAttempts: exports_external.number().int().min(1).default(2)
16849
+ mode: exports_external.enum(["deferred", "per-story", "disabled"]).default("deferred")
16850
16850
  });
16851
16851
  SmartTestRunnerConfigSchema = exports_external.object({
16852
16852
  enabled: exports_external.boolean().default(true),
@@ -16928,16 +16928,10 @@ var init_schemas_execution = __esm(() => {
16928
16928
  autofix: exports_external.object({
16929
16929
  enabled: exports_external.boolean().default(true),
16930
16930
  maxAttempts: exports_external.number().int().min(1).default(3),
16931
- maxTotalAttempts: exports_external.number().int().min(1).default(12),
16932
- rethinkAtAttempt: exports_external.number().int().min(1).default(2),
16933
- urgencyAtAttempt: exports_external.number().int().min(1).default(3),
16934
16931
  enforceTestWriterIsolation: exports_external.boolean().default(true)
16935
16932
  }).default({
16936
16933
  enabled: true,
16937
16934
  maxAttempts: 3,
16938
- maxTotalAttempts: 12,
16939
- rethinkAtAttempt: 2,
16940
- urgencyAtAttempt: 3,
16941
16935
  enforceTestWriterIsolation: true
16942
16936
  }),
16943
16937
  forceExit: exports_external.boolean().default(false),
@@ -17224,7 +17218,12 @@ var init_schemas_review = __esm(() => {
17224
17218
  timeoutMs: exports_external.number().int().positive().default(600000),
17225
17219
  excludePatterns: exports_external.array(exports_external.string()).optional(),
17226
17220
  parallel: exports_external.boolean().default(false),
17227
- maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2)
17221
+ maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
17222
+ acRegroundOnDrop: exports_external.boolean().default(true),
17223
+ substantiation: exports_external.object({
17224
+ requote: exports_external.boolean().default(true),
17225
+ maxRequotes: exports_external.number().int().min(0).default(5)
17226
+ }).optional()
17228
17227
  });
17229
17228
  ReviewConfigSchema = exports_external.object({
17230
17229
  enabled: exports_external.boolean(),
@@ -17320,7 +17319,8 @@ var init_schemas3 = __esm(() => {
17320
17319
  maxStoriesPerFeature: 500,
17321
17320
  rectification: {
17322
17321
  enabled: true,
17323
- maxRetries: 2,
17322
+ maxAttemptsTotal: 12,
17323
+ maxAttemptsPerStrategy: 3,
17324
17324
  fullSuiteTimeoutSeconds: 300,
17325
17325
  maxFailureSummaryChars: 2000,
17326
17326
  abortOnIncreasingFailures: true,
@@ -17332,8 +17332,7 @@ var init_schemas3 = __esm(() => {
17332
17332
  enabled: true,
17333
17333
  timeoutSeconds: 300,
17334
17334
  acceptOnTimeout: true,
17335
- mode: "deferred",
17336
- maxRectificationAttempts: 3
17335
+ mode: "deferred"
17337
17336
  },
17338
17337
  contextProviderTokenBudget: 2000,
17339
17338
  permissionProfile: "unrestricted",
@@ -17359,9 +17358,6 @@ var init_schemas3 = __esm(() => {
17359
17358
  autofix: {
17360
17359
  enabled: true,
17361
17360
  maxAttempts: 3,
17362
- maxTotalAttempts: 12,
17363
- rethinkAtAttempt: 2,
17364
- urgencyAtAttempt: 3,
17365
17361
  enforceTestWriterIsolation: true
17366
17362
  },
17367
17363
  forceExit: false,
@@ -17450,6 +17446,19 @@ var init_schemas3 = __esm(() => {
17450
17446
  ":!.nax/",
17451
17447
  ":!.nax-pids"
17452
17448
  ]
17449
+ },
17450
+ adversarial: {
17451
+ model: "balanced",
17452
+ diffMode: "ref",
17453
+ rules: [],
17454
+ timeoutMs: 600000,
17455
+ parallel: false,
17456
+ maxConcurrentSessions: 2,
17457
+ acRegroundOnDrop: true,
17458
+ substantiation: {
17459
+ requote: true,
17460
+ maxRequotes: 5
17461
+ }
17453
17462
  }
17454
17463
  }),
17455
17464
  plan: PlanConfigSchema.default({
@@ -18662,6 +18671,47 @@ function rejectLegacyAgentKeys(conf) {
18662
18671
  `);
18663
18672
  throw new NaxError(message, "CONFIG_LEGACY_AGENT_KEYS", { stage: "config", legacyKeys });
18664
18673
  }
18674
+ function rejectLegacyRectificationKeys(conf) {
18675
+ const legacyKeys = [];
18676
+ const migrationHints = [];
18677
+ const quality = conf.quality;
18678
+ const autofix = quality?.autofix;
18679
+ if (autofix && typeof autofix === "object") {
18680
+ if ("maxTotalAttempts" in autofix) {
18681
+ legacyKeys.push("quality.autofix.maxTotalAttempts");
18682
+ migrationHints.push("- Move `quality.autofix.maxTotalAttempts` \u2192 `execution.rectification.maxAttemptsTotal`");
18683
+ }
18684
+ if ("rethinkAtAttempt" in autofix) {
18685
+ legacyKeys.push("quality.autofix.rethinkAtAttempt");
18686
+ migrationHints.push("- Move `quality.autofix.rethinkAtAttempt` \u2192 `execution.rectification.rethinkAtAttempt`");
18687
+ }
18688
+ if ("urgencyAtAttempt" in autofix) {
18689
+ legacyKeys.push("quality.autofix.urgencyAtAttempt");
18690
+ migrationHints.push("- Move `quality.autofix.urgencyAtAttempt` \u2192 `execution.rectification.urgencyAtAttempt`");
18691
+ }
18692
+ }
18693
+ const execution = conf.execution;
18694
+ const rectification = execution?.rectification;
18695
+ if (rectification && typeof rectification === "object" && "maxRetries" in rectification) {
18696
+ legacyKeys.push("execution.rectification.maxRetries");
18697
+ migrationHints.push("- Rename `execution.rectification.maxRetries` \u2192 `execution.rectification.maxAttemptsTotal` (default changed from 2 to 12)");
18698
+ }
18699
+ const regressionGate = execution?.regressionGate;
18700
+ if (regressionGate && typeof regressionGate === "object" && "maxRectificationAttempts" in regressionGate) {
18701
+ legacyKeys.push("execution.regressionGate.maxRectificationAttempts");
18702
+ migrationHints.push("- Remove `execution.regressionGate.maxRectificationAttempts` \u2014 the regression cycle now shares `execution.rectification.maxAttemptsTotal`");
18703
+ }
18704
+ if (legacyKeys.length === 0)
18705
+ return;
18706
+ const message = [
18707
+ `Invalid configuration \u2014 legacy rectification-cap keys detected: ${legacyKeys.join(", ")}.`,
18708
+ "These were consolidated under `execution.rectification.*` so one config controls the unified",
18709
+ "fix cycle (semantic + adversarial + mechanical + regression). Migrate as follows:",
18710
+ ...migrationHints
18711
+ ].join(`
18712
+ `);
18713
+ throw new NaxError(message, "CONFIG_LEGACY_RECTIFICATION_KEYS", { stage: "config", legacyKeys });
18714
+ }
18665
18715
  function applyBatchModeCompat(conf) {
18666
18716
  const routing = conf.routing;
18667
18717
  const llm = routing?.llm;
@@ -18768,6 +18818,7 @@ async function loadConfig(startDir, cliOverrides) {
18768
18818
  return structuredClone(DEFAULT_CONFIG);
18769
18819
  }
18770
18820
  rejectLegacyAgentKeys(rawConfig);
18821
+ rejectLegacyRectificationKeys(rawConfig);
18771
18822
  const result = NaxConfigSchema.safeParse(rawConfig);
18772
18823
  if (!result.success) {
18773
18824
  const errors3 = result.error.issues.map((err) => {
@@ -18820,6 +18871,7 @@ async function loadConfigForWorkdir(rootConfigPath, packageDir, cliOverrides) {
18820
18871
  const rawMerged = deepMergeConfig(merged, profileData);
18821
18872
  rawMerged.profile = packageProfile;
18822
18873
  rejectLegacyAgentKeys(rawMerged);
18874
+ rejectLegacyRectificationKeys(rawMerged);
18823
18875
  const result = NaxConfigSchema.safeParse(rawMerged);
18824
18876
  if (result.success) {
18825
18877
  merged = result.data;
@@ -21043,6 +21095,7 @@ class DispatchEventBus {
21043
21095
  _completedListeners = new Set;
21044
21096
  _errorListeners = new Set;
21045
21097
  _reviewDecisionListeners = new Set;
21098
+ _reviewRepromptListeners = new Set;
21046
21099
  onDispatch(l) {
21047
21100
  this._dispatchListeners.add(l);
21048
21101
  return () => this._dispatchListeners.delete(l);
@@ -21059,6 +21112,10 @@ class DispatchEventBus {
21059
21112
  this._reviewDecisionListeners.add(l);
21060
21113
  return () => this._reviewDecisionListeners.delete(l);
21061
21114
  }
21115
+ onReviewReprompt(l) {
21116
+ this._reviewRepromptListeners.add(l);
21117
+ return () => this._reviewRepromptListeners.delete(l);
21118
+ }
21062
21119
  emitDispatch(event) {
21063
21120
  for (const l of this._dispatchListeners) {
21064
21121
  try {
@@ -21095,6 +21152,15 @@ class DispatchEventBus {
21095
21152
  }
21096
21153
  }
21097
21154
  }
21155
+ emitReviewReprompt(event) {
21156
+ for (const l of this._reviewRepromptListeners) {
21157
+ try {
21158
+ l(event);
21159
+ } catch (err) {
21160
+ getSafeLogger()?.warn("dispatch-bus", "review-reprompt-listener threw", { error: errorMessage(err) });
21161
+ }
21162
+ }
21163
+ }
21098
21164
  }
21099
21165
  var init_dispatch_events = __esm(() => {
21100
21166
  init_logger2();
@@ -21922,7 +21988,8 @@ function makeParseRetryStrategy(opts) {
21922
21988
  if (ctx.site === "complete") {
21923
21989
  getSafeLogger()?.warn(opts.reviewerKind, "makeParseRetryStrategy: lastOutput is not populated on complete-kind ops \u2014 retry will never fire", { storyId: ctx.storyId });
21924
21990
  }
21925
- return { retry: false };
21991
+ const fallback = opts.exhaustedFallback ? opts.exhaustedFallback("") : undefined;
21992
+ return { retry: false, ...fallback !== undefined ? { fallback } : {} };
21926
21993
  }
21927
21994
  let parsed;
21928
21995
  try {
@@ -30325,78 +30392,6 @@ function truncate(s, max) {
30325
30392
  var MAX_BLOCK_CHARS = 6000;
30326
30393
 
30327
30394
  // src/prompts/builders/review-builder.ts
30328
- class ReviewPromptBuilder {
30329
- buildSemanticReviewPrompt(story, semanticConfig, options) {
30330
- const acList = story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
30331
- `);
30332
- const customRulesBlock = semanticConfig.rules.length > 0 ? `
30333
- ## Additional Review Rules
30334
- ${semanticConfig.rules.map((r, i) => `${i + 1}. ${r}`).join(`
30335
- `)}
30336
- ` : "";
30337
- const priorIterationsBlock = buildPriorIterationsBlock(options.priorSemanticIterations ?? []);
30338
- let diffSection;
30339
- if (options.mode === "ref") {
30340
- diffSection = buildRefDiffSection(options.storyGitRef ?? "", options.stat ?? "", options.excludePatterns ?? []);
30341
- } else {
30342
- diffSection = buildEmbeddedDiffSection(options.diff ?? "");
30343
- }
30344
- const core2 = `${SEMANTIC_ROLE}
30345
-
30346
- ## Story: ${story.title}
30347
-
30348
- ### Description
30349
- ${story.description}
30350
-
30351
- ### Acceptance Criteria
30352
- ${acList}
30353
- ${customRulesBlock}${priorIterationsBlock}
30354
- ${SEMANTIC_INSTRUCTIONS}
30355
- ${SEMANTIC_OUTPUT_SCHEMA}
30356
-
30357
- ${diffSection}`;
30358
- return wrapJsonPrompt(core2);
30359
- }
30360
- static jsonRetry() {
30361
- return `Your previous response could not be parsed as valid JSON.
30362
- ` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
30363
- ` + "The object must start with { and end with }.";
30364
- }
30365
- static jsonRetryCondensed(opts) {
30366
- const threshold = opts?.blockingThreshold ?? "error";
30367
- const advisoryCap = opts?.advisoryCap ?? 3;
30368
- const blockingList = threshold === "error" ? '"error"' : threshold === "warning" ? '"error" and "warning"' : '"error", "warning", and "info"';
30369
- const blockingClause = threshold === "info" ? "Include ALL findings \u2014 do not drop any by severity." : `Include ALL findings with severity ${blockingList} (these are blocking \u2014 do not drop them).`;
30370
- const advisoryClause = threshold === "info" ? "If your response would still exceed limits, prioritize the highest-severity findings first." : `Below that, include at most ${advisoryCap} additional findings (highest severity first).`;
30371
- return `Your previous response was truncated and could not be parsed as valid JSON.
30372
- Respond with a condensed summary:
30373
- - ${blockingClause}
30374
- - ${advisoryClause}
30375
- - Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
30376
- Output ONLY a complete, valid JSON object. It must start with { and end with }.
30377
- Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
30378
- }
30379
- static requoteVerbatim(opts) {
30380
- const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
30381
- const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
30382
- return `Your previous verifiedBy.observed value did not match the referenced file on disk.
30383
-
30384
- You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
30385
-
30386
- Return ONLY this JSON object:
30387
- {"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
30388
-
30389
- Finding issue: ${opts.finding.issue}
30390
- Referenced file: ${file3}
30391
- Referenced line: ${line}
30392
-
30393
- Rules:
30394
- - Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
30395
- - observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
30396
- - If after reading the file you cannot find anything that proves the claim, set observed to "".
30397
- - Do not return a full review. Do not include markdown fences or explanation.`;
30398
- }
30399
- }
30400
30395
  function buildEmbeddedDiffSection(diff) {
30401
30396
  return `## Git Diff (production code only \u2014 test files excluded)
30402
30397
 
@@ -30476,9 +30471,114 @@ Notes:
30476
30471
  - \`acIndex\` is required when severity is "error" (1-based, into the Acceptance Criteria list above).
30477
30472
  - \`acQuote\` is optional advisory metadata for human auditors \u2014 not validated.
30478
30473
  - Omit both for "warning", "info", "unverifiable".
30479
- If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }.`;
30474
+ If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }.`, ReviewPromptBuilder;
30480
30475
  var init_review_builder = __esm(() => {
30481
30476
  SEMANTIC_ROLE = "You are a semantic code reviewer with access to the repository files. " + "Your job is to walk each acceptance criterion (AC) and judge whether the production code fulfills it \u2014 fully, partially, or not at all. " + "Test coverage gaps and convention/lint issues are out of scope \u2014 adversarial review and lint/typecheck handle those.";
30477
+ ReviewPromptBuilder = class ReviewPromptBuilder {
30478
+ buildSemanticReviewPrompt(story, semanticConfig, options) {
30479
+ const acList = story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
30480
+ `);
30481
+ const customRulesBlock = semanticConfig.rules.length > 0 ? `
30482
+ ## Additional Review Rules
30483
+ ${semanticConfig.rules.map((r, i) => `${i + 1}. ${r}`).join(`
30484
+ `)}
30485
+ ` : "";
30486
+ const priorIterationsBlock = buildPriorIterationsBlock(options.priorSemanticIterations ?? []);
30487
+ let diffSection;
30488
+ if (options.mode === "ref") {
30489
+ diffSection = buildRefDiffSection(options.storyGitRef ?? "", options.stat ?? "", options.excludePatterns ?? []);
30490
+ } else {
30491
+ diffSection = buildEmbeddedDiffSection(options.diff ?? "");
30492
+ }
30493
+ const core2 = `${SEMANTIC_ROLE}
30494
+
30495
+ ## Story: ${story.title}
30496
+
30497
+ ### Description
30498
+ ${story.description}
30499
+
30500
+ ### Acceptance Criteria
30501
+ ${acList}
30502
+ ${customRulesBlock}${priorIterationsBlock}
30503
+ ${SEMANTIC_INSTRUCTIONS}
30504
+ ${SEMANTIC_OUTPUT_SCHEMA}
30505
+
30506
+ ${diffSection}`;
30507
+ return wrapJsonPrompt(core2);
30508
+ }
30509
+ static jsonRetry() {
30510
+ return `Your previous response could not be parsed as valid JSON.
30511
+ ` + `Output ONLY the JSON object from your review \u2014 no markdown fences, no explanation.
30512
+ ` + "The object must start with { and end with }.";
30513
+ }
30514
+ static jsonRetryCondensed(opts) {
30515
+ const threshold = opts?.blockingThreshold ?? "error";
30516
+ const advisoryCap = opts?.advisoryCap ?? 3;
30517
+ const blockingList = threshold === "error" ? '"error"' : threshold === "warning" ? '"error" and "warning"' : '"error", "warning", and "info"';
30518
+ const blockingClause = threshold === "info" ? "Include ALL findings \u2014 do not drop any by severity." : `Include ALL findings with severity ${blockingList} (these are blocking \u2014 do not drop them).`;
30519
+ const advisoryClause = threshold === "info" ? "If your response would still exceed limits, prioritize the highest-severity findings first." : `Below that, include at most ${advisoryCap} additional findings (highest severity first).`;
30520
+ return `Your previous response was truncated and could not be parsed as valid JSON.
30521
+ Respond with a condensed summary:
30522
+ - ${blockingClause}
30523
+ - ${advisoryClause}
30524
+ - Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
30525
+ Output ONLY a complete, valid JSON object. It must start with { and end with }.
30526
+ Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
30527
+ }
30528
+ static requoteVerbatim(opts) {
30529
+ const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
30530
+ const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
30531
+ return `Your previous verifiedBy.observed value did not match the referenced file on disk.
30532
+
30533
+ You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
30534
+
30535
+ Return ONLY this JSON object:
30536
+ {"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
30537
+
30538
+ Finding issue: ${opts.finding.issue}
30539
+ Referenced file: ${file3}
30540
+ Referenced line: ${line}
30541
+
30542
+ Rules:
30543
+ - Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
30544
+ - observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
30545
+ - If after reading the file you cannot find anything that proves the claim, set observed to "".
30546
+ - Do not return a full review. Do not include markdown fences or explanation.`;
30547
+ }
30548
+ static DROP_CODE_MESSAGES_MINIMAL = {
30549
+ missing_ac_index: "no `acIndex` field was provided \u2014 every blocking finding must cite an AC by 1-based index",
30550
+ ac_index_out_of_range: "`acIndex` is 0 or larger than the AC list \u2014 ACs are 1-indexed; the lowest valid value is 1"
30551
+ };
30552
+ static regroundDroppedFindings(opts) {
30553
+ const { drops, acceptanceCriteria } = opts;
30554
+ if (drops.length === 0)
30555
+ return "";
30556
+ const firstDrop = drops[0];
30557
+ const codeMessage = ReviewPromptBuilder.DROP_CODE_MESSAGES_MINIMAL[firstDrop.code];
30558
+ const acList = acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
30559
+ `);
30560
+ return `Your previous review produced ${drops.length} finding${drops.length > 1 ? "s" : ""} that ${drops.length > 1 ? "were" : "was"} dropped because:
30561
+
30562
+ ${codeMessage}
30563
+
30564
+ The dropped finding${drops.length > 1 ? "s" : ""} ${drops.length > 1 ? "are" : "is"}:
30565
+ ${drops.map((d, i) => `${i + 1}. [${d.finding.severity}] ${d.finding.issue}`).join(`
30566
+ `)}
30567
+
30568
+ Please re-review the code and re-issue any valid findings. For each finding you re-issue:
30569
+ - You MUST include a valid \`acIndex\` (1-based index into the AC list below)
30570
+ - You MUST include a \`verifiedBy\` field with verified evidence
30571
+
30572
+ ## Acceptance Criteria
30573
+ ${acList}
30574
+
30575
+ ## Rules
30576
+ - If a finding's locus (file / symbol) is not named in any AC bullet, downgrade it to \`"info"\` or \`"warning"\`
30577
+ - Only re-issue findings that are genuinely substantiated by the code and constrained by an AC
30578
+ - Return ONLY a JSON object with the same shape as before:
30579
+ {"passed":true|false,"findings":[...]}`;
30580
+ }
30581
+ };
30482
30582
  });
30483
30583
 
30484
30584
  // src/prompts/builders/adversarial-review-builder.ts
@@ -30561,72 +30661,6 @@ ${diff}\`\`\`
30561
30661
 
30562
30662
  `;
30563
30663
  }
30564
-
30565
- class AdversarialReviewPromptBuilder {
30566
- buildAdversarialReviewPrompt(story, config2, options) {
30567
- const {
30568
- mode,
30569
- diff,
30570
- storyGitRef,
30571
- stat,
30572
- testInventory,
30573
- excludePatterns,
30574
- testGlobs,
30575
- refExcludePatterns,
30576
- priorAdversarialIterations,
30577
- blockingThreshold
30578
- } = options;
30579
- const priorFindingsBlock = buildPriorIterationsBlock(priorAdversarialIterations ?? []);
30580
- const storyBlock = `## Story Under Review
30581
-
30582
- **ID:** ${story.id}
30583
- **Title:** ${story.title}
30584
- **Description:** ${story.description || "(none)"}
30585
-
30586
- **Acceptance Criteria:**
30587
- ${story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
30588
- `)}
30589
-
30590
- `;
30591
- const customRulesBlock = config2.rules.length > 0 ? `## Project-Specific Adversarial Rules
30592
-
30593
- ${config2.rules.map((r) => `- ${r}`).join(`
30594
- `)}
30595
-
30596
- ` : "";
30597
- let diffBlock;
30598
- if (mode === "ref" && storyGitRef) {
30599
- diffBlock = buildAdversarialRefDiffSection(storyGitRef, stat, excludePatterns ?? [], testGlobs ?? [], refExcludePatterns ?? []);
30600
- } else if (mode === "embedded" && diff) {
30601
- diffBlock = buildAdversarialEmbeddedDiffSection(diff, testInventory);
30602
- } else {
30603
- diffBlock = `## Diff
30604
-
30605
- (No diff available \u2014 review based on story context only)
30606
-
30607
- `;
30608
- }
30609
- return [
30610
- ADVERSARIAL_ROLE,
30611
- `
30612
-
30613
- `,
30614
- priorFindingsBlock,
30615
- storyBlock,
30616
- ADVERSARIAL_INSTRUCTIONS,
30617
- `
30618
-
30619
- `,
30620
- customRulesBlock,
30621
- buildBlockingThresholdBlock(blockingThreshold ?? "error"),
30622
- OUTPUT_SCHEMA,
30623
- `
30624
-
30625
- `,
30626
- diffBlock
30627
- ].join("");
30628
- }
30629
- }
30630
30664
  var ADVERSARIAL_ROLE = `You are an adversarial code reviewer with full access to the repository.
30631
30665
 
30632
30666
  Your job is NOT to re-verify that the code satisfies the acceptance criteria \u2014 semantic review owns that question. Don't re-litigate AC correctness.
@@ -30735,8 +30769,130 @@ Worked example:
30735
30769
  **Scope constraints are not Acceptance Criteria:**
30736
30770
  The story description may contain a "Scope" section with "In:" and "Out:" bullets. These are implementation guidelines, not ACs. A finding about code changed outside the stated scope (e.g., a file listed under "Out:") cannot cite a scope constraint as its \`acQuote\`/\`acIndex\` because scope text is not in the numbered AC list. Emit scope-violation findings as \`"warning"\` \u2014 never \`"error"\`. Never use \`acIndex: 0\`; \`acIndex\` is 1-based (first AC bullet = 1).
30737
30771
 
30738
- If you cannot find an AC that names the **specific symbol** in your finding, downgrade to \`"info"\` or \`"warning"\`. A finding dropped by the validator is worse than one correctly classified as advisory.`;
30739
- var init_adversarial_review_builder = () => {};
30772
+ If you cannot find an AC that names the **specific symbol** in your finding, downgrade to \`"info"\` or \`"warning"\`. A finding dropped by the validator is worse than one correctly classified as advisory.`, AdversarialReviewPromptBuilder;
30773
+ var init_adversarial_review_builder = __esm(() => {
30774
+ AdversarialReviewPromptBuilder = class AdversarialReviewPromptBuilder {
30775
+ buildAdversarialReviewPrompt(story, config2, options) {
30776
+ const {
30777
+ mode,
30778
+ diff,
30779
+ storyGitRef,
30780
+ stat,
30781
+ testInventory,
30782
+ excludePatterns,
30783
+ testGlobs,
30784
+ refExcludePatterns,
30785
+ priorAdversarialIterations,
30786
+ blockingThreshold
30787
+ } = options;
30788
+ const priorFindingsBlock = buildPriorIterationsBlock(priorAdversarialIterations ?? []);
30789
+ const storyBlock = `## Story Under Review
30790
+
30791
+ **ID:** ${story.id}
30792
+ **Title:** ${story.title}
30793
+ **Description:** ${story.description || "(none)"}
30794
+
30795
+ **Acceptance Criteria:**
30796
+ ${story.acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
30797
+ `)}
30798
+
30799
+ `;
30800
+ const customRulesBlock = config2.rules.length > 0 ? `## Project-Specific Adversarial Rules
30801
+
30802
+ ${config2.rules.map((r) => `- ${r}`).join(`
30803
+ `)}
30804
+
30805
+ ` : "";
30806
+ let diffBlock;
30807
+ if (mode === "ref" && storyGitRef) {
30808
+ diffBlock = buildAdversarialRefDiffSection(storyGitRef, stat, excludePatterns ?? [], testGlobs ?? [], refExcludePatterns ?? []);
30809
+ } else if (mode === "embedded" && diff) {
30810
+ diffBlock = buildAdversarialEmbeddedDiffSection(diff, testInventory);
30811
+ } else {
30812
+ diffBlock = `## Diff
30813
+
30814
+ (No diff available \u2014 review based on story context only)
30815
+
30816
+ `;
30817
+ }
30818
+ return [
30819
+ ADVERSARIAL_ROLE,
30820
+ `
30821
+
30822
+ `,
30823
+ priorFindingsBlock,
30824
+ storyBlock,
30825
+ ADVERSARIAL_INSTRUCTIONS,
30826
+ `
30827
+
30828
+ `,
30829
+ customRulesBlock,
30830
+ buildBlockingThresholdBlock(blockingThreshold ?? "error"),
30831
+ OUTPUT_SCHEMA,
30832
+ `
30833
+
30834
+ `,
30835
+ diffBlock
30836
+ ].join("");
30837
+ }
30838
+ static requoteVerbatim(opts) {
30839
+ const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
30840
+ const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
30841
+ return `Your previous verifiedBy.observed value did not match the referenced file on disk.
30842
+
30843
+ You MUST use your file-reading tool to open ${file3} and copy the actual bytes around line ${line}. Do NOT quote from memory or from the prior conversation \u2014 the previous quote was wrong precisely because it was not read from disk. If you reply without a file-read tool call, the quote will be rejected.
30844
+
30845
+ Return ONLY this JSON object:
30846
+ {"file":"${file3}","line":${line},"observed":"exact 1-3 line quote"}
30847
+
30848
+ Finding issue: ${opts.finding.issue}
30849
+ Referenced file: ${file3}
30850
+ Referenced line: ${line}
30851
+
30852
+ Rules:
30853
+ - Read ${file3} with your file tool first. Then copy observed verbatim from the read result.
30854
+ - observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
30855
+ - If after reading the file you cannot find anything that proves the claim, set observed to "".
30856
+ - Do not return a full review. Do not include markdown fences or explanation.`;
30857
+ }
30858
+ static DROP_CODE_MESSAGES_QUOTE = {
30859
+ missing_ac_quote: "no `acQuote` field was provided \u2014 every blocking finding must cite an AC",
30860
+ ac_index_out_of_range: "`acIndex` is 0 or larger than the AC list \u2014 ACs are 1-indexed; the lowest valid value is 1",
30861
+ ac_quote_not_substring: "`acQuote` text does not appear verbatim in any AC bullet \u2014 copy the AC text character-for-character",
30862
+ ac_quote_does_not_constrain_locus: "the cited AC mentions the file but not the specific symbol your finding flags \u2014 pick a different AC, or downgrade to `info` / `warning`"
30863
+ };
30864
+ static regroundDroppedFindings(opts) {
30865
+ const { drops, acceptanceCriteria } = opts;
30866
+ if (drops.length === 0)
30867
+ return "";
30868
+ const firstDrop = drops[0];
30869
+ const codeMessage = AdversarialReviewPromptBuilder.DROP_CODE_MESSAGES_QUOTE[firstDrop.code] ?? `rejection code: ${firstDrop.code}`;
30870
+ const acList = acceptanceCriteria.map((ac, i) => `${i + 1}. ${ac}`).join(`
30871
+ `);
30872
+ return `Your previous review produced ${drops.length} finding${drops.length > 1 ? "s" : ""} that ${drops.length > 1 ? "were" : "was"} dropped because:
30873
+
30874
+ ${codeMessage}
30875
+
30876
+ The dropped finding${drops.length > 1 ? "s" : ""} ${drops.length > 1 ? "are" : "is"}:
30877
+ ${drops.map((d, i) => `${i + 1}. [${d.finding.severity}] ${d.finding.issue}`).join(`
30878
+ `)}
30879
+
30880
+ Please re-review the code and re-issue any valid findings. For each finding you re-issue:
30881
+ - You MUST include a valid \`acQuote\` that appears verbatim in one of the AC bullets below
30882
+ - You MUST include a valid \`acIndex\` (1-based index into the AC list)
30883
+ - The \`acQuote\` must cite the specific symbol you are flagging, not just the file
30884
+
30885
+ ## Acceptance Criteria
30886
+ ${acList}
30887
+
30888
+ ## Rules
30889
+ - If a finding's locus (file / symbol) is not named in any AC bullet, downgrade it to \`"info"\` or \`"warning"\`
30890
+ - Only re-issue findings that are genuinely substantiated by the code and constrained by an AC
30891
+ - Return ONLY a JSON object with the same shape as before:
30892
+ {"passed":true|false,"findings":[...]}`;
30893
+ }
30894
+ };
30895
+ });
30740
30896
 
30741
30897
  // src/prompts/builders/acceptance-builder-helpers.ts
30742
30898
  function formatTestOutputForFix(rawOutput) {
@@ -31249,8 +31405,420 @@ var init_adversarial_helpers = __esm(() => {
31249
31405
  init_severity();
31250
31406
  });
31251
31407
 
31408
+ // src/review/semantic-helpers.ts
31409
+ function validateLLMShape(parsed) {
31410
+ if (typeof parsed !== "object" || parsed === null)
31411
+ return null;
31412
+ const obj = parsed;
31413
+ if (typeof obj.passed !== "boolean")
31414
+ return null;
31415
+ if (!Array.isArray(obj.findings))
31416
+ return null;
31417
+ return { passed: obj.passed, findings: obj.findings };
31418
+ }
31419
+ function parseLLMResponse(raw) {
31420
+ try {
31421
+ return validateLLMShape(tryParseLLMJson(raw));
31422
+ } catch {
31423
+ return null;
31424
+ }
31425
+ }
31426
+ function formatFindings2(findings) {
31427
+ return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
31428
+ Suggestion: ${f.suggestion}`).join(`
31429
+ `);
31430
+ }
31431
+ function normalizeSeverity2(sev) {
31432
+ if (sev === "warn")
31433
+ return "warning";
31434
+ if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
31435
+ return sev;
31436
+ return "info";
31437
+ }
31438
+ function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
31439
+ if (diffMode !== "ref")
31440
+ return findings;
31441
+ return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
31442
+ }
31443
+ function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
31444
+ if (!isBlockingSeverity(finding.severity, blockingThreshold))
31445
+ return false;
31446
+ return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
31447
+ }
31448
+ function mentionsUnverifiedSource(finding) {
31449
+ const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
31450
+ return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
31451
+ }
31452
+ function hasVerifiedEvidence(finding) {
31453
+ const evidence = finding.verifiedBy;
31454
+ return !!evidence?.file?.trim() && !!evidence.observed?.trim();
31455
+ }
31456
+ function downgradeToUnverifiable(finding) {
31457
+ return {
31458
+ ...finding,
31459
+ severity: "unverifiable"
31460
+ };
31461
+ }
31462
+ function llmFindingToFinding(f) {
31463
+ const metaExtras = {};
31464
+ if (f.verifiedBy)
31465
+ metaExtras.verifiedBy = f.verifiedBy;
31466
+ if (f.acQuote)
31467
+ metaExtras.acQuote = f.acQuote;
31468
+ if (f.acIndex != null)
31469
+ metaExtras.acIndex = f.acIndex;
31470
+ return {
31471
+ source: "semantic-review",
31472
+ severity: normalizeSeverity2(f.severity),
31473
+ category: "",
31474
+ file: f.file,
31475
+ line: f.line,
31476
+ message: f.issue,
31477
+ suggestion: f.suggestion ?? undefined,
31478
+ fixTarget: "source",
31479
+ meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
31480
+ };
31481
+ }
31482
+ function toReviewFindings(findings) {
31483
+ return findings.map(llmFindingToFinding);
31484
+ }
31485
+ var UNVERIFIED_FINDING_PATTERNS;
31486
+ var init_semantic_helpers = __esm(() => {
31487
+ init_severity();
31488
+ UNVERIFIED_FINDING_PATTERNS = [
31489
+ "cannot verify",
31490
+ "can't verify",
31491
+ "from diff alone",
31492
+ "missing from diff",
31493
+ "not found in diff",
31494
+ "not present in diff",
31495
+ "does not appear in diff"
31496
+ ];
31497
+ });
31498
+
31499
+ // src/review/semantic-evidence.ts
31500
+ import { isAbsolute as isAbsolute8 } from "path";
31501
+ async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
31502
+ if (diffMode !== "ref")
31503
+ return findings;
31504
+ return Promise.all(findings.map(async (finding) => {
31505
+ if (!isBlockingSeverity(finding.severity, blockingThreshold))
31506
+ return finding;
31507
+ const evidence = await checkFindingEvidence({ finding, workdir });
31508
+ if (evidence.status !== "unmatched")
31509
+ return finding;
31510
+ return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
31511
+ }));
31512
+ }
31513
+ async function checkFindingEvidence(opts) {
31514
+ const observed = opts.finding.verifiedBy?.observed?.trim();
31515
+ const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
31516
+ const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
31517
+ if (!observed)
31518
+ return { status: "missing-observed", file: file3, line };
31519
+ const contents = await readSafeFile(opts.workdir, file3);
31520
+ if (contents === null)
31521
+ return { status: "unreadable", file: file3, line, observed };
31522
+ return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
31523
+ }
31524
+ function matchesEvidence(contents, observed, line) {
31525
+ if (!line || line <= 0) {
31526
+ return normalizedIncludes(contents, observed);
31527
+ }
31528
+ const lines = contents.split(`
31529
+ `);
31530
+ const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
31531
+ const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
31532
+ const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
31533
+ const windowText = lines.slice(start, end).join(`
31534
+ `);
31535
+ return normalizedIncludes(windowText, observed);
31536
+ }
31537
+ function downgradeUnsubstantiatedFinding(opts) {
31538
+ _evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
31539
+ storyId: opts.storyId,
31540
+ event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
31541
+ file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
31542
+ line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
31543
+ issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
31544
+ observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
31545
+ });
31546
+ return { ...opts.finding, severity: "unverifiable" };
31547
+ }
31548
+ async function readSafeFile(workdir, file3) {
31549
+ const validated = validateModulePath(file3, [workdir]);
31550
+ if (validated.valid && validated.absolutePath) {
31551
+ try {
31552
+ return await Bun.file(validated.absolutePath).text();
31553
+ } catch {
31554
+ return null;
31555
+ }
31556
+ }
31557
+ if (isAbsolute8(file3)) {
31558
+ try {
31559
+ return await Bun.file(file3).text();
31560
+ } catch {
31561
+ return null;
31562
+ }
31563
+ }
31564
+ return null;
31565
+ }
31566
+ function normalizedIncludes(contents, observed) {
31567
+ const normalizedObserved = normalizeEvidenceText(observed);
31568
+ return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
31569
+ }
31570
+ function normalizeEvidenceText(text) {
31571
+ return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
31572
+ }
31573
+ function stripWrappingQuotes(text) {
31574
+ let trimmed = text.trim();
31575
+ while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
31576
+ trimmed = trimmed.slice(1, -1).trim();
31577
+ }
31578
+ return trimmed;
31579
+ }
31580
+ function isMatchingWrapper(first, last) {
31581
+ return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
31582
+ }
31583
+ var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
31584
+ var init_semantic_evidence = __esm(() => {
31585
+ init_logger2();
31586
+ init_path_security2();
31587
+ init_semantic_helpers();
31588
+ _evidenceDeps = {
31589
+ getLogger: getSafeLogger
31590
+ };
31591
+ });
31592
+
31593
+ // src/review/finding-filters.ts
31594
+ async function substantiateAdversarialFindings(opts) {
31595
+ const { findings, workdir, storyId, blockingThreshold } = opts;
31596
+ return Promise.all(findings.map(async (finding) => {
31597
+ if (!isBlockingSeverity(finding.severity, blockingThreshold))
31598
+ return finding;
31599
+ const evidence = await checkFindingEvidence({ finding, workdir });
31600
+ if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
31601
+ return finding;
31602
+ return downgradeUnsubstantiatedFinding({
31603
+ finding,
31604
+ storyId,
31605
+ event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
31606
+ file: evidence.file,
31607
+ line: evidence.line,
31608
+ observed: evidence.observed
31609
+ });
31610
+ }));
31611
+ }
31612
+ var init_finding_filters = __esm(() => {
31613
+ init_adversarial_helpers();
31614
+ init_semantic_evidence();
31615
+ init_semantic_helpers();
31616
+ init_semantic_evidence();
31617
+ init_ac_quote_validator();
31618
+ });
31619
+
31620
+ // src/review/requote-response.ts
31621
+ function parseRequoteResponse(output) {
31622
+ const parsed = tryParseLLMJson(output);
31623
+ if (!isRecord(parsed))
31624
+ return null;
31625
+ const canonical = extractCanonical(parsed);
31626
+ if (canonical)
31627
+ return canonical;
31628
+ const findings = parsed.findings;
31629
+ if (!Array.isArray(findings) || findings.length !== 1)
31630
+ return null;
31631
+ const finding = findings[0];
31632
+ if (!isRecord(finding))
31633
+ return null;
31634
+ return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
31635
+ }
31636
+ function extractCanonical(value) {
31637
+ if (!isRecord(value))
31638
+ return null;
31639
+ if (typeof value.file !== "string" || typeof value.observed !== "string")
31640
+ return null;
31641
+ const file3 = value.file.trim();
31642
+ if (!file3)
31643
+ return null;
31644
+ const line = coerceLine(value.line);
31645
+ if (line === null)
31646
+ return null;
31647
+ return {
31648
+ file: file3,
31649
+ line: line === undefined ? undefined : line,
31650
+ observed: value.observed
31651
+ };
31652
+ }
31653
+ function coerceLine(value) {
31654
+ if (value == null)
31655
+ return;
31656
+ if (typeof value === "number")
31657
+ return value;
31658
+ if (typeof value === "string" && /^\d+$/.test(value))
31659
+ return Number.parseInt(value, 10);
31660
+ return null;
31661
+ }
31662
+ function isRecord(value) {
31663
+ return typeof value === "object" && value !== null && !Array.isArray(value);
31664
+ }
31665
+ var init_requote_response = () => {};
31666
+
31252
31667
  // src/operations/adversarial-review.ts
31253
- var FAIL_OPEN, adversarialParseRetry = (input) => makeParseRetryStrategy({
31668
+ function withRepromptMarker(output, info) {
31669
+ const parsed = tryParseLLMJson(output);
31670
+ if (!parsed || typeof parsed !== "object")
31671
+ return output;
31672
+ return JSON.stringify({ ...parsed, _repromptInfo: info });
31673
+ }
31674
+ function extractRepromptInfo(raw) {
31675
+ if (!raw || typeof raw !== "object")
31676
+ return;
31677
+ const info = raw._repromptInfo;
31678
+ if (!info || typeof info !== "object")
31679
+ return;
31680
+ const i = info;
31681
+ if (typeof i.dropCount !== "number" || typeof i.costUsd !== "number" || typeof i.outcome !== "string") {
31682
+ return;
31683
+ }
31684
+ return {
31685
+ dropCount: i.dropCount,
31686
+ costUsd: i.costUsd,
31687
+ outcome: i.outcome
31688
+ };
31689
+ }
31690
+ async function requoteBlockingAdversarialFindings(findings, ctx) {
31691
+ const threshold = ctx.input.blockingThreshold ?? "error";
31692
+ const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
31693
+ const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
31694
+ if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
31695
+ return { findings, changed: false, extraCostUsd: 0 };
31696
+ }
31697
+ const next = [...findings];
31698
+ let changed = false;
31699
+ let extraCostUsd = 0;
31700
+ let used = 0;
31701
+ for (const [index, finding] of next.entries()) {
31702
+ if (!isBlockingSeverity(finding.severity, threshold))
31703
+ continue;
31704
+ const initialEvidence = await checkFindingEvidence({ finding, workdir: ctx.input.workdir });
31705
+ if (initialEvidence.status !== "unmatched")
31706
+ continue;
31707
+ if (used >= maxRequotes)
31708
+ break;
31709
+ used += 1;
31710
+ const retry = await ctx.send(AdversarialReviewPromptBuilder.requoteVerbatim({ finding }));
31711
+ extraCostUsd += retry.estimatedCostUsd ?? 0;
31712
+ const requote = parseRequoteResponse(retry.output);
31713
+ if (!requote) {
31714
+ next[index] = downgradeUnsubstantiatedFinding({
31715
+ finding,
31716
+ storyId: ctx.input.story.id,
31717
+ event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
31718
+ ...initialEvidence
31719
+ });
31720
+ changed = true;
31721
+ continue;
31722
+ }
31723
+ const updatedFinding = {
31724
+ ...finding,
31725
+ verifiedBy: {
31726
+ file: requote.file,
31727
+ line: requote.line,
31728
+ observed: requote.observed
31729
+ }
31730
+ };
31731
+ const requotedEvidence = await checkFindingEvidence({
31732
+ finding: updatedFinding,
31733
+ workdir: ctx.input.workdir
31734
+ });
31735
+ if (requotedEvidence.status === "matched") {
31736
+ getSafeLogger()?.info("review", "Recovered adversarial finding via same-session requote", {
31737
+ storyId: ctx.input.story.id,
31738
+ event: ADVERSARIAL_REQUOTE_RECOVERED_EVENT,
31739
+ file: requotedEvidence.file,
31740
+ line: requotedEvidence.line
31741
+ });
31742
+ next[index] = updatedFinding;
31743
+ changed = true;
31744
+ continue;
31745
+ }
31746
+ next[index] = downgradeUnsubstantiatedFinding({
31747
+ finding: updatedFinding,
31748
+ storyId: ctx.input.story.id,
31749
+ event: ADVERSARIAL_REQUOTE_FAILED_EVENT,
31750
+ file: requotedEvidence.file,
31751
+ line: requotedEvidence.line,
31752
+ observed: requotedEvidence.observed
31753
+ });
31754
+ changed = true;
31755
+ }
31756
+ return { findings: next, changed, extraCostUsd };
31757
+ }
31758
+ function evaluateRepromptTrigger(shape, input) {
31759
+ if (input.adversarialConfig.acRegroundOnDrop === false)
31760
+ return { shouldReprompt: false };
31761
+ if (shape.passed)
31762
+ return { shouldReprompt: false };
31763
+ const { accepted, dropped } = filterByAcQuote(shape.findings, input.story.acceptanceCriteria);
31764
+ const threshold = input.blockingThreshold ?? "error";
31765
+ const blockingAccepted = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
31766
+ if (blockingAccepted.length > 0)
31767
+ return { shouldReprompt: false };
31768
+ if (dropped.length === 0)
31769
+ return { shouldReprompt: false };
31770
+ return { shouldReprompt: true, acDropped: dropped };
31771
+ }
31772
+ async function performAdversarialReground(turn, firstParsed, drops, ctx) {
31773
+ const threshold = ctx.input.blockingThreshold ?? "error";
31774
+ const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
31775
+ const { accepted: firstAccepted } = filterByAcQuote(firstParsed.findings, acceptanceCriteria);
31776
+ const firstAdvisory = firstAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
31777
+ const repromptPrompt = AdversarialReviewPromptBuilder.regroundDroppedFindings({
31778
+ drops,
31779
+ acceptanceCriteria
31780
+ });
31781
+ const secondTurn = await ctx.send(repromptPrompt);
31782
+ const secondParsed = validateAdversarialShape(tryParseLLMJson(secondTurn.output));
31783
+ const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
31784
+ const dropCount = drops.length;
31785
+ if (!secondParsed) {
31786
+ return {
31787
+ ...turn,
31788
+ output: withRepromptMarker(turn.output, { dropCount, outcome: "parse-failed", costUsd })
31789
+ };
31790
+ }
31791
+ const { accepted: secondAccepted } = filterByAcQuote(secondParsed.findings, acceptanceCriteria);
31792
+ const secondBlocking = secondAccepted.filter((f) => isBlockingSeverity(f.severity, threshold));
31793
+ if (secondBlocking.length > 0) {
31794
+ return {
31795
+ ...turn,
31796
+ output: JSON.stringify({
31797
+ passed: false,
31798
+ findings: secondParsed.findings,
31799
+ _repromptInfo: { dropCount, outcome: "recovered-blocking", costUsd }
31800
+ }),
31801
+ estimatedCostUsd: costUsd
31802
+ };
31803
+ }
31804
+ if (secondParsed.passed) {
31805
+ const secondAdvisory = secondAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
31806
+ return {
31807
+ ...turn,
31808
+ output: JSON.stringify({
31809
+ passed: true,
31810
+ findings: [...firstAdvisory, ...secondAdvisory],
31811
+ _repromptInfo: { dropCount, outcome: "recovered-advisory-only", costUsd }
31812
+ }),
31813
+ estimatedCostUsd: costUsd
31814
+ };
31815
+ }
31816
+ return {
31817
+ ...turn,
31818
+ output: withRepromptMarker(turn.output, { dropCount, outcome: "still-dropped", costUsd })
31819
+ };
31820
+ }
31821
+ var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
31254
31822
  validate: (parsed) => validateAdversarialShape(parsed) !== null,
31255
31823
  reviewerKind: "adversarial",
31256
31824
  maxAttempts: 2,
@@ -31258,15 +31826,24 @@ var FAIL_OPEN, adversarialParseRetry = (input) => makeParseRetryStrategy({
31258
31826
  invalid: () => ReviewPromptBuilder.jsonRetry(),
31259
31827
  truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
31260
31828
  },
31261
- exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], looksLikeFail: true } : FAIL_OPEN,
31829
+ exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN,
31262
31830
  logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
31263
31831
  }), adversarialReviewOp;
31264
31832
  var init_adversarial_review = __esm(() => {
31265
31833
  init_retry();
31266
31834
  init_config();
31835
+ init_logger2();
31267
31836
  init_prompts();
31268
31837
  init_adversarial_helpers();
31269
- FAIL_OPEN = { passed: true, findings: [], failOpen: true };
31838
+ init_finding_filters();
31839
+ init_requote_response();
31840
+ FAIL_OPEN = {
31841
+ passed: true,
31842
+ findings: [],
31843
+ normalizedFindings: [],
31844
+ acDropped: [],
31845
+ failOpen: true
31846
+ };
31270
31847
  adversarialReviewOp = {
31271
31848
  kind: "run",
31272
31849
  name: "adversarial-review",
@@ -31276,6 +31853,36 @@ var init_adversarial_review = __esm(() => {
31276
31853
  model: (input) => input.adversarialConfig.model,
31277
31854
  timeoutMs: (input) => input.adversarialConfig.timeoutMs,
31278
31855
  retry: (input) => adversarialParseRetry(input),
31856
+ async hopBody(initialPrompt, ctx) {
31857
+ const turn = await ctx.sendWithParseRetry(initialPrompt);
31858
+ const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
31859
+ if (!parsed)
31860
+ return turn;
31861
+ if (ctx.input.mode !== "ref")
31862
+ return turn;
31863
+ const regroundEnabled = ctx.input.adversarialConfig.acRegroundOnDrop !== false;
31864
+ if (regroundEnabled) {
31865
+ const firstShape = { passed: parsed.passed, findings: parsed.findings };
31866
+ const trigger = evaluateRepromptTrigger(firstShape, ctx.input);
31867
+ if (trigger.shouldReprompt) {
31868
+ return await performAdversarialReground(turn, parsed, trigger.acDropped, ctx);
31869
+ }
31870
+ }
31871
+ const requoteEnabled = ctx.input.adversarialConfig.substantiation?.requote ?? true;
31872
+ const maxRequotes = ctx.input.adversarialConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
31873
+ if (!requoteEnabled || maxRequotes <= 0)
31874
+ return turn;
31875
+ const requoted = await requoteBlockingAdversarialFindings(parsed.findings, ctx);
31876
+ if (requoted.changed) {
31877
+ const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
31878
+ return {
31879
+ ...turn,
31880
+ output: JSON.stringify({ passed, findings: requoted.findings }),
31881
+ estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
31882
+ };
31883
+ }
31884
+ return turn;
31885
+ },
31279
31886
  build(input, _ctx) {
31280
31887
  const base = new AdversarialReviewPromptBuilder().buildAdversarialReviewPrompt(input.story, input.adversarialConfig, {
31281
31888
  mode: input.mode,
@@ -31298,12 +31905,51 @@ var init_adversarial_review = __esm(() => {
31298
31905
  parse(output, _input, _ctx) {
31299
31906
  const raw = tryParseLLMJson(output);
31300
31907
  const parsed = validateAdversarialShape(raw);
31301
- if (parsed)
31302
- return { passed: parsed.passed, findings: parsed.findings };
31908
+ const repromptEvent = extractRepromptInfo(raw);
31909
+ if (parsed) {
31910
+ return {
31911
+ passed: parsed.passed,
31912
+ findings: parsed.findings,
31913
+ normalizedFindings: [],
31914
+ acDropped: [],
31915
+ repromptEvent
31916
+ };
31917
+ }
31303
31918
  if (/"passed"\s*:\s*false/.test(output) && !/"findings"\s*:\s*\[\s*\{/.test(output)) {
31304
- return { passed: false, findings: [], looksLikeFail: true };
31919
+ return {
31920
+ passed: false,
31921
+ findings: [],
31922
+ normalizedFindings: [],
31923
+ acDropped: [],
31924
+ looksLikeFail: true,
31925
+ repromptEvent
31926
+ };
31305
31927
  }
31306
31928
  throw new ParseValidationError("[adversarial-review] parse failed: invalid JSON shape");
31929
+ },
31930
+ async verify(parsed, input, _verifyCtx) {
31931
+ if (parsed.failOpen || parsed.looksLikeFail)
31932
+ return parsed;
31933
+ if (parsed.findings.length === 0)
31934
+ return parsed;
31935
+ const threshold = input.blockingThreshold ?? "error";
31936
+ const findings = parsed.findings;
31937
+ const substantiated = await substantiateAdversarialFindings({
31938
+ findings,
31939
+ workdir: input.workdir,
31940
+ storyId: input.story.id,
31941
+ blockingThreshold: threshold
31942
+ });
31943
+ const { accepted, dropped } = filterByAcQuote(substantiated, input.story.acceptanceCriteria);
31944
+ const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
31945
+ const passed = parsed.passed && blocking.length === 0;
31946
+ return {
31947
+ ...parsed,
31948
+ passed,
31949
+ findings: accepted,
31950
+ normalizedFindings: toAdversarialReviewFindings(blocking),
31951
+ acDropped: dropped
31952
+ };
31307
31953
  }
31308
31954
  };
31309
31955
  });
@@ -31738,191 +32384,6 @@ var init_review_audit = __esm(() => {
31738
32384
  };
31739
32385
  });
31740
32386
 
31741
- // src/review/semantic-helpers.ts
31742
- function validateLLMShape(parsed) {
31743
- if (typeof parsed !== "object" || parsed === null)
31744
- return null;
31745
- const obj = parsed;
31746
- if (typeof obj.passed !== "boolean")
31747
- return null;
31748
- if (!Array.isArray(obj.findings))
31749
- return null;
31750
- return { passed: obj.passed, findings: obj.findings };
31751
- }
31752
- function parseLLMResponse(raw) {
31753
- try {
31754
- return validateLLMShape(tryParseLLMJson(raw));
31755
- } catch {
31756
- return null;
31757
- }
31758
- }
31759
- function formatFindings2(findings) {
31760
- return findings.map((f) => `[${f.severity}] ${f.file}:${f.line} \u2014 ${f.issue}
31761
- Suggestion: ${f.suggestion}`).join(`
31762
- `);
31763
- }
31764
- function normalizeSeverity2(sev) {
31765
- if (sev === "warn")
31766
- return "warning";
31767
- if (sev === "critical" || sev === "error" || sev === "warning" || sev === "info" || sev === "low" || sev === "unverifiable")
31768
- return sev;
31769
- return "info";
31770
- }
31771
- function sanitizeRefModeFindings(findings, diffMode, blockingThreshold = "error") {
31772
- if (diffMode !== "ref")
31773
- return findings;
31774
- return findings.map((finding) => needsDowngradeForMissingEvidence(finding, blockingThreshold) ? downgradeToUnverifiable(finding) : finding);
31775
- }
31776
- function needsDowngradeForMissingEvidence(finding, blockingThreshold) {
31777
- if (!isBlockingSeverity(finding.severity, blockingThreshold))
31778
- return false;
31779
- return mentionsUnverifiedSource(finding) || !hasVerifiedEvidence(finding);
31780
- }
31781
- function mentionsUnverifiedSource(finding) {
31782
- const text = `${finding.issue} ${finding.suggestion}`.toLowerCase();
31783
- return UNVERIFIED_FINDING_PATTERNS.some((pattern) => text.includes(pattern));
31784
- }
31785
- function hasVerifiedEvidence(finding) {
31786
- const evidence = finding.verifiedBy;
31787
- return !!evidence?.file?.trim() && !!evidence.observed?.trim();
31788
- }
31789
- function downgradeToUnverifiable(finding) {
31790
- return {
31791
- ...finding,
31792
- severity: "unverifiable"
31793
- };
31794
- }
31795
- function llmFindingToFinding(f) {
31796
- const metaExtras = {};
31797
- if (f.verifiedBy)
31798
- metaExtras.verifiedBy = f.verifiedBy;
31799
- if (f.acQuote)
31800
- metaExtras.acQuote = f.acQuote;
31801
- if (f.acIndex != null)
31802
- metaExtras.acIndex = f.acIndex;
31803
- return {
31804
- source: "semantic-review",
31805
- severity: normalizeSeverity2(f.severity),
31806
- category: "",
31807
- file: f.file,
31808
- line: f.line,
31809
- message: f.issue,
31810
- suggestion: f.suggestion ?? undefined,
31811
- fixTarget: "source",
31812
- meta: Object.keys(metaExtras).length > 0 ? metaExtras : undefined
31813
- };
31814
- }
31815
- function toReviewFindings(findings) {
31816
- return findings.map(llmFindingToFinding);
31817
- }
31818
- var UNVERIFIED_FINDING_PATTERNS;
31819
- var init_semantic_helpers = __esm(() => {
31820
- init_severity();
31821
- UNVERIFIED_FINDING_PATTERNS = [
31822
- "cannot verify",
31823
- "can't verify",
31824
- "from diff alone",
31825
- "missing from diff",
31826
- "not found in diff",
31827
- "not present in diff",
31828
- "does not appear in diff"
31829
- ];
31830
- });
31831
-
31832
- // src/review/semantic-evidence.ts
31833
- import { isAbsolute as isAbsolute8 } from "path";
31834
- async function substantiateSemanticEvidence(findings, diffMode, workdir, storyId, blockingThreshold = "error") {
31835
- if (diffMode !== "ref")
31836
- return findings;
31837
- return Promise.all(findings.map(async (finding) => {
31838
- if (!isBlockingSeverity(finding.severity, blockingThreshold))
31839
- return finding;
31840
- const evidence = await checkFindingEvidence({ finding, workdir });
31841
- if (evidence.status !== "unmatched")
31842
- return finding;
31843
- return downgradeUnsubstantiatedFinding({ finding, storyId, ...evidence });
31844
- }));
31845
- }
31846
- async function checkFindingEvidence(opts) {
31847
- const observed = opts.finding.verifiedBy?.observed?.trim();
31848
- const file3 = opts.finding.verifiedBy?.file?.trim() || opts.finding.file;
31849
- const line = opts.finding.verifiedBy?.line ?? opts.finding.line;
31850
- if (!observed)
31851
- return { status: "missing-observed", file: file3, line };
31852
- const contents = await readSafeFile(opts.workdir, file3);
31853
- if (contents === null)
31854
- return { status: "unreadable", file: file3, line, observed };
31855
- return matchesEvidence(contents, observed, line) ? { status: "matched", file: file3, line, observed } : { status: "unmatched", file: file3, line, observed };
31856
- }
31857
- function matchesEvidence(contents, observed, line) {
31858
- if (!line || line <= 0) {
31859
- return normalizedIncludes(contents, observed);
31860
- }
31861
- const lines = contents.split(`
31862
- `);
31863
- const cited = Math.min(Math.max(0, line - 1), lines.length - 1);
31864
- const start = Math.max(0, cited - EVIDENCE_LINE_WINDOW);
31865
- const end = Math.min(lines.length, cited + EVIDENCE_LINE_WINDOW + 1);
31866
- const windowText = lines.slice(start, end).join(`
31867
- `);
31868
- return normalizedIncludes(windowText, observed);
31869
- }
31870
- function downgradeUnsubstantiatedFinding(opts) {
31871
- _evidenceDeps.getLogger()?.warn("review", "Downgraded unsubstantiated review finding", {
31872
- storyId: opts.storyId,
31873
- event: opts.event ?? SEMANTIC_FINDING_DOWNGRADED_EVENT,
31874
- file: opts.file ?? opts.finding.verifiedBy?.file ?? opts.finding.file,
31875
- line: opts.line ?? opts.finding.verifiedBy?.line ?? opts.finding.line,
31876
- issue: opts.finding.issue?.slice(0, ISSUE_PREVIEW_CHARS),
31877
- observed: opts.observed?.slice(0, OBSERVED_PREVIEW_CHARS)
31878
- });
31879
- return { ...opts.finding, severity: "unverifiable" };
31880
- }
31881
- async function readSafeFile(workdir, file3) {
31882
- const validated = validateModulePath(file3, [workdir]);
31883
- if (validated.valid && validated.absolutePath) {
31884
- try {
31885
- return await Bun.file(validated.absolutePath).text();
31886
- } catch {
31887
- return null;
31888
- }
31889
- }
31890
- if (isAbsolute8(file3)) {
31891
- try {
31892
- return await Bun.file(file3).text();
31893
- } catch {
31894
- return null;
31895
- }
31896
- }
31897
- return null;
31898
- }
31899
- function normalizedIncludes(contents, observed) {
31900
- const normalizedObserved = normalizeEvidenceText(observed);
31901
- return normalizedObserved.length > 0 && normalizeEvidenceText(contents).includes(normalizedObserved);
31902
- }
31903
- function normalizeEvidenceText(text) {
31904
- return stripWrappingQuotes(text).replace(/\s+/g, " ").trim();
31905
- }
31906
- function stripWrappingQuotes(text) {
31907
- let trimmed = text.trim();
31908
- while (trimmed.length >= 2 && isMatchingWrapper(trimmed[0], trimmed[trimmed.length - 1])) {
31909
- trimmed = trimmed.slice(1, -1).trim();
31910
- }
31911
- return trimmed;
31912
- }
31913
- function isMatchingWrapper(first, last) {
31914
- return first === "`" && last === "`" || first === `"` && last === `"` || first === "'" && last === "'";
31915
- }
31916
- var OBSERVED_PREVIEW_CHARS = 160, ISSUE_PREVIEW_CHARS = 200, EVIDENCE_LINE_WINDOW = 10, SEMANTIC_FINDING_DOWNGRADED_EVENT = "review.semantic.finding.downgraded", ADVERSARIAL_FINDING_DOWNGRADED_EVENT = "review.adversarial.finding.downgraded", _evidenceDeps;
31917
- var init_semantic_evidence = __esm(() => {
31918
- init_logger2();
31919
- init_path_security2();
31920
- init_semantic_helpers();
31921
- _evidenceDeps = {
31922
- getLogger: getSafeLogger
31923
- };
31924
- });
31925
-
31926
32387
  // src/review/adversarial.ts
31927
32388
  import { relative as relative7, sep } from "path";
31928
32389
  function recordAdversarialAudit(opts) {
@@ -31955,7 +32416,6 @@ async function runAdversarialReview(opts) {
31955
32416
  agentManager,
31956
32417
  config: naxConfig,
31957
32418
  featureName,
31958
- priorFailures,
31959
32419
  blockingThreshold,
31960
32420
  featureContextMarkdown,
31961
32421
  contextBundle,
@@ -31966,7 +32426,7 @@ async function runAdversarialReview(opts) {
31966
32426
  } = opts;
31967
32427
  const startTime = Date.now();
31968
32428
  const logger = getSafeLogger();
31969
- const effectiveRef = await resolveEffectiveRef(workdir, storyGitRef, story.id);
32429
+ const effectiveRef = await _adversarialDeps.resolveEffectiveRef(workdir, storyGitRef, story.id);
31970
32430
  if (!effectiveRef) {
31971
32431
  return {
31972
32432
  check: "adversarial",
@@ -31985,7 +32445,7 @@ async function runAdversarialReview(opts) {
31985
32445
  });
31986
32446
  const repoRoot = projectDir ?? workdir;
31987
32447
  const packageDir = workdir !== repoRoot ? workdir : undefined;
31988
- const stat = await collectDiffStat(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
32448
+ const stat = await _adversarialDeps.collectDiffStat(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
31989
32449
  if (!stat) {
31990
32450
  return {
31991
32451
  check: "adversarial",
@@ -32076,13 +32536,13 @@ async function runAdversarialReview(opts) {
32076
32536
  let opResult;
32077
32537
  try {
32078
32538
  opResult = await _adversarialDeps.callOp(callCtx, adversarialReviewOp, {
32539
+ workdir,
32079
32540
  story,
32080
32541
  adversarialConfig,
32081
32542
  mode: diffMode,
32082
32543
  diff,
32083
32544
  storyGitRef: effectiveRef,
32084
32545
  stat,
32085
- priorFailures,
32086
32546
  testInventory,
32087
32547
  excludePatterns: adversarialConfig.excludePatterns,
32088
32548
  testGlobs: resolvedTestPatterns.globs,
@@ -32167,34 +32627,28 @@ async function runAdversarialReview(opts) {
32167
32627
  durationMs: Date.now() - startTime
32168
32628
  };
32169
32629
  }
32170
- const rawParsedRaw = {
32171
- passed: opResult.passed,
32172
- findings: opResult.findings
32173
- };
32174
- const blockingThresholdEffective = blockingThreshold ?? "error";
32175
- const substantiatedFindings = await Promise.all(rawParsedRaw.findings.map(async (finding) => {
32176
- if (!isBlockingSeverity(finding.severity, blockingThresholdEffective))
32177
- return finding;
32178
- const evidence = await checkFindingEvidence({ finding, workdir });
32179
- if (evidence.status !== "unmatched" && evidence.status !== "missing-observed")
32180
- return finding;
32181
- return downgradeUnsubstantiatedFinding({
32182
- finding,
32630
+ if (opResult.repromptEvent) {
32631
+ runtime.dispatchEvents.emitReviewReprompt({
32632
+ kind: "review-reprompt-on-drop",
32183
32633
  storyId: story.id,
32184
- event: ADVERSARIAL_FINDING_DOWNGRADED_EVENT,
32185
- file: evidence.file,
32186
- line: evidence.line,
32187
- observed: evidence.observed
32634
+ reviewer: "adversarial",
32635
+ dropCount: opResult.repromptEvent.dropCount,
32636
+ repromptOutcome: opResult.repromptEvent.outcome,
32637
+ costUsd: opResult.repromptEvent.costUsd
32188
32638
  });
32189
- }));
32190
- const rawParsed = { ...rawParsedRaw, findings: substantiatedFindings };
32639
+ }
32640
+ const threshold = blockingThreshold ?? "error";
32641
+ const allFindings = opResult.findings;
32642
+ const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
32643
+ const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
32644
+ const acDropped = opResult.acDropped ?? [];
32191
32645
  let diffFiles;
32192
32646
  let diffAvailable;
32193
32647
  if (diff && diff.length > 0) {
32194
32648
  diffFiles = extractDiffFiles(diff);
32195
32649
  diffAvailable = true;
32196
32650
  } else {
32197
- const list = await collectDiffFileList(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
32651
+ const list = await _adversarialDeps.collectDiffFileList(workdir, effectiveRef, { naxIgnoreIndex, packageDir });
32198
32652
  if (list === undefined) {
32199
32653
  diffFiles = new Set;
32200
32654
  diffAvailable = false;
@@ -32203,13 +32657,6 @@ async function runAdversarialReview(opts) {
32203
32657
  diffAvailable = true;
32204
32658
  }
32205
32659
  }
32206
- const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcQuote(rawParsed.findings, story.acceptanceCriteria);
32207
- if (acDropped.length > 0) {
32208
- logger?.warn("review", "Adversarial findings dropped: acQuote validation failed", {
32209
- storyId: story.id,
32210
- dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
32211
- });
32212
- }
32213
32660
  const adversarialDropAnalysis = acDropped.map((d) => ({
32214
32661
  finding: {
32215
32662
  file: d.finding.file ?? "<unknown>",
@@ -32223,10 +32670,6 @@ async function runAdversarialReview(opts) {
32223
32670
  rawCategory: d.finding.category ?? "",
32224
32671
  counterfactual: analyzeStructuralCounterfactual({ acIndex: d.finding.acIndex, category: d.finding.category, file: d.finding.file }, story.acceptanceCriteria, diffFiles)
32225
32672
  }));
32226
- const parsed = { ...rawParsed, findings: acGroundedFindings };
32227
- const threshold = blockingThresholdEffective;
32228
- const blockingFindings = parsed.findings.filter((f) => isBlockingSeverity(f.severity, threshold));
32229
- const advisoryFindings = parsed.findings.filter((f) => !isBlockingSeverity(f.severity, threshold));
32230
32673
  const adversarialAcceptAnalysis = blockingFindings.map((f) => ({
32231
32674
  finding: {
32232
32675
  file: f.file,
@@ -32249,11 +32692,11 @@ async function runAdversarialReview(opts) {
32249
32692
  }))
32250
32693
  });
32251
32694
  }
32695
+ const durationMs = Date.now() - startTime;
32252
32696
  if (blockingFindings.length > 0) {
32253
- const durationMs2 = Date.now() - startTime;
32254
32697
  logger?.warn("review", `Adversarial review failed: ${blockingFindings.length} blocking findings`, {
32255
32698
  storyId: story.id,
32256
- durationMs: durationMs2,
32699
+ durationMs,
32257
32700
  findings: blockingFindings.map((f) => ({
32258
32701
  severity: f.severity,
32259
32702
  category: f.category,
@@ -32274,72 +32717,37 @@ async function runAdversarialReview(opts) {
32274
32717
  blockingThreshold: threshold,
32275
32718
  result: {
32276
32719
  passed: false,
32277
- findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
32720
+ findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
32278
32721
  },
32279
32722
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32280
32723
  diffAvailable,
32281
32724
  adversarialDropAnalysis,
32282
32725
  adversarialAcceptAnalysis
32283
32726
  });
32727
+ const output = blockingFindings.length > 0 ? `Adversarial review failed:
32728
+
32729
+ ${formatFindings(blockingFindings)}` : "Adversarial review failed (no findings)";
32284
32730
  return {
32285
32731
  check: "adversarial",
32286
32732
  success: false,
32287
32733
  command: "",
32288
32734
  exitCode: 1,
32289
- output: `Adversarial review failed:
32290
-
32291
- ${formatFindings(blockingFindings)}`,
32292
- durationMs: durationMs2,
32293
- findings: toAdversarialReviewFindings(blockingFindings),
32735
+ output,
32736
+ durationMs,
32737
+ findings: blockingFindings.length > 0 ? toAdversarialReviewFindings(blockingFindings) : undefined,
32294
32738
  advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32295
32739
  cost: llmCost
32296
32740
  };
32297
32741
  }
32298
- if (!parsed.passed && blockingFindings.length === 0) {
32299
- if (acDropped.length > 0) {
32300
- const durationMs3 = Date.now() - startTime;
32301
- logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
32302
- storyId: story.id,
32303
- durationMs: durationMs3,
32304
- droppedCount: acDropped.length,
32305
- dropCodes: acDropped.map((d) => d.code)
32306
- });
32307
- const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
32308
- `);
32309
- recordAdversarialAudit({
32310
- runtime,
32311
- workdir,
32312
- projectDir,
32313
- storyId: story.id,
32314
- featureName,
32315
- parsed: true,
32316
- failOpen: false,
32317
- passed: false,
32318
- blockingThreshold: threshold,
32319
- result: { passed: false, findings: [] },
32320
- advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32321
- diffAvailable,
32322
- adversarialDropAnalysis,
32323
- adversarialAcceptAnalysis: []
32324
- });
32325
- return {
32326
- check: "adversarial",
32327
- success: false,
32328
- command: "",
32329
- exitCode: 1,
32330
- output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Either re-classify these as "info" upstream or extend the ACs. Drops:
32331
-
32332
- ${dropSummary}`,
32333
- durationMs: durationMs3,
32334
- advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32335
- cost: llmCost
32336
- };
32337
- }
32338
- const durationMs2 = Date.now() - startTime;
32339
- logger?.info("review", "Adversarial review passed (all findings below blocking threshold)", {
32742
+ if (!opResult.passed && acDropped.length > 0) {
32743
+ logger?.warn("review", "Adversarial review fail-closed: blocking findings dropped as ungrounded", {
32340
32744
  storyId: story.id,
32341
- durationMs: durationMs2
32745
+ durationMs,
32746
+ droppedCount: acDropped.length,
32747
+ dropCodes: acDropped.map((d) => d.code)
32342
32748
  });
32749
+ const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
32750
+ `);
32343
32751
  recordAdversarialAudit({
32344
32752
  runtime,
32345
32753
  workdir,
@@ -32348,12 +32756,9 @@ ${dropSummary}`,
32348
32756
  featureName,
32349
32757
  parsed: true,
32350
32758
  failOpen: false,
32351
- passed: true,
32759
+ passed: false,
32352
32760
  blockingThreshold: threshold,
32353
- result: {
32354
- passed: true,
32355
- findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
32356
- },
32761
+ result: { passed: false, findings: [] },
32357
32762
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32358
32763
  diffAvailable,
32359
32764
  adversarialDropAnalysis,
@@ -32361,19 +32766,18 @@ ${dropSummary}`,
32361
32766
  });
32362
32767
  return {
32363
32768
  check: "adversarial",
32364
- success: true,
32769
+ success: false,
32365
32770
  command: "",
32366
- exitCode: 0,
32367
- output: "Adversarial review passed (all findings were advisory \u2014 below blocking threshold)",
32368
- durationMs: durationMs2,
32771
+ exitCode: 1,
32772
+ output: `Adversarial review failed: ${acDropped.length} blocking finding(s) dropped as ungrounded \u2014 the model emitted "passed: false" with concerns it could not ground in any acceptance criterion. Drops:
32773
+
32774
+ ${dropSummary}`,
32775
+ durationMs,
32369
32776
  advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32370
32777
  cost: llmCost
32371
32778
  };
32372
32779
  }
32373
- const durationMs = Date.now() - startTime;
32374
- if (parsed.passed) {
32375
- logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
32376
- }
32780
+ logger?.info("review", "Adversarial review passed", { storyId: story.id, durationMs });
32377
32781
  recordAdversarialAudit({
32378
32782
  runtime,
32379
32783
  workdir,
@@ -32382,23 +32786,23 @@ ${dropSummary}`,
32382
32786
  featureName,
32383
32787
  parsed: true,
32384
32788
  failOpen: false,
32385
- passed: parsed.passed,
32789
+ passed: true,
32386
32790
  blockingThreshold: threshold,
32387
32791
  result: {
32388
- passed: parsed.passed,
32389
- findings: llmFindingsToReviewFindings(parsed.findings, { source: "adversarial-review" })
32792
+ passed: true,
32793
+ findings: llmFindingsToReviewFindings(allFindings, { source: "adversarial-review" })
32390
32794
  },
32391
32795
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "adversarial-review" }) : undefined,
32392
32796
  diffAvailable,
32393
32797
  adversarialDropAnalysis,
32394
- adversarialAcceptAnalysis
32798
+ adversarialAcceptAnalysis: []
32395
32799
  });
32396
32800
  return {
32397
32801
  check: "adversarial",
32398
- success: parsed.passed,
32802
+ success: true,
32399
32803
  command: "",
32400
- exitCode: parsed.passed ? 0 : 1,
32401
- output: parsed.passed ? "Adversarial review passed" : "Adversarial review failed (no findings)",
32804
+ exitCode: 0,
32805
+ output: allFindings.length === 0 ? "Adversarial review passed" : "Adversarial review passed (all findings were advisory \u2014 below blocking threshold)",
32402
32806
  durationMs,
32403
32807
  advisoryFindings: advisoryFindings.length > 0 ? toAdversarialReviewFindings(advisoryFindings) : undefined,
32404
32808
  cost: llmCost
@@ -32413,16 +32817,17 @@ var init_adversarial = __esm(() => {
32413
32817
  init_adversarial_review();
32414
32818
  init_call();
32415
32819
  init_test_runners();
32416
- init_ac_quote_validator();
32417
32820
  init_ac_structural_counterfactual();
32418
32821
  init_adversarial_helpers();
32419
32822
  init_diff_utils();
32420
32823
  init_finding_projection();
32421
32824
  init_review_audit();
32422
- init_semantic_evidence();
32423
32825
  _adversarialDeps = {
32424
32826
  writeReviewAudit,
32425
- callOp
32827
+ callOp,
32828
+ resolveEffectiveRef,
32829
+ collectDiffStat,
32830
+ collectDiffFileList
32426
32831
  };
32427
32832
  });
32428
32833
 
@@ -33125,6 +33530,13 @@ class ScopedStrategy {
33125
33530
  const durationMs = Date.now() - start;
33126
33531
  if (result.success) {
33127
33532
  const parsed2 = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
33533
+ logger.info("verify[scoped]", "Scoped tests passed", {
33534
+ storyId: ctx.storyId,
33535
+ passCount: parsed2.passed,
33536
+ durationMs,
33537
+ scopeTestFallback: scopeTestFallback ?? false,
33538
+ isFullSuite
33539
+ });
33128
33540
  return makePassResult(ctx.storyId, "scoped", {
33129
33541
  rawOutput: result.output,
33130
33542
  passCount: parsed2.passed,
@@ -33133,6 +33545,12 @@ class ScopedStrategy {
33133
33545
  });
33134
33546
  }
33135
33547
  if (result.status === "TIMEOUT") {
33548
+ logger.warn("verify[scoped]", "Scoped tests timed out", {
33549
+ storyId: ctx.storyId,
33550
+ durationMs,
33551
+ scopeTestFallback: scopeTestFallback ?? false,
33552
+ isFullSuite
33553
+ });
33136
33554
  return makeFailResult(ctx.storyId, "scoped", "TIMEOUT", {
33137
33555
  rawOutput: result.output,
33138
33556
  durationMs,
@@ -33141,6 +33559,14 @@ class ScopedStrategy {
33141
33559
  });
33142
33560
  }
33143
33561
  const parsed = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
33562
+ logger.warn("verify[scoped]", "Scoped tests failed", {
33563
+ storyId: ctx.storyId,
33564
+ passCount: parsed.passed,
33565
+ failCount: parsed.failed,
33566
+ durationMs,
33567
+ scopeTestFallback: scopeTestFallback ?? false,
33568
+ isFullSuite
33569
+ });
33144
33570
  return makeFailResult(ctx.storyId, "scoped", "TEST_FAILURE", {
33145
33571
  rawOutput: result.output,
33146
33572
  passCount: parsed.passed,
@@ -35003,57 +35429,95 @@ var init_acceptance_fix = __esm(() => {
35003
35429
  };
35004
35430
  });
35005
35431
 
35006
- // src/review/requote-response.ts
35007
- function parseRequoteResponse(output) {
35432
+ // src/operations/semantic-review.ts
35433
+ function withRepromptMarker2(output, info) {
35008
35434
  const parsed = tryParseLLMJson(output);
35009
- if (!isRecord(parsed))
35010
- return null;
35011
- const canonical = extractCanonical(parsed);
35012
- if (canonical)
35013
- return canonical;
35014
- const findings = parsed.findings;
35015
- if (!Array.isArray(findings) || findings.length !== 1)
35016
- return null;
35017
- const finding = findings[0];
35018
- if (!isRecord(finding))
35019
- return null;
35020
- return extractCanonical(finding.verifiedBy) ?? extractCanonical(finding);
35435
+ if (!parsed || typeof parsed !== "object")
35436
+ return output;
35437
+ return JSON.stringify({ ...parsed, _repromptInfo: info });
35021
35438
  }
35022
- function extractCanonical(value) {
35023
- if (!isRecord(value))
35024
- return null;
35025
- if (typeof value.file !== "string" || typeof value.observed !== "string")
35026
- return null;
35027
- const file3 = value.file.trim();
35028
- if (!file3)
35029
- return null;
35030
- const line = coerceLine(value.line);
35031
- if (line === null)
35032
- return null;
35439
+ function extractRepromptInfo2(raw) {
35440
+ if (!raw || typeof raw !== "object")
35441
+ return;
35442
+ const info = raw._repromptInfo;
35443
+ if (!info || typeof info !== "object")
35444
+ return;
35445
+ const i = info;
35446
+ if (typeof i.dropCount !== "number" || typeof i.costUsd !== "number" || typeof i.outcome !== "string") {
35447
+ return;
35448
+ }
35033
35449
  return {
35034
- file: file3,
35035
- line: line === undefined ? undefined : line,
35036
- observed: value.observed
35450
+ dropCount: i.dropCount,
35451
+ costUsd: i.costUsd,
35452
+ outcome: i.outcome
35453
+ };
35454
+ }
35455
+ function evaluateRepromptTrigger2(shape, input) {
35456
+ if (input.semanticConfig.acRegroundOnDrop === false)
35457
+ return { shouldReprompt: false };
35458
+ if (shape.passed)
35459
+ return { shouldReprompt: false };
35460
+ const { accepted, dropped } = filterByAcGroundingMinimal(shape.findings, input.story.acceptanceCriteria);
35461
+ const threshold = input.blockingThreshold ?? "error";
35462
+ const blockingAccepted = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
35463
+ if (blockingAccepted.length > 0)
35464
+ return { shouldReprompt: false };
35465
+ if (dropped.length === 0)
35466
+ return { shouldReprompt: false };
35467
+ return { shouldReprompt: true, acDropped: dropped };
35468
+ }
35469
+ async function performSemanticReground(turn, firstParsed, drops, ctx) {
35470
+ const threshold = ctx.input.blockingThreshold ?? "error";
35471
+ const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
35472
+ const { accepted: firstAccepted } = filterByAcGroundingMinimal(firstParsed.findings, acceptanceCriteria);
35473
+ const firstAdvisory = firstAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
35474
+ const repromptPrompt = ReviewPromptBuilder.regroundDroppedFindings({
35475
+ drops,
35476
+ acceptanceCriteria
35477
+ });
35478
+ const secondTurn = await ctx.send(repromptPrompt);
35479
+ const secondParsed = validateLLMShape(tryParseLLMJson(secondTurn.output));
35480
+ const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
35481
+ const dropCount = drops.length;
35482
+ if (!secondParsed) {
35483
+ return {
35484
+ ...turn,
35485
+ output: withRepromptMarker2(turn.output, { dropCount, outcome: "parse-failed", costUsd })
35486
+ };
35487
+ }
35488
+ const { accepted: secondAccepted } = filterByAcGroundingMinimal(secondParsed.findings, acceptanceCriteria);
35489
+ const secondBlocking = secondAccepted.filter((f) => isBlockingSeverity(f.severity, threshold));
35490
+ if (secondBlocking.length > 0) {
35491
+ return {
35492
+ ...turn,
35493
+ output: JSON.stringify({
35494
+ passed: false,
35495
+ findings: secondParsed.findings,
35496
+ _repromptInfo: { dropCount, outcome: "recovered-blocking", costUsd }
35497
+ }),
35498
+ estimatedCostUsd: costUsd
35499
+ };
35500
+ }
35501
+ if (secondParsed.passed) {
35502
+ const secondAdvisory = secondAccepted.filter((f) => !isBlockingSeverity(f.severity, threshold));
35503
+ return {
35504
+ ...turn,
35505
+ output: JSON.stringify({
35506
+ passed: true,
35507
+ findings: [...firstAdvisory, ...secondAdvisory],
35508
+ _repromptInfo: { dropCount, outcome: "recovered-advisory-only", costUsd }
35509
+ }),
35510
+ estimatedCostUsd: costUsd
35511
+ };
35512
+ }
35513
+ return {
35514
+ ...turn,
35515
+ output: withRepromptMarker2(turn.output, { dropCount, outcome: "still-dropped", costUsd })
35037
35516
  };
35038
35517
  }
35039
- function coerceLine(value) {
35040
- if (value == null)
35041
- return;
35042
- if (typeof value === "number")
35043
- return value;
35044
- if (typeof value === "string" && /^\d+$/.test(value))
35045
- return Number.parseInt(value, 10);
35046
- return null;
35047
- }
35048
- function isRecord(value) {
35049
- return typeof value === "object" && value !== null && !Array.isArray(value);
35050
- }
35051
- var init_requote_response = () => {};
35052
-
35053
- // src/operations/semantic-review.ts
35054
35518
  async function requoteBlockingFindings(findings, ctx) {
35055
35519
  const threshold = ctx.input.blockingThreshold ?? "error";
35056
- const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES;
35520
+ const maxRequotes = ctx.input.semanticConfig.substantiation?.maxRequotes ?? DEFAULT_MAX_REQUOTES2;
35057
35521
  const requoteEnabled = ctx.input.semanticConfig.substantiation?.requote ?? true;
35058
35522
  if (ctx.input.mode !== "ref" || !requoteEnabled || maxRequotes <= 0) {
35059
35523
  return { findings, changed: false, extraCostUsd: 0 };
@@ -35120,30 +35584,45 @@ async function requoteBlockingFindings(findings, ctx) {
35120
35584
  }
35121
35585
  return { findings: next, changed, extraCostUsd };
35122
35586
  }
35123
- var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
35587
+ var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES2 = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
35124
35588
  const turn = await ctx.sendWithParseRetry(initialPrompt);
35125
35589
  const parsed = validateLLMShape(tryParseLLMJson(turn.output));
35126
35590
  if (!parsed)
35127
35591
  return turn;
35128
35592
  const requoted = await requoteBlockingFindings(parsed.findings, ctx);
35129
- if (!requoted.changed)
35593
+ if (requoted.changed) {
35594
+ const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
35595
+ return {
35596
+ ...turn,
35597
+ output: JSON.stringify({ passed, findings: requoted.findings }),
35598
+ estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
35599
+ };
35600
+ }
35601
+ if (ctx.input.mode !== "ref")
35130
35602
  return turn;
35131
- const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
35132
- return {
35133
- ...turn,
35134
- output: JSON.stringify({ passed, findings: requoted.findings }),
35135
- estimatedCostUsd: (turn.estimatedCostUsd ?? 0) + requoted.extraCostUsd
35136
- };
35603
+ const regroundEnabled = ctx.input.semanticConfig.acRegroundOnDrop !== false;
35604
+ if (!regroundEnabled)
35605
+ return turn;
35606
+ const firstShape = { passed: parsed.passed, findings: requoted.findings };
35607
+ const trigger = evaluateRepromptTrigger2(firstShape, ctx.input);
35608
+ if (!trigger.shouldReprompt)
35609
+ return turn;
35610
+ return performSemanticReground(turn, firstShape, trigger.acDropped, ctx);
35137
35611
  }, semanticReviewOp;
35138
35612
  var init_semantic_review = __esm(() => {
35139
35613
  init_retry();
35140
35614
  init_config();
35141
35615
  init_logger2();
35142
35616
  init_prompts();
35617
+ init_finding_filters();
35143
35618
  init_requote_response();
35144
- init_semantic_evidence();
35145
- init_semantic_helpers();
35146
- FAIL_OPEN2 = { passed: true, findings: [], failOpen: true };
35619
+ FAIL_OPEN2 = {
35620
+ passed: true,
35621
+ findings: [],
35622
+ normalizedFindings: [],
35623
+ acDropped: [],
35624
+ failOpen: true
35625
+ };
35147
35626
  semanticReviewOp = {
35148
35627
  kind: "run",
35149
35628
  name: "semantic-review",
@@ -35160,6 +35639,7 @@ var init_semantic_review = __esm(() => {
35160
35639
  invalid: () => ReviewPromptBuilder.jsonRetry(),
35161
35640
  truncated: () => ReviewPromptBuilder.jsonRetryCondensed({ blockingThreshold: input.blockingThreshold })
35162
35641
  },
35642
+ exhaustedFallback: (lastOutput) => /"passed"\s*:\s*false/.test(lastOutput) ? { passed: false, findings: [], normalizedFindings: [], acDropped: [], looksLikeFail: true } : FAIL_OPEN2,
35163
35643
  logContext: { blockingThreshold: input.blockingThreshold ?? "error" }
35164
35644
  }),
35165
35645
  hopBody: semanticReviewHopBody,
@@ -35181,11 +35661,47 @@ var init_semantic_review = __esm(() => {
35181
35661
  parse(output, _input, _ctx) {
35182
35662
  const raw = tryParseLLMJson(output);
35183
35663
  const parsed = validateLLMShape(raw);
35184
- if (parsed)
35185
- return { passed: parsed.passed, findings: parsed.findings };
35186
- if (/"passed"\s*:\s*false/.test(output))
35187
- return { passed: false, findings: [], looksLikeFail: true };
35664
+ const repromptEvent = extractRepromptInfo2(raw);
35665
+ if (parsed) {
35666
+ return {
35667
+ passed: parsed.passed,
35668
+ findings: parsed.findings,
35669
+ normalizedFindings: [],
35670
+ acDropped: [],
35671
+ repromptEvent
35672
+ };
35673
+ }
35674
+ if (/"passed"\s*:\s*false/.test(output)) {
35675
+ return {
35676
+ passed: false,
35677
+ findings: [],
35678
+ normalizedFindings: [],
35679
+ acDropped: [],
35680
+ looksLikeFail: true,
35681
+ repromptEvent
35682
+ };
35683
+ }
35188
35684
  return FAIL_OPEN2;
35685
+ },
35686
+ async verify(parsed, input, _verifyCtx) {
35687
+ if (parsed.failOpen || parsed.looksLikeFail)
35688
+ return parsed;
35689
+ if (parsed.findings.length === 0)
35690
+ return parsed;
35691
+ const threshold = input.blockingThreshold ?? "error";
35692
+ const findings = parsed.findings;
35693
+ const sanitized = sanitizeRefModeFindings(findings, input.mode, threshold);
35694
+ const substantiated = await substantiateSemanticEvidence(sanitized, input.mode, input.workdir, input.story.id, threshold);
35695
+ const { accepted, dropped } = filterByAcGroundingMinimal(substantiated, input.story.acceptanceCriteria);
35696
+ const blocking = accepted.filter((f) => isBlockingSeverity(f.severity, threshold));
35697
+ const passed = parsed.passed && blocking.length === 0;
35698
+ return {
35699
+ ...parsed,
35700
+ passed,
35701
+ findings: accepted,
35702
+ normalizedFindings: toReviewFindings(blocking),
35703
+ acDropped: dropped
35704
+ };
35189
35705
  }
35190
35706
  };
35191
35707
  });
@@ -37226,7 +37742,7 @@ var init_greenfield_gate = __esm(() => {
37226
37742
  });
37227
37743
  // src/verification/rectification.ts
37228
37744
  function shouldRetryRectification(state, config2) {
37229
- if (state.attempt >= config2.maxRetries) {
37745
+ if (state.attempt >= config2.maxAttemptsTotal) {
37230
37746
  return false;
37231
37747
  }
37232
37748
  if (state.lastExitCode !== undefined && state.lastExitCode !== 0 && state.currentFailures === 0) {
@@ -37329,7 +37845,7 @@ var init_full_suite_gate = __esm(() => {
37329
37845
  });
37330
37846
 
37331
37847
  // src/operations/full-suite-rectify.ts
37332
- function makeFullSuiteRectifyStrategy(story) {
37848
+ function makeFullSuiteRectifyStrategy(story, config2) {
37333
37849
  return {
37334
37850
  name: "full-suite-rectify",
37335
37851
  appliesTo: (finding) => finding.source === "test-runner" && finding.category === "failed-test",
@@ -37339,7 +37855,7 @@ function makeFullSuiteRectifyStrategy(story) {
37339
37855
  contextMarkdown: RectifierPromptBuilder.failingTestContext(findings)
37340
37856
  }),
37341
37857
  extractApplied: () => ({ targetFiles: [], summary: "Fixed failing tests" }),
37342
- maxAttempts: 3,
37858
+ maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
37343
37859
  coRun: "exclusive"
37344
37860
  };
37345
37861
  }
@@ -37380,7 +37896,7 @@ var init__finding_to_check = __esm(() => {
37380
37896
  });
37381
37897
 
37382
37898
  // src/operations/autofix-implementer-strategy.ts
37383
- function makeAutofixImplementerStrategy(story) {
37899
+ function makeAutofixImplementerStrategy(story, config2) {
37384
37900
  return {
37385
37901
  name: "autofix-implementer",
37386
37902
  appliesTo: (f) => f.fixTarget === "source" && IMPLEMENTER_SOURCES.has(f.source),
@@ -37393,7 +37909,7 @@ function makeAutofixImplementerStrategy(story) {
37393
37909
  summary: output.unresolvedReason ?? "",
37394
37910
  unresolved: output.unresolvedReason
37395
37911
  }),
37396
- maxAttempts: 3,
37912
+ maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
37397
37913
  coRun: "co-run-sequential"
37398
37914
  };
37399
37915
  }
@@ -37415,7 +37931,7 @@ function makeAutofixTestWriterStrategy(story, config2) {
37415
37931
  story,
37416
37932
  blockingThreshold: config2.review?.blockingThreshold
37417
37933
  }),
37418
- maxAttempts: 2,
37934
+ maxAttempts: config2.execution.rectification.maxAttemptsPerStrategy,
37419
37935
  coRun: "co-run-sequential"
37420
37936
  };
37421
37937
  }
@@ -39360,30 +39876,57 @@ async function runSemanticReview(opts) {
39360
39876
  durationMs: Date.now() - startTime
39361
39877
  };
39362
39878
  }
39363
- const parsed = { passed: opResult.passed, findings: opResult.findings };
39364
- const sanitizedFindings = await substantiateSemanticEvidence(sanitizeRefModeFindings(parsed.findings, diffMode, blockingThreshold ?? "error"), diffMode, workdir, story.id, blockingThreshold ?? "error");
39365
- const { accepted: acGroundedFindings, dropped: acDropped } = filterByAcGroundingMinimal(sanitizedFindings, story.acceptanceCriteria);
39366
- if (acDropped.length > 0) {
39367
- logger?.warn("review", "Semantic findings dropped: acIndex missing or out of range", {
39879
+ if (opResult.looksLikeFail) {
39880
+ logger?.warn("semantic", "LLM returned truncated JSON with passed:false \u2014 treating as failure", {
39881
+ storyId: story.id
39882
+ });
39883
+ recordSemanticAudit({
39884
+ runtime,
39885
+ workdir,
39886
+ projectDir,
39887
+ storyId: story.id,
39888
+ featureName,
39889
+ parsed: false,
39890
+ looksLikeFail: true,
39891
+ failOpen: false,
39892
+ passed: false,
39893
+ blockingThreshold,
39894
+ result: null
39895
+ });
39896
+ return {
39897
+ check: "semantic",
39898
+ success: false,
39899
+ command: "",
39900
+ exitCode: 1,
39901
+ output: "semantic review: LLM response truncated but indicated failure (passed:false found in partial response)",
39902
+ durationMs: Date.now() - startTime
39903
+ };
39904
+ }
39905
+ if (opResult.repromptEvent) {
39906
+ runtime.dispatchEvents.emitReviewReprompt({
39907
+ kind: "review-reprompt-on-drop",
39368
39908
  storyId: story.id,
39369
- dropped: acDropped.map((d) => ({ file: d.finding.file, issue: d.finding.issue, code: d.code }))
39909
+ reviewer: "semantic",
39910
+ dropCount: opResult.repromptEvent.dropCount,
39911
+ repromptOutcome: opResult.repromptEvent.outcome,
39912
+ costUsd: opResult.repromptEvent.costUsd
39370
39913
  });
39371
39914
  }
39372
- const sanitizedParsed = { ...parsed, findings: acGroundedFindings };
39373
39915
  const threshold = blockingThreshold ?? "error";
39374
- const blockingFindings = sanitizedParsed.findings.filter((f) => isBlockingSeverity(f.severity, threshold));
39375
- const advisoryFindings = sanitizedParsed.findings.filter((f) => !isBlockingSeverity(f.severity, threshold));
39916
+ const allFindings = opResult.findings;
39917
+ const blockingFindings = allFindings.filter((f) => isBlockingSeverity(f.severity, threshold));
39918
+ const advisoryFindings = allFindings.filter((f) => !isBlockingSeverity(f.severity, threshold));
39376
39919
  if (advisoryFindings.length > 0) {
39377
39920
  logger?.debug("review", `Semantic review: ${advisoryFindings.length} advisory findings (below threshold '${threshold}')`, {
39378
39921
  storyId: story.id,
39379
39922
  findings: advisoryFindings.map((f) => ({ severity: f.severity, file: f.file, issue: f.issue }))
39380
39923
  });
39381
39924
  }
39382
- if (!sanitizedParsed.passed && blockingFindings.length > 0) {
39383
- const durationMs2 = Date.now() - startTime;
39925
+ const durationMs = Date.now() - startTime;
39926
+ if (blockingFindings.length > 0) {
39384
39927
  logger?.warn("review", `Semantic review failed: ${blockingFindings.length} blocking findings`, {
39385
39928
  storyId: story.id,
39386
- durationMs: durationMs2
39929
+ durationMs
39387
39930
  });
39388
39931
  logger?.debug("review", "Semantic review findings", {
39389
39932
  storyId: story.id,
@@ -39410,7 +39953,7 @@ ${formatFindings2(blockingFindings)}`;
39410
39953
  blockingThreshold: threshold,
39411
39954
  result: {
39412
39955
  passed: false,
39413
- findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
39956
+ findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
39414
39957
  },
39415
39958
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39416
39959
  });
@@ -39420,53 +39963,16 @@ ${formatFindings2(blockingFindings)}`;
39420
39963
  command: "",
39421
39964
  exitCode: 1,
39422
39965
  output,
39423
- durationMs: durationMs2,
39966
+ durationMs,
39424
39967
  findings: toReviewFindings(blockingFindings),
39425
39968
  advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39426
39969
  cost: llmCost
39427
39970
  };
39428
39971
  }
39429
- if (!sanitizedParsed.passed && blockingFindings.length === 0) {
39430
- if (acDropped.length > 0) {
39431
- const durationMs3 = Date.now() - startTime;
39432
- logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
39433
- storyId: story.id,
39434
- durationMs: durationMs3,
39435
- droppedCount: acDropped.length,
39436
- dropCodes: acDropped.map((d) => d.code)
39437
- });
39438
- const dropSummary = acDropped.map((d, i) => `${i + 1}. [${d.code}] ${d.finding.file ?? "<unknown>"}: ${d.finding.issue}`).join(`
39439
- `);
39440
- recordSemanticAudit({
39441
- runtime,
39442
- workdir,
39443
- projectDir,
39444
- storyId: story.id,
39445
- featureName,
39446
- parsed: true,
39447
- failOpen: false,
39448
- passed: false,
39449
- blockingThreshold: threshold,
39450
- result: { passed: false, findings: [] },
39451
- advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39452
- });
39453
- return {
39454
- check: "semantic",
39455
- success: false,
39456
- command: "",
39457
- exitCode: 1,
39458
- output: `Semantic review failed: ${acDropped.length} blocking finding(s) dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution. Either re-classify these as "info" or ensure each error finding includes a valid acIndex. Drops:
39459
-
39460
- ${dropSummary}`,
39461
- durationMs: durationMs3,
39462
- advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39463
- cost: llmCost
39464
- };
39465
- }
39466
- const durationMs2 = Date.now() - startTime;
39467
- logger?.info("review", "Semantic review passed (all findings below blocking threshold)", {
39972
+ if (!opResult.passed && allFindings.length === 0) {
39973
+ logger?.warn("review", "Semantic review fail-closed: blocking findings dropped (acIndex invalid)", {
39468
39974
  storyId: story.id,
39469
- durationMs: durationMs2
39975
+ durationMs
39470
39976
  });
39471
39977
  recordSemanticAudit({
39472
39978
  runtime,
@@ -39476,29 +39982,23 @@ ${dropSummary}`,
39476
39982
  featureName,
39477
39983
  parsed: true,
39478
39984
  failOpen: false,
39479
- passed: true,
39985
+ passed: false,
39480
39986
  blockingThreshold: threshold,
39481
- result: {
39482
- passed: true,
39483
- findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
39484
- },
39987
+ result: { passed: false, findings: [] },
39485
39988
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39486
39989
  });
39487
39990
  return {
39488
39991
  check: "semantic",
39489
- success: true,
39992
+ success: false,
39490
39993
  command: "",
39491
- exitCode: 0,
39492
- output: "Semantic review passed (all findings were advisory \u2014 below blocking threshold)",
39493
- durationMs: durationMs2,
39994
+ exitCode: 1,
39995
+ output: 'Semantic review failed: blocking finding(s) were dropped \u2014 acIndex was missing or out of range. The model emitted "passed: false" without valid AC attribution.',
39996
+ durationMs,
39494
39997
  advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39495
39998
  cost: llmCost
39496
39999
  };
39497
40000
  }
39498
- const durationMs = Date.now() - startTime;
39499
- if (sanitizedParsed.passed) {
39500
- logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
39501
- }
40001
+ logger?.info("review", "Semantic review passed", { storyId: story.id, durationMs });
39502
40002
  recordSemanticAudit({
39503
40003
  runtime,
39504
40004
  workdir,
@@ -39507,20 +40007,20 @@ ${dropSummary}`,
39507
40007
  featureName,
39508
40008
  parsed: true,
39509
40009
  failOpen: false,
39510
- passed: sanitizedParsed.passed,
40010
+ passed: true,
39511
40011
  blockingThreshold: threshold,
39512
40012
  result: {
39513
- passed: sanitizedParsed.passed,
39514
- findings: llmFindingsToReviewFindings(sanitizedParsed.findings, { source: "semantic-review" })
40013
+ passed: true,
40014
+ findings: llmFindingsToReviewFindings(allFindings, { source: "semantic-review" })
39515
40015
  },
39516
40016
  advisoryFindings: advisoryFindings.length > 0 ? llmFindingsToReviewFindings(advisoryFindings, { source: "semantic-review" }) : undefined
39517
40017
  });
39518
40018
  return {
39519
40019
  check: "semantic",
39520
- success: sanitizedParsed.passed,
40020
+ success: true,
39521
40021
  command: "",
39522
- exitCode: sanitizedParsed.passed ? 0 : 1,
39523
- output: sanitizedParsed.passed ? "Semantic review passed" : "Semantic review failed (no findings)",
40022
+ exitCode: 0,
40023
+ output: allFindings.length === 0 ? "Semantic review passed" : "Semantic review passed (all findings were advisory \u2014 below blocking threshold)",
39524
40024
  durationMs,
39525
40025
  advisoryFindings: advisoryFindings.length > 0 ? toReviewFindings(advisoryFindings) : undefined,
39526
40026
  cost: llmCost
@@ -39537,12 +40037,10 @@ var init_semantic = __esm(() => {
39537
40037
  init_semantic_review();
39538
40038
  init_prompts();
39539
40039
  init_test_runners();
39540
- init_ac_quote_validator();
39541
40040
  init_diff_utils();
39542
40041
  init_finding_projection();
39543
40042
  init_review_audit();
39544
40043
  init_semantic_debate();
39545
- init_semantic_evidence();
39546
40044
  init_semantic_helpers();
39547
40045
  _semanticDeps = {
39548
40046
  createDebateRunner: (opts) => new DebateRunner(opts),
@@ -39815,6 +40313,18 @@ async function runReview(opts) {
39815
40313
  naxIgnoreIndex
39816
40314
  }) : normalizeMechanicalFindings(checkName, await runCheck(checkName, command, workdir, storyId, env2), workdir);
39817
40315
  checks3.push(result);
40316
+ if (result.success) {
40317
+ logger?.info("review", `${checkName} passed`, {
40318
+ storyId,
40319
+ durationMs: result.durationMs
40320
+ });
40321
+ } else {
40322
+ logger?.warn("review", `${checkName} failed`, {
40323
+ storyId,
40324
+ exitCode: result.exitCode,
40325
+ durationMs: result.durationMs
40326
+ });
40327
+ }
39818
40328
  if (!result.success && !firstFailure) {
39819
40329
  firstFailure = `${checkName} failed (exit code ${result.exitCode})`;
39820
40330
  }
@@ -41869,6 +42379,38 @@ async function callOp(ctx, op, input) {
41869
42379
  const rawOutput = outcome.result.output;
41870
42380
  const totalCost = outcome.result.estimatedCostUsd ?? 0;
41871
42381
  if (!rawOutput) {
42382
+ if (maxRetriesExceeded) {
42383
+ getSafeLogger()?.error("callop", "Op retry budget exhausted (empty output)", {
42384
+ storyId: ctx.storyId,
42385
+ opName: op.name,
42386
+ site: "run",
42387
+ totalAttempts: MAX_COMPLETE_RETRY_ATTEMPTS + 1
42388
+ });
42389
+ throw new NaxError(`callOp[${op.name}]: CALL_OP_MAX_RETRIES \u2014 exceeded MAX_COMPLETE_RETRY_ATTEMPTS (${MAX_COMPLETE_RETRY_ATTEMPTS})`, "CALL_OP_MAX_RETRIES", { stage: op.stage, storyId: ctx.storyId });
42390
+ }
42391
+ if (retryFallback !== undefined) {
42392
+ if (typeof retryFallback !== "object" || retryFallback === null) {
42393
+ throw new NaxError(`callOp[${op.name}]: exhaustedFallback returned a non-object (${typeof retryFallback}); fallback must be a plain object`, "CALL_OP_INVALID_FALLBACK", { stage: op.stage, storyId: ctx.storyId });
42394
+ }
42395
+ getSafeLogger()?.warn("callop", "Returning exhaustedFallback on empty output", {
42396
+ storyId: ctx.storyId,
42397
+ opName: op.name,
42398
+ agentName: dispatchAgent
42399
+ });
42400
+ return { ...retryFallback, estimatedCostUsd: totalCost };
42401
+ }
42402
+ if (op.recover) {
42403
+ const verifyCtx = makeVerifyCtx(buildCtx);
42404
+ const recovered = await op.recover(input, verifyCtx);
42405
+ if (recovered !== null) {
42406
+ getSafeLogger()?.warn("callop", "Recovered from empty output via op.recover", {
42407
+ storyId: ctx.storyId,
42408
+ opName: op.name,
42409
+ agentName: dispatchAgent
42410
+ });
42411
+ return recovered;
42412
+ }
42413
+ }
41872
42414
  throw new NaxError(`callOp[${op.name}]: agent returned no output`, "CALL_OP_NO_OUTPUT", {
41873
42415
  stage: op.stage,
41874
42416
  storyId: ctx.storyId,
@@ -52017,12 +52559,16 @@ function phasePassed(opName, output) {
52017
52559
  });
52018
52560
  return true;
52019
52561
  }
52562
+ function isFinding(value) {
52563
+ return typeof value === "object" && value !== null && typeof value.source === "string" && value.source.length > 0;
52564
+ }
52020
52565
  function extractPhaseFindings(output) {
52021
52566
  if (output === null || output === undefined || typeof output !== "object") {
52022
52567
  return [];
52023
52568
  }
52024
52569
  const record2 = output;
52025
- const findings = Array.isArray(record2.findings) ? record2.findings : [];
52570
+ const rawArray = Array.isArray(record2.normalizedFindings) ? record2.normalizedFindings : Array.isArray(record2.findings) ? record2.findings : [];
52571
+ const findings = rawArray.filter(isFinding);
52026
52572
  const success2 = "success" in record2 ? record2.success === true : ("passed" in record2) ? record2.passed === true : findings.length === 0;
52027
52573
  return success2 ? [] : findings;
52028
52574
  }
@@ -52085,11 +52631,24 @@ function toReviewDecisionPayload(opName, output) {
52085
52631
  if (typeof record2.passed !== "boolean" || !Array.isArray(record2.findings)) {
52086
52632
  return null;
52087
52633
  }
52634
+ const acDropped = Array.isArray(record2.acDropped) ? record2.acDropped.map((d) => {
52635
+ const entry = d ?? {};
52636
+ const finding = entry.finding ?? {};
52637
+ return {
52638
+ code: typeof entry.code === "string" ? entry.code : undefined,
52639
+ severity: typeof finding.severity === "string" ? finding.severity : undefined,
52640
+ file: typeof finding.file === "string" ? finding.file : undefined,
52641
+ line: typeof finding.line === "number" ? finding.line : undefined,
52642
+ issue: typeof finding.issue === "string" ? finding.issue : undefined,
52643
+ acIndex: typeof finding.acIndex === "number" ? finding.acIndex : undefined
52644
+ };
52645
+ }) : undefined;
52088
52646
  return {
52089
52647
  reviewer,
52090
52648
  parsed: true,
52091
52649
  passed: record2.passed,
52092
- result: { passed: record2.passed, findings: record2.findings }
52650
+ result: { passed: record2.passed, findings: record2.findings },
52651
+ acDropped
52093
52652
  };
52094
52653
  }
52095
52654
  function emitReviewDecision(ctx, opName, output) {
@@ -52138,12 +52697,38 @@ function logUnifiedReviewPhaseResult(storyId, opName, output) {
52138
52697
  const title = payload.reviewer === "semantic" ? "Semantic review" : "Adversarial review";
52139
52698
  if (payload.passed) {
52140
52699
  logger?.info("review", `${title} passed`, { storyId });
52141
- } else {
52142
- logger?.warn("review", `${title} failed: ${findingsCount} findings`, {
52700
+ return;
52701
+ }
52702
+ if (findingsCount === 0) {
52703
+ const dropped = payload.acDropped ?? [];
52704
+ const droppedSummary = dropped.slice(0, 5);
52705
+ logger?.warn("review", `${title} failed: 0 findings \u2014 ${dropped.length > 0 ? `${dropped.length} blocking finding(s) dropped as ungrounded by AC-grounding filter` : "model emitted passed:false but produced no findings (likely empty output)"}`, {
52143
52706
  storyId,
52144
- findingsCount
52707
+ findingsCount,
52708
+ reason: dropped.length > 0 ? "ac-grounding-drop" : "passed-false-no-findings",
52709
+ droppedCount: dropped.length || undefined,
52710
+ droppedFindings: droppedSummary.length > 0 ? droppedSummary : undefined,
52711
+ droppedTruncated: dropped.length > droppedSummary.length || undefined
52145
52712
  });
52713
+ return;
52146
52714
  }
52715
+ const findingsSummary = payload.result.findings.slice(0, 5).map((f) => {
52716
+ const r = f ?? {};
52717
+ return {
52718
+ severity: typeof r.severity === "string" ? r.severity : undefined,
52719
+ file: typeof r.file === "string" ? r.file : undefined,
52720
+ line: typeof r.line === "number" ? r.line : undefined,
52721
+ rule: typeof r.rule === "string" ? r.rule : undefined,
52722
+ issue: typeof r.issue === "string" ? r.issue : typeof r.message === "string" ? r.message : undefined,
52723
+ acIndex: typeof r.acIndex === "number" ? r.acIndex : undefined
52724
+ };
52725
+ });
52726
+ logger?.warn("review", `${title} failed: ${findingsCount} findings`, {
52727
+ storyId,
52728
+ findingsCount,
52729
+ findings: findingsSummary,
52730
+ truncated: findingsCount > findingsSummary.length
52731
+ });
52147
52732
  }
52148
52733
  async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = false) {
52149
52734
  const logger = getSafeLogger();
@@ -52515,10 +53100,10 @@ function buildPlanForStrategy(ctx, story, config2, testStrategy, inputs) {
52515
53100
  strategies.push(makeMechanicalFormatFixStrategy());
52516
53101
  }
52517
53102
  if (isThreeSession && inputs.fullSuiteGate) {
52518
- strategies.push(makeFullSuiteRectifyStrategy(story));
53103
+ strategies.push(makeFullSuiteRectifyStrategy(story, config2));
52519
53104
  }
52520
53105
  if (config2.quality.autofix?.enabled !== false) {
52521
- strategies.push(makeAutofixImplementerStrategy(story));
53106
+ strategies.push(makeAutofixImplementerStrategy(story, config2));
52522
53107
  strategies.push(makeAutofixTestWriterStrategy(story, config2));
52523
53108
  }
52524
53109
  const rectOpts = {
@@ -52650,6 +53235,7 @@ async function assemblePlanInputsFromCtx(ctx) {
52650
53235
  blockingThreshold: ctx.config.review.blockingThreshold
52651
53236
  } : undefined;
52652
53237
  const adversarialReviewInput = ctx.config.review?.enabled === true && ctx.config.review.checks?.includes("adversarial") && ctx.config.review.adversarial ? {
53238
+ workdir: ctx.workdir,
52653
53239
  story,
52654
53240
  adversarialConfig: ctx.config.review.adversarial,
52655
53241
  mode: ctx.config.review.adversarial.diffMode,
@@ -52658,9 +53244,9 @@ async function assemblePlanInputsFromCtx(ctx) {
52658
53244
  blockingThreshold: ctx.config.review.blockingThreshold
52659
53245
  } : undefined;
52660
53246
  const rectificationInput = ctx.config.execution?.rectification?.enabled === true ? {
52661
- maxAttempts: ctx.config.execution.rectification.maxRetries ?? 2,
53247
+ maxAttempts: ctx.config.execution.rectification.maxAttemptsTotal,
52662
53248
  strategies: [],
52663
- abortOnIncreasingFailures: ctx.config.execution.rectification.abortOnIncreasingFailures ?? true
53249
+ abortOnIncreasingFailures: ctx.config.execution.rectification.abortOnIncreasingFailures
52664
53250
  } : undefined;
52665
53251
  return {
52666
53252
  story,
@@ -53124,10 +53710,29 @@ Category: ${failureCategory ?? "unknown"}`,
53124
53710
  }
53125
53711
  }
53126
53712
  if (!planResult.success) {
53713
+ const failedPhases = {};
53714
+ for (const [name, output] of Object.entries(planResult.phaseOutputs)) {
53715
+ if (!output || typeof output !== "object")
53716
+ continue;
53717
+ const r = output;
53718
+ const passed = typeof r.passed === "boolean" ? r.passed : undefined;
53719
+ const success2 = typeof r.success === "boolean" ? r.success : undefined;
53720
+ const explicitFail = passed === false || success2 === false;
53721
+ if (!explicitFail)
53722
+ continue;
53723
+ const findings = Array.isArray(r.findings) ? r.findings.length : undefined;
53724
+ failedPhases[name] = { passed, success: success2, findingsCount: findings };
53725
+ }
53726
+ const stderrTail = (agentResult.stderr ?? "").slice(-500);
53727
+ const outputTail = (agentResult.output ?? "").slice(-500);
53127
53728
  logger.error("execution", "Agent session failed", {
53128
53729
  storyId: ctx.story.id,
53129
53730
  exitCode: agentResult.exitCode,
53130
- rateLimited: agentResult.rateLimited
53731
+ rateLimited: agentResult.rateLimited,
53732
+ failureCategory: failureCategory ?? "unknown",
53733
+ failedPhases: Object.keys(failedPhases).length > 0 ? failedPhases : undefined,
53734
+ stderrTail: stderrTail || undefined,
53735
+ outputTail: outputTail || undefined
53131
53736
  });
53132
53737
  if (agentResult.rateLimited) {
53133
53738
  logger.warn("execution", "Rate limited \u2014 will retry", { storyId: ctx.story.id });
@@ -53851,6 +54456,11 @@ class RegressionStrategy {
53851
54456
  const durationMs = Date.now() - start;
53852
54457
  if (result.success) {
53853
54458
  const parsed2 = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
54459
+ logger?.info("verify[regression]", "Full-suite regression gate passed", {
54460
+ storyId: ctx.storyId,
54461
+ passCount: parsed2.passed,
54462
+ durationMs
54463
+ });
53854
54464
  return makePassResult(ctx.storyId, "regression", {
53855
54465
  rawOutput: result.output,
53856
54466
  passCount: parsed2.passed,
@@ -53864,9 +54474,19 @@ class RegressionStrategy {
53864
54474
  return makePassResult(ctx.storyId, "regression", { durationMs });
53865
54475
  }
53866
54476
  if (result.status === "TIMEOUT") {
54477
+ logger?.warn("verify[regression]", "Full-suite regression gate timed out", {
54478
+ storyId: ctx.storyId,
54479
+ durationMs
54480
+ });
53867
54481
  return makeFailResult(ctx.storyId, "regression", "TIMEOUT", { rawOutput: result.output, durationMs });
53868
54482
  }
53869
54483
  const parsed = result.output ? parseTestOutput(result.output) : { passed: 0, failed: 0, failures: [] };
54484
+ logger?.warn("verify[regression]", "Full-suite regression gate failed", {
54485
+ storyId: ctx.storyId,
54486
+ passCount: parsed.passed,
54487
+ failCount: parsed.failed,
54488
+ durationMs
54489
+ });
53870
54490
  return makeFailResult(ctx.storyId, "regression", "TEST_FAILURE", {
53871
54491
  rawOutput: result.output,
53872
54492
  passCount: parsed.passed,
@@ -56943,7 +57563,7 @@ var package_default;
56943
57563
  var init_package = __esm(() => {
56944
57564
  package_default = {
56945
57565
  name: "@nathapp/nax",
56946
- version: "0.67.9",
57566
+ version: "0.67.11",
56947
57567
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
56948
57568
  type: "module",
56949
57569
  bin: {
@@ -57038,8 +57658,8 @@ var init_version = __esm(() => {
57038
57658
  NAX_VERSION = package_default.version;
57039
57659
  NAX_COMMIT = (() => {
57040
57660
  try {
57041
- if (/^[0-9a-f]{6,10}$/.test("ab2db4bb"))
57042
- return "ab2db4bb";
57661
+ if (/^[0-9a-f]{6,10}$/.test("0db5c72e"))
57662
+ return "0db5c72e";
57043
57663
  } catch {}
57044
57664
  try {
57045
57665
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -58013,7 +58633,7 @@ async function runDeferredRegression(options) {
58013
58633
  }
58014
58634
  const testCommand = config2.quality.commands.test ?? "bun test";
58015
58635
  const timeoutSeconds = config2.execution.regressionGate?.timeoutSeconds ?? 120;
58016
- const maxRectificationAttempts = config2.execution.regressionGate?.maxRectificationAttempts ?? 2;
58636
+ const maxRectificationAttempts = config2.execution.rectification.maxAttemptsTotal;
58017
58637
  const acceptOnTimeout = config2.execution.regressionGate?.acceptOnTimeout ?? true;
58018
58638
  const verifyOpts = {
58019
58639
  workdir,
@@ -58177,7 +58797,7 @@ async function runDeferredRegression(options) {
58177
58797
  const cycle = {
58178
58798
  findings: initialFindings,
58179
58799
  iterations: [],
58180
- strategies: [makeFullSuiteRectifyStrategy(story)],
58800
+ strategies: [makeFullSuiteRectifyStrategy(story, config2)],
58181
58801
  config: { maxAttemptsTotal: maxRectificationAttempts, validatorRetries: 1 },
58182
58802
  validate: async (_cycleCtx, _opts) => {
58183
58803
  const verification = await _regressionDeps.runVerification(verifyOpts);
@@ -60223,7 +60843,7 @@ async function handlePipelineFailure(ctx, pipelineResult) {
60223
60843
  feature: ctx.feature,
60224
60844
  attempts: ctx.story.attempts
60225
60845
  });
60226
- if (ctx.story.attempts !== undefined && ctx.story.attempts >= ctx.config.execution.rectification.maxRetries) {
60846
+ if (ctx.story.attempts !== undefined && ctx.story.attempts >= ctx.config.execution.rectification.maxAttemptsTotal) {
60227
60847
  await pipelineEventBus.emitAsync({
60228
60848
  type: "human-review:requested",
60229
60849
  storyId: ctx.story.id,
@@ -60506,7 +61126,7 @@ function selectNextStories(prd, config2, batchPlan, currentBatchIndex, lastStory
60506
61126
  nextBatchIndex: currentBatchIndex + 1
60507
61127
  };
60508
61128
  }
60509
- const story = getNextStory(prd, lastStoryId, config2.execution.rectification?.maxRetries ?? 2);
61129
+ const story = getNextStory(prd, lastStoryId, config2.execution.rectification?.maxAttemptsTotal ?? 12);
60510
61130
  if (!story)
60511
61131
  return null;
60512
61132
  return {
@@ -95426,15 +96046,16 @@ var FIELD_DESCRIPTIONS = {
95426
96046
  "execution.contextProviderTokenBudget": "Token budget for plugin context providers",
95427
96047
  "execution.lintCommand": "Lint command override (null=disabled, undefined=auto-detect)",
95428
96048
  "execution.typecheckCommand": "Typecheck command override (null=disabled, undefined=auto-detect)",
95429
- "execution.rectification": "Rectification loop settings (retry failed tests)",
96049
+ "execution.rectification": "Unified fix-cycle settings \u2014 shared by story-orchestrator (semantic + adversarial + mechanical) and post-run regression cycles",
95430
96050
  "execution.rectification.enabled": "Enable rectification loop",
95431
- "execution.rectification.maxRetries": "Max retry attempts per story",
96051
+ "execution.rectification.maxAttemptsTotal": "Total iteration cap for the unified fix cycle (default: 12). Per-strategy caps are the granular bound.",
96052
+ "execution.rectification.maxAttemptsPerStrategy": "Default per-strategy cap for LLM-driven strategies \u2014 autofix-implementer / autofix-test-writer / full-suite-rectify (default: 3). Mechanical strategies stay at 1.",
95432
96053
  "execution.rectification.fullSuiteTimeoutSeconds": "Timeout for full test suite run in seconds",
95433
96054
  "execution.rectification.maxFailureSummaryChars": "Max characters in failure summary",
95434
96055
  "execution.rectification.abortOnIncreasingFailures": "Abort if failure count increases",
95435
- "execution.rectification.escalateOnExhaustion": "Enable model tier escalation when retries are exhausted with remaining failures",
95436
- "execution.rectification.rethinkAtAttempt": "Attempt number at which 'rethink your approach' language is injected into the prompt (default: 2, set >= maxRetries to disable)",
95437
- "execution.rectification.urgencyAtAttempt": "Attempt number at which 'final chance before escalation' urgency is added to the prompt (default: 3, set >= maxRetries to disable)",
96056
+ "execution.rectification.escalateOnExhaustion": "Enable model tier escalation when attempts are exhausted with remaining failures",
96057
+ "execution.rectification.rethinkAtAttempt": "Attempt number at which 'rethink your approach' language is injected into the prompt (default: 2)",
96058
+ "execution.rectification.urgencyAtAttempt": "Attempt number at which 'final chance before escalation' urgency is added (default: 3)",
95438
96059
  "execution.regressionGate": "Regression gate settings (full suite after scoped tests)",
95439
96060
  "execution.regressionGate.enabled": "Enable full-suite regression gate",
95440
96061
  "execution.regressionGate.timeoutSeconds": "Timeout for regression run in seconds",