nexus-agents 2.47.0 → 2.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import {
4
4
  import {
5
5
  VERSION,
6
6
  initDataDirectories
7
- } from "./chunk-3YYS74BL.js";
7
+ } from "./chunk-M53BBBCB.js";
8
8
  import {
9
9
  CLI_SUBPROCESS_TIMEOUTS,
10
10
  createLogger,
@@ -1580,4 +1580,4 @@ export {
1580
1580
  setupCommand,
1581
1581
  setupCommandAsync
1582
1582
  };
1583
- //# sourceMappingURL=chunk-HCFJ7NMT.js.map
1583
+ //# sourceMappingURL=chunk-CH722DBX.js.map
@@ -24,7 +24,7 @@ import {
24
24
  } from "./chunk-CLYZ7FWP.js";
25
25
 
26
26
  // src/version.ts
27
- var VERSION = true ? "2.47.0" : "dev";
27
+ var VERSION = true ? "2.48.0" : "dev";
28
28
 
29
29
  // src/cli/setup-data-dir.ts
30
30
  import { mkdirSync, existsSync as existsSync2 } from "fs";
@@ -758,7 +758,7 @@ async function runDoctorFix(result) {
758
758
  writeLine2("\u2500".repeat(40));
759
759
  let fixCount = 0;
760
760
  if (!result.dataDirectory.rootExists || result.dataDirectory.subdirectories.some((d) => !d.exists || !d.writable)) {
761
- const { runSetup } = await import("./setup-command-CRBMRREJ.js");
761
+ const { runSetup } = await import("./setup-command-PLGFVKLM.js");
762
762
  const setupResult = runSetup({
763
763
  skipMcp: true,
764
764
  skipRules: true,
@@ -836,4 +836,4 @@ export {
836
836
  startStdioServer,
837
837
  closeServer
838
838
  };
839
- //# sourceMappingURL=chunk-3YYS74BL.js.map
839
+ //# sourceMappingURL=chunk-M53BBBCB.js.map
@@ -66,7 +66,7 @@ import {
66
66
  import {
67
67
  DEFAULT_TASK_TTL_MS,
68
68
  clampTaskTtl
69
- } from "./chunk-3YYS74BL.js";
69
+ } from "./chunk-M53BBBCB.js";
70
70
  import {
71
71
  createSessionMemory
72
72
  } from "./chunk-ULDKSIS7.js";
@@ -49454,6 +49454,124 @@ function buildSuccessResult(instance2, patch, modelName, startTime, state) {
49454
49454
  };
49455
49455
  }
49456
49456
 
49457
+ // src/swe-bench/verify-loop.ts
49458
+ var DEFAULT_MAX_VERIFY_RETRIES = 2;
49459
+ var FAILURE_PATTERNS = [
49460
+ {
49461
+ category: "patch_not_applicable",
49462
+ regex: /patch .*?does not apply|hunk #\d+ FAILED|Reversed .*patch detected/i,
49463
+ summarizer: (m) => `Patch did not apply cleanly: ${m[0]}`
49464
+ },
49465
+ {
49466
+ category: "syntax_error",
49467
+ regex: /SyntaxError: (.*?)(?:\n|$)|IndentationError: (.*?)(?:\n|$)/,
49468
+ summarizer: (m) => `Syntax error in generated patch: ${m[1] ?? m[2] ?? m[0]}`.trim()
49469
+ },
49470
+ {
49471
+ category: "timeout",
49472
+ regex: /Timeout\b|timed out after \d+\s?s\b|TIMEOUT_EXCEEDED/i,
49473
+ summarizer: (m) => `Test run exceeded timeout: ${m[0]}`
49474
+ },
49475
+ {
49476
+ category: "missing_dependency",
49477
+ regex: /ModuleNotFoundError: No module named '([^']+)'|ImportError: cannot import name '([^']+)'|No module named "([^"]+)"/,
49478
+ summarizer: (m) => `Missing dependency: ${m[1] ?? m[2] ?? m[3] ?? "unknown"}. Patch may need an import.`
49479
+ },
49480
+ {
49481
+ category: "runtime_error",
49482
+ regex: /([A-Z][a-zA-Z]+Error): (.*?)(?:\n|$)/,
49483
+ summarizer: (m) => `Runtime error ${m[1] ?? ""}: ${m[2] ?? ""}`.trim()
49484
+ },
49485
+ {
49486
+ category: "test_failure",
49487
+ regex: /FAILED .*?::(\S+)|AssertionError|FAIL: (\S+)/,
49488
+ summarizer: (_m, stderr, stdout) => {
49489
+ const failed = extractFailedTests(stderr, stdout);
49490
+ return failed.length > 0 ? `Tests still failing: ${failed.slice(0, 5).join(", ")}` : "One or more tests failed after patch";
49491
+ }
49492
+ }
49493
+ ];
49494
+ function classifyPatchFailure(stderr, stdout) {
49495
+ const haystack = `${stderr}
49496
+ ${stdout}`;
49497
+ for (const pattern of FAILURE_PATTERNS) {
49498
+ const match = pattern.regex.exec(haystack);
49499
+ if (match !== null) {
49500
+ return {
49501
+ category: pattern.category,
49502
+ summary: pattern.summarizer(match, stderr, stdout),
49503
+ affectedTests: extractFailedTests(stderr, stdout)
49504
+ };
49505
+ }
49506
+ }
49507
+ return {
49508
+ category: "unknown",
49509
+ summary: haystack.trim().slice(0, 200) || "No failure details captured",
49510
+ affectedTests: extractFailedTests(stderr, stdout)
49511
+ };
49512
+ }
49513
+ function extractFailedTests(stderr, stdout) {
49514
+ const combined = `${stderr}
49515
+ ${stdout}`;
49516
+ const results = /* @__PURE__ */ new Set();
49517
+ const pytestPattern = /FAILED (\S+::\S+)/g;
49518
+ let match;
49519
+ while ((match = pytestPattern.exec(combined)) !== null) {
49520
+ if (match[1] !== void 0) results.add(match[1]);
49521
+ }
49522
+ const unittestPattern = /FAIL: (\S+) \(/g;
49523
+ while ((match = unittestPattern.exec(combined)) !== null) {
49524
+ if (match[1] !== void 0) results.add(match[1]);
49525
+ }
49526
+ return Array.from(results);
49527
+ }
49528
+ var ALWAYS_RETRYABLE = /* @__PURE__ */ new Set([
49529
+ "patch_not_applicable",
49530
+ "syntax_error",
49531
+ "missing_dependency",
49532
+ "test_failure",
49533
+ "runtime_error",
49534
+ "incomplete_fix"
49535
+ ]);
49536
+ var NEVER_RETRYABLE = /* @__PURE__ */ new Set(["timeout"]);
49537
+ function shouldRetry(category, iteration, maxRetries = DEFAULT_MAX_VERIFY_RETRIES) {
49538
+ if (iteration >= maxRetries) return false;
49539
+ if (NEVER_RETRYABLE.has(category)) return false;
49540
+ if (ALWAYS_RETRYABLE.has(category)) return true;
49541
+ return iteration < 1;
49542
+ }
49543
+ function buildRetryHint(classification, iteration, maxRetries = DEFAULT_MAX_VERIFY_RETRIES) {
49544
+ const header = `Verification attempt ${String(iteration + 1)}/${String(maxRetries + 1)} failed.`;
49545
+ const bodyLines = [
49546
+ header,
49547
+ `Category: ${classification.category}`,
49548
+ `Summary: ${classification.summary}`
49549
+ ];
49550
+ if (classification.affectedTests.length > 0) {
49551
+ const count = String(classification.affectedTests.length);
49552
+ const names = classification.affectedTests.slice(0, 5).join(", ");
49553
+ const overflow = classification.affectedTests.length > 5 ? ", ..." : "";
49554
+ bodyLines.push(`Affected tests (${count}): ${names}${overflow}`);
49555
+ }
49556
+ bodyLines.push("Fix the root cause, not the symptom. Re-emit the full patch.");
49557
+ return bodyLines.join("\n");
49558
+ }
49559
+ function buildVerifyOutcome(params) {
49560
+ const maxRetries = params.maxRetries ?? DEFAULT_MAX_VERIFY_RETRIES;
49561
+ if (params.passed) {
49562
+ return { ok: true, iteration: params.iteration, willRetry: false };
49563
+ }
49564
+ const classification = classifyPatchFailure(params.stderr, params.stdout);
49565
+ const willRetry = shouldRetry(classification.category, params.iteration, maxRetries);
49566
+ return {
49567
+ ok: false,
49568
+ iteration: params.iteration,
49569
+ classification,
49570
+ retryHint: buildRetryHint(classification, params.iteration, maxRetries),
49571
+ willRetry
49572
+ };
49573
+ }
49574
+
49457
49575
  // src/swe-bench/agent-runner.ts
49458
49576
  async function runIteration(opts) {
49459
49577
  const { executor, context, previousError, previousPatch, systemPromptOverride, contextSummary } = opts;
@@ -49558,7 +49676,10 @@ async function runAgentOnInstance(instance2, options) {
49558
49676
  startTime,
49559
49677
  onMessage,
49560
49678
  systemPrompt: options.systemPrompt,
49561
- iterationContext: createEmptyContext()
49679
+ iterationContext: createEmptyContext(),
49680
+ verifyAttempts: 0,
49681
+ ...options.verifyAdapter !== void 0 ? { verifyAdapter: options.verifyAdapter } : {},
49682
+ ...options.maxVerifyRetries !== void 0 ? { maxVerifyRetries: options.maxVerifyRetries } : {}
49562
49683
  };
49563
49684
  const result = await runIterationLoop(executor, context, state, loopOptions);
49564
49685
  return { ok: true, value: result };
@@ -49609,6 +49730,54 @@ function buildDuplicateResult(instanceId, startTime, state, onMessage) {
49609
49730
  onMessage?.("Duplicate patch detected, terminating early");
49610
49731
  return buildFailedResult(instanceId, "Duplicate patch \u2014 agent is stuck", startTime, state);
49611
49732
  }
49733
+ async function invokeVerifyAdapter(adapter, patch, context, options) {
49734
+ const { passed, stderr, stdout } = await adapter.verify(context.instance, patch, context.workDir);
49735
+ return buildVerifyOutcome({
49736
+ passed,
49737
+ iteration: options.verifyAttempts - 1,
49738
+ stderr,
49739
+ stdout,
49740
+ ...options.maxVerifyRetries !== void 0 ? { maxRetries: options.maxVerifyRetries } : {}
49741
+ });
49742
+ }
49743
+ function applyVerifyRetry(outcome, state, onMessage) {
49744
+ const category = outcome.classification?.category ?? "unknown";
49745
+ onMessage?.(`Verify failed (${category}); retrying with hint`);
49746
+ state.lastError = outcome.retryHint ?? "Verification failed; re-emit the patch";
49747
+ state.lastPatch = state.finalPatch;
49748
+ state.finalPatch = void 0;
49749
+ }
49750
+ async function runPostPatchVerify(context, state, options) {
49751
+ const adapter = options.verifyAdapter;
49752
+ if (adapter === void 0 || state.finalPatch === void 0) return true;
49753
+ options.verifyAttempts += 1;
49754
+ options.onMessage?.(`Verifying patch (attempt ${String(options.verifyAttempts)})`);
49755
+ const outcome = await invokeVerifyAdapter(adapter, state.finalPatch, context, options);
49756
+ if (outcome.ok) return true;
49757
+ if (!outcome.willRetry) {
49758
+ const category = outcome.classification?.category ?? "unknown";
49759
+ options.onMessage?.(`Verify failed (${category}); no more retries`);
49760
+ return true;
49761
+ }
49762
+ applyVerifyRetry(outcome, state, options.onMessage);
49763
+ return false;
49764
+ }
49765
+ async function handleIterationDone(context, state, options, seenPatches) {
49766
+ if (isDuplicatePatch(state.finalPatch, seenPatches)) {
49767
+ state.finalPatch = void 0;
49768
+ return {
49769
+ result: buildDuplicateResult(
49770
+ context.instance.instance_id,
49771
+ options.startTime,
49772
+ state,
49773
+ options.onMessage
49774
+ )
49775
+ };
49776
+ }
49777
+ options.onMessage?.("Patch applies successfully");
49778
+ const verifyOk = await runPostPatchVerify(context, state, options);
49779
+ return verifyOk ? "break" : "continue";
49780
+ }
49612
49781
  async function runIterationLoop(executor, context, state, options) {
49613
49782
  const { config, startTime, onMessage } = options;
49614
49783
  const seenPatches = /* @__PURE__ */ new Set();
@@ -49619,12 +49788,10 @@ async function runIterationLoop(executor, context, state, options) {
49619
49788
  onMessage?.(`Iteration ${state.iterations.toString()}/${config.max_iterations.toString()}`);
49620
49789
  const done = await executeOneIteration(executor, context, state, options);
49621
49790
  if (done) {
49622
- if (isDuplicatePatch(state.finalPatch, seenPatches)) {
49623
- state.finalPatch = void 0;
49624
- return buildDuplicateResult(context.instance.instance_id, startTime, state, onMessage);
49625
- }
49626
- onMessage?.("Patch applies successfully");
49627
- break;
49791
+ const control = await handleIterationDone(context, state, options, seenPatches);
49792
+ if (control === "break") break;
49793
+ if (control === "continue") continue;
49794
+ return control.result;
49628
49795
  }
49629
49796
  if (isDuplicatePatch(state.lastPatch, seenPatches)) {
49630
49797
  return buildDuplicateResult(context.instance.instance_id, startTime, state, onMessage);
@@ -54260,4 +54427,4 @@ export {
54260
54427
  detectBackend,
54261
54428
  createTaskTracker
54262
54429
  };
54263
- //# sourceMappingURL=chunk-ZGKZIEUM.js.map
54430
+ //# sourceMappingURL=chunk-UXRR7M6E.js.map