npm - open-agents-ai - Versions diffs - 0.187.532 → 0.187.533 - Mend

open-agents-ai 0.187.532 → 0.187.533

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -526818,6 +526818,85 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
           }
         }
       }
+      /**
+       * REG-66 Debug-Loop Detection (root-cause from batch535-midi, 2026-05-04).
+       *
+       * Empirical: midi run had 11x `npm run build 2>&1` + same 5 files re-read
+       * 5-6 times each + 22 BFC-61.G coercion BLOCKS — and ZERO of those blocks
+       * resulted in a creative edit. The agent was rationally stuck: it
+       * believed it needed to read more to debug, the build command kept
+       * giving the same error, and the standard "issue an edit" directive
+       * gave no traction because the agent had no concrete edit hypothesis.
+       *
+       * This method analyzes toolCallLog for the debug-loop signature:
+       *   - Same shell command stem repeated ≥5 times in the trailing window, OR
+       *   - Same file_read path re-read ≥4 times in the trailing window.
+       * Both indicate the agent is reading/running the same things hoping for
+       * different output. Without this signal we'd just keep telling the
+       * agent to "make an edit" — which is exactly what it can't think of.
+       *
+       * When detected, the BFC-61.G block message swaps to a PERTURB-strategy
+       * directive: stop reading, change ONE thing in the most-likely-culprit
+       * file even if you're uncertain, and let the new error signal guide
+       * the next iteration. This is real human debugging strategy ("perturb
+       * to disambiguate"), NOT reward-hacking — the agent still has to
+       * produce a real edit and the success criteria (todos done + build
+       * passing) are unchanged.
+       *
+       * @returns Detection result. `detected=false` → use standard message.
+       *          `detected=true` → use REG-66 perturb-strategy message;
+       *          `repeatedSample` carries the offending command/path for the
+       *          message body so the agent sees the specific pattern called out.
+       */
+      _detectDebugLoop(toolCallLog) {
+        if (process.env["OA_DISABLE_REG66"] === "1")
+          return { detected: false };
+        const WINDOW = 20;
+        const SHELL_REPEAT_THRESHOLD = 5;
+        const READ_REPEAT_THRESHOLD = 4;
+        const window2 = toolCallLog.slice(-WINDOW);
+        if (window2.length < SHELL_REPEAT_THRESHOLD)
+          return { detected: false };
+        const _editClasses = /* @__PURE__ */ new Set(["file_write", "file_edit", "batch_edit", "file_patch"]);
+        for (const c9 of window2) {
+          if (_editClasses.has(c9.name) && c9.success !== false)
+            return { detected: false };
+        }
+        const shellCounts = /* @__PURE__ */ new Map();
+        const readCounts = /* @__PURE__ */ new Map();
+        for (const c9 of window2) {
+          if (c9.name === "shell") {
+            const m2 = c9.argsKey.match(/(?:^|,)command=([^,]+)/);
+            if (m2 && m2[1]) {
+              const stem = m2[1].trim();
+              shellCounts.set(stem, (shellCounts.get(stem) ?? 0) + 1);
+            }
+          } else if (c9.name === "file_read" || c9.name === "file_explore") {
+            const m2 = c9.argsKey.match(/(?:^|,)path=([^,]+)/);
+            if (m2 && m2[1]) {
+              const stem = m2[1].trim();
+              readCounts.set(stem, (readCounts.get(stem) ?? 0) + 1);
+            }
+          }
+        }
+        let bestShell = null;
+        for (const [k, n2] of shellCounts) {
+          if (n2 >= SHELL_REPEAT_THRESHOLD && (!bestShell || n2 > bestShell[1]))
+            bestShell = [k, n2];
+        }
+        let bestRead = null;
+        for (const [k, n2] of readCounts) {
+          if (n2 >= READ_REPEAT_THRESHOLD && (!bestRead || n2 > bestRead[1]))
+            bestRead = [k, n2];
+        }
+        if (bestShell) {
+          return { detected: true, repeatedSample: bestShell[0], count: bestShell[1], kind: "shell" };
+        }
+        if (bestRead) {
+          return { detected: true, repeatedSample: bestRead[0], count: bestRead[1], kind: "read" };
+        }
+        return { detected: false };
+      }
       readSessionTodos() {
         try {
           const sid = process.env["OA_SESSION_ID"] || this._sessionId || "default";
@@ -530749,7 +530828,32 @@ ${memoryLines.join("\n")}`
                   turn,
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
-                const reg61BlockMsg = [
+                const _dbgLoop = this._detectDebugLoop(toolCallLog);
+                const _debugLoopSampleSafe = (_dbgLoop.repeatedSample ?? "").slice(0, 120);
+                const reg61BlockMsg = _dbgLoop.detected ? [
+                  `[BLOCKED — REG-61 directive in effect — REG-66 DEBUG-LOOP detected]`,
+                  ``,
+                  `Pattern: ${_dbgLoop.kind === "shell" ? "shell command" : "file"} "${_debugLoopSampleSafe}" was used ${_dbgLoop.count}× in the trailing window with ZERO creative edits landing. You are stuck in a debug loop where re-running / re-reading is producing no new information.`,
+                  ``,
+                  `STOP DEBUGGING. PERTURB.`,
+                  ``,
+                  `Strategy when stuck like this (real human debuggers do this):`,
+                  `  1. Pick the source file most likely implicated by the recurring failure (probably in src/, the one most-imported by failing tests).`,
+                  `  2. Pick ONE plausible cause — most-recently-modified line, most-complex function, most-likely-misnamed import, most-likely off-by-one.`,
+                  `  3. Make a SPECULATIVE edit that changes that thing — even if you are NOT certain it'll fix the bug. The point is to get a NEW error signal that disambiguates.`,
+                  `  4. Re-run the failing command. If the error CHANGED, you've learned something. If it's identical, you've ruled out one hypothesis.`,
+                  ``,
+                  `This is NOT random guessing — it's targeted hypothesis falsification. Reading the same files 5+ times has already proven uninformative; only a state change will move the system.`,
+                  ``,
+                  `Issue EXACTLY ONE of: file_write / file_edit / batch_edit / file_patch on a single concrete change. The exact CHOICE of edit matters less than NOT continuing to re-read.`,
+                  ``,
+                  `Allowed bypasses (will not be blocked but will not clear the directive either):`,
+                  `  • web_search    — search the EXACT recurring error string`,
+                  `  • task_complete — exit if you genuinely cannot identify any plausible perturbation`,
+                  `  • ask_user      — escalate to human (if available)`,
+                  ``,
+                  `Once you make a real edit, the directive clears and you'll see the new test result.`
+                ].join("\n") : [
                   `[BLOCKED — REG-61 directive in effect]`,
                   ``,
                   `A REG-61 FIRST-EDIT NUDGE was issued earlier and has not yet been satisfied. The directive: your next tool call MUST be a creative edit. You issued '${tc.name}' instead, which is a read/explore/shell call. This call has been BLOCKED.`,
@@ -530777,12 +530881,12 @@ ${memoryLines.join("\n")}`
                 });
                 this.emit({
                   type: "status",
-                  content: `REG-61 COERCION BLOCK — rejected '${tc.name}' at turn ${turn}; gate stays active until creative edit dispatches`,
+                  content: `REG-61 COERCION BLOCK — rejected '${tc.name}' at turn ${turn}; gate stays active until creative edit dispatches${_dbgLoop.detected ? `; REG-66 debug-loop variant (${_dbgLoop.kind} "${_debugLoopSampleSafe.slice(0, 60)}" ${_dbgLoop.count}×)` : ""}`,
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
                 this._tagSyntheticFailure({
                   mode: "step_repetition",
-                  rationale: `REG-61 perpetual coercion block on '${tc.name}' — agent ignored FIRST-EDIT NUDGE`
+                  rationale: `REG-61 perpetual coercion block on '${tc.name}' — agent ignored FIRST-EDIT NUDGE${_dbgLoop.detected ? " (debug-loop variant)" : ""}`
                 });
                 return { tc, output: reg61BlockMsg };
               }
@@ -599758,6 +599862,15 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
       });
       return;
     }
+    if (pathname === "/favicon.ico" && method === "GET") {
+      const svg = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><rect width="16" height="16" rx="3" fill="#b2920a"/><text x="50%" y="55%" font-size="11" font-family="monospace" font-weight="700" text-anchor="middle" dominant-baseline="middle" fill="#0b0b0b">oa</text></svg>';
+      res.writeHead(200, {
+        "Content-Type": "image/svg+xml",
+        "Cache-Control": "public, max-age=86400"
+      });
+      res.end(svg);
+      return;
+    }
     if (pathname === "/" && method === "GET" && req2.headers.accept?.includes("text/html")) {
       res.writeHead(200, {
         "Content-Type": "text/html; charset=utf-8",

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.532",
+  "version": "0.187.533",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "open-agents-ai",
-      "version": "0.187.532",
+      "version": "0.187.533",
       "hasInstallScript": true,
       "license": "CC-BY-NC-4.0",
       "dependencies": {
@@ -3132,9 +3132,9 @@
       }
     },
     "node_modules/express-rate-limit": {
-      "version": "8.4.1",
-      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.4.1.tgz",
-      "integrity": "sha512-NGVYwQSAyEQgzxX1iCM978PP9AdO/hW93gMcF6ZwQCm+rFvLsBH6w4xcXWTcliS8La5EPRN3p9wzItqBwJrfNw==",
+      "version": "8.5.0",
+      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.0.tgz",
+      "integrity": "sha512-XKhFohWaSBdVJNTi5TaHziqnPkv04I9UQV6q1Wy7Ui6GGQZVW12ojDFwqer14EvCXxjvPG0CyWXx7cAXpALB4Q==",
       "license": "MIT",
       "dependencies": {
         "ip-address": "10.1.0"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.532",
+  "version": "0.187.533",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",