npm - open-agents-ai - Versions diffs - 0.187.463 → 0.187.465 - Mend

open-agents-ai 0.187.463 → 0.187.465

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -517722,6 +517722,11 @@ var init_agenticRunner = __esm({
       // Observer world-model and cohort stats
       _observerMode = "both";
       _worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
+      // REG-5: Rolling buffer of recent tool failures with their error output.
+      // Surfaced before every LLM call so the agent can't ignore "I just ran this
+      // and it errored". Detects same-fingerprint failure repetition and escalates
+      // the warning. Keeps last 8 to bound memory + prompt cost.
+      _recentFailures = [];
       _argCohorts = /* @__PURE__ */ new Map();
       // ── WO-NC-07: Error pattern learning → pre-action guidance injection ──
       // Records error patterns (tool + error signature → learned guidance).
@@ -518414,6 +518419,228 @@ ${body}`;
        * Returns null when the disable knob is set or the backend is missing the
        * chatCompletion method.
        */
+      /**
+       * REG-6: Heuristic — does this shell command perform side effects, or is it
+       * purely a read? Read-only commands are safe to dedup-cache the same way
+       * file_read and list_directory are.
+       *
+       * Conservative: if any token looks like it mutates state (write redirects,
+       * piped-to-write, mutating subcommands), return false. Otherwise check that
+       * every command segment starts with a known read-only binary.
+       */
+      _isShellCommandReadOnly(rawCmd) {
+        if (!rawCmd || typeof rawCmd !== "string")
+          return false;
+        const cmd = rawCmd.trim();
+        if (cmd.length === 0 || cmd.length > 1500)
+          return false;
+        if (/(^|[^&\d])(>|>>)\s*\S/.test(cmd))
+          return false;
+        const MUTATE_BINS = [
+          "rm",
+          "mv",
+          "cp",
+          "mkdir",
+          "rmdir",
+          "chmod",
+          "chown",
+          "touch",
+          "tee",
+          "dd",
+          "truncate",
+          "ln",
+          "kill",
+          "pkill",
+          "killall",
+          "reboot",
+          "shutdown",
+          "fakeroot",
+          "sudo",
+          "nohup",
+          "setsid",
+          "make",
+          "gradle",
+          "mvn",
+          "ansible",
+          "systemd-run"
+        ];
+        const mutateBinsRe = new RegExp(`\\b(${MUTATE_BINS.join("|")})\\b`, "i");
+        if (mutateBinsRe.test(cmd))
+          return false;
+        if (/\bsed\s+(-i|--in-place)\b/.test(cmd))
+          return false;
+        if (/\bsystemctl\s+(?!status\b|show\b|is-)/i.test(cmd))
+          return false;
+        if (/\bservice\s+\S+\s+(?!status\b)/i.test(cmd))
+          return false;
+        if (/\bcrontab\s+-(e|d|r)\b/.test(cmd))
+          return false;
+        if (/\bnpm\s+(install|uninstall|update|run|test|exec|publish|init|link|unlink|version|cache\s+clean|ci|audit\s+fix)\b/i.test(cmd))
+          return false;
+        if (/\bpnpm\s+(install|update|add|remove|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
+          return false;
+        if (/\byarn\s+(install|add|remove|upgrade|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
+          return false;
+        if (/\bpip\s+(install|uninstall|wheel)\b/i.test(cmd))
+          return false;
+        if (/\bnpx\b/.test(cmd))
+          return false;
+        if (/\bcargo\s+(build|run|test|update|publish|install|uninstall|fmt|fix)\b/i.test(cmd))
+          return false;
+        if (/\bgo\s+(build|run|test|get|install)\b/i.test(cmd))
+          return false;
+        if (/\bdocker\s+(build|run|push|pull|exec|kill|stop|rm|rmi|tag)\b/i.test(cmd))
+          return false;
+        if (/\bkubectl\s+(apply|delete|create|edit|patch|scale|rollout|exec)\b/i.test(cmd))
+          return false;
+        if (/\bterraform\s+(apply|destroy|init|plan|import)\b/i.test(cmd))
+          return false;
+        const READ_ONLY_BINS = /* @__PURE__ */ new Set([
+          "cd",
+          // shell builtin: changes pwd, doesn't write — common segment leader
+          "grep",
+          "egrep",
+          "fgrep",
+          "rg",
+          "ag",
+          "cat",
+          "head",
+          "tail",
+          "less",
+          "more",
+          "ls",
+          "ll",
+          "la",
+          "find",
+          // ALLOWED only if no -delete/-exec mutating action — pre-filtered above
+          "wc",
+          "awk",
+          "gawk",
+          "sort",
+          "uniq",
+          "tr",
+          "cut",
+          "paste",
+          "join",
+          "comm",
+          "diff",
+          "cmp",
+          "echo",
+          "printf",
+          "pwd",
+          "which",
+          "type",
+          "command",
+          "node",
+          "python",
+          "python3",
+          "ruby",
+          "perl",
+          "git",
+          // git log/show/diff/status are read; but git add/commit/push/pull are writes — pre-filtered above
+          "ollama",
+          // ollama show/list are read; ollama pull/run/create are writes — pre-filtered above
+          "cargo",
+          // pre-filtered above for build/run/etc.
+          "go",
+          // pre-filtered above for build/run/etc.
+          "stat",
+          "file",
+          "du",
+          "df",
+          "date",
+          "uname",
+          "id",
+          "whoami",
+          "hostname",
+          "uptime",
+          "env",
+          "printenv",
+          "test",
+          "[",
+          "true",
+          "false",
+          "tsc",
+          "eslint",
+          "prettier",
+          // these emit but mostly read
+          "head",
+          "tail",
+          "jq",
+          "yq",
+          "xq",
+          "base64",
+          "md5sum",
+          "sha256sum",
+          "sha1sum",
+          "tldr",
+          "man",
+          "info"
+        ]);
+        if (/\bfind\b[\s\S]*?(-delete|-exec\s+(rm|mv|cp|chmod|chown|sed\s+-i)|--?ok\s+(rm|mv))/i.test(cmd))
+          return false;
+        if (/\b(node|python\d?)\b\s+-(e|c)\b[\s\S]*\b(rm|writeFileSync|unlinkSync|mkdir|process\.exit|exec|spawn|require\(\s*['"]child_process)/i.test(cmd))
+          return false;
+        const segments = cmd.split(/(?:\|\||&&|;)/).map((s2) => s2.trim()).filter(Boolean);
+        if (segments.length === 0)
+          return false;
+        for (const seg of segments) {
+          const stripped = seg.replace(/^cd\s+\S+\s*$/i, "true").replace(/^!/, "");
+          const firstTok = stripped.split(/\s+/)[0]?.replace(/^.*\//, "") || "";
+          if (!firstTok)
+            continue;
+          if (!READ_ONLY_BINS.has(firstTok))
+            return false;
+        }
+        return true;
+      }
+      /**
+       * REG-5: Render the recent-failures block so the agent SEES its own error
+       * output before deciding what to do next. Detects same-fingerprint failure
+       * repetition and escalates the warning. Without this, the agent runs
+       * `npx next build`, gets a 200-line TypeScript error, ignores the specific
+       * error and blindly retries with `npm install --force`. Caching the failure
+       * + injecting it pre-LLM forces the model to confront what actually broke.
+       */
+      _renderRecentFailuresBlock(turn) {
+        const fails = this._recentFailures;
+        if (!fails || fails.length === 0)
+          return null;
+        const fresh = fails.filter((f2) => turn - f2.turn <= 10);
+        if (fresh.length === 0)
+          return null;
+        const fpCount = /* @__PURE__ */ new Map();
+        for (const f2 of fresh) {
+          if (turn - f2.turn <= 5)
+            fpCount.set(f2.fingerprint, (fpCount.get(f2.fingerprint) ?? 0) + 1);
+        }
+        const repeating = [...fpCount.entries()].filter(([, n2]) => n2 >= 2);
+        const lines = [];
+        if (repeating.length > 0) {
+          lines.push("[STOP — RETRY LOOP DETECTED]");
+          lines.push("You are re-issuing the SAME failing tool call(s) without changing anything that would fix the underlying error. If you cannot diagnose the error from the messages below, mark the current todo phase as `blocked` (with the blocker text) and either move to a different phase or call task_complete with what you have. DO NOT just retry the same command again — the error will not magically disappear.");
+        } else {
+          lines.push("[RECENT TOOL FAILURES — read these errors carefully BEFORE deciding your next action]");
+        }
+        const shown = fresh.slice(-5).reverse();
+        for (const f2 of shown) {
+          const argsRepr = JSON.stringify(f2.args).slice(0, 120);
+          const errFirst = (f2.error || f2.output || "").split(/\n/)[0]?.slice(0, 200) || "(no error message)";
+          const errFull = (f2.error || f2.output || "").slice(0, 600);
+          lines.push(`• turn ${f2.turn} — ${f2.tool}(${argsRepr})`);
+          lines.push(`  first line: ${errFirst}`);
+          if (errFull && errFull !== errFirst) {
+            const indented = errFull.split(/\n/).slice(0, 6).map((l2) => `    ${l2}`).join("\n");
+            lines.push(indented);
+          }
+        }
+        if (repeating.length > 0) {
+          const repeatingDesc = repeating.map(([fp, n2]) => `${n2}× ${fp.slice(0, 80)}`).join("; ");
+          lines.push(`Repeating fingerprints: ${repeatingDesc}`);
+        }
+        lines.push(`(turn ${turn} — failures auto-expire after 10 turns; cleared on success or successful retry)`);
+        return lines.join("\n");
+      }
       /**
        * REG-3: Render the current todo list as a compact transient block so the
        * agent can read its own plan without calling todo_read or re-emitting
@@ -519597,6 +519824,9 @@ ${memoryLines.join("\n")}`
           const todoBlock = this._renderTodoStateBlock(turn);
           if (todoBlock)
             _injections.push(todoBlock);
+          const failBlock = this._renderRecentFailuresBlock(turn);
+          if (failBlock)
+            _injections.push(failBlock);
           if (_injections.length > 0) {
             const reqMsgs = chatRequest.messages;
             if (Array.isArray(reqMsgs)) {
@@ -519967,7 +520197,7 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
                 this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
                 return { tc, output: blockMsg };
               }
-              const isReadLike = ![
+              const baseIsReadLike = ![
                 "file_write",
                 "file_edit",
                 "shell",
@@ -519996,6 +520226,7 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
                 // tool see every call and return the cached state itself.
                 "nexus"
               ].includes(tc.name);
+              const isReadLike = baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? "");
               const cachedEntry = recentToolResults.get(toolFingerprint);
               if (isReadLike && cachedEntry !== void 0) {
                 this.emit({
@@ -520385,6 +520616,22 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
                     recentToolResults.delete(firstKey);
                 }
               }
+              if (result.success) {
+                this._recentFailures = this._recentFailures.filter((f2) => f2.fingerprint !== toolFingerprint);
+              }
+              if (!result.success) {
+                this._recentFailures.push({
+                  tool: tc.name,
+                  fingerprint: toolFingerprint,
+                  args: tc.arguments,
+                  error: (result.error ?? "").slice(0, 600),
+                  output: (result.output ?? "").slice(0, 1500),
+                  turn
+                });
+                if (this._recentFailures.length > 8) {
+                  this._recentFailures = this._recentFailures.slice(-8);
+                }
+              }
               if (!result.success && tc.name === "shell" && /\[PERMISSION_ERROR\]/.test(result.error ?? "")) {
                 this.emit({
                   type: "sudo_request",

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.463",
+  "version": "0.187.465",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "open-agents-ai",
-      "version": "0.187.463",
+      "version": "0.187.465",
       "hasInstallScript": true,
       "license": "CC-BY-NC-4.0",
       "dependencies": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.463",
+  "version": "0.187.465",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",