open-agents-ai 0.187.463 → 0.187.465

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -517722,6 +517722,11 @@ var init_agenticRunner = __esm({
517722
517722
  // Observer world-model and cohort stats
517723
517723
  _observerMode = "both";
517724
517724
  _worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
517725
+ // REG-5: Rolling buffer of recent tool failures with their error output.
517726
+ // Surfaced before every LLM call so the agent can't ignore "I just ran this
517727
+ // and it errored". Detects same-fingerprint failure repetition and escalates
517728
+ // the warning. Keeps last 8 to bound memory + prompt cost.
517729
+ _recentFailures = [];
517725
517730
  _argCohorts = /* @__PURE__ */ new Map();
517726
517731
  // ── WO-NC-07: Error pattern learning → pre-action guidance injection ──
517727
517732
  // Records error patterns (tool + error signature → learned guidance).
@@ -518414,6 +518419,228 @@ ${body}`;
518414
518419
  * Returns null when the disable knob is set or the backend is missing the
518415
518420
  * chatCompletion method.
518416
518421
  */
518422
+ /**
518423
+ * REG-6: Heuristic — does this shell command perform side effects, or is it
518424
+ * purely a read? Read-only commands are safe to dedup-cache the same way
518425
+ * file_read and list_directory are.
518426
+ *
518427
+ * Conservative: if any token looks like it mutates state (write redirects,
518428
+ * piped-to-write, mutating subcommands), return false. Otherwise check that
518429
+ * every command segment starts with a known read-only binary.
518430
+ */
518431
+ _isShellCommandReadOnly(rawCmd) {
518432
+ if (!rawCmd || typeof rawCmd !== "string")
518433
+ return false;
518434
+ const cmd = rawCmd.trim();
518435
+ if (cmd.length === 0 || cmd.length > 1500)
518436
+ return false;
518437
+ if (/(^|[^&\d])(>|>>)\s*\S/.test(cmd))
518438
+ return false;
518439
+ const MUTATE_BINS = [
518440
+ "rm",
518441
+ "mv",
518442
+ "cp",
518443
+ "mkdir",
518444
+ "rmdir",
518445
+ "chmod",
518446
+ "chown",
518447
+ "touch",
518448
+ "tee",
518449
+ "dd",
518450
+ "truncate",
518451
+ "ln",
518452
+ "kill",
518453
+ "pkill",
518454
+ "killall",
518455
+ "reboot",
518456
+ "shutdown",
518457
+ "fakeroot",
518458
+ "sudo",
518459
+ "nohup",
518460
+ "setsid",
518461
+ "make",
518462
+ "gradle",
518463
+ "mvn",
518464
+ "ansible",
518465
+ "systemd-run"
518466
+ ];
518467
+ const mutateBinsRe = new RegExp(`\\b(${MUTATE_BINS.join("|")})\\b`, "i");
518468
+ if (mutateBinsRe.test(cmd))
518469
+ return false;
518470
+ if (/\bsed\s+(-i|--in-place)\b/.test(cmd))
518471
+ return false;
518472
+ if (/\bsystemctl\s+(?!status\b|show\b|is-)/i.test(cmd))
518473
+ return false;
518474
+ if (/\bservice\s+\S+\s+(?!status\b)/i.test(cmd))
518475
+ return false;
518476
+ if (/\bcrontab\s+-(e|d|r)\b/.test(cmd))
518477
+ return false;
518478
+ if (/\bnpm\s+(install|uninstall|update|run|test|exec|publish|init|link|unlink|version|cache\s+clean|ci|audit\s+fix)\b/i.test(cmd))
518479
+ return false;
518480
+ if (/\bpnpm\s+(install|update|add|remove|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
518481
+ return false;
518482
+ if (/\byarn\s+(install|add|remove|upgrade|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
518483
+ return false;
518484
+ if (/\bpip\s+(install|uninstall|wheel)\b/i.test(cmd))
518485
+ return false;
518486
+ if (/\bnpx\b/.test(cmd))
518487
+ return false;
518488
+ if (/\bcargo\s+(build|run|test|update|publish|install|uninstall|fmt|fix)\b/i.test(cmd))
518489
+ return false;
518490
+ if (/\bgo\s+(build|run|test|get|install)\b/i.test(cmd))
518491
+ return false;
518492
+ if (/\bdocker\s+(build|run|push|pull|exec|kill|stop|rm|rmi|tag)\b/i.test(cmd))
518493
+ return false;
518494
+ if (/\bkubectl\s+(apply|delete|create|edit|patch|scale|rollout|exec)\b/i.test(cmd))
518495
+ return false;
518496
+ if (/\bterraform\s+(apply|destroy|init|plan|import)\b/i.test(cmd))
518497
+ return false;
518498
+ const READ_ONLY_BINS = /* @__PURE__ */ new Set([
518499
+ "cd",
518500
+ // shell builtin: changes pwd, doesn't write — common segment leader
518501
+ "grep",
518502
+ "egrep",
518503
+ "fgrep",
518504
+ "rg",
518505
+ "ag",
518506
+ "cat",
518507
+ "head",
518508
+ "tail",
518509
+ "less",
518510
+ "more",
518511
+ "ls",
518512
+ "ll",
518513
+ "la",
518514
+ "find",
518515
+ // ALLOWED only if no -delete/-exec mutating action — pre-filtered above
518516
+ "wc",
518517
+ "awk",
518518
+ "gawk",
518519
+ "sort",
518520
+ "uniq",
518521
+ "tr",
518522
+ "cut",
518523
+ "paste",
518524
+ "join",
518525
+ "comm",
518526
+ "diff",
518527
+ "cmp",
518528
+ "echo",
518529
+ "printf",
518530
+ "pwd",
518531
+ "which",
518532
+ "type",
518533
+ "command",
518534
+ "node",
518535
+ "python",
518536
+ "python3",
518537
+ "ruby",
518538
+ "perl",
518539
+ "git",
518540
+ // git log/show/diff/status are read; but git add/commit/push/pull are writes — pre-filtered above
518541
+ "ollama",
518542
+ // ollama show/list are read; ollama pull/run/create are writes — pre-filtered above
518543
+ "cargo",
518544
+ // pre-filtered above for build/run/etc.
518545
+ "go",
518546
+ // pre-filtered above for build/run/etc.
518547
+ "stat",
518548
+ "file",
518549
+ "du",
518550
+ "df",
518551
+ "date",
518552
+ "uname",
518553
+ "id",
518554
+ "whoami",
518555
+ "hostname",
518556
+ "uptime",
518557
+ "env",
518558
+ "printenv",
518559
+ "test",
518560
+ "[",
518561
+ "true",
518562
+ "false",
518563
+ "tsc",
518564
+ "eslint",
518565
+ "prettier",
518566
+ // these emit but mostly read
518567
+ "head",
518568
+ "tail",
518569
+ "jq",
518570
+ "yq",
518571
+ "xq",
518572
+ "base64",
518573
+ "md5sum",
518574
+ "sha256sum",
518575
+ "sha1sum",
518576
+ "tldr",
518577
+ "man",
518578
+ "info"
518579
+ ]);
518580
+ if (/\bfind\b[\s\S]*?(-delete|-exec\s+(rm|mv|cp|chmod|chown|sed\s+-i)|--?ok\s+(rm|mv))/i.test(cmd))
518581
+ return false;
518582
+ if (/\b(node|python\d?)\b\s+-(e|c)\b[\s\S]*\b(rm|writeFileSync|unlinkSync|mkdir|process\.exit|exec|spawn|require\(\s*['"]child_process)/i.test(cmd))
518583
+ return false;
518584
+ const segments = cmd.split(/(?:\|\||&&|;)/).map((s2) => s2.trim()).filter(Boolean);
518585
+ if (segments.length === 0)
518586
+ return false;
518587
+ for (const seg of segments) {
518588
+ const stripped = seg.replace(/^cd\s+\S+\s*$/i, "true").replace(/^!/, "");
518589
+ const firstTok = stripped.split(/\s+/)[0]?.replace(/^.*\//, "") || "";
518590
+ if (!firstTok)
518591
+ continue;
518592
+ if (!READ_ONLY_BINS.has(firstTok))
518593
+ return false;
518594
+ }
518595
+ return true;
518596
+ }
518597
+ /**
518598
+ * REG-5: Render the recent-failures block so the agent SEES its own error
518599
+ * output before deciding what to do next. Detects same-fingerprint failure
518600
+ * repetition and escalates the warning. Without this, the agent runs
518601
+ * `npx next build`, gets a 200-line TypeScript error, ignores the specific
518602
+ * error and blindly retries with `npm install --force`. Caching the failure
518603
+ * + injecting it pre-LLM forces the model to confront what actually broke.
518604
+ */
518605
+ _renderRecentFailuresBlock(turn) {
518606
+ const fails = this._recentFailures;
518607
+ if (!fails || fails.length === 0)
518608
+ return null;
518609
+ const fresh = fails.filter((f2) => turn - f2.turn <= 10);
518610
+ if (fresh.length === 0)
518611
+ return null;
518612
+ const fpCount = /* @__PURE__ */ new Map();
518613
+ for (const f2 of fresh) {
518614
+ if (turn - f2.turn <= 5)
518615
+ fpCount.set(f2.fingerprint, (fpCount.get(f2.fingerprint) ?? 0) + 1);
518616
+ }
518617
+ const repeating = [...fpCount.entries()].filter(([, n2]) => n2 >= 2);
518618
+ const lines = [];
518619
+ if (repeating.length > 0) {
518620
+ lines.push("[STOP — RETRY LOOP DETECTED]");
518621
+ lines.push("You are re-issuing the SAME failing tool call(s) without changing anything that would fix the underlying error. If you cannot diagnose the error from the messages below, mark the current todo phase as `blocked` (with the blocker text) and either move to a different phase or call task_complete with what you have. DO NOT just retry the same command again — the error will not magically disappear.");
518622
+ } else {
518623
+ lines.push("[RECENT TOOL FAILURES — read these errors carefully BEFORE deciding your next action]");
518624
+ }
518625
+ const shown = fresh.slice(-5).reverse();
518626
+ for (const f2 of shown) {
518627
+ const argsRepr = JSON.stringify(f2.args).slice(0, 120);
518628
+ const errFirst = (f2.error || f2.output || "").split(/\n/)[0]?.slice(0, 200) || "(no error message)";
518629
+ const errFull = (f2.error || f2.output || "").slice(0, 600);
518630
+ lines.push(`• turn ${f2.turn} — ${f2.tool}(${argsRepr})`);
518631
+ lines.push(` first line: ${errFirst}`);
518632
+ if (errFull && errFull !== errFirst) {
518633
+ const indented = errFull.split(/\n/).slice(0, 6).map((l2) => ` ${l2}`).join("\n");
518634
+ lines.push(indented);
518635
+ }
518636
+ }
518637
+ if (repeating.length > 0) {
518638
+ const repeatingDesc = repeating.map(([fp, n2]) => `${n2}× ${fp.slice(0, 80)}`).join("; ");
518639
+ lines.push(`Repeating fingerprints: ${repeatingDesc}`);
518640
+ }
518641
+ lines.push(`(turn ${turn} — failures auto-expire after 10 turns; cleared on success or successful retry)`);
518642
+ return lines.join("\n");
518643
+ }
518417
518644
  /**
518418
518645
  * REG-3: Render the current todo list as a compact transient block so the
518419
518646
  * agent can read its own plan without calling todo_read or re-emitting
@@ -519597,6 +519824,9 @@ ${memoryLines.join("\n")}`
519597
519824
  const todoBlock = this._renderTodoStateBlock(turn);
519598
519825
  if (todoBlock)
519599
519826
  _injections.push(todoBlock);
519827
+ const failBlock = this._renderRecentFailuresBlock(turn);
519828
+ if (failBlock)
519829
+ _injections.push(failBlock);
519600
519830
  if (_injections.length > 0) {
519601
519831
  const reqMsgs = chatRequest.messages;
519602
519832
  if (Array.isArray(reqMsgs)) {
@@ -519967,7 +520197,7 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
519967
520197
  this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
519968
520198
  return { tc, output: blockMsg };
519969
520199
  }
519970
- const isReadLike = ![
520200
+ const baseIsReadLike = ![
519971
520201
  "file_write",
519972
520202
  "file_edit",
519973
520203
  "shell",
@@ -519996,6 +520226,7 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
519996
520226
  // tool see every call and return the cached state itself.
519997
520227
  "nexus"
519998
520228
  ].includes(tc.name);
520229
+ const isReadLike = baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? "");
519999
520230
  const cachedEntry = recentToolResults.get(toolFingerprint);
520000
520231
  if (isReadLike && cachedEntry !== void 0) {
520001
520232
  this.emit({
@@ -520385,6 +520616,22 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
520385
520616
  recentToolResults.delete(firstKey);
520386
520617
  }
520387
520618
  }
520619
+ if (result.success) {
520620
+ this._recentFailures = this._recentFailures.filter((f2) => f2.fingerprint !== toolFingerprint);
520621
+ }
520622
+ if (!result.success) {
520623
+ this._recentFailures.push({
520624
+ tool: tc.name,
520625
+ fingerprint: toolFingerprint,
520626
+ args: tc.arguments,
520627
+ error: (result.error ?? "").slice(0, 600),
520628
+ output: (result.output ?? "").slice(0, 1500),
520629
+ turn
520630
+ });
520631
+ if (this._recentFailures.length > 8) {
520632
+ this._recentFailures = this._recentFailures.slice(-8);
520633
+ }
520634
+ }
520388
520635
  if (!result.success && tc.name === "shell" && /\[PERMISSION_ERROR\]/.test(result.error ?? "")) {
520389
520636
  this.emit({
520390
520637
  type: "sudo_request",
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.463",
3
+ "version": "0.187.465",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.463",
9
+ "version": "0.187.465",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.463",
3
+ "version": "0.187.465",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",