open-agents-ai 0.187.463 → 0.187.465
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +248 -1
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -517722,6 +517722,11 @@ var init_agenticRunner = __esm({
|
|
|
517722
517722
|
// Observer world-model and cohort stats
|
|
517723
517723
|
_observerMode = "both";
|
|
517724
517724
|
_worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
|
|
517725
|
+
// REG-5: Rolling buffer of recent tool failures with their error output.
|
|
517726
|
+
// Surfaced before every LLM call so the agent can't ignore "I just ran this
|
|
517727
|
+
// and it errored". Detects same-fingerprint failure repetition and escalates
|
|
517728
|
+
// the warning. Keeps last 8 to bound memory + prompt cost.
|
|
517729
|
+
_recentFailures = [];
|
|
517725
517730
|
_argCohorts = /* @__PURE__ */ new Map();
|
|
517726
517731
|
// ── WO-NC-07: Error pattern learning → pre-action guidance injection ──
|
|
517727
517732
|
// Records error patterns (tool + error signature → learned guidance).
|
|
@@ -518414,6 +518419,228 @@ ${body}`;
|
|
|
518414
518419
|
* Returns null when the disable knob is set or the backend is missing the
|
|
518415
518420
|
* chatCompletion method.
|
|
518416
518421
|
*/
|
|
518422
|
+
/**
|
|
518423
|
+
* REG-6: Heuristic — does this shell command perform side effects, or is it
|
|
518424
|
+
* purely a read? Read-only commands are safe to dedup-cache the same way
|
|
518425
|
+
* file_read and list_directory are.
|
|
518426
|
+
*
|
|
518427
|
+
* Conservative: if any token looks like it mutates state (write redirects,
|
|
518428
|
+
* piped-to-write, mutating subcommands), return false. Otherwise check that
|
|
518429
|
+
* every command segment starts with a known read-only binary.
|
|
518430
|
+
*/
|
|
518431
|
+
_isShellCommandReadOnly(rawCmd) {
|
|
518432
|
+
if (!rawCmd || typeof rawCmd !== "string")
|
|
518433
|
+
return false;
|
|
518434
|
+
const cmd = rawCmd.trim();
|
|
518435
|
+
if (cmd.length === 0 || cmd.length > 1500)
|
|
518436
|
+
return false;
|
|
518437
|
+
if (/(^|[^&\d])(>|>>)\s*\S/.test(cmd))
|
|
518438
|
+
return false;
|
|
518439
|
+
const MUTATE_BINS = [
|
|
518440
|
+
"rm",
|
|
518441
|
+
"mv",
|
|
518442
|
+
"cp",
|
|
518443
|
+
"mkdir",
|
|
518444
|
+
"rmdir",
|
|
518445
|
+
"chmod",
|
|
518446
|
+
"chown",
|
|
518447
|
+
"touch",
|
|
518448
|
+
"tee",
|
|
518449
|
+
"dd",
|
|
518450
|
+
"truncate",
|
|
518451
|
+
"ln",
|
|
518452
|
+
"kill",
|
|
518453
|
+
"pkill",
|
|
518454
|
+
"killall",
|
|
518455
|
+
"reboot",
|
|
518456
|
+
"shutdown",
|
|
518457
|
+
"fakeroot",
|
|
518458
|
+
"sudo",
|
|
518459
|
+
"nohup",
|
|
518460
|
+
"setsid",
|
|
518461
|
+
"make",
|
|
518462
|
+
"gradle",
|
|
518463
|
+
"mvn",
|
|
518464
|
+
"ansible",
|
|
518465
|
+
"systemd-run"
|
|
518466
|
+
];
|
|
518467
|
+
const mutateBinsRe = new RegExp(`\\b(${MUTATE_BINS.join("|")})\\b`, "i");
|
|
518468
|
+
if (mutateBinsRe.test(cmd))
|
|
518469
|
+
return false;
|
|
518470
|
+
if (/\bsed\s+(-i|--in-place)\b/.test(cmd))
|
|
518471
|
+
return false;
|
|
518472
|
+
if (/\bsystemctl\s+(?!status\b|show\b|is-)/i.test(cmd))
|
|
518473
|
+
return false;
|
|
518474
|
+
if (/\bservice\s+\S+\s+(?!status\b)/i.test(cmd))
|
|
518475
|
+
return false;
|
|
518476
|
+
if (/\bcrontab\s+-(e|d|r)\b/.test(cmd))
|
|
518477
|
+
return false;
|
|
518478
|
+
if (/\bnpm\s+(install|uninstall|update|run|test|exec|publish|init|link|unlink|version|cache\s+clean|ci|audit\s+fix)\b/i.test(cmd))
|
|
518479
|
+
return false;
|
|
518480
|
+
if (/\bpnpm\s+(install|update|add|remove|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
|
|
518481
|
+
return false;
|
|
518482
|
+
if (/\byarn\s+(install|add|remove|upgrade|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
|
|
518483
|
+
return false;
|
|
518484
|
+
if (/\bpip\s+(install|uninstall|wheel)\b/i.test(cmd))
|
|
518485
|
+
return false;
|
|
518486
|
+
if (/\bnpx\b/.test(cmd))
|
|
518487
|
+
return false;
|
|
518488
|
+
if (/\bcargo\s+(build|run|test|update|publish|install|uninstall|fmt|fix)\b/i.test(cmd))
|
|
518489
|
+
return false;
|
|
518490
|
+
if (/\bgo\s+(build|run|test|get|install)\b/i.test(cmd))
|
|
518491
|
+
return false;
|
|
518492
|
+
if (/\bdocker\s+(build|run|push|pull|exec|kill|stop|rm|rmi|tag)\b/i.test(cmd))
|
|
518493
|
+
return false;
|
|
518494
|
+
if (/\bkubectl\s+(apply|delete|create|edit|patch|scale|rollout|exec)\b/i.test(cmd))
|
|
518495
|
+
return false;
|
|
518496
|
+
if (/\bterraform\s+(apply|destroy|init|plan|import)\b/i.test(cmd))
|
|
518497
|
+
return false;
|
|
518498
|
+
const READ_ONLY_BINS = /* @__PURE__ */ new Set([
|
|
518499
|
+
"cd",
|
|
518500
|
+
// shell builtin: changes pwd, doesn't write — common segment leader
|
|
518501
|
+
"grep",
|
|
518502
|
+
"egrep",
|
|
518503
|
+
"fgrep",
|
|
518504
|
+
"rg",
|
|
518505
|
+
"ag",
|
|
518506
|
+
"cat",
|
|
518507
|
+
"head",
|
|
518508
|
+
"tail",
|
|
518509
|
+
"less",
|
|
518510
|
+
"more",
|
|
518511
|
+
"ls",
|
|
518512
|
+
"ll",
|
|
518513
|
+
"la",
|
|
518514
|
+
"find",
|
|
518515
|
+
// ALLOWED only if no -delete/-exec mutating action — pre-filtered above
|
|
518516
|
+
"wc",
|
|
518517
|
+
"awk",
|
|
518518
|
+
"gawk",
|
|
518519
|
+
"sort",
|
|
518520
|
+
"uniq",
|
|
518521
|
+
"tr",
|
|
518522
|
+
"cut",
|
|
518523
|
+
"paste",
|
|
518524
|
+
"join",
|
|
518525
|
+
"comm",
|
|
518526
|
+
"diff",
|
|
518527
|
+
"cmp",
|
|
518528
|
+
"echo",
|
|
518529
|
+
"printf",
|
|
518530
|
+
"pwd",
|
|
518531
|
+
"which",
|
|
518532
|
+
"type",
|
|
518533
|
+
"command",
|
|
518534
|
+
"node",
|
|
518535
|
+
"python",
|
|
518536
|
+
"python3",
|
|
518537
|
+
"ruby",
|
|
518538
|
+
"perl",
|
|
518539
|
+
"git",
|
|
518540
|
+
// git log/show/diff/status are read; but git add/commit/push/pull are writes — pre-filtered above
|
|
518541
|
+
"ollama",
|
|
518542
|
+
// ollama show/list are read; ollama pull/run/create are writes — pre-filtered above
|
|
518543
|
+
"cargo",
|
|
518544
|
+
// pre-filtered above for build/run/etc.
|
|
518545
|
+
"go",
|
|
518546
|
+
// pre-filtered above for build/run/etc.
|
|
518547
|
+
"stat",
|
|
518548
|
+
"file",
|
|
518549
|
+
"du",
|
|
518550
|
+
"df",
|
|
518551
|
+
"date",
|
|
518552
|
+
"uname",
|
|
518553
|
+
"id",
|
|
518554
|
+
"whoami",
|
|
518555
|
+
"hostname",
|
|
518556
|
+
"uptime",
|
|
518557
|
+
"env",
|
|
518558
|
+
"printenv",
|
|
518559
|
+
"test",
|
|
518560
|
+
"[",
|
|
518561
|
+
"true",
|
|
518562
|
+
"false",
|
|
518563
|
+
"tsc",
|
|
518564
|
+
"eslint",
|
|
518565
|
+
"prettier",
|
|
518566
|
+
// these emit but mostly read
|
|
518567
|
+
"head",
|
|
518568
|
+
"tail",
|
|
518569
|
+
"jq",
|
|
518570
|
+
"yq",
|
|
518571
|
+
"xq",
|
|
518572
|
+
"base64",
|
|
518573
|
+
"md5sum",
|
|
518574
|
+
"sha256sum",
|
|
518575
|
+
"sha1sum",
|
|
518576
|
+
"tldr",
|
|
518577
|
+
"man",
|
|
518578
|
+
"info"
|
|
518579
|
+
]);
|
|
518580
|
+
if (/\bfind\b[\s\S]*?(-delete|-exec\s+(rm|mv|cp|chmod|chown|sed\s+-i)|--?ok\s+(rm|mv))/i.test(cmd))
|
|
518581
|
+
return false;
|
|
518582
|
+
if (/\b(node|python\d?)\b\s+-(e|c)\b[\s\S]*\b(rm|writeFileSync|unlinkSync|mkdir|process\.exit|exec|spawn|require\(\s*['"]child_process)/i.test(cmd))
|
|
518583
|
+
return false;
|
|
518584
|
+
const segments = cmd.split(/(?:\|\||&&|;)/).map((s2) => s2.trim()).filter(Boolean);
|
|
518585
|
+
if (segments.length === 0)
|
|
518586
|
+
return false;
|
|
518587
|
+
for (const seg of segments) {
|
|
518588
|
+
const stripped = seg.replace(/^cd\s+\S+\s*$/i, "true").replace(/^!/, "");
|
|
518589
|
+
const firstTok = stripped.split(/\s+/)[0]?.replace(/^.*\//, "") || "";
|
|
518590
|
+
if (!firstTok)
|
|
518591
|
+
continue;
|
|
518592
|
+
if (!READ_ONLY_BINS.has(firstTok))
|
|
518593
|
+
return false;
|
|
518594
|
+
}
|
|
518595
|
+
return true;
|
|
518596
|
+
}
|
|
518597
|
+
/**
|
|
518598
|
+
* REG-5: Render the recent-failures block so the agent SEES its own error
|
|
518599
|
+
* output before deciding what to do next. Detects same-fingerprint failure
|
|
518600
|
+
* repetition and escalates the warning. Without this, the agent runs
|
|
518601
|
+
* `npx next build`, gets a 200-line TypeScript error, ignores the specific
|
|
518602
|
+
* error and blindly retries with `npm install --force`. Caching the failure
|
|
518603
|
+
* + injecting it pre-LLM forces the model to confront what actually broke.
|
|
518604
|
+
*/
|
|
518605
|
+
_renderRecentFailuresBlock(turn) {
|
|
518606
|
+
const fails = this._recentFailures;
|
|
518607
|
+
if (!fails || fails.length === 0)
|
|
518608
|
+
return null;
|
|
518609
|
+
const fresh = fails.filter((f2) => turn - f2.turn <= 10);
|
|
518610
|
+
if (fresh.length === 0)
|
|
518611
|
+
return null;
|
|
518612
|
+
const fpCount = /* @__PURE__ */ new Map();
|
|
518613
|
+
for (const f2 of fresh) {
|
|
518614
|
+
if (turn - f2.turn <= 5)
|
|
518615
|
+
fpCount.set(f2.fingerprint, (fpCount.get(f2.fingerprint) ?? 0) + 1);
|
|
518616
|
+
}
|
|
518617
|
+
const repeating = [...fpCount.entries()].filter(([, n2]) => n2 >= 2);
|
|
518618
|
+
const lines = [];
|
|
518619
|
+
if (repeating.length > 0) {
|
|
518620
|
+
lines.push("[STOP — RETRY LOOP DETECTED]");
|
|
518621
|
+
lines.push("You are re-issuing the SAME failing tool call(s) without changing anything that would fix the underlying error. If you cannot diagnose the error from the messages below, mark the current todo phase as `blocked` (with the blocker text) and either move to a different phase or call task_complete with what you have. DO NOT just retry the same command again — the error will not magically disappear.");
|
|
518622
|
+
} else {
|
|
518623
|
+
lines.push("[RECENT TOOL FAILURES — read these errors carefully BEFORE deciding your next action]");
|
|
518624
|
+
}
|
|
518625
|
+
const shown = fresh.slice(-5).reverse();
|
|
518626
|
+
for (const f2 of shown) {
|
|
518627
|
+
const argsRepr = JSON.stringify(f2.args).slice(0, 120);
|
|
518628
|
+
const errFirst = (f2.error || f2.output || "").split(/\n/)[0]?.slice(0, 200) || "(no error message)";
|
|
518629
|
+
const errFull = (f2.error || f2.output || "").slice(0, 600);
|
|
518630
|
+
lines.push(`• turn ${f2.turn} — ${f2.tool}(${argsRepr})`);
|
|
518631
|
+
lines.push(` first line: ${errFirst}`);
|
|
518632
|
+
if (errFull && errFull !== errFirst) {
|
|
518633
|
+
const indented = errFull.split(/\n/).slice(0, 6).map((l2) => ` ${l2}`).join("\n");
|
|
518634
|
+
lines.push(indented);
|
|
518635
|
+
}
|
|
518636
|
+
}
|
|
518637
|
+
if (repeating.length > 0) {
|
|
518638
|
+
const repeatingDesc = repeating.map(([fp, n2]) => `${n2}× ${fp.slice(0, 80)}`).join("; ");
|
|
518639
|
+
lines.push(`Repeating fingerprints: ${repeatingDesc}`);
|
|
518640
|
+
}
|
|
518641
|
+
lines.push(`(turn ${turn} — failures auto-expire after 10 turns; cleared on success or successful retry)`);
|
|
518642
|
+
return lines.join("\n");
|
|
518643
|
+
}
|
|
518417
518644
|
/**
|
|
518418
518645
|
* REG-3: Render the current todo list as a compact transient block so the
|
|
518419
518646
|
* agent can read its own plan without calling todo_read or re-emitting
|
|
@@ -519597,6 +519824,9 @@ ${memoryLines.join("\n")}`
|
|
|
519597
519824
|
const todoBlock = this._renderTodoStateBlock(turn);
|
|
519598
519825
|
if (todoBlock)
|
|
519599
519826
|
_injections.push(todoBlock);
|
|
519827
|
+
const failBlock = this._renderRecentFailuresBlock(turn);
|
|
519828
|
+
if (failBlock)
|
|
519829
|
+
_injections.push(failBlock);
|
|
519600
519830
|
if (_injections.length > 0) {
|
|
519601
519831
|
const reqMsgs = chatRequest.messages;
|
|
519602
519832
|
if (Array.isArray(reqMsgs)) {
|
|
@@ -519967,7 +520197,7 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
|
|
|
519967
520197
|
this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
519968
520198
|
return { tc, output: blockMsg };
|
|
519969
520199
|
}
|
|
519970
|
-
const
|
|
520200
|
+
const baseIsReadLike = ![
|
|
519971
520201
|
"file_write",
|
|
519972
520202
|
"file_edit",
|
|
519973
520203
|
"shell",
|
|
@@ -519996,6 +520226,7 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
|
|
|
519996
520226
|
// tool see every call and return the cached state itself.
|
|
519997
520227
|
"nexus"
|
|
519998
520228
|
].includes(tc.name);
|
|
520229
|
+
const isReadLike = baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? "");
|
|
519999
520230
|
const cachedEntry = recentToolResults.get(toolFingerprint);
|
|
520000
520231
|
if (isReadLike && cachedEntry !== void 0) {
|
|
520001
520232
|
this.emit({
|
|
@@ -520385,6 +520616,22 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
|
|
|
520385
520616
|
recentToolResults.delete(firstKey);
|
|
520386
520617
|
}
|
|
520387
520618
|
}
|
|
520619
|
+
if (result.success) {
|
|
520620
|
+
this._recentFailures = this._recentFailures.filter((f2) => f2.fingerprint !== toolFingerprint);
|
|
520621
|
+
}
|
|
520622
|
+
if (!result.success) {
|
|
520623
|
+
this._recentFailures.push({
|
|
520624
|
+
tool: tc.name,
|
|
520625
|
+
fingerprint: toolFingerprint,
|
|
520626
|
+
args: tc.arguments,
|
|
520627
|
+
error: (result.error ?? "").slice(0, 600),
|
|
520628
|
+
output: (result.output ?? "").slice(0, 1500),
|
|
520629
|
+
turn
|
|
520630
|
+
});
|
|
520631
|
+
if (this._recentFailures.length > 8) {
|
|
520632
|
+
this._recentFailures = this._recentFailures.slice(-8);
|
|
520633
|
+
}
|
|
520634
|
+
}
|
|
520388
520635
|
if (!result.success && tc.name === "shell" && /\[PERMISSION_ERROR\]/.test(result.error ?? "")) {
|
|
520389
520636
|
this.emit({
|
|
520390
520637
|
type: "sudo_request",
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.465",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.465",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED