open-agents-ai 0.187.532 → 0.187.533
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +116 -3
- package/npm-shrinkwrap.json +5 -5
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -526818,6 +526818,85 @@ Pick the SMALLEST concrete deliverable from the spec — typically the project e
|
|
|
526818
526818
|
}
|
|
526819
526819
|
}
|
|
526820
526820
|
}
|
|
526821
|
+
/**
|
|
526822
|
+
* REG-66 Debug-Loop Detection (root-cause from batch535-midi, 2026-05-04).
|
|
526823
|
+
*
|
|
526824
|
+
* Empirical: midi run had 11x `npm run build 2>&1` + same 5 files re-read
|
|
526825
|
+
* 5-6 times each + 22 BFC-61.G coercion BLOCKS — and ZERO of those blocks
|
|
526826
|
+
* resulted in a creative edit. The agent was rationally stuck: it
|
|
526827
|
+
* believed it needed to read more to debug, the build command kept
|
|
526828
|
+
* giving the same error, and the standard "issue an edit" directive
|
|
526829
|
+
* gave no traction because the agent had no concrete edit hypothesis.
|
|
526830
|
+
*
|
|
526831
|
+
* This method analyzes toolCallLog for the debug-loop signature:
|
|
526832
|
+
* - Same shell command stem repeated ≥5 times in the trailing window, OR
|
|
526833
|
+
* - Same file_read path re-read ≥4 times in the trailing window.
|
|
526834
|
+
* Both indicate the agent is reading/running the same things hoping for
|
|
526835
|
+
* different output. Without this signal we'd just keep telling the
|
|
526836
|
+
* agent to "make an edit" — which is exactly what it can't think of.
|
|
526837
|
+
*
|
|
526838
|
+
* When detected, the BFC-61.G block message swaps to a PERTURB-strategy
|
|
526839
|
+
* directive: stop reading, change ONE thing in the most-likely-culprit
|
|
526840
|
+
* file even if you're uncertain, and let the new error signal guide
|
|
526841
|
+
* the next iteration. This is real human debugging strategy ("perturb
|
|
526842
|
+
* to disambiguate"), NOT reward-hacking — the agent still has to
|
|
526843
|
+
* produce a real edit and the success criteria (todos done + build
|
|
526844
|
+
* passing) are unchanged.
|
|
526845
|
+
*
|
|
526846
|
+
* @returns Detection result. `detected=false` → use standard message.
|
|
526847
|
+
* `detected=true` → use REG-66 perturb-strategy message;
|
|
526848
|
+
* `repeatedSample` carries the offending command/path for the
|
|
526849
|
+
* message body so the agent sees the specific pattern called out.
|
|
526850
|
+
*/
|
|
526851
|
+
_detectDebugLoop(toolCallLog) {
|
|
526852
|
+
if (process.env["OA_DISABLE_REG66"] === "1")
|
|
526853
|
+
return { detected: false };
|
|
526854
|
+
const WINDOW = 20;
|
|
526855
|
+
const SHELL_REPEAT_THRESHOLD = 5;
|
|
526856
|
+
const READ_REPEAT_THRESHOLD = 4;
|
|
526857
|
+
const window2 = toolCallLog.slice(-WINDOW);
|
|
526858
|
+
if (window2.length < SHELL_REPEAT_THRESHOLD)
|
|
526859
|
+
return { detected: false };
|
|
526860
|
+
const _editClasses = /* @__PURE__ */ new Set(["file_write", "file_edit", "batch_edit", "file_patch"]);
|
|
526861
|
+
for (const c9 of window2) {
|
|
526862
|
+
if (_editClasses.has(c9.name) && c9.success !== false)
|
|
526863
|
+
return { detected: false };
|
|
526864
|
+
}
|
|
526865
|
+
const shellCounts = /* @__PURE__ */ new Map();
|
|
526866
|
+
const readCounts = /* @__PURE__ */ new Map();
|
|
526867
|
+
for (const c9 of window2) {
|
|
526868
|
+
if (c9.name === "shell") {
|
|
526869
|
+
const m2 = c9.argsKey.match(/(?:^|,)command=([^,]+)/);
|
|
526870
|
+
if (m2 && m2[1]) {
|
|
526871
|
+
const stem = m2[1].trim();
|
|
526872
|
+
shellCounts.set(stem, (shellCounts.get(stem) ?? 0) + 1);
|
|
526873
|
+
}
|
|
526874
|
+
} else if (c9.name === "file_read" || c9.name === "file_explore") {
|
|
526875
|
+
const m2 = c9.argsKey.match(/(?:^|,)path=([^,]+)/);
|
|
526876
|
+
if (m2 && m2[1]) {
|
|
526877
|
+
const stem = m2[1].trim();
|
|
526878
|
+
readCounts.set(stem, (readCounts.get(stem) ?? 0) + 1);
|
|
526879
|
+
}
|
|
526880
|
+
}
|
|
526881
|
+
}
|
|
526882
|
+
let bestShell = null;
|
|
526883
|
+
for (const [k, n2] of shellCounts) {
|
|
526884
|
+
if (n2 >= SHELL_REPEAT_THRESHOLD && (!bestShell || n2 > bestShell[1]))
|
|
526885
|
+
bestShell = [k, n2];
|
|
526886
|
+
}
|
|
526887
|
+
let bestRead = null;
|
|
526888
|
+
for (const [k, n2] of readCounts) {
|
|
526889
|
+
if (n2 >= READ_REPEAT_THRESHOLD && (!bestRead || n2 > bestRead[1]))
|
|
526890
|
+
bestRead = [k, n2];
|
|
526891
|
+
}
|
|
526892
|
+
if (bestShell) {
|
|
526893
|
+
return { detected: true, repeatedSample: bestShell[0], count: bestShell[1], kind: "shell" };
|
|
526894
|
+
}
|
|
526895
|
+
if (bestRead) {
|
|
526896
|
+
return { detected: true, repeatedSample: bestRead[0], count: bestRead[1], kind: "read" };
|
|
526897
|
+
}
|
|
526898
|
+
return { detected: false };
|
|
526899
|
+
}
|
|
526821
526900
|
readSessionTodos() {
|
|
526822
526901
|
try {
|
|
526823
526902
|
const sid = process.env["OA_SESSION_ID"] || this._sessionId || "default";
|
|
@@ -530749,7 +530828,32 @@ ${memoryLines.join("\n")}`
|
|
|
530749
530828
|
turn,
|
|
530750
530829
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
530751
530830
|
});
|
|
530752
|
-
const
|
|
530831
|
+
const _dbgLoop = this._detectDebugLoop(toolCallLog);
|
|
530832
|
+
const _debugLoopSampleSafe = (_dbgLoop.repeatedSample ?? "").slice(0, 120);
|
|
530833
|
+
const reg61BlockMsg = _dbgLoop.detected ? [
|
|
530834
|
+
`[BLOCKED — REG-61 directive in effect — REG-66 DEBUG-LOOP detected]`,
|
|
530835
|
+
``,
|
|
530836
|
+
`Pattern: ${_dbgLoop.kind === "shell" ? "shell command" : "file"} "${_debugLoopSampleSafe}" was used ${_dbgLoop.count}× in the trailing window with ZERO creative edits landing. You are stuck in a debug loop where re-running / re-reading is producing no new information.`,
|
|
530837
|
+
``,
|
|
530838
|
+
`STOP DEBUGGING. PERTURB.`,
|
|
530839
|
+
``,
|
|
530840
|
+
`Strategy when stuck like this (real human debuggers do this):`,
|
|
530841
|
+
` 1. Pick the source file most likely implicated by the recurring failure (probably in src/, the one most-imported by failing tests).`,
|
|
530842
|
+
` 2. Pick ONE plausible cause — most-recently-modified line, most-complex function, most-likely-misnamed import, most-likely off-by-one.`,
|
|
530843
|
+
` 3. Make a SPECULATIVE edit that changes that thing — even if you are NOT certain it'll fix the bug. The point is to get a NEW error signal that disambiguates.`,
|
|
530844
|
+
` 4. Re-run the failing command. If the error CHANGED, you've learned something. If it's identical, you've ruled out one hypothesis.`,
|
|
530845
|
+
``,
|
|
530846
|
+
`This is NOT random guessing — it's targeted hypothesis falsification. Reading the same files 5+ times has already proven uninformative; only a state change will move the system.`,
|
|
530847
|
+
``,
|
|
530848
|
+
`Issue EXACTLY ONE of: file_write / file_edit / batch_edit / file_patch on a single concrete change. The exact CHOICE of edit matters less than NOT continuing to re-read.`,
|
|
530849
|
+
``,
|
|
530850
|
+
`Allowed bypasses (will not be blocked but will not clear the directive either):`,
|
|
530851
|
+
` • web_search — search the EXACT recurring error string`,
|
|
530852
|
+
` • task_complete — exit if you genuinely cannot identify any plausible perturbation`,
|
|
530853
|
+
` • ask_user — escalate to human (if available)`,
|
|
530854
|
+
``,
|
|
530855
|
+
`Once you make a real edit, the directive clears and you'll see the new test result.`
|
|
530856
|
+
].join("\n") : [
|
|
530753
530857
|
`[BLOCKED — REG-61 directive in effect]`,
|
|
530754
530858
|
``,
|
|
530755
530859
|
`A REG-61 FIRST-EDIT NUDGE was issued earlier and has not yet been satisfied. The directive: your next tool call MUST be a creative edit. You issued '${tc.name}' instead, which is a read/explore/shell call. This call has been BLOCKED.`,
|
|
@@ -530777,12 +530881,12 @@ ${memoryLines.join("\n")}`
|
|
|
530777
530881
|
});
|
|
530778
530882
|
this.emit({
|
|
530779
530883
|
type: "status",
|
|
530780
|
-
content: `REG-61 COERCION BLOCK — rejected '${tc.name}' at turn ${turn}; gate stays active until creative edit dispatches`,
|
|
530884
|
+
content: `REG-61 COERCION BLOCK — rejected '${tc.name}' at turn ${turn}; gate stays active until creative edit dispatches${_dbgLoop.detected ? `; REG-66 debug-loop variant (${_dbgLoop.kind} "${_debugLoopSampleSafe.slice(0, 60)}" ${_dbgLoop.count}×)` : ""}`,
|
|
530781
530885
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
530782
530886
|
});
|
|
530783
530887
|
this._tagSyntheticFailure({
|
|
530784
530888
|
mode: "step_repetition",
|
|
530785
|
-
rationale: `REG-61 perpetual coercion block on '${tc.name}' — agent ignored FIRST-EDIT NUDGE`
|
|
530889
|
+
rationale: `REG-61 perpetual coercion block on '${tc.name}' — agent ignored FIRST-EDIT NUDGE${_dbgLoop.detected ? " (debug-loop variant)" : ""}`
|
|
530786
530890
|
});
|
|
530787
530891
|
return { tc, output: reg61BlockMsg };
|
|
530788
530892
|
}
|
|
@@ -599758,6 +599862,15 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
|
|
|
599758
599862
|
});
|
|
599759
599863
|
return;
|
|
599760
599864
|
}
|
|
599865
|
+
if (pathname === "/favicon.ico" && method === "GET") {
|
|
599866
|
+
const svg = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><rect width="16" height="16" rx="3" fill="#b2920a"/><text x="50%" y="55%" font-size="11" font-family="monospace" font-weight="700" text-anchor="middle" dominant-baseline="middle" fill="#0b0b0b">oa</text></svg>';
|
|
599867
|
+
res.writeHead(200, {
|
|
599868
|
+
"Content-Type": "image/svg+xml",
|
|
599869
|
+
"Cache-Control": "public, max-age=86400"
|
|
599870
|
+
});
|
|
599871
|
+
res.end(svg);
|
|
599872
|
+
return;
|
|
599873
|
+
}
|
|
599761
599874
|
if (pathname === "/" && method === "GET" && req2.headers.accept?.includes("text/html")) {
|
|
599762
599875
|
res.writeHead(200, {
|
|
599763
599876
|
"Content-Type": "text/html; charset=utf-8",
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.533",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.533",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
|
@@ -3132,9 +3132,9 @@
|
|
|
3132
3132
|
}
|
|
3133
3133
|
},
|
|
3134
3134
|
"node_modules/express-rate-limit": {
|
|
3135
|
-
"version": "8.
|
|
3136
|
-
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.
|
|
3137
|
-
"integrity": "sha512-
|
|
3135
|
+
"version": "8.5.0",
|
|
3136
|
+
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.0.tgz",
|
|
3137
|
+
"integrity": "sha512-XKhFohWaSBdVJNTi5TaHziqnPkv04I9UQV6q1Wy7Ui6GGQZVW12ojDFwqer14EvCXxjvPG0CyWXx7cAXpALB4Q==",
|
|
3138
3138
|
"license": "MIT",
|
|
3139
3139
|
"dependencies": {
|
|
3140
3140
|
"ip-address": "10.1.0"
|
package/package.json
CHANGED