open-agents-ai 0.187.236 → 0.187.237
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +150 -11
- package/dist/scripts/.env +14 -0
- package/dist/scripts/.scrape_setup_complete +1 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -271575,6 +271575,7 @@ ${sr.result.output}`;
|
|
|
271575
271575
|
}
|
|
271576
271576
|
if (completed)
|
|
271577
271577
|
break;
|
|
271578
|
+
this.littlemanObserve(messages2, turn);
|
|
271578
271579
|
const currentRepScore = this.detectRepetition(toolCallLog);
|
|
271579
271580
|
if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
|
|
271580
271581
|
const { repetitionWindow } = this.contextLimits();
|
|
@@ -271957,7 +271958,7 @@ ${result.output}`;
|
|
|
271957
271958
|
turn,
|
|
271958
271959
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
271959
271960
|
});
|
|
271960
|
-
output = `[
|
|
271961
|
+
output = `[${tc.name} succeeded — output externalized: ${result.output.length} chars, ${lineCount} lines]
|
|
271961
271962
|
Handle: ${handleId}
|
|
271962
271963
|
Preview: ${preview}...
|
|
271963
271964
|
Full content available via: repl_exec(code="data = retrieve('${handleId}')") or memex_retrieve(id="${handleId}")`;
|
|
@@ -272380,7 +272381,7 @@ ${errOutput}`;
|
|
|
272380
272381
|
${result.output}`, "utf-8");
|
|
272381
272382
|
} catch {
|
|
272382
272383
|
}
|
|
272383
|
-
return `[
|
|
272384
|
+
return `[${toolName} succeeded — output externalized: ${result.output.length} chars, ${lineCount} lines]
|
|
272384
272385
|
Handle: ${handleId}
|
|
272385
272386
|
Preview: ${preview}...
|
|
272386
272387
|
Full content available via: repl_exec(code="data = retrieve('${handleId}')") or memex_retrieve(id="${handleId}")`;
|
|
@@ -272882,6 +272883,99 @@ ${newerSummary}`;
|
|
|
272882
272883
|
|
|
272883
272884
|
${trimmedNew}`;
|
|
272884
272885
|
}
|
|
272886
|
+
// -------------------------------------------------------------------------
|
|
272887
|
+
// Littleman Observer — parallel meta-analysis of the main loop
|
|
272888
|
+
// -------------------------------------------------------------------------
|
|
272889
|
+
// Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
|
|
272890
|
+
// Runs after each tool turn to detect when the model has lost track of
|
|
272891
|
+
// what happened and inject corrections before the next inference.
|
|
272892
|
+
//
|
|
272893
|
+
// This is the architectural fix for the "I see both tools have been failing"
|
|
272894
|
+
// regression: instead of only fixing the data the model sees (mask/summary),
|
|
272895
|
+
// we add a second analysis path that catches mismatches in real-time.
|
|
272896
|
+
/** Track recent tool outcomes for the littleman observer */
|
|
272897
|
+
_littlemanToolOutcomes = [];
|
|
272898
|
+
/**
|
|
272899
|
+
* Littleman observer: post-turn meta-analysis.
|
|
272900
|
+
*
|
|
272901
|
+
* Examines the last few messages looking for contradictions between
|
|
272902
|
+
* actual tool outcomes and the model's stated understanding. When it
|
|
272903
|
+
* detects the model claiming failure after success (or vice versa),
|
|
272904
|
+
* it injects a corrective message.
|
|
272905
|
+
*
|
|
272906
|
+
* Also detects repeated actions — when the model re-does something
|
|
272907
|
+
* that already succeeded, the littleman nudges it to move on.
|
|
272908
|
+
*/
|
|
272909
|
+
littlemanObserve(messages2, turn) {
|
|
272910
|
+
if (this.options.modelTier === "large")
|
|
272911
|
+
return;
|
|
272912
|
+
const recent = messages2.slice(-6);
|
|
272913
|
+
for (const msg of recent) {
|
|
272914
|
+
if (msg.role === "tool" && typeof msg.content === "string") {
|
|
272915
|
+
const isError2 = msg.content.startsWith("Error:") || /^(FAIL|ERR!|TypeError)/i.test(msg.content);
|
|
272916
|
+
const succeeded = !isError2;
|
|
272917
|
+
const preview = msg.content.slice(0, 80);
|
|
272918
|
+
let toolName = "unknown";
|
|
272919
|
+
if (msg.tool_call_id) {
|
|
272920
|
+
for (const m2 of recent) {
|
|
272921
|
+
if (m2.tool_calls) {
|
|
272922
|
+
for (const tc of m2.tool_calls) {
|
|
272923
|
+
if (tc.id === msg.tool_call_id) {
|
|
272924
|
+
toolName = tc.function.name;
|
|
272925
|
+
}
|
|
272926
|
+
}
|
|
272927
|
+
}
|
|
272928
|
+
}
|
|
272929
|
+
}
|
|
272930
|
+
if (!this._littlemanToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName)) {
|
|
272931
|
+
this._littlemanToolOutcomes.push({ turn, tool: toolName, succeeded, preview });
|
|
272932
|
+
}
|
|
272933
|
+
}
|
|
272934
|
+
}
|
|
272935
|
+
while (this._littlemanToolOutcomes.length > 20)
|
|
272936
|
+
this._littlemanToolOutcomes.shift();
|
|
272937
|
+
const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
|
|
272938
|
+
if (lastAssistant && typeof lastAssistant.content === "string") {
|
|
272939
|
+
const text = lastAssistant.content.toLowerCase();
|
|
272940
|
+
const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
|
|
272941
|
+
if (claimsFailure) {
|
|
272942
|
+
const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
|
|
272943
|
+
const successes = recentOutcomes.filter((o2) => o2.succeeded);
|
|
272944
|
+
if (successes.length >= 1) {
|
|
272945
|
+
const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
|
|
272946
|
+
this.pendingUserMessages.push(`[LITTLEMAN] Correction: your recent tools DID succeed. Do not retry them.
|
|
272947
|
+
Successful results: ${successList}
|
|
272948
|
+
Build on these results instead of retrying. What is your NEXT step toward the goal?`);
|
|
272949
|
+
this.emit({
|
|
272950
|
+
type: "status",
|
|
272951
|
+
content: `Littleman: corrected false failure claim (${successes.length} tools succeeded)`,
|
|
272952
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
272953
|
+
});
|
|
272954
|
+
}
|
|
272955
|
+
}
|
|
272956
|
+
}
|
|
272957
|
+
const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
|
|
272958
|
+
for (const tc of lastToolCalls) {
|
|
272959
|
+
const name10 = tc.function.name;
|
|
272960
|
+
let args = {};
|
|
272961
|
+
try {
|
|
272962
|
+
args = JSON.parse(tc.function.arguments);
|
|
272963
|
+
} catch {
|
|
272964
|
+
}
|
|
272965
|
+
const argsKey = name10 === "shell" ? String(args.command ?? "").slice(0, 60) : name10 === "web_fetch" ? String(args.url ?? "").slice(0, 80) : String(args.path ?? args.url ?? args.query ?? "").slice(0, 60);
|
|
272966
|
+
const prior = this._littlemanToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.preview.includes(argsKey.slice(0, 30)) && o2.turn < turn);
|
|
272967
|
+
if (prior) {
|
|
272968
|
+
this.pendingUserMessages.push(`[LITTLEMAN] You already ran ${name10} successfully on turn ${prior.turn} with similar arguments. Result was: ${prior.preview.slice(0, 100)}
|
|
272969
|
+
Do NOT re-run it. Use the result you already have and proceed to the next step.`);
|
|
272970
|
+
this.emit({
|
|
272971
|
+
type: "status",
|
|
272972
|
+
content: `Littleman: prevented redundant ${name10} call (succeeded on turn ${prior.turn})`,
|
|
272973
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
272974
|
+
});
|
|
272975
|
+
break;
|
|
272976
|
+
}
|
|
272977
|
+
}
|
|
272978
|
+
}
|
|
272885
272979
|
/**
|
|
272886
272980
|
* Infer what the model should do next from the most recent messages.
|
|
272887
272981
|
* Analyzes the last few tool calls, errors, and assistant text to produce
|
|
@@ -273030,18 +273124,37 @@ ${pathKeep}${omitted}` };
|
|
|
273030
273124
|
return { ...msg, content: `[directory listing: ${lines} entries — top entries preserved]
|
|
273031
273125
|
${dirKeep}${omitted}` };
|
|
273032
273126
|
}
|
|
273033
|
-
case "web_fetch":
|
|
273034
|
-
|
|
273035
|
-
|
|
273036
|
-
|
|
273127
|
+
case "web_fetch": {
|
|
273128
|
+
const webPreview = contentLines.slice(0, 5).join("\n");
|
|
273129
|
+
const webOmitted = lines > 5 ? `
|
|
273130
|
+
[... ${lines - 5} more lines omitted for compaction]` : "";
|
|
273131
|
+
return { ...msg, content: `[web_fetch succeeded: ${content.length} chars, ${lines} lines — preview preserved]
|
|
273132
|
+
${webPreview}${webOmitted}` };
|
|
273133
|
+
}
|
|
273134
|
+
case "web_search": {
|
|
273135
|
+
const searchPreview = contentLines.slice(0, 5).join("\n");
|
|
273136
|
+
const searchOmitted = lines > 5 ? `
|
|
273137
|
+
[... ${lines - 5} more results omitted]` : "";
|
|
273138
|
+
return { ...msg, content: `[web_search succeeded: ${lines} results — top results preserved]
|
|
273139
|
+
${searchPreview}${searchOmitted}` };
|
|
273140
|
+
}
|
|
273037
273141
|
case "shell":
|
|
273038
|
-
case "background_run":
|
|
273142
|
+
case "background_run": {
|
|
273039
273143
|
if (/PASS|FAIL|error|warning/i.test(content))
|
|
273040
273144
|
return msg;
|
|
273041
|
-
|
|
273145
|
+
const cmdPreview = contentLines.slice(0, 5).join("\n");
|
|
273146
|
+
const cmdOmitted = lines > 5 ? `
|
|
273147
|
+
[... ${lines - 5} more lines omitted for compaction]` : "";
|
|
273148
|
+
return { ...msg, content: `[shell succeeded: ${lines} lines, ${content.length} chars — preview preserved]
|
|
273149
|
+
${cmdPreview}${cmdOmitted}` };
|
|
273150
|
+
}
|
|
273042
273151
|
default:
|
|
273043
273152
|
if (content.length > 2e3) {
|
|
273044
|
-
|
|
273153
|
+
const genPreview = contentLines.slice(0, 3).join("\n");
|
|
273154
|
+
const genOmitted = lines > 3 ? `
|
|
273155
|
+
[... ${lines - 3} more lines omitted]` : "";
|
|
273156
|
+
return { ...msg, content: `[${toolName ?? "tool"} succeeded: ${content.length} chars — preview preserved]
|
|
273157
|
+
${genPreview}${genOmitted}` };
|
|
273045
273158
|
}
|
|
273046
273159
|
return msg;
|
|
273047
273160
|
}
|
|
@@ -273141,19 +273254,45 @@ ${headContent}${sigLines ? "\n[key signatures]: " + sigLines : ""}`;
|
|
|
273141
273254
|
const cmd = String(tc.args.command || "").slice(0, 100);
|
|
273142
273255
|
const hasError = content.startsWith("Error:") || /FAIL|ERR!/i.test(content);
|
|
273143
273256
|
const hasPass = /PASS|passed|✓|success/i.test(content);
|
|
273257
|
+
const hasMaskedSuccess = /^\[shell succeeded:|^\[.+ succeeded —/.test(content);
|
|
273144
273258
|
let outcome;
|
|
273145
273259
|
if (hasError) {
|
|
273146
273260
|
const errorLines = content.split("\n").filter((l2) => /error|FAIL|✗|×|ERR!/i.test(l2)).slice(0, 3);
|
|
273147
273261
|
outcome = errorLines.length > 0 ? errorLines.join("; ").slice(0, 200) : content.slice(0, 200);
|
|
273148
273262
|
errors.push(`\`${cmd}\`: ${outcome.slice(0, 150)}`);
|
|
273149
|
-
} else if (hasPass) {
|
|
273150
|
-
|
|
273263
|
+
} else if (hasPass || hasMaskedSuccess) {
|
|
273264
|
+
const previewLines = content.split("\n").slice(1, 4).join(" ").trim();
|
|
273265
|
+
outcome = previewLines ? `succeeded: ${previewLines.slice(0, 120)}` : "succeeded";
|
|
273151
273266
|
} else {
|
|
273152
273267
|
outcome = content.slice(0, 150);
|
|
273153
273268
|
}
|
|
273154
273269
|
commandResults.push({ cmd, outcome });
|
|
273155
273270
|
break;
|
|
273156
273271
|
}
|
|
273272
|
+
case "web_fetch": {
|
|
273273
|
+
const url = String(tc.args.url || "").slice(0, 120);
|
|
273274
|
+
if (content.startsWith("Error:")) {
|
|
273275
|
+
const errMsg = content.slice(0, 200);
|
|
273276
|
+
errors.push(`web_fetch \`${url}\`: ${errMsg}`);
|
|
273277
|
+
commandResults.push({ cmd: `web_fetch ${url}`, outcome: errMsg });
|
|
273278
|
+
} else {
|
|
273279
|
+
const preview = content.split("\n").slice(0, 3).join(" ").trim().slice(0, 150);
|
|
273280
|
+
commandResults.push({ cmd: `web_fetch ${url}`, outcome: `succeeded: ${preview || `${content.length} chars`}` });
|
|
273281
|
+
}
|
|
273282
|
+
break;
|
|
273283
|
+
}
|
|
273284
|
+
case "web_search": {
|
|
273285
|
+
const query = String(tc.args.query || "").slice(0, 80);
|
|
273286
|
+
if (content.startsWith("Error:")) {
|
|
273287
|
+
errors.push(`web_search "${query}": ${content.slice(0, 200)}`);
|
|
273288
|
+
commandResults.push({ cmd: `web_search "${query}"`, outcome: content.slice(0, 200) });
|
|
273289
|
+
} else {
|
|
273290
|
+
const resultCount = (content.match(/\n/g) || []).length;
|
|
273291
|
+
const preview = content.split("\n").slice(0, 2).join(" ").trim().slice(0, 100);
|
|
273292
|
+
commandResults.push({ cmd: `web_search "${query}"`, outcome: `${resultCount} results: ${preview}` });
|
|
273293
|
+
}
|
|
273294
|
+
break;
|
|
273295
|
+
}
|
|
273157
273296
|
case "grep_search": {
|
|
273158
273297
|
const pattern = String(tc.args.pattern || "");
|
|
273159
273298
|
const matchCount = (content.match(/\n/g) || []).length;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
SCRAPE_API_KEY=948a46e9c7b94025aba48cf43f823950
|
|
2
|
+
SCRAPE_BIND=0.0.0.0
|
|
3
|
+
SCRAPE_PORT=8130
|
|
4
|
+
SCRAPE_REQUIRE_AUTH=0
|
|
5
|
+
SCRAPE_MAX_CONCURRENCY=4
|
|
6
|
+
SCRAPE_QUEUE_TIMEOUT_S=0
|
|
7
|
+
SCRAPE_RATE_LIMIT_RPS=60
|
|
8
|
+
SCRAPE_RATE_LIMIT_BURST=180
|
|
9
|
+
SCRAPE_RATE_LIMIT_LOCAL_BYPASS=1
|
|
10
|
+
SCRAPE_RATE_LIMIT_DISABLED=0
|
|
11
|
+
SCRAPE_RATE_LIMIT_WHITELIST=
|
|
12
|
+
SCRAPE_FILE_TTL_S=900
|
|
13
|
+
SCRAPE_FRAME_KEEPALIVE_S=45
|
|
14
|
+
SCRAPE_HEADLESS_DEFAULT=1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ok
|
package/package.json
CHANGED