open-agents-ai 0.187.236 → 0.187.238
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +210 -11
- package/dist/scripts/.env +14 -0
- package/dist/scripts/.scrape_setup_complete +1 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -270794,6 +270794,32 @@ ${memoryLines.join("\n")}`
|
|
|
270794
270794
|
maxTokens: effectiveMaxTokens,
|
|
270795
270795
|
timeoutMs: this.options.requestTimeoutMs
|
|
270796
270796
|
};
|
|
270797
|
+
{
|
|
270798
|
+
const ctxChars = compacted.reduce((s2, m2) => {
|
|
270799
|
+
let c7 = typeof m2.content === "string" ? m2.content.length : 100;
|
|
270800
|
+
if (m2.tool_calls)
|
|
270801
|
+
for (const tc of m2.tool_calls)
|
|
270802
|
+
c7 += tc.function.arguments?.length ?? 0;
|
|
270803
|
+
return s2 + c7;
|
|
270804
|
+
}, 0);
|
|
270805
|
+
const estTokens = Math.ceil(ctxChars / 4);
|
|
270806
|
+
const limits = this.contextLimits();
|
|
270807
|
+
this.emit({
|
|
270808
|
+
type: "debug_context",
|
|
270809
|
+
content: `Turn ${turn}: ${compacted.length} msgs, ~${estTokens} tokens (threshold: ${limits.compactionThreshold}), headroom: ${limits.compactionThreshold - estTokens}`,
|
|
270810
|
+
turn,
|
|
270811
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
270812
|
+
contextSnapshot: {
|
|
270813
|
+
messageCount: compacted.length,
|
|
270814
|
+
estimatedTokens: estTokens,
|
|
270815
|
+
compactionThreshold: limits.compactionThreshold,
|
|
270816
|
+
toolCallCount,
|
|
270817
|
+
keepRecent: limits.keepRecent,
|
|
270818
|
+
littlemanOutcomes: this._littlemanToolOutcomes.length,
|
|
270819
|
+
headroom: limits.compactionThreshold - estTokens
|
|
270820
|
+
}
|
|
270821
|
+
});
|
|
270822
|
+
}
|
|
270797
270823
|
let response;
|
|
270798
270824
|
try {
|
|
270799
270825
|
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
@@ -271575,6 +271601,7 @@ ${sr.result.output}`;
|
|
|
271575
271601
|
}
|
|
271576
271602
|
if (completed)
|
|
271577
271603
|
break;
|
|
271604
|
+
this.littlemanObserve(messages2, turn);
|
|
271578
271605
|
const currentRepScore = this.detectRepetition(toolCallLog);
|
|
271579
271606
|
if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
|
|
271580
271607
|
const { repetitionWindow } = this.contextLimits();
|
|
@@ -271957,7 +271984,7 @@ ${result.output}`;
|
|
|
271957
271984
|
turn,
|
|
271958
271985
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
271959
271986
|
});
|
|
271960
|
-
output = `[
|
|
271987
|
+
output = `[${tc.name} succeeded — output externalized: ${result.output.length} chars, ${lineCount} lines]
|
|
271961
271988
|
Handle: ${handleId}
|
|
271962
271989
|
Preview: ${preview}...
|
|
271963
271990
|
Full content available via: repl_exec(code="data = retrieve('${handleId}')") or memex_retrieve(id="${handleId}")`;
|
|
@@ -272380,7 +272407,7 @@ ${errOutput}`;
|
|
|
272380
272407
|
${result.output}`, "utf-8");
|
|
272381
272408
|
} catch {
|
|
272382
272409
|
}
|
|
272383
|
-
return `[
|
|
272410
|
+
return `[${toolName} succeeded — output externalized: ${result.output.length} chars, ${lineCount} lines]
|
|
272384
272411
|
Handle: ${handleId}
|
|
272385
272412
|
Preview: ${preview}...
|
|
272386
272413
|
Full content available via: repl_exec(code="data = retrieve('${handleId}')") or memex_retrieve(id="${handleId}")`;
|
|
@@ -272882,6 +272909,113 @@ ${newerSummary}`;
|
|
|
272882
272909
|
|
|
272883
272910
|
${trimmedNew}`;
|
|
272884
272911
|
}
|
|
272912
|
+
// -------------------------------------------------------------------------
|
|
272913
|
+
// Littleman Observer — parallel meta-analysis of the main loop
|
|
272914
|
+
// -------------------------------------------------------------------------
|
|
272915
|
+
// Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
|
|
272916
|
+
// Runs after each tool turn to detect when the model has lost track of
|
|
272917
|
+
// what happened and inject corrections before the next inference.
|
|
272918
|
+
//
|
|
272919
|
+
// This is the architectural fix for the "I see both tools have been failing"
|
|
272920
|
+
// regression: instead of only fixing the data the model sees (mask/summary),
|
|
272921
|
+
// we add a second analysis path that catches mismatches in real-time.
|
|
272922
|
+
/** Track recent tool outcomes for the littleman observer */
|
|
272923
|
+
_littlemanToolOutcomes = [];
|
|
272924
|
+
/**
|
|
272925
|
+
* Littleman observer: post-turn meta-analysis.
|
|
272926
|
+
*
|
|
272927
|
+
* Examines the last few messages looking for contradictions between
|
|
272928
|
+
* actual tool outcomes and the model's stated understanding. When it
|
|
272929
|
+
* detects the model claiming failure after success (or vice versa),
|
|
272930
|
+
* it injects a corrective message.
|
|
272931
|
+
*
|
|
272932
|
+
* Also detects repeated actions — when the model re-does something
|
|
272933
|
+
* that already succeeded, the littleman nudges it to move on.
|
|
272934
|
+
*/
|
|
272935
|
+
littlemanObserve(messages2, turn) {
|
|
272936
|
+
if (this.options.modelTier === "large")
|
|
272937
|
+
return;
|
|
272938
|
+
const recent = messages2.slice(-6);
|
|
272939
|
+
for (const msg of recent) {
|
|
272940
|
+
if (msg.role === "tool" && typeof msg.content === "string") {
|
|
272941
|
+
const isError2 = msg.content.startsWith("Error:") || /^(FAIL|ERR!|TypeError)/i.test(msg.content);
|
|
272942
|
+
const succeeded = !isError2;
|
|
272943
|
+
const preview = msg.content.slice(0, 80);
|
|
272944
|
+
let toolName = "unknown";
|
|
272945
|
+
if (msg.tool_call_id) {
|
|
272946
|
+
for (const m2 of recent) {
|
|
272947
|
+
if (m2.tool_calls) {
|
|
272948
|
+
for (const tc of m2.tool_calls) {
|
|
272949
|
+
if (tc.id === msg.tool_call_id) {
|
|
272950
|
+
toolName = tc.function.name;
|
|
272951
|
+
}
|
|
272952
|
+
}
|
|
272953
|
+
}
|
|
272954
|
+
}
|
|
272955
|
+
}
|
|
272956
|
+
if (!this._littlemanToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName)) {
|
|
272957
|
+
this._littlemanToolOutcomes.push({ turn, tool: toolName, succeeded, preview });
|
|
272958
|
+
}
|
|
272959
|
+
}
|
|
272960
|
+
}
|
|
272961
|
+
while (this._littlemanToolOutcomes.length > 20)
|
|
272962
|
+
this._littlemanToolOutcomes.shift();
|
|
272963
|
+
const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
|
|
272964
|
+
if (lastAssistant && typeof lastAssistant.content === "string") {
|
|
272965
|
+
const text = lastAssistant.content.toLowerCase();
|
|
272966
|
+
const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
|
|
272967
|
+
if (claimsFailure) {
|
|
272968
|
+
const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
|
|
272969
|
+
const successes = recentOutcomes.filter((o2) => o2.succeeded);
|
|
272970
|
+
if (successes.length >= 1) {
|
|
272971
|
+
const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
|
|
272972
|
+
this.pendingUserMessages.push(`[LITTLEMAN] Correction: your recent tools DID succeed. Do not retry them.
|
|
272973
|
+
Successful results: ${successList}
|
|
272974
|
+
Build on these results instead of retrying. What is your NEXT step toward the goal?`);
|
|
272975
|
+
this.emit({
|
|
272976
|
+
type: "status",
|
|
272977
|
+
content: `Littleman: corrected false failure claim (${successes.length} tools succeeded)`,
|
|
272978
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
272979
|
+
});
|
|
272980
|
+
}
|
|
272981
|
+
}
|
|
272982
|
+
}
|
|
272983
|
+
const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
|
|
272984
|
+
for (const tc of lastToolCalls) {
|
|
272985
|
+
const name10 = tc.function.name;
|
|
272986
|
+
let args = {};
|
|
272987
|
+
try {
|
|
272988
|
+
args = JSON.parse(tc.function.arguments);
|
|
272989
|
+
} catch {
|
|
272990
|
+
}
|
|
272991
|
+
const argsKey = name10 === "shell" ? String(args.command ?? "").slice(0, 60) : name10 === "web_fetch" ? String(args.url ?? "").slice(0, 80) : String(args.path ?? args.url ?? args.query ?? "").slice(0, 60);
|
|
272992
|
+
const prior = this._littlemanToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.preview.includes(argsKey.slice(0, 30)) && o2.turn < turn);
|
|
272993
|
+
if (prior) {
|
|
272994
|
+
this.pendingUserMessages.push(`[LITTLEMAN] You already ran ${name10} successfully on turn ${prior.turn} with similar arguments. Result was: ${prior.preview.slice(0, 100)}
|
|
272995
|
+
Do NOT re-run it. Use the result you already have and proceed to the next step.`);
|
|
272996
|
+
this.emit({
|
|
272997
|
+
type: "status",
|
|
272998
|
+
content: `Littleman: prevented redundant ${name10} call (succeeded on turn ${prior.turn})`,
|
|
272999
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
273000
|
+
});
|
|
273001
|
+
break;
|
|
273002
|
+
}
|
|
273003
|
+
}
|
|
273004
|
+
const succCount = this._littlemanToolOutcomes.filter((o2) => o2.succeeded).length;
|
|
273005
|
+
const failCount = this._littlemanToolOutcomes.filter((o2) => !o2.succeeded).length;
|
|
273006
|
+
this.emit({
|
|
273007
|
+
type: "debug_littleman",
|
|
273008
|
+
turn,
|
|
273009
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
273010
|
+
content: `Littleman: ${this._littlemanToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
|
|
273011
|
+
littlemanAction: {
|
|
273012
|
+
detection: "none",
|
|
273013
|
+
recentSuccesses: succCount,
|
|
273014
|
+
recentFailures: failCount,
|
|
273015
|
+
intervention: this.pendingUserMessages.length > 0 ? this.pendingUserMessages[this.pendingUserMessages.length - 1]?.slice(0, 120) ?? null : null
|
|
273016
|
+
}
|
|
273017
|
+
});
|
|
273018
|
+
}
|
|
272885
273019
|
/**
|
|
272886
273020
|
* Infer what the model should do next from the most recent messages.
|
|
272887
273021
|
* Analyzes the last few tool calls, errors, and assistant text to produce
|
|
@@ -273030,18 +273164,37 @@ ${pathKeep}${omitted}` };
|
|
|
273030
273164
|
return { ...msg, content: `[directory listing: ${lines} entries — top entries preserved]
|
|
273031
273165
|
${dirKeep}${omitted}` };
|
|
273032
273166
|
}
|
|
273033
|
-
case "web_fetch":
|
|
273034
|
-
|
|
273035
|
-
|
|
273036
|
-
|
|
273167
|
+
case "web_fetch": {
|
|
273168
|
+
const webPreview = contentLines.slice(0, 5).join("\n");
|
|
273169
|
+
const webOmitted = lines > 5 ? `
|
|
273170
|
+
[... ${lines - 5} more lines omitted for compaction]` : "";
|
|
273171
|
+
return { ...msg, content: `[web_fetch succeeded: ${content.length} chars, ${lines} lines — preview preserved]
|
|
273172
|
+
${webPreview}${webOmitted}` };
|
|
273173
|
+
}
|
|
273174
|
+
case "web_search": {
|
|
273175
|
+
const searchPreview = contentLines.slice(0, 5).join("\n");
|
|
273176
|
+
const searchOmitted = lines > 5 ? `
|
|
273177
|
+
[... ${lines - 5} more results omitted]` : "";
|
|
273178
|
+
return { ...msg, content: `[web_search succeeded: ${lines} results — top results preserved]
|
|
273179
|
+
${searchPreview}${searchOmitted}` };
|
|
273180
|
+
}
|
|
273037
273181
|
case "shell":
|
|
273038
|
-
case "background_run":
|
|
273182
|
+
case "background_run": {
|
|
273039
273183
|
if (/PASS|FAIL|error|warning/i.test(content))
|
|
273040
273184
|
return msg;
|
|
273041
|
-
|
|
273185
|
+
const cmdPreview = contentLines.slice(0, 5).join("\n");
|
|
273186
|
+
const cmdOmitted = lines > 5 ? `
|
|
273187
|
+
[... ${lines - 5} more lines omitted for compaction]` : "";
|
|
273188
|
+
return { ...msg, content: `[shell succeeded: ${lines} lines, ${content.length} chars — preview preserved]
|
|
273189
|
+
${cmdPreview}${cmdOmitted}` };
|
|
273190
|
+
}
|
|
273042
273191
|
default:
|
|
273043
273192
|
if (content.length > 2e3) {
|
|
273044
|
-
|
|
273193
|
+
const genPreview = contentLines.slice(0, 3).join("\n");
|
|
273194
|
+
const genOmitted = lines > 3 ? `
|
|
273195
|
+
[... ${lines - 3} more lines omitted]` : "";
|
|
273196
|
+
return { ...msg, content: `[${toolName ?? "tool"} succeeded: ${content.length} chars — preview preserved]
|
|
273197
|
+
${genPreview}${genOmitted}` };
|
|
273045
273198
|
}
|
|
273046
273199
|
return msg;
|
|
273047
273200
|
}
|
|
@@ -273141,19 +273294,45 @@ ${headContent}${sigLines ? "\n[key signatures]: " + sigLines : ""}`;
|
|
|
273141
273294
|
const cmd = String(tc.args.command || "").slice(0, 100);
|
|
273142
273295
|
const hasError = content.startsWith("Error:") || /FAIL|ERR!/i.test(content);
|
|
273143
273296
|
const hasPass = /PASS|passed|✓|success/i.test(content);
|
|
273297
|
+
const hasMaskedSuccess = /^\[shell succeeded:|^\[.+ succeeded —/.test(content);
|
|
273144
273298
|
let outcome;
|
|
273145
273299
|
if (hasError) {
|
|
273146
273300
|
const errorLines = content.split("\n").filter((l2) => /error|FAIL|✗|×|ERR!/i.test(l2)).slice(0, 3);
|
|
273147
273301
|
outcome = errorLines.length > 0 ? errorLines.join("; ").slice(0, 200) : content.slice(0, 200);
|
|
273148
273302
|
errors.push(`\`${cmd}\`: ${outcome.slice(0, 150)}`);
|
|
273149
|
-
} else if (hasPass) {
|
|
273150
|
-
|
|
273303
|
+
} else if (hasPass || hasMaskedSuccess) {
|
|
273304
|
+
const previewLines = content.split("\n").slice(1, 4).join(" ").trim();
|
|
273305
|
+
outcome = previewLines ? `succeeded: ${previewLines.slice(0, 120)}` : "succeeded";
|
|
273151
273306
|
} else {
|
|
273152
273307
|
outcome = content.slice(0, 150);
|
|
273153
273308
|
}
|
|
273154
273309
|
commandResults.push({ cmd, outcome });
|
|
273155
273310
|
break;
|
|
273156
273311
|
}
|
|
273312
|
+
case "web_fetch": {
|
|
273313
|
+
const url = String(tc.args.url || "").slice(0, 120);
|
|
273314
|
+
if (content.startsWith("Error:")) {
|
|
273315
|
+
const errMsg = content.slice(0, 200);
|
|
273316
|
+
errors.push(`web_fetch \`${url}\`: ${errMsg}`);
|
|
273317
|
+
commandResults.push({ cmd: `web_fetch ${url}`, outcome: errMsg });
|
|
273318
|
+
} else {
|
|
273319
|
+
const preview = content.split("\n").slice(0, 3).join(" ").trim().slice(0, 150);
|
|
273320
|
+
commandResults.push({ cmd: `web_fetch ${url}`, outcome: `succeeded: ${preview || `${content.length} chars`}` });
|
|
273321
|
+
}
|
|
273322
|
+
break;
|
|
273323
|
+
}
|
|
273324
|
+
case "web_search": {
|
|
273325
|
+
const query = String(tc.args.query || "").slice(0, 80);
|
|
273326
|
+
if (content.startsWith("Error:")) {
|
|
273327
|
+
errors.push(`web_search "${query}": ${content.slice(0, 200)}`);
|
|
273328
|
+
commandResults.push({ cmd: `web_search "${query}"`, outcome: content.slice(0, 200) });
|
|
273329
|
+
} else {
|
|
273330
|
+
const resultCount = (content.match(/\n/g) || []).length;
|
|
273331
|
+
const preview = content.split("\n").slice(0, 2).join(" ").trim().slice(0, 100);
|
|
273332
|
+
commandResults.push({ cmd: `web_search "${query}"`, outcome: `${resultCount} results: ${preview}` });
|
|
273333
|
+
}
|
|
273334
|
+
break;
|
|
273335
|
+
}
|
|
273157
273336
|
case "grep_search": {
|
|
273158
273337
|
const pattern = String(tc.args.pattern || "");
|
|
273159
273338
|
const matchCount = (content.match(/\n/g) || []).length;
|
|
@@ -328450,6 +328629,26 @@ ${entry.fullContent}`
|
|
|
328450
328629
|
break;
|
|
328451
328630
|
case "complete":
|
|
328452
328631
|
break;
|
|
328632
|
+
// -- Live observability hooks --
|
|
328633
|
+
case "debug_context":
|
|
328634
|
+
if (config.verbose) {
|
|
328635
|
+
const snap = event.contextSnapshot;
|
|
328636
|
+
if (snap) {
|
|
328637
|
+
contentWrite(() => renderInfo(
|
|
328638
|
+
`\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | littleman: ${snap.littlemanOutcomes} tracked\x1B[0m`
|
|
328639
|
+
));
|
|
328640
|
+
}
|
|
328641
|
+
}
|
|
328642
|
+
break;
|
|
328643
|
+
case "debug_littleman":
|
|
328644
|
+
if (config.verbose && event.littlemanAction) {
|
|
328645
|
+
const lm = event.littlemanAction;
|
|
328646
|
+
const intervention = lm.intervention ? ` | INTERVENTION: ${lm.intervention}` : "";
|
|
328647
|
+
contentWrite(() => renderInfo(
|
|
328648
|
+
`\x1B[38;5;178m[littleman] ${lm.recentSuccesses} ok, ${lm.recentFailures} err${intervention}\x1B[0m`
|
|
328649
|
+
));
|
|
328650
|
+
}
|
|
328651
|
+
break;
|
|
328453
328652
|
}
|
|
328454
328653
|
});
|
|
328455
328654
|
const sessionId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
SCRAPE_API_KEY=948a46e9c7b94025aba48cf43f823950
|
|
2
|
+
SCRAPE_BIND=0.0.0.0
|
|
3
|
+
SCRAPE_PORT=8130
|
|
4
|
+
SCRAPE_REQUIRE_AUTH=0
|
|
5
|
+
SCRAPE_MAX_CONCURRENCY=4
|
|
6
|
+
SCRAPE_QUEUE_TIMEOUT_S=0
|
|
7
|
+
SCRAPE_RATE_LIMIT_RPS=60
|
|
8
|
+
SCRAPE_RATE_LIMIT_BURST=180
|
|
9
|
+
SCRAPE_RATE_LIMIT_LOCAL_BYPASS=1
|
|
10
|
+
SCRAPE_RATE_LIMIT_DISABLED=0
|
|
11
|
+
SCRAPE_RATE_LIMIT_WHITELIST=
|
|
12
|
+
SCRAPE_FILE_TTL_S=900
|
|
13
|
+
SCRAPE_FRAME_KEEPALIVE_S=45
|
|
14
|
+
SCRAPE_HEADLESS_DEFAULT=1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ok
|
package/package.json
CHANGED