open-agents-ai 0.187.348 → 0.187.349
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +311 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -269809,6 +269809,268 @@ var init_dist7 = __esm({
|
|
|
269809
269809
|
}
|
|
269810
269810
|
});
|
|
269811
269811
|
|
|
269812
|
+
// packages/orchestrator/dist/reflectionBuffer.js
|
|
269813
|
+
var MAX_REFLECTIONS, MAX_TOTAL, TaskReflectionBuffer;
|
|
269814
|
+
var init_reflectionBuffer = __esm({
|
|
269815
|
+
"packages/orchestrator/dist/reflectionBuffer.js"() {
|
|
269816
|
+
"use strict";
|
|
269817
|
+
MAX_REFLECTIONS = 5;
|
|
269818
|
+
MAX_TOTAL = 50;
|
|
269819
|
+
TaskReflectionBuffer = class {
|
|
269820
|
+
state;
|
|
269821
|
+
persistPath;
|
|
269822
|
+
constructor(persistPath) {
|
|
269823
|
+
this.persistPath = persistPath ?? null;
|
|
269824
|
+
if (this.persistPath) {
|
|
269825
|
+
try {
|
|
269826
|
+
const { readFileSync: readFileSync69, existsSync: existsSync89 } = __require("node:fs");
|
|
269827
|
+
if (existsSync89(this.persistPath)) {
|
|
269828
|
+
this.state = JSON.parse(readFileSync69(this.persistPath, "utf-8"));
|
|
269829
|
+
return;
|
|
269830
|
+
}
|
|
269831
|
+
} catch {
|
|
269832
|
+
}
|
|
269833
|
+
}
|
|
269834
|
+
this.state = {
|
|
269835
|
+
reflections: [],
|
|
269836
|
+
maxReflections: MAX_REFLECTIONS,
|
|
269837
|
+
totalFailures: 0,
|
|
269838
|
+
totalConsumed: 0
|
|
269839
|
+
};
|
|
269840
|
+
}
|
|
269841
|
+
/** Get the current number of stored reflections */
|
|
269842
|
+
get count() {
|
|
269843
|
+
return this.state.reflections.length;
|
|
269844
|
+
}
|
|
269845
|
+
get totalFailures() {
|
|
269846
|
+
return this.state.totalFailures;
|
|
269847
|
+
}
|
|
269848
|
+
get totalConsumed() {
|
|
269849
|
+
return this.state.totalConsumed;
|
|
269850
|
+
}
|
|
269851
|
+
/**
|
|
269852
|
+
* Generate and store a reflection from a failed task.
|
|
269853
|
+
*
|
|
269854
|
+
* This is the Self-Reflector from Reflexion's three-model architecture.
|
|
269855
|
+
* The reflection is deterministic (no LLM call needed) — it analyzes
|
|
269856
|
+
* the failure trajectory and generates typed guidance.
|
|
269857
|
+
*
|
|
269858
|
+
* @param taskGoal The original task prompt
|
|
269859
|
+
* @param sessionId Current session ID
|
|
269860
|
+
* @param turnsSpent How many turns were used
|
|
269861
|
+
* @param failedApproaches List of failed approaches from _taskState
|
|
269862
|
+
* @param toolCallLog Recent tool call history
|
|
269863
|
+
* @param lastError The final error or failure reason
|
|
269864
|
+
*/
|
|
269865
|
+
addReflection(params) {
|
|
269866
|
+
const { taskGoal, sessionId, turnsSpent, failedApproaches, toolCallLog, lastError, failedPaths } = params;
|
|
269867
|
+
const taskFingerprint = this.computeFingerprint(taskGoal);
|
|
269868
|
+
const errorType = this.classifyError(toolCallLog, failedApproaches, lastError, turnsSpent);
|
|
269869
|
+
const failedTools = [...new Set(toolCallLog.filter((t2) => !t2.success).map((t2) => t2.tool))].slice(0, 5);
|
|
269870
|
+
const { whatFailed, whatToDoDifferently, confidence } = this.generateGuidance(errorType, failedApproaches, toolCallLog, lastError, turnsSpent);
|
|
269871
|
+
const reflection = {
|
|
269872
|
+
timestamp: Date.now(),
|
|
269873
|
+
sessionId,
|
|
269874
|
+
taskGoal: taskGoal.slice(0, 200),
|
|
269875
|
+
taskFingerprint,
|
|
269876
|
+
whatFailed,
|
|
269877
|
+
whatToDoDifferently,
|
|
269878
|
+
errorType,
|
|
269879
|
+
failedTools,
|
|
269880
|
+
failedPaths: (failedPaths ?? []).slice(0, 5),
|
|
269881
|
+
turnsSpent,
|
|
269882
|
+
confidence
|
|
269883
|
+
};
|
|
269884
|
+
this.state.reflections.unshift(reflection);
|
|
269885
|
+
this.state.totalFailures++;
|
|
269886
|
+
const byFingerprint = /* @__PURE__ */ new Map();
|
|
269887
|
+
this.state.reflections = this.state.reflections.filter((r2) => {
|
|
269888
|
+
const count = (byFingerprint.get(r2.taskFingerprint) ?? 0) + 1;
|
|
269889
|
+
byFingerprint.set(r2.taskFingerprint, count);
|
|
269890
|
+
return count <= this.state.maxReflections;
|
|
269891
|
+
});
|
|
269892
|
+
if (this.state.reflections.length > MAX_TOTAL) {
|
|
269893
|
+
this.state.reflections = this.state.reflections.slice(0, MAX_TOTAL);
|
|
269894
|
+
}
|
|
269895
|
+
this.persist();
|
|
269896
|
+
return reflection;
|
|
269897
|
+
}
|
|
269898
|
+
/**
|
|
269899
|
+
* Retrieve relevant reflections for a new task attempt.
|
|
269900
|
+
*
|
|
269901
|
+
* Returns reflections from similar past failures, sorted by relevance.
|
|
269902
|
+
* These should be prepended to the system prompt for the next attempt.
|
|
269903
|
+
*
|
|
269904
|
+
* @param taskGoal The current task goal
|
|
269905
|
+
* @param maxResults Max reflections to return (default: 3)
|
|
269906
|
+
*/
|
|
269907
|
+
getRelevantReflections(taskGoal, maxResults = 3) {
|
|
269908
|
+
if (this.state.reflections.length === 0)
|
|
269909
|
+
return [];
|
|
269910
|
+
const fingerprint = this.computeFingerprint(taskGoal);
|
|
269911
|
+
const goalLower = taskGoal.toLowerCase();
|
|
269912
|
+
const goalWords = new Set(goalLower.split(/\s+/).filter((w) => w.length > 3));
|
|
269913
|
+
const scored = this.state.reflections.map((r2) => {
|
|
269914
|
+
let score = 0;
|
|
269915
|
+
if (r2.taskFingerprint === fingerprint)
|
|
269916
|
+
score += 5;
|
|
269917
|
+
const rWords = new Set(r2.taskGoal.toLowerCase().split(/\s+/).filter((w) => w.length > 3));
|
|
269918
|
+
let overlap = 0;
|
|
269919
|
+
for (const w of goalWords)
|
|
269920
|
+
if (rWords.has(w))
|
|
269921
|
+
overlap++;
|
|
269922
|
+
score += overlap;
|
|
269923
|
+
const hoursAgo = (Date.now() - r2.timestamp) / 36e5;
|
|
269924
|
+
score += Math.max(0, 2 - hoursAgo * 0.1);
|
|
269925
|
+
score += r2.confidence * 2;
|
|
269926
|
+
return { reflection: r2, score };
|
|
269927
|
+
});
|
|
269928
|
+
scored.sort((a2, b) => b.score - a2.score);
|
|
269929
|
+
const results = scored.slice(0, maxResults).filter((s2) => s2.score > 1).map((s2) => s2.reflection);
|
|
269930
|
+
this.state.totalConsumed += results.length;
|
|
269931
|
+
this.persist();
|
|
269932
|
+
return results;
|
|
269933
|
+
}
|
|
269934
|
+
/**
|
|
269935
|
+
* Format reflections as a system prompt injection.
|
|
269936
|
+
* Returns the text to prepend to the task context.
|
|
269937
|
+
*/
|
|
269938
|
+
formatForContext(reflections) {
|
|
269939
|
+
if (reflections.length === 0)
|
|
269940
|
+
return "";
|
|
269941
|
+
const lines = [
|
|
269942
|
+
"[Prior Failure Reflections — learn from these mistakes]",
|
|
269943
|
+
""
|
|
269944
|
+
];
|
|
269945
|
+
for (let i2 = 0; i2 < reflections.length; i2++) {
|
|
269946
|
+
const r2 = reflections[i2];
|
|
269947
|
+
lines.push(`Reflection ${i2 + 1} (${r2.errorType}):`);
|
|
269948
|
+
lines.push(` What failed: ${r2.whatFailed}`);
|
|
269949
|
+
lines.push(` Do instead: ${r2.whatToDoDifferently}`);
|
|
269950
|
+
if (r2.failedTools.length > 0) {
|
|
269951
|
+
lines.push(` Avoid: ${r2.failedTools.join(", ")} with the same approach`);
|
|
269952
|
+
}
|
|
269953
|
+
lines.push("");
|
|
269954
|
+
}
|
|
269955
|
+
lines.push("Apply these lessons. Do NOT repeat the same mistakes.");
|
|
269956
|
+
return lines.join("\n");
|
|
269957
|
+
}
|
|
269958
|
+
// ─── Internal ──────────────────────────────────────────────────────────
|
|
269959
|
+
/** Compute a fingerprint for task similarity matching */
|
|
269960
|
+
computeFingerprint(taskGoal) {
|
|
269961
|
+
const lower = taskGoal.toLowerCase();
|
|
269962
|
+
const significant = lower.split(/\s+/).filter((w) => w.length > 4).filter((w) => !["please", "could", "would", "should", "about", "these", "those", "their", "there", "which"].includes(w)).sort().slice(0, 8).join("_");
|
|
269963
|
+
return significant || "generic";
|
|
269964
|
+
}
|
|
269965
|
+
/** Classify the error type from evidence */
|
|
269966
|
+
classifyError(toolCallLog, failedApproaches, lastError, turnsSpent) {
|
|
269967
|
+
const errorLower = lastError.toLowerCase();
|
|
269968
|
+
const allErrors = toolCallLog.filter((t2) => !t2.success).map((t2) => (t2.error ?? "").toLowerCase());
|
|
269969
|
+
if (allErrors.some((e2) => e2.includes("enoent") || e2.includes("not found")))
|
|
269970
|
+
return "search_fail";
|
|
269971
|
+
if (allErrors.some((e2) => e2.includes("permission") || e2.includes("eacces")))
|
|
269972
|
+
return "permission";
|
|
269973
|
+
if (allErrors.some((e2) => e2.includes("module") || e2.includes("package") || e2.includes("dependency")))
|
|
269974
|
+
return "dependency";
|
|
269975
|
+
if (turnsSpent >= 15 && failedApproaches.length >= 3)
|
|
269976
|
+
return "repetition";
|
|
269977
|
+
if (errorLower.includes("timeout") || errorLower.includes("turn limit"))
|
|
269978
|
+
return "timeout";
|
|
269979
|
+
if (errorLower.includes("incomplete") || errorLower.includes("partial"))
|
|
269980
|
+
return "incomplete";
|
|
269981
|
+
const uniqueTools = new Set(toolCallLog.map((t2) => t2.tool));
|
|
269982
|
+
const failRate = toolCallLog.filter((t2) => !t2.success).length / Math.max(1, toolCallLog.length);
|
|
269983
|
+
if (failRate > 0.5 && uniqueTools.size <= 2)
|
|
269984
|
+
return "tool_misuse";
|
|
269985
|
+
if (failRate > 0.3)
|
|
269986
|
+
return "logic";
|
|
269987
|
+
return "other";
|
|
269988
|
+
}
|
|
269989
|
+
/** Generate actionable guidance following Self-Refine's criterion */
|
|
269990
|
+
generateGuidance(errorType, failedApproaches, toolCallLog, lastError, turnsSpent) {
|
|
269991
|
+
const failedTools = toolCallLog.filter((t2) => !t2.success);
|
|
269992
|
+
const lastFailedTool = failedTools[failedTools.length - 1];
|
|
269993
|
+
switch (errorType) {
|
|
269994
|
+
case "search_fail":
|
|
269995
|
+
return {
|
|
269996
|
+
whatFailed: `Could not find the target file/function. Tried: ${failedApproaches.slice(0, 2).join(", ") || lastError.slice(0, 80)}`,
|
|
269997
|
+
whatToDoDifferently: "Use grep_search with broader patterns first. Try list_directory to verify paths. Check for typos in file names. Search parent directories.",
|
|
269998
|
+
confidence: 0.85
|
|
269999
|
+
};
|
|
270000
|
+
case "tool_misuse":
|
|
270001
|
+
return {
|
|
270002
|
+
whatFailed: `Wrong tool or arguments for the task. Tool ${lastFailedTool?.tool ?? "unknown"} failed: ${lastFailedTool?.error?.slice(0, 60) ?? lastError.slice(0, 60)}`,
|
|
270003
|
+
whatToDoDifferently: `Try a different tool. If file_edit failed, try file_write. If shell failed with a complex command, break it into simpler steps. Read the file first before editing.`,
|
|
270004
|
+
confidence: 0.8
|
|
270005
|
+
};
|
|
270006
|
+
case "repetition":
|
|
270007
|
+
return {
|
|
270008
|
+
whatFailed: `Got stuck in a loop after ${turnsSpent} turns trying ${failedApproaches.length} approaches. The same tools kept failing with similar errors.`,
|
|
270009
|
+
whatToDoDifferently: "Stop and try a completely different strategy. If you were editing, try rewriting from scratch. If searching failed, try a broader or narrower query. Ask yourself: what assumption am I making that might be wrong?",
|
|
270010
|
+
confidence: 0.9
|
|
270011
|
+
};
|
|
270012
|
+
case "timeout":
|
|
270013
|
+
return {
|
|
270014
|
+
whatFailed: `Ran out of turns (${turnsSpent}). The task was not completed in the allocated budget.`,
|
|
270015
|
+
whatToDoDifferently: "Start with the most critical action immediately — skip planning. Do fewer tool calls. Focus on the single most important sub-task first.",
|
|
270016
|
+
confidence: 0.75
|
|
270017
|
+
};
|
|
270018
|
+
case "permission":
|
|
270019
|
+
return {
|
|
270020
|
+
whatFailed: `Permission denied: ${lastError.slice(0, 80)}`,
|
|
270021
|
+
whatToDoDifferently: "Check file permissions first. Use sudo if allowed. Try writing to /tmp/ instead. Avoid modifying system files.",
|
|
270022
|
+
confidence: 0.9
|
|
270023
|
+
};
|
|
270024
|
+
case "dependency":
|
|
270025
|
+
return {
|
|
270026
|
+
whatFailed: `Missing dependency: ${lastError.slice(0, 80)}`,
|
|
270027
|
+
whatToDoDifferently: "Install the dependency first (npm install, pip install, apt install). Check if a virtual environment is needed. Verify the package name is correct.",
|
|
270028
|
+
confidence: 0.85
|
|
270029
|
+
};
|
|
270030
|
+
case "incomplete":
|
|
270031
|
+
return {
|
|
270032
|
+
whatFailed: `Task was only partially completed. ${failedApproaches.length > 0 ? `Approaches tried: ${failedApproaches[0]}` : ""}`,
|
|
270033
|
+
whatToDoDifferently: "Complete ALL steps before calling task_complete. Check your todo list. Verify each file was actually modified. Run tests to confirm.",
|
|
270034
|
+
confidence: 0.7
|
|
270035
|
+
};
|
|
270036
|
+
case "logic":
|
|
270037
|
+
return {
|
|
270038
|
+
whatFailed: `The approach was logically flawed. Multiple tools failed (${failedTools.length}/${toolCallLog.length} calls).`,
|
|
270039
|
+
whatToDoDifferently: "Rethink the approach from scratch. Read the relevant code before making changes. Test your understanding by reading the file first, then planning the edit.",
|
|
270040
|
+
confidence: 0.6
|
|
270041
|
+
};
|
|
270042
|
+
case "semantic":
|
|
270043
|
+
return {
|
|
270044
|
+
whatFailed: `Misunderstood the task requirement. ${lastError.slice(0, 80)}`,
|
|
270045
|
+
whatToDoDifferently: "Re-read the task prompt carefully. Identify exactly what output is expected. If ambiguous, focus on the most literal interpretation.",
|
|
270046
|
+
confidence: 0.5
|
|
270047
|
+
};
|
|
270048
|
+
default:
|
|
270049
|
+
return {
|
|
270050
|
+
whatFailed: `Task failed: ${lastError.slice(0, 100)}`,
|
|
270051
|
+
whatToDoDifferently: "Try a different approach. Read relevant files first. Break the task into smaller steps.",
|
|
270052
|
+
confidence: 0.4
|
|
270053
|
+
};
|
|
270054
|
+
}
|
|
270055
|
+
}
|
|
270056
|
+
/** Persist to disk */
|
|
270057
|
+
persist() {
|
|
270058
|
+
if (!this.persistPath)
|
|
270059
|
+
return;
|
|
270060
|
+
try {
|
|
270061
|
+
const { writeFileSync: writeFileSync50, mkdirSync: mkdirSync56, existsSync: existsSync89 } = __require("node:fs");
|
|
270062
|
+
const { join: join108 } = __require("node:path");
|
|
270063
|
+
const dir = join108(this.persistPath, "..");
|
|
270064
|
+
if (!existsSync89(dir))
|
|
270065
|
+
mkdirSync56(dir, { recursive: true });
|
|
270066
|
+
writeFileSync50(this.persistPath, JSON.stringify(this.state, null, 2));
|
|
270067
|
+
} catch {
|
|
270068
|
+
}
|
|
270069
|
+
}
|
|
270070
|
+
};
|
|
270071
|
+
}
|
|
270072
|
+
});
|
|
270073
|
+
|
|
269812
270074
|
// packages/orchestrator/dist/tool-batching.js
|
|
269813
270075
|
function isConcurrencySafe(toolName, readOnlyHints) {
|
|
269814
270076
|
if (CONCURRENT_SAFE_TOOLS.has(toolName))
|
|
@@ -270538,6 +270800,7 @@ var init_agenticRunner = __esm({
|
|
|
270538
270800
|
init_pressure_gate();
|
|
270539
270801
|
init_dist4();
|
|
270540
270802
|
init_dist7();
|
|
270803
|
+
init_reflectionBuffer();
|
|
270541
270804
|
init_tool_batching();
|
|
270542
270805
|
init_hooks();
|
|
270543
270806
|
init_app_state();
|
|
@@ -271486,6 +271749,27 @@ TASK: ${task}` : task;
|
|
|
271486
271749
|
{ role: "system", content: systemPrompt },
|
|
271487
271750
|
{ role: "user", content: userContent }
|
|
271488
271751
|
];
|
|
271752
|
+
try {
|
|
271753
|
+
if (!this._reflectionBuffer) {
|
|
271754
|
+
const oaDir = this._workingDirectory ? _pathJoin(this._workingDirectory, ".oa", "memory") : null;
|
|
271755
|
+
if (oaDir) {
|
|
271756
|
+
this._reflectionBuffer = new TaskReflectionBuffer(_pathJoin(oaDir, "reflections.json"));
|
|
271757
|
+
}
|
|
271758
|
+
}
|
|
271759
|
+
if (this._reflectionBuffer) {
|
|
271760
|
+
const reflections = this._reflectionBuffer.getRelevantReflections(cleanedTask, 3);
|
|
271761
|
+
if (reflections.length > 0) {
|
|
271762
|
+
const reflectionCtx = this._reflectionBuffer.formatForContext(reflections);
|
|
271763
|
+
messages2.push({ role: "system", content: reflectionCtx });
|
|
271764
|
+
this.emit({
|
|
271765
|
+
type: "status",
|
|
271766
|
+
content: `Reflexion: injected ${reflections.length} prior failure reflection(s) for this task type`,
|
|
271767
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
271768
|
+
});
|
|
271769
|
+
}
|
|
271770
|
+
}
|
|
271771
|
+
} catch {
|
|
271772
|
+
}
|
|
271489
271773
|
let toolDefs = await this.buildToolDefinitions();
|
|
271490
271774
|
const baseInstructions = getSystemPromptForTier(this.options.modelTier);
|
|
271491
271775
|
this.checkPromptToolParity(baseInstructions, toolDefs);
|
|
@@ -273410,6 +273694,29 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
|
|
|
273410
273694
|
});
|
|
273411
273695
|
} catch {
|
|
273412
273696
|
}
|
|
273697
|
+
if (this._reflectionBuffer && !completed) {
|
|
273698
|
+
try {
|
|
273699
|
+
const reflection = this._reflectionBuffer.addReflection({
|
|
273700
|
+
taskGoal: cleanedTask,
|
|
273701
|
+
sessionId: this._sessionId,
|
|
273702
|
+
turnsSpent: this._taskState.toolCallCount,
|
|
273703
|
+
failedApproaches: this._taskState.failedApproaches,
|
|
273704
|
+
toolCallLog: toolCallLog.map((t2) => ({
|
|
273705
|
+
tool: t2.name,
|
|
273706
|
+
success: t2.success ?? false,
|
|
273707
|
+
error: t2.outputPreview?.slice(0, 100)
|
|
273708
|
+
})),
|
|
273709
|
+
lastError: summary || "Task did not complete",
|
|
273710
|
+
failedPaths: [...this._taskState.modifiedFiles.keys()].slice(0, 5)
|
|
273711
|
+
});
|
|
273712
|
+
this.emit({
|
|
273713
|
+
type: "status",
|
|
273714
|
+
content: `Reflexion: stored ${reflection.errorType} reflection — "${reflection.whatToDoDifferently.slice(0, 80)}"`,
|
|
273715
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
273716
|
+
});
|
|
273717
|
+
} catch {
|
|
273718
|
+
}
|
|
273719
|
+
}
|
|
273413
273720
|
if (this._episodeStore) {
|
|
273414
273721
|
try {
|
|
273415
273722
|
this._episodeStore.insert({
|
|
@@ -274188,6 +274495,10 @@ ${trimmedNew}`;
|
|
|
274188
274495
|
/** WO-FIX-C: Tool fingerprints the littleman has flagged as redundant.
|
|
274189
274496
|
* Checked in executeSingle to block re-execution and return cached data. */
|
|
274190
274497
|
_littlemanRedundantBlocks = /* @__PURE__ */ new Set();
|
|
274498
|
+
/** Reflexion pattern: task-local failure-indexed reflection buffer.
|
|
274499
|
+
* Generates typed self-reflections on task failure and injects them
|
|
274500
|
+
* into the next attempt's context for active learning. */
|
|
274501
|
+
_reflectionBuffer = null;
|
|
274191
274502
|
/**
|
|
274192
274503
|
* Littleman observer: post-turn meta-analysis.
|
|
274193
274504
|
*
|
package/package.json
CHANGED