npm - open-agents-ai - Versions diffs - 0.187.348 → 0.187.349 - Mend

open-agents-ai 0.187.348 → 0.187.349

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +311 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -269809,6 +269809,268 @@ var init_dist7 = __esm({
   }
 });
+// packages/orchestrator/dist/reflectionBuffer.js
+var MAX_REFLECTIONS, MAX_TOTAL, TaskReflectionBuffer;
+var init_reflectionBuffer = __esm({
+  "packages/orchestrator/dist/reflectionBuffer.js"() {
+    "use strict";
+    MAX_REFLECTIONS = 5;
+    MAX_TOTAL = 50;
+    TaskReflectionBuffer = class {
+      state;
+      persistPath;
+      constructor(persistPath) {
+        this.persistPath = persistPath ?? null;
+        if (this.persistPath) {
+          try {
+            const { readFileSync: readFileSync69, existsSync: existsSync89 } = __require("node:fs");
+            if (existsSync89(this.persistPath)) {
+              this.state = JSON.parse(readFileSync69(this.persistPath, "utf-8"));
+              return;
+            }
+          } catch {
+          }
+        }
+        this.state = {
+          reflections: [],
+          maxReflections: MAX_REFLECTIONS,
+          totalFailures: 0,
+          totalConsumed: 0
+        };
+      }
+      /** Get the current number of stored reflections */
+      get count() {
+        return this.state.reflections.length;
+      }
+      get totalFailures() {
+        return this.state.totalFailures;
+      }
+      get totalConsumed() {
+        return this.state.totalConsumed;
+      }
+      /**
+       * Generate and store a reflection from a failed task.
+       *
+       * This is the Self-Reflector from Reflexion's three-model architecture.
+       * The reflection is deterministic (no LLM call needed) — it analyzes
+       * the failure trajectory and generates typed guidance.
+       *
+       * @param taskGoal The original task prompt
+       * @param sessionId Current session ID
+       * @param turnsSpent How many turns were used
+       * @param failedApproaches List of failed approaches from _taskState
+       * @param toolCallLog Recent tool call history
+       * @param lastError The final error or failure reason
+       */
+      addReflection(params) {
+        const { taskGoal, sessionId, turnsSpent, failedApproaches, toolCallLog, lastError, failedPaths } = params;
+        const taskFingerprint = this.computeFingerprint(taskGoal);
+        const errorType = this.classifyError(toolCallLog, failedApproaches, lastError, turnsSpent);
+        const failedTools = [...new Set(toolCallLog.filter((t2) => !t2.success).map((t2) => t2.tool))].slice(0, 5);
+        const { whatFailed, whatToDoDifferently, confidence } = this.generateGuidance(errorType, failedApproaches, toolCallLog, lastError, turnsSpent);
+        const reflection = {
+          timestamp: Date.now(),
+          sessionId,
+          taskGoal: taskGoal.slice(0, 200),
+          taskFingerprint,
+          whatFailed,
+          whatToDoDifferently,
+          errorType,
+          failedTools,
+          failedPaths: (failedPaths ?? []).slice(0, 5),
+          turnsSpent,
+          confidence
+        };
+        this.state.reflections.unshift(reflection);
+        this.state.totalFailures++;
+        const byFingerprint = /* @__PURE__ */ new Map();
+        this.state.reflections = this.state.reflections.filter((r2) => {
+          const count = (byFingerprint.get(r2.taskFingerprint) ?? 0) + 1;
+          byFingerprint.set(r2.taskFingerprint, count);
+          return count <= this.state.maxReflections;
+        });
+        if (this.state.reflections.length > MAX_TOTAL) {
+          this.state.reflections = this.state.reflections.slice(0, MAX_TOTAL);
+        }
+        this.persist();
+        return reflection;
+      }
+      /**
+       * Retrieve relevant reflections for a new task attempt.
+       *
+       * Returns reflections from similar past failures, sorted by relevance.
+       * These should be prepended to the system prompt for the next attempt.
+       *
+       * @param taskGoal The current task goal
+       * @param maxResults Max reflections to return (default: 3)
+       */
+      getRelevantReflections(taskGoal, maxResults = 3) {
+        if (this.state.reflections.length === 0)
+          return [];
+        const fingerprint = this.computeFingerprint(taskGoal);
+        const goalLower = taskGoal.toLowerCase();
+        const goalWords = new Set(goalLower.split(/\s+/).filter((w) => w.length > 3));
+        const scored = this.state.reflections.map((r2) => {
+          let score = 0;
+          if (r2.taskFingerprint === fingerprint)
+            score += 5;
+          const rWords = new Set(r2.taskGoal.toLowerCase().split(/\s+/).filter((w) => w.length > 3));
+          let overlap = 0;
+          for (const w of goalWords)
+            if (rWords.has(w))
+              overlap++;
+          score += overlap;
+          const hoursAgo = (Date.now() - r2.timestamp) / 36e5;
+          score += Math.max(0, 2 - hoursAgo * 0.1);
+          score += r2.confidence * 2;
+          return { reflection: r2, score };
+        });
+        scored.sort((a2, b) => b.score - a2.score);
+        const results = scored.slice(0, maxResults).filter((s2) => s2.score > 1).map((s2) => s2.reflection);
+        this.state.totalConsumed += results.length;
+        this.persist();
+        return results;
+      }
+      /**
+       * Format reflections as a system prompt injection.
+       * Returns the text to prepend to the task context.
+       */
+      formatForContext(reflections) {
+        if (reflections.length === 0)
+          return "";
+        const lines = [
+          "[Prior Failure Reflections — learn from these mistakes]",
+          ""
+        ];
+        for (let i2 = 0; i2 < reflections.length; i2++) {
+          const r2 = reflections[i2];
+          lines.push(`Reflection ${i2 + 1} (${r2.errorType}):`);
+          lines.push(`  What failed: ${r2.whatFailed}`);
+          lines.push(`  Do instead: ${r2.whatToDoDifferently}`);
+          if (r2.failedTools.length > 0) {
+            lines.push(`  Avoid: ${r2.failedTools.join(", ")} with the same approach`);
+          }
+          lines.push("");
+        }
+        lines.push("Apply these lessons. Do NOT repeat the same mistakes.");
+        return lines.join("\n");
+      }
+      // ─── Internal ──────────────────────────────────────────────────────────
+      /** Compute a fingerprint for task similarity matching */
+      computeFingerprint(taskGoal) {
+        const lower = taskGoal.toLowerCase();
+        const significant = lower.split(/\s+/).filter((w) => w.length > 4).filter((w) => !["please", "could", "would", "should", "about", "these", "those", "their", "there", "which"].includes(w)).sort().slice(0, 8).join("_");
+        return significant || "generic";
+      }
+      /** Classify the error type from evidence */
+      classifyError(toolCallLog, failedApproaches, lastError, turnsSpent) {
+        const errorLower = lastError.toLowerCase();
+        const allErrors = toolCallLog.filter((t2) => !t2.success).map((t2) => (t2.error ?? "").toLowerCase());
+        if (allErrors.some((e2) => e2.includes("enoent") || e2.includes("not found")))
+          return "search_fail";
+        if (allErrors.some((e2) => e2.includes("permission") || e2.includes("eacces")))
+          return "permission";
+        if (allErrors.some((e2) => e2.includes("module") || e2.includes("package") || e2.includes("dependency")))
+          return "dependency";
+        if (turnsSpent >= 15 && failedApproaches.length >= 3)
+          return "repetition";
+        if (errorLower.includes("timeout") || errorLower.includes("turn limit"))
+          return "timeout";
+        if (errorLower.includes("incomplete") || errorLower.includes("partial"))
+          return "incomplete";
+        const uniqueTools = new Set(toolCallLog.map((t2) => t2.tool));
+        const failRate = toolCallLog.filter((t2) => !t2.success).length / Math.max(1, toolCallLog.length);
+        if (failRate > 0.5 && uniqueTools.size <= 2)
+          return "tool_misuse";
+        if (failRate > 0.3)
+          return "logic";
+        return "other";
+      }
+      /** Generate actionable guidance following Self-Refine's criterion */
+      generateGuidance(errorType, failedApproaches, toolCallLog, lastError, turnsSpent) {
+        const failedTools = toolCallLog.filter((t2) => !t2.success);
+        const lastFailedTool = failedTools[failedTools.length - 1];
+        switch (errorType) {
+          case "search_fail":
+            return {
+              whatFailed: `Could not find the target file/function. Tried: ${failedApproaches.slice(0, 2).join(", ") || lastError.slice(0, 80)}`,
+              whatToDoDifferently: "Use grep_search with broader patterns first. Try list_directory to verify paths. Check for typos in file names. Search parent directories.",
+              confidence: 0.85
+            };
+          case "tool_misuse":
+            return {
+              whatFailed: `Wrong tool or arguments for the task. Tool ${lastFailedTool?.tool ?? "unknown"} failed: ${lastFailedTool?.error?.slice(0, 60) ?? lastError.slice(0, 60)}`,
+              whatToDoDifferently: `Try a different tool. If file_edit failed, try file_write. If shell failed with a complex command, break it into simpler steps. Read the file first before editing.`,
+              confidence: 0.8
+            };
+          case "repetition":
+            return {
+              whatFailed: `Got stuck in a loop after ${turnsSpent} turns trying ${failedApproaches.length} approaches. The same tools kept failing with similar errors.`,
+              whatToDoDifferently: "Stop and try a completely different strategy. If you were editing, try rewriting from scratch. If searching failed, try a broader or narrower query. Ask yourself: what assumption am I making that might be wrong?",
+              confidence: 0.9
+            };
+          case "timeout":
+            return {
+              whatFailed: `Ran out of turns (${turnsSpent}). The task was not completed in the allocated budget.`,
+              whatToDoDifferently: "Start with the most critical action immediately — skip planning. Do fewer tool calls. Focus on the single most important sub-task first.",
+              confidence: 0.75
+            };
+          case "permission":
+            return {
+              whatFailed: `Permission denied: ${lastError.slice(0, 80)}`,
+              whatToDoDifferently: "Check file permissions first. Use sudo if allowed. Try writing to /tmp/ instead. Avoid modifying system files.",
+              confidence: 0.9
+            };
+          case "dependency":
+            return {
+              whatFailed: `Missing dependency: ${lastError.slice(0, 80)}`,
+              whatToDoDifferently: "Install the dependency first (npm install, pip install, apt install). Check if a virtual environment is needed. Verify the package name is correct.",
+              confidence: 0.85
+            };
+          case "incomplete":
+            return {
+              whatFailed: `Task was only partially completed. ${failedApproaches.length > 0 ? `Approaches tried: ${failedApproaches[0]}` : ""}`,
+              whatToDoDifferently: "Complete ALL steps before calling task_complete. Check your todo list. Verify each file was actually modified. Run tests to confirm.",
+              confidence: 0.7
+            };
+          case "logic":
+            return {
+              whatFailed: `The approach was logically flawed. Multiple tools failed (${failedTools.length}/${toolCallLog.length} calls).`,
+              whatToDoDifferently: "Rethink the approach from scratch. Read the relevant code before making changes. Test your understanding by reading the file first, then planning the edit.",
+              confidence: 0.6
+            };
+          case "semantic":
+            return {
+              whatFailed: `Misunderstood the task requirement. ${lastError.slice(0, 80)}`,
+              whatToDoDifferently: "Re-read the task prompt carefully. Identify exactly what output is expected. If ambiguous, focus on the most literal interpretation.",
+              confidence: 0.5
+            };
+          default:
+            return {
+              whatFailed: `Task failed: ${lastError.slice(0, 100)}`,
+              whatToDoDifferently: "Try a different approach. Read relevant files first. Break the task into smaller steps.",
+              confidence: 0.4
+            };
+        }
+      }
+      /** Persist to disk */
+      persist() {
+        if (!this.persistPath)
+          return;
+        try {
+          const { writeFileSync: writeFileSync50, mkdirSync: mkdirSync56, existsSync: existsSync89 } = __require("node:fs");
+          const { join: join108 } = __require("node:path");
+          const dir = join108(this.persistPath, "..");
+          if (!existsSync89(dir))
+            mkdirSync56(dir, { recursive: true });
+          writeFileSync50(this.persistPath, JSON.stringify(this.state, null, 2));
+        } catch {
+        }
+      }
+    };
+  }
+});
 // packages/orchestrator/dist/tool-batching.js
 function isConcurrencySafe(toolName, readOnlyHints) {
   if (CONCURRENT_SAFE_TOOLS.has(toolName))
@@ -270538,6 +270800,7 @@ var init_agenticRunner = __esm({
     init_pressure_gate();
     init_dist4();
     init_dist7();
+    init_reflectionBuffer();
     init_tool_batching();
     init_hooks();
     init_app_state();
@@ -271486,6 +271749,27 @@ TASK: ${task}` : task;
           { role: "system", content: systemPrompt },
           { role: "user", content: userContent }
         ];
+        try {
+          if (!this._reflectionBuffer) {
+            const oaDir = this._workingDirectory ? _pathJoin(this._workingDirectory, ".oa", "memory") : null;
+            if (oaDir) {
+              this._reflectionBuffer = new TaskReflectionBuffer(_pathJoin(oaDir, "reflections.json"));
+            }
+          }
+          if (this._reflectionBuffer) {
+            const reflections = this._reflectionBuffer.getRelevantReflections(cleanedTask, 3);
+            if (reflections.length > 0) {
+              const reflectionCtx = this._reflectionBuffer.formatForContext(reflections);
+              messages2.push({ role: "system", content: reflectionCtx });
+              this.emit({
+                type: "status",
+                content: `Reflexion: injected ${reflections.length} prior failure reflection(s) for this task type`,
+                timestamp: (/* @__PURE__ */ new Date()).toISOString()
+              });
+            }
+          }
+        } catch {
+        }
         let toolDefs = await this.buildToolDefinitions();
         const baseInstructions = getSystemPromptForTier(this.options.modelTier);
         this.checkPromptToolParity(baseInstructions, toolDefs);
@@ -273410,6 +273694,29 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')")  or
           });
         } catch {
         }
+        if (this._reflectionBuffer && !completed) {
+          try {
+            const reflection = this._reflectionBuffer.addReflection({
+              taskGoal: cleanedTask,
+              sessionId: this._sessionId,
+              turnsSpent: this._taskState.toolCallCount,
+              failedApproaches: this._taskState.failedApproaches,
+              toolCallLog: toolCallLog.map((t2) => ({
+                tool: t2.name,
+                success: t2.success ?? false,
+                error: t2.outputPreview?.slice(0, 100)
+              })),
+              lastError: summary || "Task did not complete",
+              failedPaths: [...this._taskState.modifiedFiles.keys()].slice(0, 5)
+            });
+            this.emit({
+              type: "status",
+              content: `Reflexion: stored ${reflection.errorType} reflection — "${reflection.whatToDoDifferently.slice(0, 80)}"`,
+              timestamp: (/* @__PURE__ */ new Date()).toISOString()
+            });
+          } catch {
+          }
+        }
         if (this._episodeStore) {
           try {
             this._episodeStore.insert({
@@ -274188,6 +274495,10 @@ ${trimmedNew}`;
       /** WO-FIX-C: Tool fingerprints the littleman has flagged as redundant.
        *  Checked in executeSingle to block re-execution and return cached data. */
       _littlemanRedundantBlocks = /* @__PURE__ */ new Set();
+      /** Reflexion pattern: task-local failure-indexed reflection buffer.
+       *  Generates typed self-reflections on task failure and injects them
+       *  into the next attempt's context for active learning. */
+      _reflectionBuffer = null;
       /**
        * Littleman observer: post-turn meta-analysis.
        *

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.348",
+  "version": "0.187.349",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) \u2014 interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",