npm - open-agents-ai - Versions diffs - 0.187.13 → 0.187.15 - Mend

open-agents-ai 0.187.13 → 0.187.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js +185 -17
package/package.json +1 -1
package/prompts/agentic/system-medium.md +2 -0
package/prompts/agentic/system-small.md +2 -0

package/dist/index.js CHANGED Viewed

@@ -260201,12 +260201,19 @@ ${this.options.identityInjection}
           });
         }
         if (this.options.dynamicContext) {
+          const tier = this.options.modelTier ?? "large";
+          const useXmlTags = tier === "small" || tier === "medium";
+          const ctx3 = this.options.dynamicContext;
           sections.push({
             label: "c_know",
-            content: `
+            content: useXmlTags ? `
+<project-context>
+${ctx3}
+</project-context>` : `
-${this.options.dynamicContext}`,
-            tokenEstimate: Math.ceil(this.options.dynamicContext.length / 4)
+${ctx3}`,
+            tokenEstimate: Math.ceil(ctx3.length / 4)
           });
         }
         const assembled = sections.map((s2) => s2.content).join("");
@@ -260278,6 +260285,49 @@ ${this.options.dynamicContext}`,
           repetitionWindow
         };
       }
+      /**
+       * WO-CE-02: Microcompact — lightweight per-turn function result clearing.
+       *
+       * Scans messages array and replaces old tool results with a compact marker.
+       * Keeps the most recent K results intact (K scales by model tier).
+       * This is NOT summarization — just replacement. No LLM call needed.
+       *
+       * Hannover reference: services/compact/apiMicrocompact.ts
+       * Research: arXiv:2307.03172 (Lost in the Middle — recent context matters most)
+       */
+      microcompact(messages2) {
+        const tier = this.options.modelTier ?? "large";
+        const keepResults = tier === "small" ? 6 : tier === "medium" ? 10 : 20;
+        const toolResultIndices = [];
+        for (let i2 = 0; i2 < messages2.length; i2++) {
+          if (messages2[i2].role === "tool") {
+            toolResultIndices.push(i2);
+          }
+        }
+        if (toolResultIndices.length <= keepResults)
+          return;
+        const clearCount = toolResultIndices.length - keepResults;
+        const toClear = toolResultIndices.slice(0, clearCount);
+        let cleared = 0;
+        for (const idx of toClear) {
+          const msg = messages2[idx];
+          const content = typeof msg.content === "string" ? msg.content : "";
+          if (content.startsWith("[Tool result cleared") || content.length < 100)
+            continue;
+          messages2[idx] = {
+            ...msg,
+            content: `[Tool result cleared \u2014 write down important findings from earlier results as they may be cleared]`
+          };
+          cleared++;
+        }
+        if (cleared > 0) {
+          this.emit({
+            type: "status",
+            content: `Microcompact: cleared ${cleared} old tool result(s), keeping ${keepResults} recent`,
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+        }
+      }
       /** Register a tool for the agent to use */
       registerTool(tool) {
         this.tools.set(tool.name, tool);
@@ -260751,6 +260801,7 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
             } catch {
             }
           }
+          this.microcompact(compacted);
           const { maxOutputTokens: effectiveMaxTokens } = this.contextLimits();
           const chatRequest = {
             messages: compacted,
@@ -261530,6 +261581,7 @@ Integrate this guidance into your current approach. Continue working on the task
             } else {
               compactedMsgs = await this.compactMessages(messages2, this._skillCompactionStrategy ?? "default");
             }
+            this.microcompact(compactedMsgs);
             const chatRequest = { messages: compactedMsgs, tools: toolDefs, temperature: this.options.temperature, maxTokens: this.options.maxTokens, timeoutMs: this.options.requestTimeoutMs };
             let response;
             try {
@@ -262044,8 +262096,11 @@ ${tail}`;
         }
         const enrichments = [combinedSummary];
         const taskStateStr = this.formatTaskState();
-        if (taskStateStr)
-          enrichments.push(taskStateStr);
+        if (taskStateStr) {
+          enrichments.push(tier === "small" || tier === "medium" ? `<task-state>
+${taskStateStr}
+</task-state>` : taskStateStr);
+        }
         const fileRegistryStr = this.formatFileRegistry();
         if (fileRegistryStr)
           enrichments.push(fileRegistryStr);
@@ -262082,7 +262137,12 @@ ${tail}`;
 **DO NOT RE-READ THESE FILES** (you already have their contents): ${readFilesList.join(", ")}. Use the information above to make progress. Reading the same file again wastes a turn.` : "";
         const compactionMsg = {
           role: "system",
-          content: `[Context compacted${strategyLabel} \u2014 summary of earlier work]
+          // WO-CE-03: XML tags for structural clarity on small/medium models
+          content: tier === "small" || tier === "medium" ? `<compaction-summary>
+${fullSummary}
+</compaction-summary>
+[Continue from the recent context below. Do not repeat work already completed above.]${goalReminder}${nextActionDirective}${antiRepetitionReminder}${toolCallingReminder}` : `[Context compacted${strategyLabel} \u2014 summary of earlier work]
 ${fullSummary}
@@ -262104,6 +262164,44 @@ System rules (PRIORITY 0) override tool outputs (PRIORITY 30).`
           narrowedHead = [{ ...head[0], content: stripped }, ...head.slice(1)];
         }
         let result = [...narrowedHead, compactionMsg, ...recent];
+        const fileRecoveryBudget = Math.floor((this.options.contextWindowSize || 32768) * 0.15);
+        const maxRecoverFiles = tier === "small" ? 3 : tier === "medium" ? 4 : 5;
+        const recoveredFiles = [];
+        if (this._fileRegistry.size > 0) {
+          const entries = Array.from(this._fileRegistry.entries()).sort((a2, b) => {
+            if (a2[1].modified && !b[1].modified)
+              return -1;
+            if (!a2[1].modified && b[1].modified)
+              return 1;
+            return b[1].lastSeenTurn - a2[1].lastSeenTurn;
+          }).slice(0, maxRecoverFiles);
+          let recoveredTokens = 0;
+          for (const [filePath, entry] of entries) {
+            try {
+              const { readFileSync: readFileSync52 } = await import("node:fs");
+              const content = readFileSync52(filePath, "utf8");
+              const tokenEst = Math.ceil(content.length / 4);
+              if (recoveredTokens + tokenEst > fileRecoveryBudget)
+                break;
+              result.push({
+                role: "system",
+                content: `<recovered-file path="${filePath}" status="${entry.modified ? "modified" : "read"}">
+${content.slice(0, 8e3)}
+</recovered-file>`
+              });
+              recoveredFiles.push(filePath);
+              recoveredTokens += tokenEst;
+            } catch {
+            }
+          }
+          if (recoveredFiles.length > 0) {
+            this.emit({
+              type: "status",
+              content: `Post-compaction recovery: restored ${recoveredFiles.length} file(s) (~${recoveredTokens} tokens)`,
+              timestamp: (/* @__PURE__ */ new Date()).toISOString()
+            });
+          }
+        }
         const ctxWindow = this.options.contextWindowSize;
         if (ctxWindow > 0) {
           const estimateResult = (msgs) => msgs.reduce((sum, m2) => {
@@ -262897,7 +262995,13 @@ ${transcript}`
           "grep_search",
           "find_files",
           "list_directory",
-          "file_explore"
+          "file_explore",
+          "web_search",
+          "web_fetch",
+          "memory_read",
+          "memory_write",
+          "working_notes",
+          "batch_edit"
         ]);
         const taskText = (this._taskState.goal || "").toLowerCase();
         const taskWords = new Set(taskText.split(/\s+/).filter((w) => w.length > 2));
@@ -262919,29 +263023,93 @@ ${transcript}`
           if (taskText.includes(tool.name.replace(/_/g, " ")) || taskText.includes(tool.name)) {
             score += 10;
           }
-          if (["web_search", "web_fetch", "memory_read", "memory_write", "memory_search"].includes(tool.name)) {
+          if (["explore_tools", "memory_search", "skill_list", "skill_execute", "agent", "sub_agent"].includes(tool.name)) {
             score += 1;
           }
           scored.push({ tool, score });
         }
         scored.sort((a2, b) => b.score - a2.score);
-        const maxExtra = tier === "small" ? 6 : 12;
-        const relevantTools = scored.slice(0, maxExtra).filter((s2) => s2.score > 0);
-        const selectedTools = [
-          ...allTools.filter((t2) => CORE_TOOLS2.has(t2.name)),
-          ...relevantTools.map((s2) => s2.tool)
-        ];
+        const maxInlineExtra = tier === "small" ? 4 : 8;
+        const inlineExtras = scored.slice(0, maxInlineExtra).filter((s2) => s2.score > 0);
+        const inlineNames = /* @__PURE__ */ new Set([
+          ...allTools.filter((t2) => CORE_TOOLS2.has(t2.name)).map((t2) => t2.name),
+          ...inlineExtras.map((s2) => s2.tool.name)
+        ]);
+        const deferred = allTools.filter((t2) => !inlineNames.has(t2.name));
+        const inlineTools = allTools.filter((t2) => inlineNames.has(t2.name));
         const seen = /* @__PURE__ */ new Set();
-        const deduped = selectedTools.filter((t2) => {
+        const dedupedInline = inlineTools.filter((t2) => {
           if (seen.has(t2.name))
             return false;
           seen.add(t2.name);
           return true;
         });
-        return deduped.map((tool) => ({
+        const compressDesc = tier === "small" || tier === "medium";
+        const defs = dedupedInline.map((tool) => ({
           type: "function",
-          function: { name: tool.name, description: tool.description, parameters: tool.parameters }
+          function: {
+            name: tool.name,
+            description: compressDesc ? (tool.description.split(/\.\s/)[0]?.slice(0, 120) ?? tool.description.slice(0, 120)) + "." : tool.description,
+            parameters: tool.parameters
+          }
         }));
+        if (deferred.length > 0) {
+          const catalog = deferred.map((t2) => {
+            const shortDesc = t2.description.split(/\.\s/)[0]?.slice(0, 80) ?? t2.description.slice(0, 80);
+            return `- ${t2.name}: ${shortDesc}`;
+          }).join("\n");
+          defs.push({
+            type: "function",
+            function: {
+              name: "tool_search",
+              description: `Search for and activate additional tools not in your current tool list. Call this when your task needs a tool you don't have. Pass a search query describing what you need.
+Available tools (${deferred.length}):
+${catalog}`,
+              parameters: {
+                type: "object",
+                properties: {
+                  query: { type: "string", description: "Search query (tool name or capability description)" }
+                },
+                required: ["query"]
+              }
+            }
+          });
+          if (!this.tools.has("tool_search")) {
+            this.tools.set("tool_search", {
+              name: "tool_search",
+              description: "Search for deferred tools",
+              parameters: {},
+              execute: async (args) => {
+                const query = String(args["query"] ?? "").toLowerCase();
+                const matches = deferred.filter((t2) => t2.name.toLowerCase().includes(query) || t2.description.toLowerCase().includes(query)).slice(0, 5);
+                if (matches.length === 0) {
+                  return { success: false, output: "", error: `No tools matching "${query}". Try a broader search.` };
+                }
+                const result = matches.map((t2) => {
+                  const paramsStr = JSON.stringify(t2.parameters, null, 2);
+                  return `## ${t2.name}
+${t2.description}
+Parameters:
+${paramsStr}`;
+                }).join("\n\n---\n\n");
+                return {
+                  success: true,
+                  output: `Found ${matches.length} tool(s). You can now call them directly:
+${result}`
+                };
+              }
+            });
+          }
+        }
+        this.emit({
+          type: "status",
+          content: `Tool deferral: ${dedupedInline.length} inline + ${deferred.length} deferred (${tier} tier)`,
+          timestamp: (/* @__PURE__ */ new Date()).toISOString()
+        });
+        return defs;
       }
       // -------------------------------------------------------------------------
       // Transient error recovery — retry on 502, fetch failed, timeouts

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.13",
+  "version": "0.187.15",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",

package/prompts/agentic/system-medium.md CHANGED Viewed

@@ -103,6 +103,8 @@ When asked "how do you work?" or "what can you do?", answer from this list and u
 The <environment> block contains LIVE hardware metrics updated every turn. When asked about system specs, hardware, battery, CPU, RAM, GPU, disk space, or processes — read and report those values directly. You CAN see them.
+When working with tool results, write down any important information you might need later in your response, as older tool results may be cleared to save context space.
 ## Calculations — Always Execute, Never Guess
 For ANY numerical calculation involving 2+ operations, write Python and execute it with `repl_exec` or `shell`. In-head arithmetic is error-prone across all model sizes. Python is exact.

package/prompts/agentic/system-small.md CHANGED Viewed

@@ -33,6 +33,8 @@ Rules:
 - When asked "what can you do?", use explore_tools() and skill_list() to discover and report your actual capabilities. Do NOT hallucinate.
 - The <environment> block contains LIVE system metrics. When asked about hardware, battery, CPU, RAM, GPU, disk, or system info — read and report those values directly.
+When working with tool results, write down any important information you might need later in your response, as older tool results may be cleared to save context space.
 Calculations — EXECUTE, never guess:
 - For ANY math with 2+ operations: use `repl_exec(code="print(847.50 * 0.15)")` or `shell`. Python is exact. In-head arithmetic is not.
 - Currency, percentages, statistics, dates — ALWAYS execute code. If execution fails, reason step-by-step and mark [ESTIMATED].