npm - open-agents-ai - Versions diffs - 0.187.474 → 0.187.476 - Mend

open-agents-ai 0.187.474 → 0.187.476

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.js +198 -26
package/npm-shrinkwrap.json +2 -2
package/package.json +1 -1
package/prompts/agentic/system-large.md +20 -0
package/prompts/agentic/system-medium.md +30 -0
package/prompts/agentic/system-small.md +3 -1

package/dist/index.js CHANGED Viewed

@@ -518229,6 +518229,52 @@ function getSystemPromptForTier(tier) {
       return SYSTEM_PROMPT;
   }
 }
+function detectTaskMode(task) {
+  if (!task)
+    return false;
+  const head = task.slice(0, 4e3).toLowerCase();
+  if (task.length > 2e3)
+    return true;
+  if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
+    return true;
+  if (/\b(implement|build|create|refactor|write|fix|migrate|deploy|generate|setup|set up|develop|design|integrate)\b/.test(head)) {
+    if (/\b(spec|file|module|component|api|endpoint|database|schema|test|build|next\.js|typescript|react|prisma|tailwind|sql|python|rust|go)\b/.test(head)) {
+      return true;
+    }
+  }
+  return false;
+}
+function slimSystemPromptForTaskMode(prompt) {
+  const SECTION_HEADERS_TO_REMOVE = [
+    /^##\s*Interactive\s*\/\s*Long-?Running Sessions\s*$/im,
+    /^##\s*Document Generation Strategy\s*$/im,
+    /^##\s*Calculations\s*[—-]\s*Always Execute, Never Guess\s*$/im,
+    /^##\s*Knowledge Gaps\s*[—-]\s*Search, Don't Hallucinate\s*$/im,
+    /^##\s*Self-Awareness( & Introspection)?\s*$/im,
+    /^##\s*Debugging\s*[—-]\s*Observe Before Reasoning\s*$/im
+  ];
+  const TOOL_LINES_TO_REMOVE = [
+    /^- nexus:.*$/im,
+    /^- background_run.*task_status.*task_output.*task_stop:.*$/im,
+    /^- (asr_listen|audio_capture|audio_playback|audio_analyze|camera_capture|desktop_click|bluetooth_scan|browser_action):.*$/im,
+    /^Voice\/TTS:.*$/im,
+    /^- Voice\/TTS:.*$/im,
+    /^- Desktop\/Vision:.*$/im,
+    /^- P2P:.*$/im
+  ];
+  const CHAT_MODE_BLOCK = /^\*\*CHAT MODE\*\*[\s\S]*?(?=\*\*TASK MODE\*\*)/im;
+  let out = prompt;
+  for (const re of SECTION_HEADERS_TO_REMOVE) {
+    out = out.replace(new RegExp(re.source + "[\\s\\S]*?(?=^##\\s|\\Z)", "im"), "");
+  }
+  for (const re of TOOL_LINES_TO_REMOVE) {
+    out = out.replace(re, "");
+  }
+  out = out.replace(CHAT_MODE_BLOCK, "");
+  out = out.replace(/^\*\*TASK MODE\*\*[^\n]*\n/im, "");
+  out = out.replace(/\n{3,}/g, "\n\n");
+  return out.trim() + "\n";
+}
 function computeTodoReminder(input) {
   const turnsSinceWriteThreshold = input.turnsSinceWriteThreshold ?? 10;
   const turnsBetweenReminders = input.turnsBetweenReminders ?? 10;
@@ -518638,7 +518684,17 @@ var init_agenticRunner = __esm({
       async assembleContext(task, context2) {
         const sections = [];
         const pressureCue = pressureCheck(task);
-        const basePrompt = getSystemPromptForTier(this.options.modelTier) + pressureCue;
+        const rawPrompt = getSystemPromptForTier(this.options.modelTier);
+        const taskModeOn = detectTaskMode(task);
+        const slimmedPrompt = taskModeOn ? slimSystemPromptForTaskMode(rawPrompt) : rawPrompt;
+        const basePrompt = slimmedPrompt + pressureCue;
+        if (taskModeOn) {
+          this.emit({
+            type: "status",
+            content: `REG-19: TASK MODE detected — system prompt slimmed ${rawPrompt.length}→${slimmedPrompt.length} bytes`,
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+        }
         const _BATCH_GUIDANCE = {
           small: "\n\n## Response batching\n\nEmit AT MOST 2 tool calls per response. After observing their results, plan the next 2 in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
           medium: "\n\n## Response batching\n\nEmit AT MOST 4 tool calls per response. After observing their results, plan the next batch in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
@@ -520548,6 +520604,28 @@ TASK: ${task}` : task;
         for (const [tool, budget] of Object.entries(toolBudgets)) {
           toolCallBudget.set(tool, budget);
         }
+        const stagnationWindow = [];
+        let stagnationCooldownUntilTurn = -1;
+        const STAG_WINDOW_TURNS = 40;
+        const STAG_WINDOW_MS = 10 * 60 * 1e3;
+        const STAG_MIN_SAMPLES = 30;
+        const STAG_FAILURE_THRESHOLD = 5;
+        const STAG_VARIANT_THRESHOLD = 4;
+        const STAG_FILES_DELTA_MIN = 3;
+        let injectionsThisTurn = 0;
+        const INJECTION_BUDGET_SOFT = 2;
+        const deferredSoftInjections = [];
+        const pushSoftInjection = (role, content) => {
+          if (injectionsThisTurn < INJECTION_BUDGET_SOFT) {
+            messages2.push({ role, content });
+            injectionsThisTurn++;
+            return true;
+          }
+          if (deferredSoftInjections.length < 6) {
+            deferredSoftInjections.push({ role, content });
+          }
+          return false;
+        };
         for (let turn = 0; turn < this.options.maxTurns; turn++) {
           clearTurnState(this._appState);
           this._maybeApplyThinkGuard();
@@ -520562,6 +520640,76 @@ TASK: ${task}` : task;
             this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
             break;
           }
+          injectionsThisTurn = 0;
+          while (deferredSoftInjections.length > 0 && injectionsThisTurn < INJECTION_BUDGET_SOFT) {
+            const next = deferredSoftInjections.shift();
+            messages2.push({ role: next.role, content: next.content });
+            injectionsThisTurn++;
+          }
+          if (turn > stagnationCooldownUntilTurn && stagnationWindow.length >= STAG_MIN_SAMPLES) {
+            const cutoffTurn = turn - STAG_WINDOW_TURNS;
+            const cutoffTs = Date.now() - STAG_WINDOW_MS;
+            while (stagnationWindow.length && (stagnationWindow[0].turn < cutoffTurn || stagnationWindow[0].ts < cutoffTs)) {
+              stagnationWindow.shift();
+            }
+            if (stagnationWindow.length >= STAG_MIN_SAMPLES) {
+              const completedDelta = stagnationWindow[stagnationWindow.length - 1].completedTodos - stagnationWindow[0].completedTodos;
+              const fileSet = /* @__PURE__ */ new Set();
+              for (const s2 of stagnationWindow)
+                for (const p2 of s2.filesTouchedThisTurn)
+                  fileSet.add(p2);
+              const filesDelta = fileSet.size;
+              const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
+              const variantSet = /* @__PURE__ */ new Set();
+              for (const s2 of stagnationWindow)
+                for (const p2 of s2.shellPrefixesThisTurn)
+                  variantSet.add(p2);
+              const variantCount = variantSet.size;
+              if (completedDelta === 0 && filesDelta < STAG_FILES_DELTA_MIN && failureSum >= STAG_FAILURE_THRESHOLD && variantCount >= STAG_VARIANT_THRESHOLD) {
+                const variantList = [...variantSet].slice(0, 8).map((v) => `  • ${v}`).join("\n");
+                const stagMsg = [
+                  `[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
+                  ``,
+                  `Over the last ${stagnationWindow.length} turns you have:`,
+                  `  • Completed 0 new todos`,
+                  `  • Written/edited only ${filesDelta} unique file(s) (need ≥${STAG_FILES_DELTA_MIN} for healthy progress)`,
+                  `  • Accumulated ${failureSum} failures`,
+                  `  • Tried ${variantCount} different shell-command variants:`,
+                  variantList,
+                  ``,
+                  `You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
+                  ``,
+                  `MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
+                  ``,
+                  `1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
+                  ``,
+                  `2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
+                  ``,
+                  `3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
+                  `     • If you think a package is installed: ls node_modules/<name>/package.json`,
+                  `     • If you think an env var is set: printenv <NAME>`,
+                  `     • If you think a file imports correctly: head -5 <file>`,
+                  `     • If you don't know what an error means: web_search("<exact error string>")`,
+                  ``,
+                  `4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
+                  ``,
+                  `DO NOT in your next response:`,
+                  `  • Try another version, flag, or variant of any command in the list above`,
+                  `  • Wipe node_modules / re-install — that hides the original error`,
+                  `  • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
+                  ``,
+                  `task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
+                ].join("\n");
+                messages2.push({ role: "system", content: stagMsg });
+                stagnationCooldownUntilTurn = turn + 5;
+                this.emit({
+                  type: "status",
+                  content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${variantCount} variants, ${failureSum} failures, ${filesDelta} files in window)`,
+                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                });
+              }
+            }
+          }
           if (pendingConstraintWarnings.length > 0) {
             const warningMsg = "<constraint-recall>\n" + pendingConstraintWarnings.join("\n") + "\n</constraint-recall>";
             messages2.push({ role: "system", content: warningMsg });
@@ -520745,11 +520893,8 @@ Now call file_write with YOUR skeleton for this task.`
             if (toolHints.length > 0) {
               toolHints.sort((a2, b) => b.score - a2.score);
               const top = toolHints.slice(0, 5);
-              messages2.push({
-                role: "system",
-                content: `[Relevant tools for this task]
-${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
-              });
+              pushSoftInjection("system", `[Relevant tools for this task]
+${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
             }
           }
           if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520773,11 +520918,8 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
             }
             hints.push("EFFICIENCY: Aim for 3-5 tool calls total. Each call should make measurable progress. Do not repeat a tool call with the same arguments.");
             if (hints.length > 0) {
-              messages2.push({
-                role: "system",
-                content: `[Efficiency Guide]
-${hints.join("\n")}`
-              });
+              pushSoftInjection("system", `[Efficiency Guide]
+${hints.join("\n")}`);
             }
           }
           if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520787,21 +520929,18 @@ ${hints.join("\n")}`
             const hasMultiStepRequirement = taskGoal.length > 200 && (taskGoal.match(/\d\./g) || []).length >= 2;
             const isAnalysisTask = (taskGoal.match(/\banalyze\b|\baudit\b|\breview\b|\bdiagnose\b|\binvestigate\b|\bcompare\b|\bevaluate\b/gi) || []).length >= 1;
             if (hasMultiplePremises || hasConditionalLogic || hasMultiStepRequirement || isAnalysisTask) {
-              messages2.push({
-                role: "system",
-                content: [
-                  "[Structured Reasoning Guide]",
-                  "This task requires multi-step reasoning. Follow this structure:",
-                  "",
-                  "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
-                  "2. For each sub-question:",
-                  "   a. State what you KNOW (verified from evidence/tool output)",
-                  "   b. State what you ASSUME (hypotheses not yet confirmed)",
-                  "   c. Derive your conclusion using ONLY verified facts",
-                  "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
-                  "4. Before your final answer, verify: does each conclusion follow from the evidence?"
-                ].join("\n")
-              });
+              pushSoftInjection("system", [
+                "[Structured Reasoning Guide]",
+                "This task requires multi-step reasoning. Follow this structure:",
+                "",
+                "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
+                "2. For each sub-question:",
+                "   a. State what you KNOW (verified from evidence/tool output)",
+                "   b. State what you ASSUME (hypotheses not yet confirmed)",
+                "   c. Derive your conclusion using ONLY verified facts",
+                "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
+                "4. Before your final answer, verify: does each conclusion follow from the evidence?"
+              ].join("\n"));
             }
           }
           const turnBudget = turnTier === "small" ? 5 : turnTier === "medium" ? 8 : 0;
@@ -522354,6 +522493,39 @@ Your most recent tool calls SUCCEEDED. If the task is complete, call task_comple
               });
             }
           }
+          try {
+            const turnLogTail = toolCallLog.filter((t2) => t2.turn === turn || t2.turn === void 0);
+            const filesTouched = /* @__PURE__ */ new Set();
+            const shellPrefixes = /* @__PURE__ */ new Set();
+            let failuresThisTurn = 0;
+            for (const tc of turnLogTail) {
+              if (tc.success === false)
+                failuresThisTurn++;
+              if (["file_write", "file_edit", "batch_edit", "file_patch"].includes(tc.name)) {
+                const m2 = tc.argsKey?.match(/path=([^,]+)/);
+                if (m2 && m2[1])
+                  filesTouched.add(m2[1]);
+              }
+              if (tc.name === "shell") {
+                const cmdMatch = tc.argsKey?.match(/command=([^,]{0,200})/);
+                const cmd = cmdMatch?.[1] ?? "";
+                const prefix = cmd.replace(/^cd\s+\S+\s*&&\s*/, "").split(/\s+/).slice(0, 3).join(" ");
+                if (prefix)
+                  shellPrefixes.add(prefix);
+              }
+            }
+            const todosNow = this.readSessionTodos() || [];
+            const completedNow = todosNow.filter((t2) => t2.status === "completed").length;
+            stagnationWindow.push({
+              turn,
+              ts: Date.now(),
+              completedTodos: completedNow,
+              filesTouchedThisTurn: filesTouched,
+              failuresThisTurn,
+              shellPrefixesThisTurn: shellPrefixes
+            });
+          } catch {
+          }
         }
         let prevCycleToolCalls = toolCallCount;
         while (!completed && !this.aborted && this.options.bruteForce && bruteForceCycle < this.options.bruteForceMaxCycles) {

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.474",
+  "version": "0.187.476",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "open-agents-ai",
-      "version": "0.187.474",
+      "version": "0.187.476",
       "hasInstallScript": true,
       "license": "CC-BY-NC-4.0",
       "dependencies": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.474",
+  "version": "0.187.476",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",

package/prompts/agentic/system-large.md CHANGED Viewed

@@ -161,6 +161,26 @@ When you discover image files (png, jpg, gif, svg, webp, bmp) during codebase ex
 - ALWAYS run validation (tests, build, lint) after making changes
 - If tests fail, read the FULL error output. Fix the exact failing assertion or error.
 - Do NOT give up after a failure. Iterate: fix → test → fix → test until it passes.
+- task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. Being stuck on a code/config problem is NEVER grounds for task_complete — use DIAGNOSTIC MODE below.
+### DIAGNOSTIC MODE — When You ARE Stuck, Slow Down and Investigate
+If you have tried 2+ approaches to the same blocker and both failed, **STOP attempting fixes** and enter diagnostic mode. Repeating fix-attempts on a misunderstood problem just wastes turns. Diagnose ROOT CAUSE first.
+**The diagnostic loop (one cycle per turn, NOT batched):**
+1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If output is in a log packet, use `log_explore` with `op="errors"`, then `op="lines"` for context.
+2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command (e.g. "I think tailwindcss is installed" → `ls node_modules/tailwindcss/package.json`).
+3. **STATE A HYPOTHESIS in writing** before your next action. Then design ONE experiment that would CONFIRM or REFUTE it (not fix it — verify it first).
+4. **WEB SEARCH the exact error message** if you don't know what it means. A 30-second lookup beats 10 retry attempts.
+5. **CHECK THE OBVIOUS** — silent failures are common. `npm install` reporting "added 141 packages" doesn't mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep with `ls node_modules/<name>/package.json`.
+6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
+**What diagnostic mode is NOT:**
+- Trying another version (`tailwindcss@3.4.19` after `tailwindcss@4.0.0`) — that's variant-fatigue, not diagnosis.
+- Adding `--force` or `--legacy-peer-deps` — those mask root causes.
+- Wiping node_modules and re-installing — hides the original error.
+- Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
 - Use grep_search and find_files for efficient exploration (don't dump entire directories)
 - Use file_edit for small changes instead of rewriting entire files
 - Keep tool calls focused — read only what you need

package/prompts/agentic/system-medium.md CHANGED Viewed

@@ -94,6 +94,36 @@ NEVER write the entire document in ONE file_write call. DECOMPOSE:
 - Do NOT give up after failure. Iterate until it passes.
 - Use file_edit for small changes, not full file rewrites
 - You MUST call task_complete when done — when you have enough information from web tools, STOP fetching and call task_complete with a summary. Do not keep browsing after you have the answer.
+- task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. Being stuck on a code/config problem is NEVER grounds for task_complete — use DIAGNOSTIC MODE below.
+### DIAGNOSTIC MODE — When You ARE Stuck, Slow Down and Investigate
+If you have tried 2+ approaches to the same blocker and both failed, **STOP attempting fixes** and enter diagnostic mode. Repeating fix-attempts on a misunderstood problem just wastes turns. Diagnose ROOT CAUSE first.
+**The diagnostic loop (one cycle per turn, NOT batched):**
+1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If the output is in a log packet, use `log_explore` with `op="errors"` to see every marker, then `op="lines"` for surrounding context.
+2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command:
+   - "I think tailwindcss is installed" → `ls node_modules/tailwindcss/package.json` (one line)
+   - "I think the import path is right" → `cat src/lib/x.ts | head -5`
+   - "I think the env var is set" → `printenv VAR_NAME`
+3. **STATE A HYPOTHESIS in writing** before your next action:
+   - "Hypothesis: tailwindcss didn't install because @tailwindcss/postcss has a peer-dep conflict with autoprefixer."
+   - Then design ONE experiment that would CONFIRM or REFUTE it (not fix it — verify it first).
+4. **WEB SEARCH the exact error message** if you don't know what it means. `web_search("exact error string from terminal")`. A 30-second lookup beats 10 retry attempts.
+5. **CHECK THE OBVIOUS** — silent failures are common. `npm install` saying "added 141 packages" doesn't mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep with `ls node_modules/<name>/package.json`.
+6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
+**What diagnostic mode is NOT:**
+- Trying another version (`tailwindcss@3.4.19` after `tailwindcss@4.0.0` failed) — that's variant-fatigue, not diagnosis.
+- Adding `--force` or `--legacy-peer-deps` — those mask root causes, they don't reveal them.
+- Wiping node_modules and re-installing — that just hides the original error.
+- Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
 - Do NOT output long explanations. Focus on tool calls.
 - If file_read/list_directory returns ENOENT, use list_directory on the project root — do NOT guess parent paths
 - Directory listing entries are RELATIVE to the listed directory. If you list "parent/" and see "child", the full path is "parent/child" — NOT ".child" or just "child"

package/prompts/agentic/system-small.md CHANGED Viewed

@@ -99,10 +99,12 @@ Complex tasks (5+ steps) — DECOMPOSE before acting:
 1. Call todo_write with the checklist. Mark item 1 "in_progress".
 2. Execute ONE STEP AT A TIME. After each, update todo_write status.
 3. After each file edit, VERIFY: file_read or shell test.
-4. If stuck after 2 attempts, try a DIFFERENT approach — do not repeat the same tool call.
+4. If stuck after 2 attempts: STOP. Enter DIAGNOSTIC MODE — read the FULL error output, state a hypothesis in writing, verify ONE assumption with the smallest test command, web_search the exact error string. Only fix AFTER you've confirmed root cause. Do NOT keep trying variants of the same approach.
 5. For multi-file changes: read ALL relevant files first, then edit in dependency order.
 6. Final todo_write marks all items "completed", then call task_complete.
+task_complete is ONLY for ACTUAL completion. Being stuck on a code/config problem is NEVER grounds for task_complete — diagnose, do not exit.
 CRITICAL — NEVER repeat a tool call with the same arguments. If you already read a file, use the data you have. If you already ran a command, use the output. Calling the same tool twice with identical arguments wastes turns and produces the same result.
 Long document generation (reports, SOWs, proposals, contracts):