npm - open-agents-ai - Versions diffs - 0.187.475 → 0.187.477 - Mend

open-agents-ai 0.187.475 → 0.187.477

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -282094,14 +282094,14 @@ ${lanes.join("\n")}
         return { value: value2, isSyntacticallyString, resolvedOtherFiles, hasExternalReferences };
       }
       function createEvaluator({ evaluateElementAccessExpression, evaluateEntityNameExpression }) {
-        function evaluate2(expr, location) {
+        function evaluate3(expr, location) {
           let isSyntacticallyString = false;
           let resolvedOtherFiles = false;
           let hasExternalReferences = false;
           expr = skipParentheses(expr);
           switch (expr.kind) {
             case 225:
-              const result = evaluate2(expr.operand, location);
+              const result = evaluate3(expr.operand, location);
               resolvedOtherFiles = result.resolvedOtherFiles;
               hasExternalReferences = result.hasExternalReferences;
               if (typeof result.value === "number") {
@@ -282116,8 +282116,8 @@ ${lanes.join("\n")}
               }
               break;
             case 227: {
-              const left = evaluate2(expr.left, location);
-              const right = evaluate2(expr.right, location);
+              const left = evaluate3(expr.left, location);
+              const right = evaluate3(expr.right, location);
               isSyntacticallyString = (left.isSyntacticallyString || right.isSyntacticallyString) && expr.operatorToken.kind === 40;
               resolvedOtherFiles = left.resolvedOtherFiles || right.resolvedOtherFiles;
               hasExternalReferences = left.hasExternalReferences || right.hasExternalReferences;
@@ -282192,7 +282192,7 @@ ${lanes.join("\n")}
           let resolvedOtherFiles = false;
           let hasExternalReferences = false;
           for (const span of expr.templateSpans) {
-            const spanResult = evaluate2(span.expression, location);
+            const spanResult = evaluate3(span.expression, location);
             if (spanResult.value === void 0) {
               return evaluatorResult(
                 /*value*/
@@ -282214,7 +282214,7 @@ ${lanes.join("\n")}
             hasExternalReferences
           );
         }
-        return evaluate2;
+        return evaluate3;
       }
       function isConstAssertion(location) {
         return isAssertionExpression(location) && isConstTypeReference(location.type) || isJSDocTypeTag(location) && isConstTypeReference(location.typeExpression);
@@ -312752,7 +312752,7 @@ ${lanes.join("\n")}
         var emitResolver = createResolver();
         var nodeBuilder = createNodeBuilder();
         var syntacticNodeBuilder = createSyntacticTypeNodeBuilder(compilerOptions, nodeBuilder.syntacticBuilderResolver);
-        var evaluate2 = createEvaluator({
+        var evaluate3 = createEvaluator({
           evaluateElementAccessExpression,
           evaluateEntityNameExpression
         });
@@ -348722,7 +348722,7 @@ ${lanes.join("\n")}
                     case 72:
                     case 50:
                     case 73:
-                      const rhsEval = evaluate2(right);
+                      const rhsEval = evaluate3(right);
                       if (typeof rhsEval.value === "number" && Math.abs(rhsEval.value) >= 32) {
                         errorOrSuggestion(
                           isEnumMember(walkUpParenthesizedExpressions(right.parent.parent)),
@@ -349225,7 +349225,7 @@ ${lanes.join("\n")}
             texts.push(span.literal.text);
             types2.push(isTypeAssignableTo(type, templateConstraintType) ? type : stringType);
           }
-          const evaluated = node.parent.kind !== 216 && evaluate2(node).value;
+          const evaluated = node.parent.kind !== 216 && evaluate3(node).value;
           if (evaluated) {
             return getFreshTypeOfLiteralType(getStringLiteralType(evaluated));
           }
@@ -354706,7 +354706,7 @@ ${lanes.join("\n")}
         function computeConstantEnumMemberValue(member) {
           const isConstEnum = isEnumConst(member.parent);
           const initializer = member.initializer;
-          const result = evaluate2(initializer, member);
+          const result = evaluate3(initializer, member);
           if (result.value !== void 0) {
             if (isConstEnum && typeof result.value === "number" && !isFinite(result.value)) {
               error2(
@@ -354761,7 +354761,7 @@ ${lanes.join("\n")}
           if (isConstantVariable(symbol3)) {
             const declaration = symbol3.valueDeclaration;
             if (declaration && isVariableDeclaration(declaration) && !declaration.type && declaration.initializer && (!location || declaration !== location && isBlockScopedNameDeclaredBeforeUse(declaration, location))) {
-              const result = evaluate2(declaration.initializer, declaration);
+              const result = evaluate3(declaration.initializer, declaration);
               if (location && getSourceFileOfNode(location) !== getSourceFileOfNode(declaration)) {
                 return evaluatorResult(
                   result.value,
@@ -512128,6 +512128,106 @@ var init_personality = __esm({
   }
 });
+// packages/orchestrator/dist/critic.js
+function buildForceProgressBlockMessage(call, hits) {
+  const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
+  return `[FORCED PROGRESS BLOCK — you have called ${call.tool}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. The data is not changing. You are stuck in a read-only loop instead of advancing the plan.
+REQUIRED before this tool will run again with these arguments:
+  • file_write or file_edit, OR
+  • todo_write that advances the plan, OR
+  • task_complete (if all phases are done).
+If you genuinely need this same data again, call a DIFFERENT tool first (one of the three above). Until then, refer to your conversation history — the result of this exact call is already there.]`;
+}
+function evaluate(inputs) {
+  const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
+  if (observerRedundantBlock) {
+    const cached = recentToolResults.get(fingerprint);
+    return {
+      decision: "observer_block",
+      reason: "Littleman observer flagged this fingerprint as redundant",
+      cachedResult: cached ? cached.result : null
+    };
+  }
+  if (isReadLike) {
+    const cached = recentToolResults.get(fingerprint);
+    if (cached !== void 0) {
+      const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
+      const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
+      if (hits >= threshold) {
+        return {
+          decision: "force_progress_block",
+          reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
+          hitNumber: hits,
+          blockMessage: buildForceProgressBlockMessage(proposedCall, hits)
+        };
+      }
+      return {
+        decision: "serve_cached",
+        reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
+        cachedResult: cached.result,
+        compacted: cached.compacted,
+        hitNumber: hits
+      };
+    }
+  }
+  return { decision: "pass" };
+}
+function buildStagnationDiagnostic(signals) {
+  const variantList = signals.variantList.slice(0, 8).map((v) => `  • ${v}`).join("\n");
+  return [
+    `[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
+    ``,
+    `Over the last ${signals.windowSamples} turns you have:`,
+    `  • Completed 0 new todos`,
+    `  • Written/edited only ${signals.filesDelta} unique file(s) (need ≥3 for healthy progress)`,
+    `  • Accumulated ${signals.failureSum} failures`,
+    `  • Tried ${signals.variantCount} different shell-command variants:`,
+    variantList,
+    ``,
+    `You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
+    ``,
+    `MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
+    ``,
+    `1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
+    ``,
+    `2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
+    ``,
+    `3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
+    `     • If you think a package is installed: ls node_modules/<name>/package.json`,
+    `     • If you think an env var is set: printenv <NAME>`,
+    `     • If you think a file imports correctly: head -5 <file>`,
+    `     • If you don't know what an error means: web_search("<exact error string>")`,
+    ``,
+    `4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
+    ``,
+    `DO NOT in your next response:`,
+    `  • Try another version, flag, or variant of any command in the list above`,
+    `  • Wipe node_modules / re-install — that hides the original error`,
+    `  • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
+    ``,
+    `task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
+  ].join("\n");
+}
+function isStagnant(signals, opts) {
+  const failureThreshold = opts?.failureThreshold ?? 5;
+  const variantThreshold = opts?.variantThreshold ?? 4;
+  const filesDeltaMin = opts?.filesDeltaMin ?? 3;
+  const minSamples = opts?.minSamples ?? 30;
+  if (signals.windowSamples < minSamples)
+    return false;
+  return signals.completedDelta === 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
+}
+var SHELL_THRESHOLD, FS_THRESHOLD;
+var init_critic = __esm({
+  "packages/orchestrator/dist/critic.js"() {
+    "use strict";
+    SHELL_THRESHOLD = 2;
+    FS_THRESHOLD = 3;
+  }
+});
 // packages/orchestrator/dist/pressure-gate.js
 function detectPressure(message2) {
   const hasProfanity = PRESSURE_SIGNALS.test(message2);
@@ -514455,7 +514555,7 @@ function h10(t2) {
 function h11(t2) {
   return t2 * t2 * (t2 - 1);
 }
-function evaluate(controls, r2) {
+function evaluate2(controls, r2) {
   const { K: K2, N, P: P2, E: Eb, Pd, Ed, expansion, epsSigma, deltaMax } = controls;
   const eps2 = epsSigma * epsSigma;
   const u = new Float64Array(K2);
@@ -514731,7 +514831,7 @@ function deserialize(obj) {
 function rmse(controls, samples) {
   let sumSq = 0;
   for (const s2 of samples) {
-    const pred = evaluate(controls, s2.input);
+    const pred = evaluate2(controls, s2.input);
     for (let n2 = 0; n2 < controls.N; n2++) {
       const diff = pred[n2] - s2.output[n2];
       sumSq += diff * diff;
@@ -514906,7 +515006,7 @@ var init_embeddingAligner = __esm({
           const range = max - min;
           normalized[k] = range > 1e-10 ? Math.max(0, Math.min(1, (projected[k] - min) / range)) : 0.5;
         }
-        const aligned64 = evaluate(this.state.controls, normalized);
+        const aligned64 = evaluate2(this.state.controls, normalized);
         const result = new Float32Array(this.state.dstDim);
         for (let n2 = 0; n2 < this.state.dstDim; n2++)
           result[n2] = aligned64[n2];
@@ -514993,7 +515093,7 @@ var init_embeddingAligner = __esm({
         let avgCosine = 0;
         if (testSamples.length > 0) {
           for (const s2 of testSamples) {
-            const pred = evaluate(controls, s2.input);
+            const pred = evaluate2(controls, s2.input);
             avgCosine += cosine(pred, s2.output);
           }
           avgCosine /= testSamples.length;
@@ -516500,7 +516600,7 @@ __export(dist_exports2, {
   retrieveByPPR: () => retrieveByPPR,
   splanifoldCosine: () => cosine,
   splanifoldDeserialize: () => deserialize,
-  splanifoldEvaluate: () => evaluate,
+  splanifoldEvaluate: () => evaluate2,
   splanifoldFit: () => fit,
   splanifoldRmse: () => rmse,
   splanifoldSerialize: () => serialize
@@ -518229,6 +518329,52 @@ function getSystemPromptForTier(tier) {
       return SYSTEM_PROMPT;
   }
 }
+function detectTaskMode(task) {
+  if (!task)
+    return false;
+  const head = task.slice(0, 4e3).toLowerCase();
+  if (task.length > 2e3)
+    return true;
+  if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
+    return true;
+  if (/\b(implement|build|create|refactor|write|fix|migrate|deploy|generate|setup|set up|develop|design|integrate)\b/.test(head)) {
+    if (/\b(spec|file|module|component|api|endpoint|database|schema|test|build|next\.js|typescript|react|prisma|tailwind|sql|python|rust|go)\b/.test(head)) {
+      return true;
+    }
+  }
+  return false;
+}
+function slimSystemPromptForTaskMode(prompt) {
+  const SECTION_HEADERS_TO_REMOVE = [
+    /^##\s*Interactive\s*\/\s*Long-?Running Sessions\s*$/im,
+    /^##\s*Document Generation Strategy\s*$/im,
+    /^##\s*Calculations\s*[—-]\s*Always Execute, Never Guess\s*$/im,
+    /^##\s*Knowledge Gaps\s*[—-]\s*Search, Don't Hallucinate\s*$/im,
+    /^##\s*Self-Awareness( & Introspection)?\s*$/im,
+    /^##\s*Debugging\s*[—-]\s*Observe Before Reasoning\s*$/im
+  ];
+  const TOOL_LINES_TO_REMOVE = [
+    /^- nexus:.*$/im,
+    /^- background_run.*task_status.*task_output.*task_stop:.*$/im,
+    /^- (asr_listen|audio_capture|audio_playback|audio_analyze|camera_capture|desktop_click|bluetooth_scan|browser_action):.*$/im,
+    /^Voice\/TTS:.*$/im,
+    /^- Voice\/TTS:.*$/im,
+    /^- Desktop\/Vision:.*$/im,
+    /^- P2P:.*$/im
+  ];
+  const CHAT_MODE_BLOCK = /^\*\*CHAT MODE\*\*[\s\S]*?(?=\*\*TASK MODE\*\*)/im;
+  let out = prompt;
+  for (const re of SECTION_HEADERS_TO_REMOVE) {
+    out = out.replace(new RegExp(re.source + "[\\s\\S]*?(?=^##\\s|\\Z)", "im"), "");
+  }
+  for (const re of TOOL_LINES_TO_REMOVE) {
+    out = out.replace(re, "");
+  }
+  out = out.replace(CHAT_MODE_BLOCK, "");
+  out = out.replace(/^\*\*TASK MODE\*\*[^\n]*\n/im, "");
+  out = out.replace(/\n{3,}/g, "\n\n");
+  return out.trim() + "\n";
+}
 function computeTodoReminder(input) {
   const turnsSinceWriteThreshold = input.turnsSinceWriteThreshold ?? 10;
   const turnsBetweenReminders = input.turnsBetweenReminders ?? 10;
@@ -518326,6 +518472,7 @@ var init_agenticRunner = __esm({
     init_dist();
     init_personality();
     init_promptLoader();
+    init_critic();
     init_pressure_gate();
     init_dist5();
     init_dist7();
@@ -518638,7 +518785,17 @@ var init_agenticRunner = __esm({
       async assembleContext(task, context2) {
         const sections = [];
         const pressureCue = pressureCheck(task);
-        const basePrompt = getSystemPromptForTier(this.options.modelTier) + pressureCue;
+        const rawPrompt = getSystemPromptForTier(this.options.modelTier);
+        const taskModeOn = detectTaskMode(task);
+        const slimmedPrompt = taskModeOn ? slimSystemPromptForTaskMode(rawPrompt) : rawPrompt;
+        const basePrompt = slimmedPrompt + pressureCue;
+        if (taskModeOn) {
+          this.emit({
+            type: "status",
+            content: `REG-19: TASK MODE detected — system prompt slimmed ${rawPrompt.length}→${slimmedPrompt.length} bytes`,
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+        }
         const _BATCH_GUIDANCE = {
           small: "\n\n## Response batching\n\nEmit AT MOST 2 tool calls per response. After observing their results, plan the next 2 in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
           medium: "\n\n## Response batching\n\nEmit AT MOST 4 tool calls per response. After observing their results, plan the next batch in your following response. Smaller batches let the orchestrator deliver cache/failure/progress signals to you between actions. Tool calls beyond the cap are dropped. Use todo_write between batches to mark progress.",
@@ -520556,6 +520713,20 @@ TASK: ${task}` : task;
         const STAG_FAILURE_THRESHOLD = 5;
         const STAG_VARIANT_THRESHOLD = 4;
         const STAG_FILES_DELTA_MIN = 3;
+        let injectionsThisTurn = 0;
+        const INJECTION_BUDGET_SOFT = 2;
+        const deferredSoftInjections = [];
+        const pushSoftInjection = (role, content) => {
+          if (injectionsThisTurn < INJECTION_BUDGET_SOFT) {
+            messages2.push({ role, content });
+            injectionsThisTurn++;
+            return true;
+          }
+          if (deferredSoftInjections.length < 6) {
+            deferredSoftInjections.push({ role, content });
+          }
+          return false;
+        };
         for (let turn = 0; turn < this.options.maxTurns; turn++) {
           clearTurnState(this._appState);
           this._maybeApplyThinkGuard();
@@ -520570,6 +520741,12 @@ TASK: ${task}` : task;
             this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
             break;
           }
+          injectionsThisTurn = 0;
+          while (deferredSoftInjections.length > 0 && injectionsThisTurn < INJECTION_BUDGET_SOFT) {
+            const next = deferredSoftInjections.shift();
+            messages2.push({ role: next.role, content: next.content });
+            injectionsThisTurn++;
+          }
           if (turn > stagnationCooldownUntilTurn && stagnationWindow.length >= STAG_MIN_SAMPLES) {
             const cutoffTurn = turn - STAG_WINDOW_TURNS;
             const cutoffTs = Date.now() - STAG_WINDOW_MS;
@@ -520582,53 +520759,30 @@ TASK: ${task}` : task;
               for (const s2 of stagnationWindow)
                 for (const p2 of s2.filesTouchedThisTurn)
                   fileSet.add(p2);
-              const filesDelta = fileSet.size;
-              const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
               const variantSet = /* @__PURE__ */ new Set();
               for (const s2 of stagnationWindow)
                 for (const p2 of s2.shellPrefixesThisTurn)
                   variantSet.add(p2);
-              const variantCount = variantSet.size;
-              if (completedDelta === 0 && filesDelta < STAG_FILES_DELTA_MIN && failureSum >= STAG_FAILURE_THRESHOLD && variantCount >= STAG_VARIANT_THRESHOLD) {
-                const variantList = [...variantSet].slice(0, 8).map((v) => `  • ${v}`).join("\n");
-                const stagMsg = [
-                  `[STAGNATION DETECTED — DIAGNOSTIC MODE REQUIRED]`,
-                  ``,
-                  `Over the last ${stagnationWindow.length} turns you have:`,
-                  `  • Completed 0 new todos`,
-                  `  • Written/edited only ${filesDelta} unique file(s) (need ≥${STAG_FILES_DELTA_MIN} for healthy progress)`,
-                  `  • Accumulated ${failureSum} failures`,
-                  `  • Tried ${variantCount} different shell-command variants:`,
-                  variantList,
-                  ``,
-                  `You are not making progress — you are trying surface-level variants of the same approach without diagnosing root cause. This is the failure mode that prevents real completion.`,
-                  ``,
-                  `MANDATORY NEXT ACTIONS (do NOT call task_complete; do NOT try another variant):`,
-                  ``,
-                  `1. READ THE FULL ERROR — re-read your most recent failure output ENTIRELY. If it's in a log packet, call log_explore({op:"errors"}) then log_explore({op:"lines", start:..., end:...}) for context. Do not skim.`,
-                  ``,
-                  `2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
-                  ``,
-                  `3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
-                  `     • If you think a package is installed: ls node_modules/<name>/package.json`,
-                  `     • If you think an env var is set: printenv <NAME>`,
-                  `     • If you think a file imports correctly: head -5 <file>`,
-                  `     • If you don't know what an error means: web_search("<exact error string>")`,
-                  ``,
-                  `4. CHECK SILENT FAILURES — npm install reporting "added N packages" does NOT mean ALL declared deps installed; npm sometimes drops packages with peer-dep conflicts without erroring. Verify each expected dep individually.`,
-                  ``,
-                  `DO NOT in your next response:`,
-                  `  • Try another version, flag, or variant of any command in the list above`,
-                  `  • Wipe node_modules / re-install — that hides the original error`,
-                  `  • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
-                  ``,
-                  `task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
-                ].join("\n");
-                messages2.push({ role: "system", content: stagMsg });
+              const failureSum = stagnationWindow.reduce((a2, s2) => a2 + s2.failuresThisTurn, 0);
+              const signals = {
+                completedDelta,
+                filesDelta: fileSet.size,
+                failureSum,
+                variantCount: variantSet.size,
+                windowSamples: stagnationWindow.length,
+                variantList: [...variantSet]
+              };
+              if (isStagnant(signals, {
+                failureThreshold: STAG_FAILURE_THRESHOLD,
+                variantThreshold: STAG_VARIANT_THRESHOLD,
+                filesDeltaMin: STAG_FILES_DELTA_MIN,
+                minSamples: STAG_MIN_SAMPLES
+              })) {
+                messages2.push({ role: "system", content: buildStagnationDiagnostic(signals) });
                 stagnationCooldownUntilTurn = turn + 5;
                 this.emit({
                   type: "status",
-                  content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${variantCount} variants, ${failureSum} failures, ${filesDelta} files in window)`,
+                  content: `STAGNATION DETECTED — injected diagnostic mode at turn ${turn} (${signals.variantCount} variants, ${signals.failureSum} failures, ${signals.filesDelta} files in window)`,
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
               }
@@ -520817,11 +520971,8 @@ Now call file_write with YOUR skeleton for this task.`
             if (toolHints.length > 0) {
               toolHints.sort((a2, b) => b.score - a2.score);
               const top = toolHints.slice(0, 5);
-              messages2.push({
-                role: "system",
-                content: `[Relevant tools for this task]
-${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
-              });
+              pushSoftInjection("system", `[Relevant tools for this task]
+${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`);
             }
           }
           if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520845,11 +520996,8 @@ ${top.map((t2) => `- ${t2.name}: ${t2.desc}`).join("\n")}`
             }
             hints.push("EFFICIENCY: Aim for 3-5 tool calls total. Each call should make measurable progress. Do not repeat a tool call with the same arguments.");
             if (hints.length > 0) {
-              messages2.push({
-                role: "system",
-                content: `[Efficiency Guide]
-${hints.join("\n")}`
-              });
+              pushSoftInjection("system", `[Efficiency Guide]
+${hints.join("\n")}`);
             }
           }
           if (turn === 0 && (turnTier === "small" || turnTier === "medium")) {
@@ -520859,21 +521007,18 @@ ${hints.join("\n")}`
             const hasMultiStepRequirement = taskGoal.length > 200 && (taskGoal.match(/\d\./g) || []).length >= 2;
             const isAnalysisTask = (taskGoal.match(/\banalyze\b|\baudit\b|\breview\b|\bdiagnose\b|\binvestigate\b|\bcompare\b|\bevaluate\b/gi) || []).length >= 1;
             if (hasMultiplePremises || hasConditionalLogic || hasMultiStepRequirement || isAnalysisTask) {
-              messages2.push({
-                role: "system",
-                content: [
-                  "[Structured Reasoning Guide]",
-                  "This task requires multi-step reasoning. Follow this structure:",
-                  "",
-                  "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
-                  "2. For each sub-question:",
-                  "   a. State what you KNOW (verified from evidence/tool output)",
-                  "   b. State what you ASSUME (hypotheses not yet confirmed)",
-                  "   c. Derive your conclusion using ONLY verified facts",
-                  "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
-                  "4. Before your final answer, verify: does each conclusion follow from the evidence?"
-                ].join("\n")
-              });
+              pushSoftInjection("system", [
+                "[Structured Reasoning Guide]",
+                "This task requires multi-step reasoning. Follow this structure:",
+                "",
+                "1. DECOMPOSE: List the sub-questions this task requires, from simplest to most complex.",
+                "2. For each sub-question:",
+                "   a. State what you KNOW (verified from evidence/tool output)",
+                "   b. State what you ASSUME (hypotheses not yet confirmed)",
+                "   c. Derive your conclusion using ONLY verified facts",
+                "3. If a tool result contradicts your earlier reasoning, UPDATE your conclusions — don't ignore new evidence.",
+                "4. Before your final answer, verify: does each conclusion follow from the evidence?"
+              ].join("\n"));
             }
           }
           const turnBudget = turnTier === "small" ? 5 : turnTier === "medium" ? 8 : 0;
@@ -521442,16 +521587,6 @@ ${memoryLines.join("\n")}`
                 toolCallBudget.set(tc.name, budgetRemaining - 1);
               }
               const toolFingerprint = `${tc.name}:${argsKey}`;
-              if (this._littlemanRedundantBlocks.has(toolFingerprint)) {
-                this._littlemanRedundantBlocks.delete(toolFingerprint);
-                const cachedEntry2 = recentToolResults.get(toolFingerprint);
-                this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
-                const blockMsg = cachedEntry2 ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
-${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
-                this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
-                return { tc, output: blockMsg };
-              }
               const baseIsReadLike = ![
                 "file_write",
                 "file_edit",
@@ -521468,22 +521603,53 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
                 "sub_agent",
                 "priority_delegate",
                 "ask_user",
-                // WO-TASK-02 — todo_write is a state-write tool. Calling it twice
-                // with the same args is idempotent (it just re-stores the same
-                // list) but the dedup detector was flagging it as a wasted call
-                // and blocking the planning workflow. The agent uses todo_write
-                // as its primary checkpoint mechanism so it MUST always execute.
                 "todo_write",
-                // nexus is also a state tool — connect is idempotent but the
-                // dedup warning was causing confused agents to bail out to
-                // shell workarounds (npm install, find /bin, etc.) when they
-                // saw "DUPLICATE CALL" after their first connect. Let the
-                // tool see every call and return the cached state itself.
                 "nexus"
               ].includes(tc.name);
               const isReadLike = baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? "");
-              const cachedEntry = recentToolResults.get(toolFingerprint);
-              if (isReadLike && cachedEntry !== void 0) {
+              const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
+              if (observerRedundantBlock) {
+                this._littlemanRedundantBlocks.delete(toolFingerprint);
+              }
+              const criticDecision = evaluate({
+                proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
+                fingerprint: toolFingerprint,
+                isReadLike,
+                recentToolResults,
+                dedupHitCount,
+                recentFailures: this._recentFailures.map((f2) => ({
+                  fingerprint: f2.fingerprint,
+                  toolName: f2.tool,
+                  errorPreview: (f2.error || f2.output || "").slice(0, 200)
+                })),
+                stagnationSignals: null,
+                // stagnation gate handled at top-of-turn
+                stagnationGateActive: false,
+                observerRedundantBlock
+              });
+              if (criticDecision.decision === "observer_block") {
+                this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
+${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
+                this.emit({ type: "tool_result", toolName: tc.name, success: true, content: blockMsg.slice(0, 100), turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                return { tc, output: blockMsg };
+              }
+              if (criticDecision.decision === "force_progress_block") {
+                dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
+                this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                this.emit({
+                  type: "tool_result",
+                  toolName: tc.name,
+                  success: false,
+                  content: criticDecision.blockMessage.slice(0, 120),
+                  turn,
+                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                });
+                return { tc, output: criticDecision.blockMessage };
+              }
+              if (criticDecision.decision === "serve_cached") {
+                dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
                 this.emit({
                   type: "tool_call",
                   toolName: tc.name,
@@ -521491,36 +521657,13 @@ ${cachedEntry2.result.slice(0, 500)}` : `[BLOCKED — the observer confirmed thi
                   turn,
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
-                const hits = (dedupHitCount.get(toolFingerprint) ?? 0) + 1;
-                dedupHitCount.set(toolFingerprint, hits);
-                const threshold = tc.name === "shell" ? 2 : DEDUP_ESCALATION_THRESHOLD;
-                if (hits >= threshold) {
-                  const argPreview = JSON.stringify(tc.arguments ?? {}).slice(0, 200);
-                  const blockMsg = `[FORCED PROGRESS BLOCK — you have called ${tc.name}(${argPreview}) ${hits} times with identical arguments and received the cached result each time. The data is not changing. You are stuck in a read-only loop instead of advancing the plan.
-REQUIRED before this tool will run again with these arguments:
-  • file_write or file_edit, OR
-  • todo_write that advances the plan, OR
-  • task_complete (if all phases are done).
-If you genuinely need this same data again, call a DIFFERENT tool first (one of the three above). Until then, refer to your conversation history — the result of this exact call is already there.]`;
-                  this.emit({
-                    type: "tool_result",
-                    toolName: tc.name,
-                    success: false,
-                    content: blockMsg.slice(0, 120),
-                    turn,
-                    timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                  });
-                  return { tc, output: blockMsg };
-                }
-                const header = cachedEntry.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
+                const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
-` : `[DUPLICATE CALL #${hits} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
+` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
 `;
-                const truncatedCache = cachedEntry.result.length > 500 ? cachedEntry.result.slice(0, 500) + `
-... [${cachedEntry.result.length - 500} chars omitted — same as before]` : cachedEntry.result;
+                const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
+... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
                 const dedupOutput = header + truncatedCache;
                 this.emit({
                   type: "tool_result",

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.475",
+  "version": "0.187.477",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "open-agents-ai",
-      "version": "0.187.475",
+      "version": "0.187.477",
       "hasInstallScript": true,
       "license": "CC-BY-NC-4.0",
       "dependencies": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.475",
+  "version": "0.187.477",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",