npm - open-agents-ai - Versions diffs - 0.187.370 → 0.187.372 - Mend

open-agents-ai 0.187.370 → 0.187.372

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +298 -13
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -81,6 +81,7 @@ function loadConfigFile() {
     if (typeof parsed.dryRun === "boolean") result.dryRun = parsed.dryRun;
     if (typeof parsed.verbose === "boolean") result.verbose = parsed.verbose;
     if (typeof parsed.dbPath === "string") result.dbPath = parsed.dbPath;
+    if (typeof parsed.thinking === "boolean") result.thinking = parsed.thinking;
     return result;
   } catch {
     return {};
@@ -515664,6 +515665,46 @@ ${todoItems}
 </system-reminder>`;
   return { shouldInject: true, content, reason: "injected" };
 }
+function stripThinkBlocks(s2) {
+  if (!s2)
+    return s2;
+  return s2.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
+}
+function computeEffectiveThink(params) {
+  if (process.env["OA_FORCE_NO_THINK"] === "1")
+    return false;
+  if (params.suppressed)
+    return false;
+  if (params.hasTools)
+    return false;
+  if (typeof params.requestThink === "boolean")
+    return params.requestThink;
+  if (process.env["OA_THINK_AUTO"] !== "0" && Array.isArray(params.messages)) {
+    const blob = params.messages.filter((m2) => m2.role === "user" || m2.role === "system").map((m2) => typeof m2.content === "string" ? m2.content : "").join("\n").toLowerCase();
+    if (/\b(plan|decompose|analyze(?:\s+complex)?|step\s*by\s*step|reason through|think through|reason step)\b/.test(blob)) {
+      return true;
+    }
+  }
+  return params.defaultThink;
+}
+function classifyThinkOutcome(raw) {
+  if (!raw)
+    return "empty_after_strip";
+  const hasOpen = /<think>/i.test(raw);
+  const hasClose = /<\/think>/i.test(raw);
+  if (hasOpen && !hasClose)
+    return "unclosed_think";
+  const stripped = stripThinkBlocks(raw);
+  if (stripped.trim().length < 2)
+    return "empty_after_strip";
+  if (hasOpen && hasClose) {
+    const thinkLen = raw.length - stripped.length;
+    if (thinkLen > raw.length * 0.9 && stripped.trim().length < 40) {
+      return "runaway_think";
+    }
+  }
+  return null;
+}
 var SYSTEM_PROMPT, SYSTEM_PROMPT_MEDIUM, SYSTEM_PROMPT_SMALL, VISUAL_TOOLS, AUDIO_TOOLS, SOCIAL_TOOLS, SPATIAL_TOOLS, CODE_TOOLS, AgenticRunner, OllamaAgenticBackend;
 var init_agenticRunner = __esm({
   "packages/orchestrator/dist/agenticRunner.js"() {
@@ -516486,6 +516527,40 @@ ${body}`;
           }
         }
       }
+      /**
+       * Think-loop-guard runner hook. Called once per turn at the top of the
+       * agentic loop. Responsibilities:
+       *   1. Consume OA_THINK_GUARD_RESET env var (written by /think reset) to
+       *      clear a prior suppression — the CLI can't talk to the backend
+       *      directly, so it drops a timestamp in the env and we pick it up.
+       *   2. Emit a one-shot user-visible warning the first turn after the
+       *      guard trips, so the user knows why answers suddenly look different.
+       */
+      _lastThinkGuardResetAt = 0;
+      _maybeApplyThinkGuard() {
+        const resetRaw = process.env["OA_THINK_GUARD_RESET"];
+        if (resetRaw) {
+          const ts = Number(resetRaw);
+          if (Number.isFinite(ts) && ts > this._lastThinkGuardResetAt) {
+            this._lastThinkGuardResetAt = ts;
+            if (typeof this.backend.resetThinkGuard === "function") {
+              this.backend.resetThinkGuard();
+              this.emit({
+                type: "status",
+                content: "🧠 Think-guard cleared — reasoning mode will re-enable on the next eligible request.",
+                timestamp: (/* @__PURE__ */ new Date()).toISOString()
+              });
+            }
+          }
+        }
+        if (typeof this.backend.consumeSuppressionNotice === "function" && this.backend.consumeSuppressionNotice()) {
+          this.emit({
+            type: "status",
+            content: "⚠ Think-mode auto-suppressed — two consecutive empty/unclosed-<think> responses detected. Continuing with direct answers. Use `/think reset` to retry.",
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+        }
+      }
       /**
        * Detect repetition in recent tool calls.
        * Returns a score 0-1 where 1 = fully repetitive (stuck in a loop).
@@ -516764,6 +516839,7 @@ TASK: ${task}` : task;
         }
         for (let turn = 0; turn < this.options.maxTurns; turn++) {
           clearTurnState(this._appState);
+          this._maybeApplyThinkGuard();
           if (this._paused) {
             const shouldContinue = await this.waitIfPaused();
             if (!shouldContinue) {
@@ -518244,6 +518320,7 @@ You have ${this.options.maxTurns} more turns. Continue making progress. Call tas
             messages2.push(...compacted);
           }
           for (let turn = 0; turn < this.options.maxTurns; turn++) {
+            this._maybeApplyThinkGuard();
             if (this._paused) {
               const shouldContinue = await this.waitIfPaused();
               if (!shouldContinue) {
@@ -521100,13 +521177,23 @@ ${description}`
         return resp;
       }
     };
-    OllamaAgenticBackend = class {
+    OllamaAgenticBackend = class _OllamaAgenticBackend {
       baseUrl;
       model;
       apiKey;
       thinking;
       /** Abort signal — set by the runner so /stop can cancel in-flight requests */
       _abortSignal = null;
+      // ── Think-loop guard (0.187.372) ──────────────────────────────────────
+      // If the model keeps producing empty / unclosed-think responses, we
+      // assume Qwen3 dual-mode is looping and start suppressing think for
+      // this backend instance. User can clear via /think reset.
+      _thinkFailStreak = 0;
+      _thinkSuccessStreak = 0;
+      _thinkSuppressed = false;
+      _thinkSuppressedNotified = false;
+      static _thinkFailThreshold = 2;
+      static _thinkRecoveryThreshold = 6;
       /** Multi-key pool — round-robin rotation per request for load distribution */
       _keyPool = [];
       _keyIndex = 0;
@@ -521116,7 +521203,7 @@ ${description}`
         this.baseUrl = normalizeBaseUrl(baseUrl);
         this.model = model;
         this.apiKey = apiKey ?? "";
-        this.thinking = thinking ?? true;
+        this.thinking = thinking ?? false;
         this._isAnthropic = /api\.anthropic\.com/i.test(baseUrl);
       }
       /** Set multiple API keys for round-robin rotation per request */
@@ -521128,6 +521215,61 @@ ${description}`
       setAbortSignal(signal) {
         this._abortSignal = signal;
       }
+      /** Is think currently auto-suppressed by the loop-guard? */
+      isThinkSuppressed() {
+        return this._thinkSuppressed;
+      }
+      /** Clear the loop-guard — lets think re-enable on the next eligible request. */
+      resetThinkGuard() {
+        this._thinkFailStreak = 0;
+        this._thinkSuccessStreak = 0;
+        this._thinkSuppressed = false;
+        this._thinkSuppressedNotified = false;
+      }
+      /**
+       * Feed a completed assistant response into the loop-guard. We only
+       * update counters on responses that WERE think=true — otherwise
+       * think-off responses (the vast majority) would drive the counters
+       * and mask the failure signal we're trying to detect.
+       *
+       * Failure classes (per classifyThinkOutcome) bump the fail streak.
+       * Healthy think-mode responses bump the success streak and, past a
+       * threshold, clear a prior suppression so think can come back on if
+       * the model is behaving again.
+       *
+       * Returns the classification so callers can decide whether to
+       * emit a warning / retry.
+       */
+      recordThinkOutcome(raw, wasThinkRequested) {
+        if (!wasThinkRequested)
+          return null;
+        const cls = classifyThinkOutcome(raw);
+        if (cls !== null) {
+          this._thinkFailStreak++;
+          this._thinkSuccessStreak = 0;
+          if (this._thinkFailStreak >= _OllamaAgenticBackend._thinkFailThreshold && !this._thinkSuppressed) {
+            this._thinkSuppressed = true;
+          }
+        } else {
+          this._thinkSuccessStreak++;
+          this._thinkFailStreak = 0;
+          if (this._thinkSuppressed && this._thinkSuccessStreak >= _OllamaAgenticBackend._thinkRecoveryThreshold) {
+            this._thinkSuppressed = false;
+            this._thinkSuppressedNotified = false;
+          }
+        }
+        return cls;
+      }
+      /** Pick up the one-shot "notify about suppression" flag. Returns true
+       *  the first time it's called after a trip; false thereafter until
+       *  the guard resets. Used by the runner to emit a single warning. */
+      consumeSuppressionNotice() {
+        if (this._thinkSuppressed && !this._thinkSuppressedNotified) {
+          this._thinkSuppressedNotified = true;
+          return true;
+        }
+        return false;
+      }
       /** Build auth headers — adapts to provider (Bearer for most, x-api-key for Anthropic).
        *  When a key pool is set, round-robins through keys per request. */
       authHeaders() {
@@ -521151,13 +521293,25 @@ ${description}`
         if (this._isAnthropic) {
           return this._anthropicChatCompletion(request);
         }
+        const cleanedMessages = request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2);
+        const effectiveThink = computeEffectiveThink({
+          requestThink: request.think,
+          defaultThink: this.thinking,
+          hasTools: Array.isArray(request.tools) && request.tools.length > 0,
+          messages: cleanedMessages,
+          suppressed: this._thinkSuppressed
+        });
+        let effectiveMaxTokens = request.maxTokens;
+        if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
+          effectiveMaxTokens = 4096;
+        }
         const body = {
           model: this.model,
-          messages: request.messages,
+          messages: cleanedMessages,
           tools: request.tools,
           temperature: request.temperature,
-          max_tokens: request.maxTokens,
-          think: this.thinking
+          max_tokens: effectiveMaxTokens,
+          think: effectiveThink
         };
         const fetchOpts = {
           method: "POST",
@@ -521176,6 +521330,71 @@ ${description}`
         const data = await resp.json();
         const choices = data.choices ?? [];
         const usage = data.usage;
+        const firstChoice = choices[0];
+        const responseText = firstChoice ? String(firstChoice.message?.content ?? "") : "";
+        const outcome = this.recordThinkOutcome(responseText, effectiveThink === true);
+        if (outcome !== null && effectiveThink === true) {
+          const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
+          if (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think") {
+            const retryBody = {
+              model: this.model,
+              messages: cleanedMessages,
+              tools: request.tools,
+              temperature: request.temperature,
+              max_tokens: request.maxTokens,
+              think: false
+            };
+            try {
+              const retryOpts = {
+                method: "POST",
+                headers: this.authHeaders(),
+                body: JSON.stringify(retryBody)
+              };
+              if (this._abortSignal)
+                retryOpts.signal = this._abortSignal;
+              const retryResp = await fetch(`${this.baseUrl}/v1/chat/completions`, retryOpts);
+              if (retryResp.ok) {
+                const retryData = await retryResp.json();
+                const retryChoices = retryData.choices ?? [];
+                const retryUsage = retryData.usage;
+                if (retryChoices.length > 0) {
+                  return {
+                    choices: retryChoices.map((c8) => {
+                      const msg = c8.message;
+                      const toolCalls = msg.tool_calls ?? [];
+                      return {
+                        message: {
+                          content: msg.content || null,
+                          toolCalls: toolCalls.length > 0 ? toolCalls.map((tc) => {
+                            const fn = tc.function;
+                            let args;
+                            try {
+                              args = typeof fn.arguments === "string" ? JSON.parse(fn.arguments) : fn.arguments ?? {};
+                            } catch {
+                              const repaired = repairJson(fn.arguments);
+                              args = repaired ?? { _raw: fn.arguments };
+                            }
+                            return {
+                              id: tc.id || crypto.randomUUID(),
+                              name: fn.name,
+                              arguments: args
+                            };
+                          }) : void 0
+                        }
+                      };
+                    }),
+                    usage: retryUsage ? {
+                      totalTokens: retryUsage.total_tokens ?? 0,
+                      promptTokens: retryUsage.prompt_tokens,
+                      completionTokens: retryUsage.completion_tokens
+                    } : void 0
+                  };
+                }
+              }
+            } catch {
+            }
+          }
+        }
         return {
           choices: choices.map((c8) => {
             const msg = c8.message;
@@ -521314,15 +521533,27 @@ ${description}`
        * The existing chatCompletion() method is completely unmodified.
        */
       async *chatCompletionStream(request) {
+        const cleanedMessages = request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2);
+        const effectiveThink = computeEffectiveThink({
+          requestThink: request.think,
+          defaultThink: this.thinking,
+          hasTools: Array.isArray(request.tools) && request.tools.length > 0,
+          messages: cleanedMessages,
+          suppressed: this._thinkSuppressed
+        });
+        let effectiveMaxTokens = request.maxTokens;
+        if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
+          effectiveMaxTokens = 4096;
+        }
         const body = {
           model: this.model,
-          messages: request.messages,
+          messages: cleanedMessages,
           tools: request.tools,
           temperature: request.temperature,
-          max_tokens: request.maxTokens,
+          max_tokens: effectiveMaxTokens,
           stream: true,
           stream_options: { include_usage: true },
-          think: this.thinking
+          think: effectiveThink
         };
         const streamFetchOpts = {
           method: "POST",
@@ -521340,6 +521571,9 @@ ${description}`
         }
         let sseBuffer = "";
         const decoder = new TextDecoder();
+        let accumulatedContent = "";
+        let accumulatedThinking = "";
+        let sawReasoningTokens = false;
         for await (const rawChunk of resp.body) {
           sseBuffer += decoder.decode(rawChunk, { stream: true });
           const parts = sseBuffer.split("\n\n");
@@ -521348,8 +521582,10 @@ ${description}`
             const line = part.trim();
             if (!line)
               continue;
-            if (line === "data: [DONE]")
+            if (line === "data: [DONE]") {
+              this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
               return;
+            }
             if (!line.startsWith("data: "))
               continue;
             try {
@@ -521373,9 +521609,12 @@ ${description}`
               const finishReason = choice.finish_reason;
               const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
               if (reasoningToken) {
+                sawReasoningTokens = true;
+                accumulatedThinking += reasoningToken;
                 yield { type: "content", content: reasoningToken, thinking: true };
               }
               if (delta?.content) {
+                accumulatedContent += delta.content;
                 yield { type: "content", content: delta.content };
               }
               const tcDeltas = delta?.tool_calls;
@@ -521409,6 +521648,23 @@ ${description}`
             }
           }
         }
+        this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
+      }
+      /** Reconstruct a raw-looking assistant response from the streamed
+       *  parts, then feed it into the loop-guard. Used at stream end (both
+       *  the [DONE] case and the fell-off-the-end case). */
+      _finalizeStreamGuard(thinkRequested, content, thinking, hadReasoningTokens) {
+        if (!thinkRequested) {
+          this.recordThinkOutcome(content, false);
+          return;
+        }
+        let rawLike;
+        if (hadReasoningTokens && thinking) {
+          rawLike = `<think>${thinking}</think>${content}`;
+        } else {
+          rawLike = content;
+        }
+        this.recordThinkOutcome(rawLike, true);
       }
     };
   }
@@ -546123,12 +546379,41 @@ Clone a new voice: /voice clone <wav-file> [name]`);
       return "handled";
     }
     case "think": {
-      const isOn = ctx3.thinkToggle();
+      const token = (arg || "").trim().toLowerCase();
+      const desc = (s2) => s2 ? "🧠 models that support reasoning (Qwen3, DeepSeek-R1, etc.) will show their thinking chain; tool calls still run direct" : "⚡ direct-answer mode (reasoning suppressed); recommended for tool-heavy workflows";
+      if (token === "status" || token === "?") {
+        const cur = ctx3.config.thinking ?? false;
+        renderInfo2(`Thinking mode: ${cur ? "on" : "off"} — ${desc(cur)}`);
+        if (process.env["OA_THINK_AUTO"] !== "0") renderInfo2("Auto-heuristic active (set OA_THINK_AUTO=0 to disable) — user messages with plan/decompose/analyze/step-by-step/reason-through auto-flip to think=on, tool calls stay off.");
+        if (process.env["OA_FORCE_NO_THINK"] === "1") renderWarning2("OA_FORCE_NO_THINK=1 forces off regardless of /think setting");
+        return "handled";
+      }
+      if (token === "auto") {
+        process.env["OA_THINK_AUTO"] = "1";
+        renderInfo2("Thinking auto-heuristic enabled (default since 0.187.372). User message containing plan/decompose/analyze/step-by-step/reason-through auto-flips think=on; tool calls still force off. Disable with OA_THINK_AUTO=0.");
+        return "handled";
+      }
+      if (token === "reset" || token === "clear") {
+        process.env["OA_THINK_GUARD_RESET"] = String(Date.now());
+        renderInfo2("Loop-guard reset requested. If think was auto-suppressed after empty/unclosed-think responses, it will re-enable on the next eligible request.");
+        return "handled";
+      }
+      let isOn;
+      if (token === "on" || token === "true" || token === "yes" || token === "1") {
+        isOn = true;
+        ctx3.config.thinking = true;
+      } else if (token === "off" || token === "false" || token === "no" || token === "0") {
+        isOn = false;
+        ctx3.config.thinking = false;
+      } else {
+        isOn = ctx3.thinkToggle();
+      }
       const save2 = hasLocal ? ctx3.saveLocalSettings.bind(ctx3) : ctx3.saveSettings.bind(ctx3);
       save2({ thinking: isOn });
-      renderInfo2(
-        `Thinking mode: ${isOn ? "on" : "off"}${hasLocal ? " (project-local)" : ""}` + (isOn ? " — models that support reasoning (Qwen3, DeepSeek-R1, etc.) will show their thinking chain" : " — reasoning chain suppressed, model responds directly")
-      );
+      renderInfo2(`Thinking mode: ${isOn ? "on" : "off"}${hasLocal ? " (project-local)" : ""} — ${desc(isOn)}`);
+      if (isOn) {
+        renderInfo2("Note: max_tokens will auto-raise to ≥4096 per request to prevent <think> truncation.");
+      }
       return "handled";
     }
     case "tools": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.370",
+  "version": "0.187.372",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",