npm - omnius - Versions diffs - 1.0.182 → 1.0.184 - Mend

omnius 1.0.182 → 1.0.184

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -550132,11 +550132,17 @@ function injectNoThinkDirective(messages2) {
   const target = messages2[lastUserIdx];
   if (!target || typeof target.content !== "string")
     return messages2;
-  if (/\/no_think\b/i.test(target.content))
+  const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
+  const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
+  if (hasOllamaNoThink && hasQwenNoThink)
     return messages2;
+  const suffix = [
+    hasOllamaNoThink ? null : "/nothink",
+    hasQwenNoThink ? null : "/no_think"
+  ].filter(Boolean).join("\n");
   const annotated = `${target.content}
-/no_think`;
+${suffix}`;
   return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
 }
 function backendHttpErrorDetail(text) {
@@ -550154,6 +550160,8 @@ function isOllamaModelNotFoundResponse(status, text, model) {
 function computeEffectiveThink(params) {
   if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
     return false;
+  if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
+    return false;
   if (params.suppressed)
     return false;
   if (params.hasTools)
@@ -550172,18 +550180,9 @@ function computeEffectiveThink(params) {
   return params.defaultThink;
 }
 function sanitizeHistoryThink(messages2) {
-  let lastAsstIdx = -1;
-  for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
-    if (messages2[i2]?.role === "assistant") {
-      lastAsstIdx = i2;
-      break;
-    }
-  }
-  return messages2.map((m2, i2) => {
+  return messages2.map((m2) => {
     if (m2.role !== "assistant" || typeof m2.content !== "string")
       return m2;
-    if (i2 === lastAsstIdx)
-      return m2;
     return { ...m2, content: stripThinkBlocks(m2.content) };
   });
 }
@@ -563608,10 +563607,11 @@ ${description}`
         if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
           effectiveMaxTokens = 4096;
         }
+        const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
         const responseFormat = request.responseFormat ?? request.response_format;
         const body = {
           model: this.model,
-          messages: cleanedMessages,
+          messages: requestMessages,
           tools: request.tools,
           temperature: request.temperature,
           max_tokens: effectiveMaxTokens,
@@ -563620,7 +563620,7 @@ ${description}`
         if (responseFormat !== void 0) {
           body["response_format"] = responseFormat;
         }
-        const reqNumCtx = request.numCtx;
+        const reqNumCtx = request.numCtx ?? request.num_ctx;
         if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
           const opts = body["options"] ?? {};
           opts["num_ctx"] = reqNumCtx;
@@ -563705,7 +563705,7 @@ ${description}`
           const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
           const shouldRetryThinkGuard = outcome !== null && effectiveThink === true && (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think");
           if (shouldRetryThinkGuard || shouldRecoverFromEmpty) {
-            const retryMessages = injectNoThinkDirective(cleanedMessages);
+            const retryMessages = injectNoThinkDirective(requestMessages);
             const retryBody = {
               model: this.model,
               messages: retryMessages,
@@ -563892,7 +563892,7 @@ ${description}`
        * Ollama pool routing as non-stream completions.
        */
       async *chatCompletionStream(request) {
-        const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
+        const cleanedMessages = normalizeMessagesForStrictOpenAI(sanitizeHistoryThink(request.messages));
         let effectiveThink = computeEffectiveThink({
           requestThink: request.think,
           defaultThink: this.thinking,
@@ -563907,10 +563907,11 @@ ${description}`
         if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
           effectiveMaxTokens = 4096;
         }
+        const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
         const responseFormat = request.responseFormat ?? request.response_format;
         const body = {
           model: this.model,
-          messages: cleanedMessages,
+          messages: requestMessages,
           tools: request.tools,
           temperature: request.temperature,
           max_tokens: effectiveMaxTokens,
@@ -563921,7 +563922,7 @@ ${description}`
         if (responseFormat !== void 0) {
           body["response_format"] = responseFormat;
         }
-        const reqNumCtx = request.numCtx;
+        const reqNumCtx = request.numCtx ?? request.num_ctx;
         if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
           const opts = body["options"] ?? {};
           opts["num_ctx"] = reqNumCtx;
@@ -564176,6 +564177,57 @@ var init_nexusBackend = __esm({
         this.authKey = authKey || "";
         this.thinking = thinking ?? false;
       }
+      effectiveThink(request) {
+        if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
+          return false;
+        if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
+          return false;
+        if (Array.isArray(request.tools) && request.tools.length > 0)
+          return false;
+        if (request.think === true)
+          return true;
+        if (request.think === false)
+          return false;
+        return this.thinking === true;
+      }
+      noThinkMessages(messages2) {
+        let lastUserIdx = -1;
+        for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
+          if (messages2[i2]?.role === "user") {
+            lastUserIdx = i2;
+            break;
+          }
+        }
+        if (lastUserIdx < 0)
+          return messages2;
+        const target = messages2[lastUserIdx];
+        if (!target || typeof target.content !== "string")
+          return messages2;
+        const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
+        const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
+        if (hasOllamaNoThink && hasQwenNoThink)
+          return messages2;
+        const suffix = [
+          hasOllamaNoThink ? null : "/nothink",
+          hasQwenNoThink ? null : "/no_think"
+        ].filter(Boolean).join("\n");
+        return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
+${suffix}` } : m2);
+      }
+      requestMessages(request, effectiveThink) {
+        return effectiveThink ? request.messages : this.noThinkMessages(request.messages);
+      }
+      applyOptionalRequestFields(daemonArgs, request) {
+        const responseFormat = request.responseFormat ?? request.response_format;
+        if (responseFormat !== void 0) {
+          daemonArgs.response_format = JSON.stringify(responseFormat);
+        }
+        const numCtx = request.numCtx ?? request.num_ctx;
+        if (Number.isFinite(numCtx) && (numCtx ?? 0) > 0) {
+          daemonArgs.num_ctx = String(numCtx);
+        }
+      }
       /** Reset the consecutive failure counter (called on endpoint switch / reconnect) */
       resetFailures() {
         this.consecutiveFailures = 0;
@@ -564191,9 +564243,10 @@ var init_nexusBackend = __esm({
           err.fatal = true;
           throw err;
         }
+        const effectiveThink = this.effectiveThink(request);
         const daemonArgs = {
           model: this.model,
-          messages: JSON.stringify(request.messages),
+          messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
           tools: JSON.stringify(request.tools),
           temperature: String(request.temperature),
           max_tokens: String(request.maxTokens)
@@ -564204,7 +564257,8 @@ var init_nexusBackend = __esm({
         if (this.authKey) {
           daemonArgs.auth_key = this.authKey;
         }
-        daemonArgs.think = String(this.thinking);
+        daemonArgs.think = String(effectiveThink);
+        this.applyOptionalRequestFields(daemonArgs, request);
         let rawResult;
         try {
           rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -564303,9 +564357,10 @@ var init_nexusBackend = __esm({
       async *chatCompletionStream(request) {
         const streamFile = join97(tmpdir18(), `nexus-stream-${randomBytes19(6).toString("hex")}.jsonl`);
         writeFileSync38(streamFile, "", "utf8");
+        const effectiveThink = this.effectiveThink(request);
         const daemonArgs = {
           model: this.model,
-          messages: JSON.stringify(request.messages),
+          messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
           tools: JSON.stringify(request.tools),
           temperature: String(request.temperature),
           max_tokens: String(request.maxTokens),
@@ -564315,7 +564370,8 @@ var init_nexusBackend = __esm({
           daemonArgs.target_peer = this.targetPeer;
         if (this.authKey)
           daemonArgs.auth_key = this.authKey;
-        daemonArgs.think = String(this.thinking);
+        daemonArgs.think = String(effectiveThink);
+        this.applyOptionalRequestFields(daemonArgs, request);
         let rawResult;
         try {
           rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -591098,6 +591154,7 @@ __export(setup_exports, {
   checkOllamaUpdate: () => checkOllamaUpdate,
   checkPythonVenv: () => checkPythonVenv,
   checkToolSupport: () => checkToolSupport,
+  classifyOllamaThinkingTreatment: () => classifyOllamaThinkingTreatment,
   computeInferenceScore: () => computeInferenceScore,
   createExpandedVariant: () => createExpandedVariant,
   createExpandedVariantAsync: () => createExpandedVariantAsync,
@@ -591125,6 +591182,7 @@ __export(setup_exports, {
   repairAllExpandedVariants: () => repairAllExpandedVariants,
   runElevatedCommand: () => runElevatedCommand,
   runSetupWizard: () => runSetupWizard,
+  shouldBakeNoThinkIntoOllamaModelfile: () => shouldBakeNoThinkIntoOllamaModelfile,
   updateOllama: () => updateOllama
 });
 import * as readline from "node:readline";
@@ -592840,23 +592898,26 @@ ${c3.cyan(OMNIUS_FIRST_RUN_BANNER)}
     const createModelfile = await ask(rl, `  Create optimized model "${c3.bold(customName)}" with ${ctx3.label} context? (Y/n) `);
     if (createModelfile.toLowerCase() !== "n") {
       try {
-        const numPredict = Math.min(16384, Math.max(2048, Math.floor(ctx3.numCtx * 0.25)));
-        const modelfileContent = [
-          `FROM ${selectedVariant.tag}`,
-          `PARAMETER num_ctx ${ctx3.numCtx}`,
-          `PARAMETER temperature 0`,
-          `PARAMETER num_predict ${numPredict}`,
-          `PARAMETER stop "<|endoftext|>"`
-        ].join("\n");
+        const modelfileCandidates = expandedVariantContentCandidates(selectedVariant.tag, ctx3.numCtx);
         const modelDir2 = join115(homedir37(), ".omnius", "models");
         mkdirSync56(modelDir2, { recursive: true });
         const modelfilePath = join115(modelDir2, `Modelfile.${customName}`);
-        writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
         process.stdout.write(`  ${c3.dim("Creating model...")} `);
-        execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
-          stdio: "pipe",
-          timeout: 12e4
-        });
+        for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
+          writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
+          try {
+            execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
+              stdio: "pipe",
+              timeout: 12e4
+            });
+            break;
+          } catch (err) {
+            if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
+              continue;
+            }
+            throw err;
+          }
+        }
         process.stdout.write(`${c3.green("✔")}
 `);
         setConfigValue("model", customName);
@@ -593439,6 +593500,29 @@ function parseShowNumCtx2(show) {
   }
   return 0;
 }
+function classifyOllamaThinkingTreatment(modelName) {
+  const normalized = modelName.replace(/^omnius-/i, "").replace(/:latest$/i, "").toLowerCase();
+  if (/\bgpt[-_]?oss\b/.test(normalized)) return "gpt-oss-levels";
+  if (/(?:^|[-_/:])(?:qwq|qvq)(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])thinking(?:[-_/:]|$)/.test(normalized) || /[-_]thinking(?:[-_/:]|$)/.test(normalized)) {
+    return "thinking-only";
+  }
+  if (/(?:^|[-_/:])qwen3(?:[._-]?\d+)?(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])qwen3(?:vl|omni)(?:[-_/:]|$)/.test(normalized) || /deepseek[-_]?r1/.test(normalized) || /deepseek[-_]?v?3[._-]1/.test(normalized)) {
+    return "toggleable";
+  }
+  return "none";
+}
+function shouldBakeNoThinkIntoOllamaModelfile(modelName) {
+  return classifyOllamaThinkingTreatment(modelName) === "toggleable";
+}
+function parseShowNoThink(show) {
+  const sources = [show.parameters, show.modelfile];
+  for (const source of sources) {
+    if (!source) continue;
+    if (/\b(?:PARAMETER\s+)?nothink\s+(?:true|1|on|yes)\b/i.test(source)) return true;
+    if (/\b(?:PARAMETER\s+)?think\s+(?:false|0|off|no)\b/i.test(source)) return true;
+  }
+  return false;
+}
 async function checkExpandedVariant(modelName, backendUrl2) {
   if (modelName.startsWith("omnius-")) return null;
   try {
@@ -593518,7 +593602,7 @@ async function readExpandedVariantState(backendUrl2, modelName) {
     if (baseModel && (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel))) {
       baseModel = null;
     }
-    return { currentNumCtx, baseModel };
+    return { currentNumCtx, baseModel, hasNoThink: parseShowNoThink(showData) };
   } catch {
     return null;
   }
@@ -593526,50 +593610,93 @@ async function readExpandedVariantState(backendUrl2, modelName) {
 function stripVariantTag(modelName) {
   return modelName.replace(/:latest$/i, "");
 }
-function createExpandedVariantContent(baseModel, numCtx) {
+function createExpandedVariantContent(baseModel, numCtx, options2 = {}) {
   if (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel)) {
     throw new Error(
       `createExpandedVariantContent: refusing to use blob-path base "${baseModel}". Pass the user-facing model name (e.g. "qwen3.6:latest") instead.`
     );
   }
   const numPredict = Math.min(16384, Math.max(2048, Math.floor(numCtx * 0.25)));
-  return [
+  const lines = [
     `FROM ${baseModel}`,
     `PARAMETER num_ctx ${numCtx}`,
+    ...options2.includeNoThink ? [
+      `# Keep toggleable reasoning models in direct-answer mode by default.`,
+      `PARAMETER nothink true`
+    ] : [],
     `PARAMETER temperature 0`,
     `PARAMETER num_predict ${numPredict}`,
     `PARAMETER stop "<|endoftext|>"`
+  ];
+  return lines.join("\n");
+}
+function expandedVariantContentCandidates(baseModel, numCtx) {
+  if (!shouldBakeNoThinkIntoOllamaModelfile(baseModel)) {
+    return [createExpandedVariantContent(baseModel, numCtx)];
+  }
+  return [
+    createExpandedVariantContent(baseModel, numCtx, { includeNoThink: true }),
+    createExpandedVariantContent(baseModel, numCtx, { includeNoThink: false })
+  ];
+}
+function ollamaCreateNothinkRejected(err) {
+  const anyErr = err;
+  const text = [
+    anyErr?.stderr?.toString?.() ?? "",
+    anyErr?.stdout?.toString?.() ?? "",
+    anyErr?.message ?? ""
   ].join("\n");
+  return /nothink|unknown parameter|invalid parameter|unsupported parameter/i.test(text);
 }
 function createExpandedVariantNamed(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
   const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
-  const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
+  const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
   try {
     const modelDir2 = join115(homedir37(), ".omnius", "models");
     mkdirSync56(modelDir2, { recursive: true });
     const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
-    writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
-    execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
-      stdio: "pipe",
-      timeout: 12e4
-    });
-    return targetModel;
+    for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
+      writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
+      try {
+        execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
+          stdio: "pipe",
+          timeout: 12e4
+        });
+        return targetModel;
+      } catch (err) {
+        if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
+          continue;
+        }
+        throw err;
+      }
+    }
+    return null;
   } catch {
     return null;
   }
 }
 async function createExpandedVariantNamedAsync(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
   const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
-  const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
+  const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
   try {
     const modelDir2 = join115(homedir37(), ".omnius", "models");
     mkdirSync56(modelDir2, { recursive: true });
     const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
-    writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
-    await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
-      timeout: 12e4
-    });
-    return targetModel;
+    for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
+      writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
+      try {
+        await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
+          timeout: 12e4
+        });
+        return targetModel;
+      } catch (err) {
+        if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
+          continue;
+        }
+        throw err;
+      }
+    }
+    return null;
   } catch {
     return null;
   }
@@ -606896,6 +607023,10 @@ Clone a new voice: /voice clone <wav-file> [name]`);
           renderWarning(
             "OMNIUS_FORCE_NO_THINK=1 forces off regardless of /think setting"
           );
+        else if (cur && process.env["OMNIUS_ENABLE_THINKING"] !== "1")
+          renderWarning(
+            "OMNIUS_ENABLE_THINKING is not set; /think is saved but backend requests remain direct-answer mode."
+          );
         return "handled";
       }
       if (token === "auto") {
@@ -606934,6 +607065,11 @@ Clone a new voice: /voice clone <wav-file> [name]`);
         renderInfo(
           "Note: max_tokens will auto-raise to ≥4096 per request to prevent <think> truncation."
         );
+        if (process.env["OMNIUS_ENABLE_THINKING"] !== "1") {
+          renderWarning(
+            "Thinking is hard-disabled by default. Set OMNIUS_ENABLE_THINKING=1 before launch for /think on or /think auto to affect backend requests."
+          );
+        }
       }
       return "handled";
     }
@@ -629145,7 +629281,7 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 1e4, maxMs) {
     10
   );
   const floor = Number.isFinite(minMs) && minMs > 0 ? minMs : 1e4;
-  const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 9e4;
+  const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 3e4;
   const callerCap = Number.isFinite(maxMs) && (maxMs ?? 0) >= floor ? maxMs : configuredCap;
   const cap = Math.max(floor, Math.min(configuredCap, callerCap));
   const requested = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : cap;
@@ -629172,6 +629308,9 @@ function telegramRouterDiagnosticAttemptLooksLikeTimeout(attempt) {
 function telegramRouterDiagnosticAttemptLooksLikeBackendLiveness(attempt) {
   return attempt.status === "threw" && telegramRouterErrorLooksLikeBackendLiveness(attempt.error ?? "");
 }
+function telegramRouterDiagnosticIsDualEmptyVisible(diag) {
+  return diag.jsonModeStatus === "empty-after-strip" && diag.plainStatus === "empty-after-strip";
+}
 function telegramThinkSuppressedRequest(request) {
   const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
   let appended = false;
@@ -629179,18 +629318,24 @@ function telegramThinkSuppressedRequest(request) {
     const m2 = messages2[i2];
     if (!m2 || m2.role !== "user") continue;
     const content = typeof m2.content === "string" ? m2.content : "";
-    if (/\/no_think\b/i.test(content)) {
+    const hasOllamaNoThink = /\/nothink\b/i.test(content);
+    const hasQwenNoThink = /\/no[_-]think\b/i.test(content);
+    if (hasOllamaNoThink && hasQwenNoThink) {
       appended = true;
       break;
     }
-    messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
+    const suffix = [
+      hasOllamaNoThink ? null : "/nothink",
+      hasQwenNoThink ? null : "/no_think"
+    ].filter(Boolean).join("\n");
+    messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}${suffix}` : `${content}
-/no_think` };
+${suffix}` };
     appended = true;
     break;
   }
   if (!appended) {
-    messages2.push({ role: "user", content: "/no_think" });
+    messages2.push({ role: "user", content: "/nothink\n/no_think" });
   }
   return { ...request, messages: messages2, think: false };
 }
@@ -630779,7 +630924,7 @@ function renderTelegramSubAgentError(username, error) {
   process.stdout.write(`    ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
 `);
 }
-var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
+var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS, TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
 var init_telegram_bridge = __esm({
   "packages/cli/src/tui/telegram-bridge.ts"() {
     "use strict";
@@ -631223,6 +631368,21 @@ Telegram link integrity contract:
     TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS = 20 * 60 * 1e3;
     TELEGRAM_CHANNEL_DMN_MIN_MESSAGES = 4;
     TELEGRAM_ALLOWED_UPDATES = ["message", "guest_message", "callback_query", "poll", "message_reaction", "message_reaction_count"];
+    TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS = 50;
+    TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES = [
+      "qwen3:0.6b",
+      "qwen3:1.7b",
+      "qwen3:4b",
+      "qwen3:8b",
+      "qwen2.5:3b",
+      "qwen2.5:7b",
+      "llama3.2:1b",
+      "llama3.2:3b",
+      "gemma3:1b",
+      "gemma3:4b",
+      "phi3:mini",
+      "phi4-mini:latest"
+    ];
     TELEGRAM_PUBLIC_TOOL_QUOTAS = {
       web: { limit: 20, windowMs: 60 * 6e4 },
       media: { limit: 30, windowMs: 60 * 6e4 },
@@ -631256,6 +631416,7 @@ Telegram link integrity contract:
       pollLoopPromise = null;
       pollFatalNotified = false;
       lastUpdateId = 0;
+      telegramRouterModelCache = null;
       state = {
         active: false,
         botUserId: void 0,
@@ -631288,10 +631449,14 @@ Telegram link integrity contract:
        * capacity and flood the TUI.
        */
       telegramActiveWorkSessions = /* @__PURE__ */ new Set();
+      telegramActiveWorkGenerations = /* @__PURE__ */ new Map();
+      telegramActiveWorkStartedAtMs = /* @__PURE__ */ new Map();
       /** Queued Telegram sessions waiting for a global work slot. */
       telegramQueuedSessionWork = /* @__PURE__ */ new Map();
       telegramDispatchQueuedTimer = null;
       telegramDispatchQueuedAtMs = 0;
+      telegramQueueDiagnosticLastAtMs = 0;
+      telegramPollWarningLastAtMs = 0;
       /** Lightweight chat history by chat/guest session key */
       chatHistory = /* @__PURE__ */ new Map();
       /** Participant and tone state by chat/guest session key */
@@ -631927,6 +632092,63 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
         if (!Number.isFinite(parsed)) return 350;
         return Math.max(0, Math.min(2e3, Math.floor(parsed)));
       }
+      telegramQueueDiagnosticIntervalMs() {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_QUEUE_DIAGNOSTIC_MS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 5e3 && raw <= 3e5) return raw;
+        return 3e4;
+      }
+      maybeLogTelegramQueueDiagnostic(reason) {
+        if (this.telegramQueuedSessionWork.size === 0) return;
+        const now = Date.now();
+        const interval = this.telegramQueueDiagnosticIntervalMs();
+        if (now - this.telegramQueueDiagnosticLastAtMs < interval) return;
+        this.telegramQueueDiagnosticLastAtMs = now;
+        const queued = [...this.telegramQueuedSessionWork.values()].sort((a2, b) => a2.enqueuedAtMs - b.enqueuedAtMs).slice(0, 4).map((work) => {
+          const age = formatTelegramPipelineDuration(now - work.enqueuedAtMs);
+          const live = this.telegramSessionIsLive(work.sessionKey) ? "blocked:same-session-live" : "ready";
+          return `${work.sessionKey} age=${age} bundled=${work.messageCount} ${live}`;
+        });
+        const active = [...this.activeTelegramInteractionSessionKeys()].slice(0, 6);
+        const inferences = this.getTelegramActiveInferences().slice(0, 4).map((inf) => `${inf.id}/${inf.kind}/${inf.model} elapsed=${inf.elapsedSec.toFixed(1)}s ttfb=${inf.ttfbSec === void 0 ? "waiting" : `${inf.ttfbSec.toFixed(1)}s`}`);
+        this.tuiWrite(() => renderTelegramSubAgentEvent(
+          "queue",
+          `queue diagnostic (${reason}): active ${this.activeTelegramInteractionCount()}/${this.getSubAgentLimit()} [${active.join(", ") || "none"}]; queued ${this.telegramQueuedSessionWork.size} [${queued.join(" | ")}]; inferences [${inferences.join(" | ") || "none"}]`
+        ));
+      }
+      nextTelegramWorkGeneration(sessionKey) {
+        const generation = (this.telegramActiveWorkGenerations.get(sessionKey) ?? 0) + 1;
+        this.telegramActiveWorkGenerations.set(sessionKey, generation);
+        return generation;
+      }
+      telegramWorkGenerationIsCurrent(sessionKey, generation) {
+        return this.telegramActiveWorkGenerations.get(sessionKey) === generation;
+      }
+      telegramPreAgentWorkMaxIdleMs() {
+        const routerMs = telegramRouterTimeoutMs(this.agentConfig?.timeoutMs);
+        const raw = Number.parseInt(process.env["OMNIUS_TG_PRE_AGENT_MAX_IDLE_MS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 3e4 && raw <= 9e5) return raw;
+        return Math.max(12e4, routerMs + 3e4);
+      }
+      reapStaleTelegramPreAgentWork() {
+        const now = Date.now();
+        const maxIdleMs = this.telegramPreAgentWorkMaxIdleMs();
+        for (const sessionKey of [...this.telegramActiveWorkSessions]) {
+          if (this.subAgents.has(sessionKey) || this.activeChatSessions.has(sessionKey)) continue;
+          const startedAt2 = this.telegramActiveWorkStartedAtMs.get(sessionKey);
+          if (!startedAt2) continue;
+          const idleMs = now - startedAt2;
+          if (idleMs <= maxIdleMs) continue;
+          const generation = this.telegramActiveWorkGenerations.get(sessionKey) ?? 0;
+          this.telegramActiveWorkGenerations.set(sessionKey, generation + 1);
+          this.telegramActiveWorkSessions.delete(sessionKey);
+          this.telegramActiveWorkStartedAtMs.delete(sessionKey);
+          this.refreshActiveTelegramInteractionCount();
+          this.tuiWrite(() => renderTelegramSubAgentEvent(
+            "queue",
+            `watchdog: released stale pre-agent Telegram work pin for ${sessionKey} after ${Math.round(idleMs / 1e3)}s; queued messages may dispatch now`
+          ));
+        }
+      }
       dispatchQueuedTelegramSessionWorkSoon(delayMs = 0) {
         const dueAt = Date.now() + Math.max(0, delayMs);
         if (this.telegramDispatchQueuedTimer && this.telegramDispatchQueuedAtMs <= dueAt) return;
@@ -631956,6 +632178,9 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
             this.dispatchQueuedTelegramSessionWorkSoon(Math.max(0, nextDue - Date.now()));
           }
         }
+        if (this.telegramQueuedSessionWork.size > 0) {
+          this.maybeLogTelegramQueueDiagnostic("dispatch");
+        }
         this.refreshActiveTelegramInteractionCount();
       }
       buildTelegramQueuedSessionWork(sessionKey, msg, toolContext, now) {
@@ -631999,11 +632224,16 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
           return;
         }
         this.telegramActiveWorkSessions.add(work.sessionKey);
+        this.telegramActiveWorkStartedAtMs.set(work.sessionKey, Date.now());
+        const generation = this.nextTelegramWorkGeneration(work.sessionKey);
         this.refreshActiveTelegramInteractionCount();
-        void this.processTelegramMessageWork(work).catch((err) => {
+        void this.processTelegramMessageWork(work, generation).catch((err) => {
           this.tuiWrite(() => renderWarning(`Telegram sub-agent error: ${err instanceof Error ? err.message : String(err)}`));
         }).finally(() => {
-          this.telegramActiveWorkSessions.delete(work.sessionKey);
+          if (this.telegramWorkGenerationIsCurrent(work.sessionKey, generation)) {
+            this.telegramActiveWorkSessions.delete(work.sessionKey);
+            this.telegramActiveWorkStartedAtMs.delete(work.sessionKey);
+          }
           this.refreshActiveTelegramInteractionCount();
           this.dispatchQueuedTelegramSessionWorkSoon();
         });
@@ -635238,7 +635468,7 @@ ${lines.join("\n")}`);
           `Current Telegram message text (untrusted user data):
 ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
           "",
-          "/no_think"
+          "/nothink\n/no_think"
         ].filter(Boolean).join("\n");
         try {
           const result = await this.telegramRouterJsonCompletion(
@@ -635293,7 +635523,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             },
             inferenceKind,
             sessionKey,
-            { stream: false, reason: "router-json" }
+            { stream: false, reason: "router-json", modelName: diagnostics?.backendModel }
           );
           const visible = jsonModeResult.choices.some(
             (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -635349,7 +635579,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             suppressed,
             inferenceKind,
             sessionKey,
-            { stream: false, reason: "router-plain-retry" }
+            { stream: false, reason: "router-plain-retry", modelName: diagnostics?.backendModel }
           );
           if (diagnostics) {
             const plainVisible = plainResult.choices.some(
@@ -635402,7 +635632,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
        *      hard-deadline retire path becomes diagnosable instead of opaque
        */
       async telegramObservableInference(backend, request, kind, sessionKey, options2 = {}) {
-        const model = this.agentConfig?.model ?? "?";
+        const model = options2.modelName ?? this.agentConfig?.model ?? "?";
         const promptTokens = estimatePromptTokensFromRequest(request);
         const broker = getModelBroker();
         const trainCtx = await broker.getNctxTrain(model).catch(() => null);
@@ -635436,7 +635666,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             if (!streamAllowed && process.env["OMNIUS_BROKER_TRACE"] === "1") {
               this.tuiWrite(() => renderTelegramSubAgentEvent(
                 sessionKey,
-                `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx)}`
+                `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx, model)}`
               ));
             }
           } else if (typeof streamFn !== "function") {
@@ -635469,18 +635699,22 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
           this.deregisterTelegramInference(id);
         }
       }
-      telegramBackendDiagnostic() {
+      telegramBackendDiagnostic(modelOverride, routerModelSource, routerModelDetail) {
         const config = this.agentConfig;
         if (!config) return "backend=unconfigured model=?";
-        return `backend=${config.backendType} url=${config.backendUrl} model=${config.model}`;
+        const model = modelOverride || config.model;
+        const source = routerModelSource ? ` router_model_source=${routerModelSource}` : "";
+        const detail = routerModelDetail ? ` router_model_detail=${compactTelegramRouterDiagnosticText(routerModelDetail, 180)}` : "";
+        const main2 = model !== config.model ? ` main_model=${config.model}` : "";
+        return `backend=${config.backendType} url=${config.backendUrl} model=${model}${main2}${source}${detail}`;
       }
-      telegramInferenceRequestDiagnostic(request) {
+      telegramInferenceRequestDiagnostic(request, modelOverride) {
         const responseFormat = request.responseFormat ?? request.response_format;
         const responseFormatType = responseFormat && typeof responseFormat["type"] === "string" ? responseFormat["type"] : responseFormat ? "present" : "none";
         const numCtx = request.numCtx;
         const think = request.think;
         const tools = Array.isArray(request.tools) ? request.tools.length : 0;
-        return `${this.telegramBackendDiagnostic()} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
+        return `${this.telegramBackendDiagnostic(modelOverride)} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
       }
       telegramStreamInactivityDiagnostic(request, inferenceId, inactivityMs, contentChars, thinkingChars) {
         const entry = this.telegramActiveInferences.get(inferenceId);
@@ -635489,7 +635723,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
         const idle = entry ? `${((now - entry.lastTokenAt) / 1e3).toFixed(1)}s` : "unknown";
         const ttfb = entry?.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
         const phase = entry?.firstChunkAt === void 0 ? "before-first-chunk" : "mid-stream";
-        return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request)}; stream_endpoint=no-sse-chunk)`;
+        return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request, entry?.model)}; stream_endpoint=no-sse-chunk)`;
       }
       /**
        * Drive a chatCompletionStream to exhaustion, accumulating tokens into a
@@ -635705,7 +635939,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
           `Original router output:`,
           rawPreview,
           ``,
-          `/no_think`
+          `/nothink
+/no_think`
         ].join("\n");
         try {
           const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635718,8 +635953,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             ],
             tools: [],
             temperature: 0,
-            maxTokens: 800,
-            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
+            maxTokens: 500,
+            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
             think: false
           }, diagnostics, "router-repair", sessionKey);
           const repairedText = result.choices[0]?.message?.content ?? "";
@@ -635772,7 +636007,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
           `Router context (trailing-window):`,
           trimmedUserPrompt,
           ``,
-          `/no_think`
+          `/nothink
+/no_think`
         ].join("\n");
         try {
           const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635785,8 +636021,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
             ],
             tools: [],
             temperature: 0,
-            maxTokens: 1e3,
-            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
+            maxTokens: 500,
+            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
             think: false
           }, diagnostics, "router-strict-retry", sessionKey);
           const retryText = result.choices[0]?.message?.content ?? "";
@@ -635980,6 +636216,7 @@ ${retryText}`,
        * never fires.
        */
       reapStaleTelegramSubAgents() {
+        this.reapStaleTelegramPreAgentWork();
         const maxIdleMs = this.telegramSubAgentMaxIdleMs();
         const now = Date.now();
         const stale = [];
@@ -636000,6 +636237,7 @@ ${retryText}`,
             clearInterval(agent.typingInterval);
             agent.typingInterval = null;
           }
+          this.stopTelegramPublicProgressMessage(agent);
           try {
             agent.runner?.abort?.();
           } catch {
@@ -636019,6 +636257,118 @@ ${retryText}`,
           this.subAgentViewCallbacks?.onStatus(agent.viewId, "failed");
           this.subAgentViewCallbacks?.onComplete(agent.viewId);
         }
+        if (this.telegramQueuedSessionWork.size > 0) {
+          this.maybeLogTelegramQueueDiagnostic("watchdog");
+          this.dispatchQueuedTelegramSessionWorkSoon();
+        }
+      }
+      telegramRouterAutoModelEnabled() {
+        const raw = (process.env["OMNIUS_TG_ROUTER_AUTO_MODEL"] ?? "").trim().toLowerCase();
+        return raw !== "0" && raw !== "false" && raw !== "off";
+      }
+      telegramRouterCandidateModels() {
+        const raw = (process.env["OMNIUS_TG_ROUTER_MODEL_CANDIDATES"] ?? "").trim();
+        const candidates = raw ? raw.split(/[,\s]+/).map((part) => part.trim()).filter(Boolean) : TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES;
+        return Array.from(new Set(candidates));
+      }
+      normalizeOllamaModelNameForMatch(name10) {
+        return name10.trim().toLowerCase().replace(/:latest$/, "");
+      }
+      async fetchOllamaInstalledModelNames(baseUrl) {
+        const url = `${baseUrl.replace(/\/+$/, "")}/api/tags`;
+        const timeoutFn = AbortSignal.timeout;
+        const res = await fetch(url, {
+          signal: typeof timeoutFn === "function" ? timeoutFn(2e3) : void 0
+        });
+        if (!res.ok) throw new Error(`ollama /api/tags returned HTTP ${res.status}`);
+        const data = await res.json();
+        return Array.isArray(data.models) ? data.models.map((model) => typeof model.name === "string" ? model.name : "").filter(Boolean) : [];
+      }
+      async resolveTelegramRouterBackend(config) {
+        const explicit = (process.env["OMNIUS_TG_ROUTER_MODEL"] ?? "").trim();
+        if (explicit && !/^(?:0|false|off|same|main)$/i.test(explicit)) {
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, explicit, config.apiKey),
+            model: explicit,
+            source: "env",
+            detail: "OMNIUS_TG_ROUTER_MODEL"
+          };
+        }
+        if (config.backendType !== "ollama" || !this.telegramRouterAutoModelEnabled()) {
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
+            model: config.model,
+            source: "main"
+          };
+        }
+        const candidates = this.telegramRouterCandidateModels();
+        const cacheKey = `${config.backendUrl}
+${config.model}
+${candidates.join(",")}`;
+        const now = Date.now();
+        if (this.telegramRouterModelCache && this.telegramRouterModelCache.cacheKey === cacheKey && now - this.telegramRouterModelCache.atMs < 6e4) {
+          const cached = this.telegramRouterModelCache;
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, cached.model, config.apiKey),
+            model: cached.model,
+            source: cached.source,
+            detail: cached.detail
+          };
+        }
+        try {
+          const installed = await this.fetchOllamaInstalledModelNames(config.backendUrl);
+          const installedByNormalized = /* @__PURE__ */ new Map();
+          for (const name10 of installed) {
+            installedByNormalized.set(this.normalizeOllamaModelNameForMatch(name10), name10);
+          }
+          for (const candidate of candidates) {
+            const selected = installedByNormalized.get(this.normalizeOllamaModelNameForMatch(candidate));
+            if (!selected) continue;
+            const resolved = {
+              cacheKey,
+              atMs: now,
+              model: selected,
+              source: "auto-small",
+              detail: "selected first installed OMNIUS_TG_ROUTER_MODEL_CANDIDATES entry from Ollama /api/tags"
+            };
+            this.telegramRouterModelCache = resolved;
+            return {
+              backend: new OllamaAgenticBackend(config.backendUrl, selected, config.apiKey),
+              model: selected,
+              source: "auto-small",
+              detail: resolved.detail
+            };
+          }
+        } catch (err) {
+          const detail2 = `router model auto-detect failed: ${err instanceof Error ? err.message : String(err)}`;
+          this.telegramRouterModelCache = {
+            cacheKey,
+            atMs: now,
+            model: config.model,
+            source: "main",
+            detail: detail2
+          };
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
+            model: config.model,
+            source: "main",
+            detail: detail2
+          };
+        }
+        const detail = "no configured small router model was installed; using main model";
+        this.telegramRouterModelCache = {
+          cacheKey,
+          atMs: now,
+          model: config.model,
+          source: "main",
+          detail
+        };
+        return {
+          backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
+          model: config.model,
+          source: "main",
+          detail
+        };
       }
       async inferTelegramInteractionDecision(msg, toolContext) {
         const config = this.agentConfig;
@@ -636048,11 +636398,8 @@ ${retryText}`,
           };
           return fallback;
         }
-        const backend = new OllamaAgenticBackend(
-          config.backendUrl,
-          config.model,
-          config.apiKey
-        );
+        const routerBackend = await this.resolveTelegramRouterBackend(config);
+        const backend = routerBackend.backend;
         const forcedLine = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred live from context.` : `The operator selected Telegram mode "auto". Infer route live from context.`;
         const context2 = this.buildTelegramConversationContextStream(sessionKey, msg, isGroup ? 36 : 20, identitySalienceSignals);
         const currentReplyContext = this.buildTelegramCurrentReplyContext(sessionKey, msg);
@@ -636192,7 +636539,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           } catch {
           }
         }
-        const diagnostics = {};
+        const diagnostics = {
+          backendType: config.backendType,
+          backendUrl: config.backendUrl,
+          backendModel: routerBackend.model,
+          routerModelSource: routerBackend.source,
+          routerModelDetail: routerBackend.detail
+        };
         const routerStartMs = Date.now();
         try {
           const result = await this.telegramRouterJsonCompletion(backend, {
@@ -636205,17 +636558,17 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             ],
             tools: [],
             temperature: 0,
-            // Minimal route JSON should fit comfortably; keeping this small avoids
-            // reintroducing truncated-note repair cascades.
-            maxTokens: 900,
-            timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
+            // Router JSON is tiny. Keep the answer budget tight so Qwen-class
+            // models cannot spend a minute producing hidden <think>-only output.
+            maxTokens: 360,
+            timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
             think: false
           }, diagnostics, "router", sessionKey);
           const text = result.choices[0]?.message?.content ?? "";
           const routerLatencyMs = Date.now() - routerStartMs;
           try {
             const pidReg = getPidRegistry();
-            const modelKey = this.agentConfig?.model ?? "?";
+            const modelKey = routerBackend.model ?? this.agentConfig?.model ?? "?";
             pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
             pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
           } catch {
@@ -636242,7 +636595,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
                 diagnosticNote: this.composeTelegramRouterDiagnosticNote(
                   void 0,
                   failureNarrative2,
-                  "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open"
+                  "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open",
+                  diagnostics
                 ),
                 raw: text
               }),
@@ -636263,8 +636617,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
                 ],
                 tools: [],
                 temperature: 0,
-                maxTokens: 1400,
-                timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
+                maxTokens: 700,
+                timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
                 think: false
               }, diagnostics, "router", sessionKey);
               const reissuedText = reissued.choices[0]?.message?.content ?? "";
@@ -636277,7 +636631,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             } catch {
             }
           }
-          const repaired = await this.repairTelegramInteractionDecision(
+          const dualEmptyVisible = telegramRouterDiagnosticIsDualEmptyVisible(diagnostics) && !telegramRouterRawPreview(text);
+          if (dualEmptyVisible) {
+            if (diagnostics.repairStatus === void 0) {
+              diagnostics.repairStatus = "skipped";
+              diagnostics.repairError = "router returned no visible text in json-mode or plain retry; repair/strict retry would only burn more inference";
+            }
+          }
+          const repaired = dualEmptyVisible ? null : await this.repairTelegramInteractionDecision(
             backend,
             text,
             forcedRoute,
@@ -636288,7 +636649,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           if (repaired) {
             return withRouterTelemetry(this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes));
           }
-          const strictRetry = await this.retryTelegramInteractionDecisionStrict(
+          const strictRetry = dualEmptyVisible ? null : await this.retryTelegramInteractionDecisionStrict(
             backend,
             userPrompt,
             text,
@@ -636304,12 +636665,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           const failureNarrative = this.summarizeTelegramRouterFailure(diagnostics);
           const backendLivenessFailure = (diagnostics.attempts ?? []).some(telegramRouterDiagnosticAttemptLooksLikeBackendLiveness) || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.repairError ?? "") || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.strictRetryError ?? "");
           const fallback = this.applyTelegramSilentReflectionNotes(this.buildTelegramRouterUnavailableDecision(msg, toolContext, {
-            reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
+            reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
             silentDisposition: reflectionNotes.silentDisposition,
             diagnosticNote: this.composeTelegramRouterDiagnosticNote(
               invalidRouterPreview,
               failureNarrative,
-              backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
+              backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it",
+              diagnostics
             ),
             raw: text
           }), reflectionNotes);
@@ -636323,7 +636685,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             diagnosticNote: this.composeTelegramRouterDiagnosticNote(
               void 0,
               failureNarrative,
-              `router failed before live notes were generated: ${errMsg.slice(0, 160)}`
+              `router failed before live notes were generated: ${errMsg.slice(0, 160)}`,
+              diagnostics
             )
           }), reflectionNotes);
           return withRouterTelemetry(fallback);
@@ -636443,10 +636806,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           operatorHint
         };
       }
-      composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline) {
+      composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline, diagnostics) {
         const segments = [];
         segments.push(headline);
-        segments.push(this.telegramBackendDiagnostic());
+        segments.push(this.telegramBackendDiagnostic(
+          diagnostics?.backendModel,
+          diagnostics?.routerModelSource,
+          diagnostics?.routerModelDetail
+        ));
         if (failureNarrative.summary) segments.push(failureNarrative.summary);
         if (invalidRouterPreview) segments.push(`invalid router output preview: ${invalidRouterPreview}`);
         if (failureNarrative.detail) segments.push(`router-failure trace: ${failureNarrative.detail}`);
@@ -636884,6 +637251,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         for (const [, agent] of this.subAgents) {
           agent.aborted = true;
           if (agent.typingInterval) clearInterval(agent.typingInterval);
+          this.stopTelegramPublicProgressMessage(agent);
           try {
             agent.runner?.abort?.();
           } catch {
@@ -636899,6 +637267,8 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         }
         this.telegramQueuedSessionWork.clear();
         this.telegramActiveWorkSessions.clear();
+        this.telegramActiveWorkGenerations.clear();
+        this.telegramActiveWorkStartedAtMs.clear();
         this.telegramAdminLivePanels.clear();
         this.flushTelegramViewWrites();
         this.flushTelegramTuiWrites();
@@ -637085,6 +637455,62 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
           }
         }
       }
+      shouldUseTelegramPublicProgressMessage(msg, toolContext) {
+        return toolContext === "telegram-public" && msg.chatType !== "private" && !msg.guestQueryId;
+      }
+      renderTelegramPublicProgressHTML(subAgent, msg, phase) {
+        const elapsedSec = Math.max(0, Math.floor((Date.now() - subAgent.startedAtMs) / 1e3));
+        const sessionKey = this.sessionKeyForMessage(msg);
+        const activeInference = this.getTelegramActiveInferences().find((inf) => inf.sessionKey === sessionKey);
+        const status = activeInference ? activeInference.ttfbSec === void 0 ? `model request active; waiting for first token (${activeInference.kind}, ${activeInference.elapsedSec.toFixed(1)}s)` : `streaming ${activeInference.kind}; content=${activeInference.contentTokens}t thinking=${activeInference.thinkingTokens}t` : phase;
+        const width = 12;
+        const filled = Math.min(width, Math.floor(elapsedSec % 60 / 60 * width));
+        const bar = `[${"#".repeat(filled)}${"-".repeat(width - filled)}]`;
+        return [
+          `<b>Working</b>`,
+          `<code>${bar}</code> ${elapsedSec}s`,
+          `<i>${escapeTelegramHTML(status)}</i>`
+        ].join("\n");
+      }
+      startTelegramPublicProgressMessage(subAgent, msg, phase) {
+        if (!this.shouldUseTelegramPublicProgressMessage(msg, subAgent.toolContext)) return;
+        if (subAgent.publicProgressTimer) return;
+        const update2 = () => {
+          if (subAgent.aborted) return;
+          if (!this.subAgents.has(this.sessionKeyForMessage(msg))) return;
+          const html = this.renderTelegramPublicProgressHTML(subAgent, msg, phase);
+          if (subAgent.liveMessageId) {
+            const now = Date.now();
+            if (now - subAgent.lastEditMs < 3e3) return;
+            subAgent.lastEditMs = now;
+            void this.editLiveMessage(msg.chatId, subAgent.liveMessageId, html).catch(() => {
+            });
+            return;
+          }
+          if (subAgent.liveMessagePromise) return;
+          subAgent.liveMessagePromise = this.sendLiveMessage(
+            msg.chatId,
+            html,
+            msg.chatType !== "private" ? msg.messageId : void 0
+          ).then((id) => {
+            subAgent.liveMessageId = id;
+            subAgent.lastEditMs = Date.now();
+          }).catch(() => {
+          }).finally(() => {
+            subAgent.liveMessagePromise = null;
+          });
+        };
+        update2();
+        subAgent.publicProgressTimer = setInterval(update2, 5e3);
+        if (typeof subAgent.publicProgressTimer.unref === "function") {
+          subAgent.publicProgressTimer.unref();
+        }
+      }
+      stopTelegramPublicProgressMessage(subAgent) {
+        if (!subAgent.publicProgressTimer) return;
+        clearInterval(subAgent.publicProgressTimer);
+        subAgent.publicProgressTimer = null;
+      }
       ensureTelegramAdminLivePanel(subAgent, msg) {
         const existing = subAgent.adminLivePanelNonce ? this.telegramAdminLivePanels.get(subAgent.adminLivePanelNonce) : void 0;
         if (existing) return existing;
@@ -637343,11 +637769,12 @@ Join: ${newUrl}`);
         }
         this.scheduleTelegramSessionWork(msg, toolContext);
       }
-      async processTelegramMessageWork(work) {
+      async processTelegramMessageWork(work, workGeneration) {
         const msg = work.msg;
         const toolContext = work.toolContext;
         const sessionKey = this.sessionKeyForMessage(msg);
         const isAdminDM = toolContext === "telegram-admin-dm";
+        if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) return;
         const existing = this.subAgents.get(sessionKey);
         if (existing && !existing.aborted) {
           await this.enqueueTelegramQueuedSessionWorkForExistingSubAgent(work, existing);
@@ -637365,6 +637792,13 @@ Join: ${newUrl}`);
         } catch (err) {
           decision2 = this.fallbackTelegramRouterDecision(msg, toolContext, err);
         }
+        if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) {
+          this.tuiWrite(() => renderTelegramSubAgentEvent(
+            msg.username,
+            `discarded stale Telegram work result after queue pin release for ${sessionKey}`
+          ));
+          return;
+        }
         const storedPreference = this.applyTelegramReplyPreferenceUpdate(
           sessionKey,
           msg,
@@ -637482,6 +637916,7 @@ Join: ${newUrl}`);
         if (replyEdge) {
           this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, replyEdge));
         }
+        this.startTelegramPublicProgressMessage(subAgent, msg, "taking notes and preparing tools");
         try {
           let mediaContext = "";
           if (msg.media || msg.replyToMedia) {
@@ -637498,6 +637933,7 @@ Join: ${newUrl}`);
             clearInterval(subAgent.typingInterval);
             subAgent.typingInterval = null;
           }
+          this.stopTelegramPublicProgressMessage(subAgent);
           const finalText = cleanTelegramVisibleReply(result || "");
           if (isAdminDM && !this.telegramAdminRunCompleted(subAgent)) {
             const incompleteText = this.telegramAdminIncompleteRunText(subAgent, finalText);
@@ -637566,6 +638002,7 @@ Join: ${newUrl}`);
             clearInterval(subAgent.typingInterval);
             subAgent.typingInterval = null;
           }
+          this.stopTelegramPublicProgressMessage(subAgent);
           const errMsg = err instanceof Error ? err.message : String(err);
           this.tuiWrite(() => renderTelegramSubAgentError(msg.username, errMsg));
           this.subAgentViewCallbacks?.onWrite(subAgent.viewId, `error: ${errMsg}`);
@@ -637582,6 +638019,7 @@ Join: ${newUrl}`);
             });
           }
         } finally {
+          this.stopTelegramPublicProgressMessage(subAgent);
           this.clearTelegramSubAgentContextBuffer(sessionKey);
           this.subAgents.delete(sessionKey);
           this.refreshActiveTelegramInteractionCount();
@@ -637755,6 +638193,24 @@ Join: ${newUrl}`);
           typingInterval = this.startTypingIndicator(msg.chatId);
         }
         this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `live inference: chat reply (${this.interactionMode})`));
+        if (this.shouldUseTelegramPublicProgressMessage(msg, toolContext)) {
+          const initialHtml = [
+            `<b>Working</b>`,
+            `<code>[------------]</code> 0s`,
+            `<i>preparing a concise reply</i>`
+          ].join("\n");
+          liveMessagePromise = this.sendLiveMessage(
+            msg.chatId,
+            initialHtml,
+            msg.chatType !== "private" ? msg.messageId : void 0
+          ).then((id) => {
+            liveMessageId = id;
+            lastEditMs = Date.now();
+          }).catch(() => {
+          }).finally(() => {
+            liveMessagePromise = null;
+          });
+        }
         try {
           const mediaContext = msg.media || msg.replyToMedia || msg.livePhoto ? await this.processMediaContextForMessage(msg) : "";
           const contextualPayload = [mediaContext, additionalContext].filter(Boolean).join("\n\n");
@@ -641592,10 +642048,12 @@ ${caption}\r
       /** Long polling loop */
       async pollLoop() {
         while (this.polling) {
+          const longPollTimeoutSeconds = this.telegramLongPollTimeoutSeconds();
           try {
             const result = await this.apiCall("getUpdates", {
               offset: this.lastUpdateId + 1,
-              timeout: 30,
+              timeout: longPollTimeoutSeconds,
+              limit: 100,
               allowed_updates: TELEGRAM_ALLOWED_UPDATES
             });
             if (result.ok && Array.isArray(result.result)) {
@@ -641633,11 +642091,30 @@ ${caption}\r
             }
           } catch (err) {
             if (this.polling) {
+              const now = Date.now();
+              if (now - this.telegramPollWarningLastAtMs > 3e4) {
+                this.telegramPollWarningLastAtMs = now;
+                this.tuiWrite(() => renderWarning(
+                  `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); long_poll_timeout=${longPollTimeoutSeconds}s client_deadline_ms=${this.telegramLongPollClientTimeoutMs(longPollTimeoutSeconds) ?? "none"}; retrying`
+                ));
+              }
               await new Promise((r2) => setTimeout(r2, 5e3));
             }
           }
         }
       }
+      telegramLongPollTimeoutSeconds() {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_TIMEOUT_SECONDS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 0 && raw <= 120) return raw;
+        return TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS;
+      }
+      telegramLongPollClientTimeoutMs(serverTimeoutSeconds) {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_CLIENT_TIMEOUT_MS"] ?? "", 10);
+        if (!Number.isFinite(raw)) return null;
+        const floor = Math.max(5e3, Math.floor((serverTimeoutSeconds ?? 0) * 1e3) + 5e3);
+        if (raw >= floor && raw <= 3e5) return raw;
+        return null;
+      }
       /** Make a Telegram Bot API call with rate-limit retry */
       async apiCall(method, body, _retryDepth = 0) {
         const url = `https://api.telegram.org/bot${this.botToken}/${method}`;
@@ -641650,7 +642127,15 @@ ${caption}\r
         }
         const isLongPoll = method === "getUpdates";
         if (isLongPoll && this.abortController) {
-          options2.signal = this.abortController.signal;
+          const timeoutFn = AbortSignal.timeout;
+          const anyFn = AbortSignal.any;
+          const bodyTimeout = typeof body?.["timeout"] === "number" ? body["timeout"] : void 0;
+          const clientTimeoutMs = this.telegramLongPollClientTimeoutMs(bodyTimeout);
+          const signals = [
+            this.abortController.signal,
+            clientTimeoutMs && typeof timeoutFn === "function" ? timeoutFn(clientTimeoutMs) : void 0
+          ].filter((signal) => signal instanceof AbortSignal);
+          options2.signal = typeof anyFn === "function" && signals.length > 1 ? anyFn(signals) : signals[0];
         } else if (!isLongPoll) {
           options2.signal = AbortSignal.timeout(3e4);
         }
@@ -659607,6 +660092,30 @@ function sanitizeChatContent(raw) {
   }
   return cleaned.join("\n").trim();
 }
+function appendNoThinkDirectivesToMessages(messages2) {
+  let lastUserIdx = -1;
+  for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
+    if (messages2[i2]?.role === "user") {
+      lastUserIdx = i2;
+      break;
+    }
+  }
+  if (lastUserIdx < 0) return messages2;
+  const target = messages2[lastUserIdx];
+  if (!target || typeof target.content !== "string") return messages2;
+  const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
+  const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
+  if (hasOllamaNoThink && hasQwenNoThink) return messages2;
+  const suffix = [
+    hasOllamaNoThink ? null : "/nothink",
+    hasQwenNoThink ? null : "/no_think"
+  ].filter(Boolean).join("\n");
+  return messages2.map(
+    (m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
+${suffix}` } : m2
+  );
+}
 async function directChatBackend(opts) {
   const { model, messages: messages2, stream, res, sessionId, ollamaUrl, extraFields } = opts;
   const cfg = loadConfig();
@@ -659695,13 +660204,12 @@ async function directChatBackend(opts) {
     if (Array.isArray(ef["stop"]) || typeof ef["stop"] === "string") ollamaOpts["stop"] = ef["stop"];
     const hasTools = Array.isArray(ef["tools"]) && ef["tools"].length > 0;
     const ollamaFormat = ollamaFormatFromOpenAIResponseFormat(ef["response_format"]);
+    const ollamaMessages = appendNoThinkDirectivesToMessages(messages2);
     const reqBody = JSON.stringify({
       model: cleanModel,
-      messages: messages2,
+      messages: ollamaMessages,
       stream,
-      // Don't force think:false when the caller is using tool calling —
-      // thinking models often need their reasoning chain to choose a tool.
-      ...hasTools ? {} : { think: false },
+      think: false,
       ...hasTools ? { tools: ef["tools"] } : {},
       ...ef["tool_choice"] !== void 0 ? { tool_choice: ef["tool_choice"] } : {},
       ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {},
@@ -659931,13 +660439,18 @@ async function completeRealtimeTextOnly(opts) {
   if (!requestedModel) {
     originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
   }
-  const makeOllamaChatBody = (modelName) => JSON.stringify({
-    model: modelName,
-    messages: requestBody["messages"],
-    stream: false,
-    think: false,
-    options: { temperature, num_predict: maxTokens }
-  });
+  const makeOllamaChatBody = (modelName) => {
+    const rtMessages = Array.isArray(requestBody["messages"]) ? appendNoThinkDirectivesToMessages(
+      requestBody["messages"]
+    ) : requestBody["messages"];
+    return JSON.stringify({
+      model: modelName,
+      messages: rtMessages,
+      stream: false,
+      think: false,
+      options: { temperature, num_predict: maxTokens }
+    });
+  };
   let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
   if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
     const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);
@@ -661278,9 +661791,14 @@ async function handleV1ChatCompletions(req2, res, ollamaUrl) {
     return;
   }
   const callerProvidedThink = "think" in routedBody;
-  const callerProvidedTools = Array.isArray(routedBody["tools"]) && routedBody["tools"].length > 0;
-  const finalThink = callerProvidedThink ? routedBody["think"] : callerProvidedTools ? void 0 : false;
+  const thinkingAllowed = process.env["OMNIUS_ENABLE_THINKING"] === "1" && process.env["OMNIUS_FORCE_NO_THINK"] !== "1";
+  const finalThink = thinkingAllowed && callerProvidedThink ? routedBody["think"] : false;
   const ollamaBody = { ...routedBody };
+  if (finalThink === false && Array.isArray(ollamaBody["messages"])) {
+    ollamaBody["messages"] = appendNoThinkDirectivesToMessages(
+      ollamaBody["messages"]
+    );
+  }
   const ollamaOptions = ollamaBody["options"] && typeof ollamaBody["options"] === "object" ? { ...ollamaBody["options"] } : {};
   if (typeof ollamaBody["max_tokens"] === "number") {
     ollamaOptions["num_predict"] = ollamaBody["max_tokens"];