npm - omnius - Versions diffs - 1.0.183 → 1.0.184 - Mend

omnius 1.0.183 → 1.0.184

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -591154,6 +591154,7 @@ __export(setup_exports, {
   checkOllamaUpdate: () => checkOllamaUpdate,
   checkPythonVenv: () => checkPythonVenv,
   checkToolSupport: () => checkToolSupport,
+  classifyOllamaThinkingTreatment: () => classifyOllamaThinkingTreatment,
   computeInferenceScore: () => computeInferenceScore,
   createExpandedVariant: () => createExpandedVariant,
   createExpandedVariantAsync: () => createExpandedVariantAsync,
@@ -591181,6 +591182,7 @@ __export(setup_exports, {
   repairAllExpandedVariants: () => repairAllExpandedVariants,
   runElevatedCommand: () => runElevatedCommand,
   runSetupWizard: () => runSetupWizard,
+  shouldBakeNoThinkIntoOllamaModelfile: () => shouldBakeNoThinkIntoOllamaModelfile,
   updateOllama: () => updateOllama
 });
 import * as readline from "node:readline";
@@ -592896,23 +592898,26 @@ ${c3.cyan(OMNIUS_FIRST_RUN_BANNER)}
     const createModelfile = await ask(rl, `  Create optimized model "${c3.bold(customName)}" with ${ctx3.label} context? (Y/n) `);
     if (createModelfile.toLowerCase() !== "n") {
       try {
-        const numPredict = Math.min(16384, Math.max(2048, Math.floor(ctx3.numCtx * 0.25)));
-        const modelfileContent = [
-          `FROM ${selectedVariant.tag}`,
-          `PARAMETER num_ctx ${ctx3.numCtx}`,
-          `PARAMETER temperature 0`,
-          `PARAMETER num_predict ${numPredict}`,
-          `PARAMETER stop "<|endoftext|>"`
-        ].join("\n");
+        const modelfileCandidates = expandedVariantContentCandidates(selectedVariant.tag, ctx3.numCtx);
         const modelDir2 = join115(homedir37(), ".omnius", "models");
         mkdirSync56(modelDir2, { recursive: true });
         const modelfilePath = join115(modelDir2, `Modelfile.${customName}`);
-        writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
         process.stdout.write(`  ${c3.dim("Creating model...")} `);
-        execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
-          stdio: "pipe",
-          timeout: 12e4
-        });
+        for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
+          writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
+          try {
+            execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
+              stdio: "pipe",
+              timeout: 12e4
+            });
+            break;
+          } catch (err) {
+            if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
+              continue;
+            }
+            throw err;
+          }
+        }
         process.stdout.write(`${c3.green("✔")}
 `);
         setConfigValue("model", customName);
@@ -593495,6 +593500,29 @@ function parseShowNumCtx2(show) {
   }
   return 0;
 }
+function classifyOllamaThinkingTreatment(modelName) {
+  const normalized = modelName.replace(/^omnius-/i, "").replace(/:latest$/i, "").toLowerCase();
+  if (/\bgpt[-_]?oss\b/.test(normalized)) return "gpt-oss-levels";
+  if (/(?:^|[-_/:])(?:qwq|qvq)(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])thinking(?:[-_/:]|$)/.test(normalized) || /[-_]thinking(?:[-_/:]|$)/.test(normalized)) {
+    return "thinking-only";
+  }
+  if (/(?:^|[-_/:])qwen3(?:[._-]?\d+)?(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])qwen3(?:vl|omni)(?:[-_/:]|$)/.test(normalized) || /deepseek[-_]?r1/.test(normalized) || /deepseek[-_]?v?3[._-]1/.test(normalized)) {
+    return "toggleable";
+  }
+  return "none";
+}
+function shouldBakeNoThinkIntoOllamaModelfile(modelName) {
+  return classifyOllamaThinkingTreatment(modelName) === "toggleable";
+}
+function parseShowNoThink(show) {
+  const sources = [show.parameters, show.modelfile];
+  for (const source of sources) {
+    if (!source) continue;
+    if (/\b(?:PARAMETER\s+)?nothink\s+(?:true|1|on|yes)\b/i.test(source)) return true;
+    if (/\b(?:PARAMETER\s+)?think\s+(?:false|0|off|no)\b/i.test(source)) return true;
+  }
+  return false;
+}
 async function checkExpandedVariant(modelName, backendUrl2) {
   if (modelName.startsWith("omnius-")) return null;
   try {
@@ -593574,7 +593602,7 @@ async function readExpandedVariantState(backendUrl2, modelName) {
     if (baseModel && (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel))) {
       baseModel = null;
     }
-    return { currentNumCtx, baseModel };
+    return { currentNumCtx, baseModel, hasNoThink: parseShowNoThink(showData) };
   } catch {
     return null;
   }
@@ -593582,50 +593610,93 @@ async function readExpandedVariantState(backendUrl2, modelName) {
 function stripVariantTag(modelName) {
   return modelName.replace(/:latest$/i, "");
 }
-function createExpandedVariantContent(baseModel, numCtx) {
+function createExpandedVariantContent(baseModel, numCtx, options2 = {}) {
   if (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel)) {
     throw new Error(
       `createExpandedVariantContent: refusing to use blob-path base "${baseModel}". Pass the user-facing model name (e.g. "qwen3.6:latest") instead.`
     );
   }
   const numPredict = Math.min(16384, Math.max(2048, Math.floor(numCtx * 0.25)));
-  return [
+  const lines = [
     `FROM ${baseModel}`,
     `PARAMETER num_ctx ${numCtx}`,
+    ...options2.includeNoThink ? [
+      `# Keep toggleable reasoning models in direct-answer mode by default.`,
+      `PARAMETER nothink true`
+    ] : [],
     `PARAMETER temperature 0`,
     `PARAMETER num_predict ${numPredict}`,
     `PARAMETER stop "<|endoftext|>"`
+  ];
+  return lines.join("\n");
+}
+function expandedVariantContentCandidates(baseModel, numCtx) {
+  if (!shouldBakeNoThinkIntoOllamaModelfile(baseModel)) {
+    return [createExpandedVariantContent(baseModel, numCtx)];
+  }
+  return [
+    createExpandedVariantContent(baseModel, numCtx, { includeNoThink: true }),
+    createExpandedVariantContent(baseModel, numCtx, { includeNoThink: false })
+  ];
+}
+function ollamaCreateNothinkRejected(err) {
+  const anyErr = err;
+  const text = [
+    anyErr?.stderr?.toString?.() ?? "",
+    anyErr?.stdout?.toString?.() ?? "",
+    anyErr?.message ?? ""
   ].join("\n");
+  return /nothink|unknown parameter|invalid parameter|unsupported parameter/i.test(text);
 }
 function createExpandedVariantNamed(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
   const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
-  const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
+  const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
   try {
     const modelDir2 = join115(homedir37(), ".omnius", "models");
     mkdirSync56(modelDir2, { recursive: true });
     const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
-    writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
-    execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
-      stdio: "pipe",
-      timeout: 12e4
-    });
-    return targetModel;
+    for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
+      writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
+      try {
+        execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
+          stdio: "pipe",
+          timeout: 12e4
+        });
+        return targetModel;
+      } catch (err) {
+        if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
+          continue;
+        }
+        throw err;
+      }
+    }
+    return null;
   } catch {
     return null;
   }
 }
 async function createExpandedVariantNamedAsync(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
   const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
-  const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
+  const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
   try {
     const modelDir2 = join115(homedir37(), ".omnius", "models");
     mkdirSync56(modelDir2, { recursive: true });
     const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
-    writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
-    await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
-      timeout: 12e4
-    });
-    return targetModel;
+    for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
+      writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
+      try {
+        await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
+          timeout: 12e4
+        });
+        return targetModel;
+      } catch (err) {
+        if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
+          continue;
+        }
+        throw err;
+      }
+    }
+    return null;
   } catch {
     return null;
   }
@@ -606952,6 +607023,10 @@ Clone a new voice: /voice clone <wav-file> [name]`);
           renderWarning(
             "OMNIUS_FORCE_NO_THINK=1 forces off regardless of /think setting"
           );
+        else if (cur && process.env["OMNIUS_ENABLE_THINKING"] !== "1")
+          renderWarning(
+            "OMNIUS_ENABLE_THINKING is not set; /think is saved but backend requests remain direct-answer mode."
+          );
         return "handled";
       }
       if (token === "auto") {
@@ -606990,6 +607065,11 @@ Clone a new voice: /voice clone <wav-file> [name]`);
         renderInfo(
           "Note: max_tokens will auto-raise to ≥4096 per request to prevent <think> truncation."
         );
+        if (process.env["OMNIUS_ENABLE_THINKING"] !== "1") {
+          renderWarning(
+            "Thinking is hard-disabled by default. Set OMNIUS_ENABLE_THINKING=1 before launch for /think on or /think auto to affect backend requests."
+          );
+        }
       }
       return "handled";
     }
@@ -630844,7 +630924,7 @@ function renderTelegramSubAgentError(username, error) {
   process.stdout.write(`    ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
 `);
 }
-var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
+var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS, TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
 var init_telegram_bridge = __esm({
   "packages/cli/src/tui/telegram-bridge.ts"() {
     "use strict";
@@ -631288,6 +631368,21 @@ Telegram link integrity contract:
     TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS = 20 * 60 * 1e3;
     TELEGRAM_CHANNEL_DMN_MIN_MESSAGES = 4;
     TELEGRAM_ALLOWED_UPDATES = ["message", "guest_message", "callback_query", "poll", "message_reaction", "message_reaction_count"];
+    TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS = 50;
+    TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES = [
+      "qwen3:0.6b",
+      "qwen3:1.7b",
+      "qwen3:4b",
+      "qwen3:8b",
+      "qwen2.5:3b",
+      "qwen2.5:7b",
+      "llama3.2:1b",
+      "llama3.2:3b",
+      "gemma3:1b",
+      "gemma3:4b",
+      "phi3:mini",
+      "phi4-mini:latest"
+    ];
     TELEGRAM_PUBLIC_TOOL_QUOTAS = {
       web: { limit: 20, windowMs: 60 * 6e4 },
       media: { limit: 30, windowMs: 60 * 6e4 },
@@ -631321,6 +631416,7 @@ Telegram link integrity contract:
       pollLoopPromise = null;
       pollFatalNotified = false;
       lastUpdateId = 0;
+      telegramRouterModelCache = null;
       state = {
         active: false,
         botUserId: void 0,
@@ -635427,7 +635523,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             },
             inferenceKind,
             sessionKey,
-            { stream: false, reason: "router-json" }
+            { stream: false, reason: "router-json", modelName: diagnostics?.backendModel }
           );
           const visible = jsonModeResult.choices.some(
             (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -635483,7 +635579,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             suppressed,
             inferenceKind,
             sessionKey,
-            { stream: false, reason: "router-plain-retry" }
+            { stream: false, reason: "router-plain-retry", modelName: diagnostics?.backendModel }
           );
           if (diagnostics) {
             const plainVisible = plainResult.choices.some(
@@ -635536,7 +635632,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
        *      hard-deadline retire path becomes diagnosable instead of opaque
        */
       async telegramObservableInference(backend, request, kind, sessionKey, options2 = {}) {
-        const model = this.agentConfig?.model ?? "?";
+        const model = options2.modelName ?? this.agentConfig?.model ?? "?";
         const promptTokens = estimatePromptTokensFromRequest(request);
         const broker = getModelBroker();
         const trainCtx = await broker.getNctxTrain(model).catch(() => null);
@@ -635570,7 +635666,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             if (!streamAllowed && process.env["OMNIUS_BROKER_TRACE"] === "1") {
               this.tuiWrite(() => renderTelegramSubAgentEvent(
                 sessionKey,
-                `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx)}`
+                `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx, model)}`
               ));
             }
           } else if (typeof streamFn !== "function") {
@@ -635603,18 +635699,22 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
           this.deregisterTelegramInference(id);
         }
       }
-      telegramBackendDiagnostic() {
+      telegramBackendDiagnostic(modelOverride, routerModelSource, routerModelDetail) {
         const config = this.agentConfig;
         if (!config) return "backend=unconfigured model=?";
-        return `backend=${config.backendType} url=${config.backendUrl} model=${config.model}`;
+        const model = modelOverride || config.model;
+        const source = routerModelSource ? ` router_model_source=${routerModelSource}` : "";
+        const detail = routerModelDetail ? ` router_model_detail=${compactTelegramRouterDiagnosticText(routerModelDetail, 180)}` : "";
+        const main2 = model !== config.model ? ` main_model=${config.model}` : "";
+        return `backend=${config.backendType} url=${config.backendUrl} model=${model}${main2}${source}${detail}`;
       }
-      telegramInferenceRequestDiagnostic(request) {
+      telegramInferenceRequestDiagnostic(request, modelOverride) {
         const responseFormat = request.responseFormat ?? request.response_format;
         const responseFormatType = responseFormat && typeof responseFormat["type"] === "string" ? responseFormat["type"] : responseFormat ? "present" : "none";
         const numCtx = request.numCtx;
         const think = request.think;
         const tools = Array.isArray(request.tools) ? request.tools.length : 0;
-        return `${this.telegramBackendDiagnostic()} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
+        return `${this.telegramBackendDiagnostic(modelOverride)} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
       }
       telegramStreamInactivityDiagnostic(request, inferenceId, inactivityMs, contentChars, thinkingChars) {
         const entry = this.telegramActiveInferences.get(inferenceId);
@@ -635623,7 +635723,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
         const idle = entry ? `${((now - entry.lastTokenAt) / 1e3).toFixed(1)}s` : "unknown";
         const ttfb = entry?.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
         const phase = entry?.firstChunkAt === void 0 ? "before-first-chunk" : "mid-stream";
-        return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request)}; stream_endpoint=no-sse-chunk)`;
+        return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request, entry?.model)}; stream_endpoint=no-sse-chunk)`;
       }
       /**
        * Drive a chatCompletionStream to exhaustion, accumulating tokens into a
@@ -636162,6 +636262,114 @@ ${retryText}`,
           this.dispatchQueuedTelegramSessionWorkSoon();
         }
       }
+      telegramRouterAutoModelEnabled() {
+        const raw = (process.env["OMNIUS_TG_ROUTER_AUTO_MODEL"] ?? "").trim().toLowerCase();
+        return raw !== "0" && raw !== "false" && raw !== "off";
+      }
+      telegramRouterCandidateModels() {
+        const raw = (process.env["OMNIUS_TG_ROUTER_MODEL_CANDIDATES"] ?? "").trim();
+        const candidates = raw ? raw.split(/[,\s]+/).map((part) => part.trim()).filter(Boolean) : TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES;
+        return Array.from(new Set(candidates));
+      }
+      normalizeOllamaModelNameForMatch(name10) {
+        return name10.trim().toLowerCase().replace(/:latest$/, "");
+      }
+      async fetchOllamaInstalledModelNames(baseUrl) {
+        const url = `${baseUrl.replace(/\/+$/, "")}/api/tags`;
+        const timeoutFn = AbortSignal.timeout;
+        const res = await fetch(url, {
+          signal: typeof timeoutFn === "function" ? timeoutFn(2e3) : void 0
+        });
+        if (!res.ok) throw new Error(`ollama /api/tags returned HTTP ${res.status}`);
+        const data = await res.json();
+        return Array.isArray(data.models) ? data.models.map((model) => typeof model.name === "string" ? model.name : "").filter(Boolean) : [];
+      }
+      async resolveTelegramRouterBackend(config) {
+        const explicit = (process.env["OMNIUS_TG_ROUTER_MODEL"] ?? "").trim();
+        if (explicit && !/^(?:0|false|off|same|main)$/i.test(explicit)) {
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, explicit, config.apiKey),
+            model: explicit,
+            source: "env",
+            detail: "OMNIUS_TG_ROUTER_MODEL"
+          };
+        }
+        if (config.backendType !== "ollama" || !this.telegramRouterAutoModelEnabled()) {
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
+            model: config.model,
+            source: "main"
+          };
+        }
+        const candidates = this.telegramRouterCandidateModels();
+        const cacheKey = `${config.backendUrl}
+${config.model}
+${candidates.join(",")}`;
+        const now = Date.now();
+        if (this.telegramRouterModelCache && this.telegramRouterModelCache.cacheKey === cacheKey && now - this.telegramRouterModelCache.atMs < 6e4) {
+          const cached = this.telegramRouterModelCache;
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, cached.model, config.apiKey),
+            model: cached.model,
+            source: cached.source,
+            detail: cached.detail
+          };
+        }
+        try {
+          const installed = await this.fetchOllamaInstalledModelNames(config.backendUrl);
+          const installedByNormalized = /* @__PURE__ */ new Map();
+          for (const name10 of installed) {
+            installedByNormalized.set(this.normalizeOllamaModelNameForMatch(name10), name10);
+          }
+          for (const candidate of candidates) {
+            const selected = installedByNormalized.get(this.normalizeOllamaModelNameForMatch(candidate));
+            if (!selected) continue;
+            const resolved = {
+              cacheKey,
+              atMs: now,
+              model: selected,
+              source: "auto-small",
+              detail: "selected first installed OMNIUS_TG_ROUTER_MODEL_CANDIDATES entry from Ollama /api/tags"
+            };
+            this.telegramRouterModelCache = resolved;
+            return {
+              backend: new OllamaAgenticBackend(config.backendUrl, selected, config.apiKey),
+              model: selected,
+              source: "auto-small",
+              detail: resolved.detail
+            };
+          }
+        } catch (err) {
+          const detail2 = `router model auto-detect failed: ${err instanceof Error ? err.message : String(err)}`;
+          this.telegramRouterModelCache = {
+            cacheKey,
+            atMs: now,
+            model: config.model,
+            source: "main",
+            detail: detail2
+          };
+          return {
+            backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
+            model: config.model,
+            source: "main",
+            detail: detail2
+          };
+        }
+        const detail = "no configured small router model was installed; using main model";
+        this.telegramRouterModelCache = {
+          cacheKey,
+          atMs: now,
+          model: config.model,
+          source: "main",
+          detail
+        };
+        return {
+          backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
+          model: config.model,
+          source: "main",
+          detail
+        };
+      }
       async inferTelegramInteractionDecision(msg, toolContext) {
         const config = this.agentConfig;
         const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
@@ -636190,11 +636398,8 @@ ${retryText}`,
           };
           return fallback;
         }
-        const backend = new OllamaAgenticBackend(
-          config.backendUrl,
-          config.model,
-          config.apiKey
-        );
+        const routerBackend = await this.resolveTelegramRouterBackend(config);
+        const backend = routerBackend.backend;
         const forcedLine = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred live from context.` : `The operator selected Telegram mode "auto". Infer route live from context.`;
         const context2 = this.buildTelegramConversationContextStream(sessionKey, msg, isGroup ? 36 : 20, identitySalienceSignals);
         const currentReplyContext = this.buildTelegramCurrentReplyContext(sessionKey, msg);
@@ -636334,7 +636539,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           } catch {
           }
         }
-        const diagnostics = {};
+        const diagnostics = {
+          backendType: config.backendType,
+          backendUrl: config.backendUrl,
+          backendModel: routerBackend.model,
+          routerModelSource: routerBackend.source,
+          routerModelDetail: routerBackend.detail
+        };
         const routerStartMs = Date.now();
         try {
           const result = await this.telegramRouterJsonCompletion(backend, {
@@ -636357,7 +636568,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           const routerLatencyMs = Date.now() - routerStartMs;
           try {
             const pidReg = getPidRegistry();
-            const modelKey = this.agentConfig?.model ?? "?";
+            const modelKey = routerBackend.model ?? this.agentConfig?.model ?? "?";
             pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
             pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
           } catch {
@@ -636384,7 +636595,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
                 diagnosticNote: this.composeTelegramRouterDiagnosticNote(
                   void 0,
                   failureNarrative2,
-                  "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open"
+                  "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open",
+                  diagnostics
                 ),
                 raw: text
               }),
@@ -636458,7 +636670,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             diagnosticNote: this.composeTelegramRouterDiagnosticNote(
               invalidRouterPreview,
               failureNarrative,
-              backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
+              backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it",
+              diagnostics
             ),
             raw: text
           }), reflectionNotes);
@@ -636472,7 +636685,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             diagnosticNote: this.composeTelegramRouterDiagnosticNote(
               void 0,
               failureNarrative,
-              `router failed before live notes were generated: ${errMsg.slice(0, 160)}`
+              `router failed before live notes were generated: ${errMsg.slice(0, 160)}`,
+              diagnostics
             )
           }), reflectionNotes);
           return withRouterTelemetry(fallback);
@@ -636592,10 +636806,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           operatorHint
         };
       }
-      composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline) {
+      composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline, diagnostics) {
         const segments = [];
         segments.push(headline);
-        segments.push(this.telegramBackendDiagnostic());
+        segments.push(this.telegramBackendDiagnostic(
+          diagnostics?.backendModel,
+          diagnostics?.routerModelSource,
+          diagnostics?.routerModelDetail
+        ));
         if (failureNarrative.summary) segments.push(failureNarrative.summary);
         if (invalidRouterPreview) segments.push(`invalid router output preview: ${invalidRouterPreview}`);
         if (failureNarrative.detail) segments.push(`router-failure trace: ${failureNarrative.detail}`);
@@ -641830,10 +642048,12 @@ ${caption}\r
       /** Long polling loop */
       async pollLoop() {
         while (this.polling) {
+          const longPollTimeoutSeconds = this.telegramLongPollTimeoutSeconds();
           try {
             const result = await this.apiCall("getUpdates", {
               offset: this.lastUpdateId + 1,
-              timeout: 30,
+              timeout: longPollTimeoutSeconds,
+              limit: 100,
               allowed_updates: TELEGRAM_ALLOWED_UPDATES
             });
             if (result.ok && Array.isArray(result.result)) {
@@ -641875,7 +642095,7 @@ ${caption}\r
               if (now - this.telegramPollWarningLastAtMs > 3e4) {
                 this.telegramPollWarningLastAtMs = now;
                 this.tuiWrite(() => renderWarning(
-                  `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); retrying`
+                  `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); long_poll_timeout=${longPollTimeoutSeconds}s client_deadline_ms=${this.telegramLongPollClientTimeoutMs(longPollTimeoutSeconds) ?? "none"}; retrying`
                 ));
               }
               await new Promise((r2) => setTimeout(r2, 5e3));
@@ -641883,10 +642103,17 @@ ${caption}\r
           }
         }
       }
-      telegramLongPollClientTimeoutMs() {
+      telegramLongPollTimeoutSeconds() {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_TIMEOUT_SECONDS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 0 && raw <= 120) return raw;
+        return TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS;
+      }
+      telegramLongPollClientTimeoutMs(serverTimeoutSeconds) {
         const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_CLIENT_TIMEOUT_MS"] ?? "", 10);
-        if (Number.isFinite(raw) && raw >= 35e3 && raw <= 3e5) return raw;
-        return 45e3;
+        if (!Number.isFinite(raw)) return null;
+        const floor = Math.max(5e3, Math.floor((serverTimeoutSeconds ?? 0) * 1e3) + 5e3);
+        if (raw >= floor && raw <= 3e5) return raw;
+        return null;
       }
       /** Make a Telegram Bot API call with rate-limit retry */
       async apiCall(method, body, _retryDepth = 0) {
@@ -641902,9 +642129,11 @@ ${caption}\r
         if (isLongPoll && this.abortController) {
           const timeoutFn = AbortSignal.timeout;
           const anyFn = AbortSignal.any;
+          const bodyTimeout = typeof body?.["timeout"] === "number" ? body["timeout"] : void 0;
+          const clientTimeoutMs = this.telegramLongPollClientTimeoutMs(bodyTimeout);
           const signals = [
             this.abortController.signal,
-            typeof timeoutFn === "function" ? timeoutFn(this.telegramLongPollClientTimeoutMs()) : void 0
+            clientTimeoutMs && typeof timeoutFn === "function" ? timeoutFn(clientTimeoutMs) : void 0
           ].filter((signal) => signal instanceof AbortSignal);
           options2.signal = typeof anyFn === "function" && signals.length > 1 ? anyFn(signals) : signals[0];
         } else if (!isLongPoll) {
@@ -661562,9 +661791,14 @@ async function handleV1ChatCompletions(req2, res, ollamaUrl) {
     return;
   }
   const callerProvidedThink = "think" in routedBody;
-  const callerProvidedTools = Array.isArray(routedBody["tools"]) && routedBody["tools"].length > 0;
-  const finalThink = callerProvidedThink ? routedBody["think"] : callerProvidedTools ? void 0 : false;
+  const thinkingAllowed = process.env["OMNIUS_ENABLE_THINKING"] === "1" && process.env["OMNIUS_FORCE_NO_THINK"] !== "1";
+  const finalThink = thinkingAllowed && callerProvidedThink ? routedBody["think"] : false;
   const ollamaBody = { ...routedBody };
+  if (finalThink === false && Array.isArray(ollamaBody["messages"])) {
+    ollamaBody["messages"] = appendNoThinkDirectivesToMessages(
+      ollamaBody["messages"]
+    );
+  }
   const ollamaOptions = ollamaBody["options"] && typeof ollamaBody["options"] === "object" ? { ...ollamaBody["options"] } : {};
   if (typeof ollamaBody["max_tokens"] === "number") {
     ollamaOptions["num_predict"] = ollamaBody["max_tokens"];

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.183",
+  "version": "1.0.184",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.183",
+      "version": "1.0.184",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.183",
+  "version": "1.0.184",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",