npm - omnius - Versions diffs - 1.0.182 → 1.0.183 - Mend

omnius 1.0.182 → 1.0.183

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -550132,11 +550132,17 @@ function injectNoThinkDirective(messages2) {
   const target = messages2[lastUserIdx];
   if (!target || typeof target.content !== "string")
     return messages2;
-  if (/\/no_think\b/i.test(target.content))
+  const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
+  const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
+  if (hasOllamaNoThink && hasQwenNoThink)
     return messages2;
+  const suffix = [
+    hasOllamaNoThink ? null : "/nothink",
+    hasQwenNoThink ? null : "/no_think"
+  ].filter(Boolean).join("\n");
   const annotated = `${target.content}
-/no_think`;
+${suffix}`;
   return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
 }
 function backendHttpErrorDetail(text) {
@@ -550154,6 +550160,8 @@ function isOllamaModelNotFoundResponse(status, text, model) {
 function computeEffectiveThink(params) {
   if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
     return false;
+  if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
+    return false;
   if (params.suppressed)
     return false;
   if (params.hasTools)
@@ -550172,18 +550180,9 @@ function computeEffectiveThink(params) {
   return params.defaultThink;
 }
 function sanitizeHistoryThink(messages2) {
-  let lastAsstIdx = -1;
-  for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
-    if (messages2[i2]?.role === "assistant") {
-      lastAsstIdx = i2;
-      break;
-    }
-  }
-  return messages2.map((m2, i2) => {
+  return messages2.map((m2) => {
     if (m2.role !== "assistant" || typeof m2.content !== "string")
       return m2;
-    if (i2 === lastAsstIdx)
-      return m2;
     return { ...m2, content: stripThinkBlocks(m2.content) };
   });
 }
@@ -563608,10 +563607,11 @@ ${description}`
         if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
           effectiveMaxTokens = 4096;
         }
+        const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
         const responseFormat = request.responseFormat ?? request.response_format;
         const body = {
           model: this.model,
-          messages: cleanedMessages,
+          messages: requestMessages,
           tools: request.tools,
           temperature: request.temperature,
           max_tokens: effectiveMaxTokens,
@@ -563620,7 +563620,7 @@ ${description}`
         if (responseFormat !== void 0) {
           body["response_format"] = responseFormat;
         }
-        const reqNumCtx = request.numCtx;
+        const reqNumCtx = request.numCtx ?? request.num_ctx;
         if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
           const opts = body["options"] ?? {};
           opts["num_ctx"] = reqNumCtx;
@@ -563705,7 +563705,7 @@ ${description}`
           const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
           const shouldRetryThinkGuard = outcome !== null && effectiveThink === true && (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think");
           if (shouldRetryThinkGuard || shouldRecoverFromEmpty) {
-            const retryMessages = injectNoThinkDirective(cleanedMessages);
+            const retryMessages = injectNoThinkDirective(requestMessages);
             const retryBody = {
               model: this.model,
               messages: retryMessages,
@@ -563892,7 +563892,7 @@ ${description}`
        * Ollama pool routing as non-stream completions.
        */
       async *chatCompletionStream(request) {
-        const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
+        const cleanedMessages = normalizeMessagesForStrictOpenAI(sanitizeHistoryThink(request.messages));
         let effectiveThink = computeEffectiveThink({
           requestThink: request.think,
           defaultThink: this.thinking,
@@ -563907,10 +563907,11 @@ ${description}`
         if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
           effectiveMaxTokens = 4096;
         }
+        const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
         const responseFormat = request.responseFormat ?? request.response_format;
         const body = {
           model: this.model,
-          messages: cleanedMessages,
+          messages: requestMessages,
           tools: request.tools,
           temperature: request.temperature,
           max_tokens: effectiveMaxTokens,
@@ -563921,7 +563922,7 @@ ${description}`
         if (responseFormat !== void 0) {
           body["response_format"] = responseFormat;
         }
-        const reqNumCtx = request.numCtx;
+        const reqNumCtx = request.numCtx ?? request.num_ctx;
         if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
           const opts = body["options"] ?? {};
           opts["num_ctx"] = reqNumCtx;
@@ -564176,6 +564177,57 @@ var init_nexusBackend = __esm({
         this.authKey = authKey || "";
         this.thinking = thinking ?? false;
       }
+      effectiveThink(request) {
+        if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
+          return false;
+        if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
+          return false;
+        if (Array.isArray(request.tools) && request.tools.length > 0)
+          return false;
+        if (request.think === true)
+          return true;
+        if (request.think === false)
+          return false;
+        return this.thinking === true;
+      }
+      noThinkMessages(messages2) {
+        let lastUserIdx = -1;
+        for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
+          if (messages2[i2]?.role === "user") {
+            lastUserIdx = i2;
+            break;
+          }
+        }
+        if (lastUserIdx < 0)
+          return messages2;
+        const target = messages2[lastUserIdx];
+        if (!target || typeof target.content !== "string")
+          return messages2;
+        const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
+        const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
+        if (hasOllamaNoThink && hasQwenNoThink)
+          return messages2;
+        const suffix = [
+          hasOllamaNoThink ? null : "/nothink",
+          hasQwenNoThink ? null : "/no_think"
+        ].filter(Boolean).join("\n");
+        return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
+${suffix}` } : m2);
+      }
+      requestMessages(request, effectiveThink) {
+        return effectiveThink ? request.messages : this.noThinkMessages(request.messages);
+      }
+      applyOptionalRequestFields(daemonArgs, request) {
+        const responseFormat = request.responseFormat ?? request.response_format;
+        if (responseFormat !== void 0) {
+          daemonArgs.response_format = JSON.stringify(responseFormat);
+        }
+        const numCtx = request.numCtx ?? request.num_ctx;
+        if (Number.isFinite(numCtx) && (numCtx ?? 0) > 0) {
+          daemonArgs.num_ctx = String(numCtx);
+        }
+      }
       /** Reset the consecutive failure counter (called on endpoint switch / reconnect) */
       resetFailures() {
         this.consecutiveFailures = 0;
@@ -564191,9 +564243,10 @@ var init_nexusBackend = __esm({
           err.fatal = true;
           throw err;
         }
+        const effectiveThink = this.effectiveThink(request);
         const daemonArgs = {
           model: this.model,
-          messages: JSON.stringify(request.messages),
+          messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
           tools: JSON.stringify(request.tools),
           temperature: String(request.temperature),
           max_tokens: String(request.maxTokens)
@@ -564204,7 +564257,8 @@ var init_nexusBackend = __esm({
         if (this.authKey) {
           daemonArgs.auth_key = this.authKey;
         }
-        daemonArgs.think = String(this.thinking);
+        daemonArgs.think = String(effectiveThink);
+        this.applyOptionalRequestFields(daemonArgs, request);
         let rawResult;
         try {
           rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -564303,9 +564357,10 @@ var init_nexusBackend = __esm({
       async *chatCompletionStream(request) {
         const streamFile = join97(tmpdir18(), `nexus-stream-${randomBytes19(6).toString("hex")}.jsonl`);
         writeFileSync38(streamFile, "", "utf8");
+        const effectiveThink = this.effectiveThink(request);
         const daemonArgs = {
           model: this.model,
-          messages: JSON.stringify(request.messages),
+          messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
           tools: JSON.stringify(request.tools),
           temperature: String(request.temperature),
           max_tokens: String(request.maxTokens),
@@ -564315,7 +564370,8 @@ var init_nexusBackend = __esm({
           daemonArgs.target_peer = this.targetPeer;
         if (this.authKey)
           daemonArgs.auth_key = this.authKey;
-        daemonArgs.think = String(this.thinking);
+        daemonArgs.think = String(effectiveThink);
+        this.applyOptionalRequestFields(daemonArgs, request);
         let rawResult;
         try {
           rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -629145,7 +629201,7 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 1e4, maxMs) {
     10
   );
   const floor = Number.isFinite(minMs) && minMs > 0 ? minMs : 1e4;
-  const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 9e4;
+  const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 3e4;
   const callerCap = Number.isFinite(maxMs) && (maxMs ?? 0) >= floor ? maxMs : configuredCap;
   const cap = Math.max(floor, Math.min(configuredCap, callerCap));
   const requested = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : cap;
@@ -629172,6 +629228,9 @@ function telegramRouterDiagnosticAttemptLooksLikeTimeout(attempt) {
 function telegramRouterDiagnosticAttemptLooksLikeBackendLiveness(attempt) {
   return attempt.status === "threw" && telegramRouterErrorLooksLikeBackendLiveness(attempt.error ?? "");
 }
+function telegramRouterDiagnosticIsDualEmptyVisible(diag) {
+  return diag.jsonModeStatus === "empty-after-strip" && diag.plainStatus === "empty-after-strip";
+}
 function telegramThinkSuppressedRequest(request) {
   const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
   let appended = false;
@@ -629179,18 +629238,24 @@ function telegramThinkSuppressedRequest(request) {
     const m2 = messages2[i2];
     if (!m2 || m2.role !== "user") continue;
     const content = typeof m2.content === "string" ? m2.content : "";
-    if (/\/no_think\b/i.test(content)) {
+    const hasOllamaNoThink = /\/nothink\b/i.test(content);
+    const hasQwenNoThink = /\/no[_-]think\b/i.test(content);
+    if (hasOllamaNoThink && hasQwenNoThink) {
       appended = true;
       break;
     }
-    messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
+    const suffix = [
+      hasOllamaNoThink ? null : "/nothink",
+      hasQwenNoThink ? null : "/no_think"
+    ].filter(Boolean).join("\n");
+    messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}${suffix}` : `${content}
-/no_think` };
+${suffix}` };
     appended = true;
     break;
   }
   if (!appended) {
-    messages2.push({ role: "user", content: "/no_think" });
+    messages2.push({ role: "user", content: "/nothink\n/no_think" });
   }
   return { ...request, messages: messages2, think: false };
 }
@@ -631288,10 +631353,14 @@ Telegram link integrity contract:
        * capacity and flood the TUI.
        */
       telegramActiveWorkSessions = /* @__PURE__ */ new Set();
+      telegramActiveWorkGenerations = /* @__PURE__ */ new Map();
+      telegramActiveWorkStartedAtMs = /* @__PURE__ */ new Map();
       /** Queued Telegram sessions waiting for a global work slot. */
       telegramQueuedSessionWork = /* @__PURE__ */ new Map();
       telegramDispatchQueuedTimer = null;
       telegramDispatchQueuedAtMs = 0;
+      telegramQueueDiagnosticLastAtMs = 0;
+      telegramPollWarningLastAtMs = 0;
       /** Lightweight chat history by chat/guest session key */
       chatHistory = /* @__PURE__ */ new Map();
       /** Participant and tone state by chat/guest session key */
@@ -631927,6 +631996,63 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
         if (!Number.isFinite(parsed)) return 350;
         return Math.max(0, Math.min(2e3, Math.floor(parsed)));
       }
+      telegramQueueDiagnosticIntervalMs() {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_QUEUE_DIAGNOSTIC_MS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 5e3 && raw <= 3e5) return raw;
+        return 3e4;
+      }
+      maybeLogTelegramQueueDiagnostic(reason) {
+        if (this.telegramQueuedSessionWork.size === 0) return;
+        const now = Date.now();
+        const interval = this.telegramQueueDiagnosticIntervalMs();
+        if (now - this.telegramQueueDiagnosticLastAtMs < interval) return;
+        this.telegramQueueDiagnosticLastAtMs = now;
+        const queued = [...this.telegramQueuedSessionWork.values()].sort((a2, b) => a2.enqueuedAtMs - b.enqueuedAtMs).slice(0, 4).map((work) => {
+          const age = formatTelegramPipelineDuration(now - work.enqueuedAtMs);
+          const live = this.telegramSessionIsLive(work.sessionKey) ? "blocked:same-session-live" : "ready";
+          return `${work.sessionKey} age=${age} bundled=${work.messageCount} ${live}`;
+        });
+        const active = [...this.activeTelegramInteractionSessionKeys()].slice(0, 6);
+        const inferences = this.getTelegramActiveInferences().slice(0, 4).map((inf) => `${inf.id}/${inf.kind}/${inf.model} elapsed=${inf.elapsedSec.toFixed(1)}s ttfb=${inf.ttfbSec === void 0 ? "waiting" : `${inf.ttfbSec.toFixed(1)}s`}`);
+        this.tuiWrite(() => renderTelegramSubAgentEvent(
+          "queue",
+          `queue diagnostic (${reason}): active ${this.activeTelegramInteractionCount()}/${this.getSubAgentLimit()} [${active.join(", ") || "none"}]; queued ${this.telegramQueuedSessionWork.size} [${queued.join(" | ")}]; inferences [${inferences.join(" | ") || "none"}]`
+        ));
+      }
+      nextTelegramWorkGeneration(sessionKey) {
+        const generation = (this.telegramActiveWorkGenerations.get(sessionKey) ?? 0) + 1;
+        this.telegramActiveWorkGenerations.set(sessionKey, generation);
+        return generation;
+      }
+      telegramWorkGenerationIsCurrent(sessionKey, generation) {
+        return this.telegramActiveWorkGenerations.get(sessionKey) === generation;
+      }
+      telegramPreAgentWorkMaxIdleMs() {
+        const routerMs = telegramRouterTimeoutMs(this.agentConfig?.timeoutMs);
+        const raw = Number.parseInt(process.env["OMNIUS_TG_PRE_AGENT_MAX_IDLE_MS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 3e4 && raw <= 9e5) return raw;
+        return Math.max(12e4, routerMs + 3e4);
+      }
+      reapStaleTelegramPreAgentWork() {
+        const now = Date.now();
+        const maxIdleMs = this.telegramPreAgentWorkMaxIdleMs();
+        for (const sessionKey of [...this.telegramActiveWorkSessions]) {
+          if (this.subAgents.has(sessionKey) || this.activeChatSessions.has(sessionKey)) continue;
+          const startedAt2 = this.telegramActiveWorkStartedAtMs.get(sessionKey);
+          if (!startedAt2) continue;
+          const idleMs = now - startedAt2;
+          if (idleMs <= maxIdleMs) continue;
+          const generation = this.telegramActiveWorkGenerations.get(sessionKey) ?? 0;
+          this.telegramActiveWorkGenerations.set(sessionKey, generation + 1);
+          this.telegramActiveWorkSessions.delete(sessionKey);
+          this.telegramActiveWorkStartedAtMs.delete(sessionKey);
+          this.refreshActiveTelegramInteractionCount();
+          this.tuiWrite(() => renderTelegramSubAgentEvent(
+            "queue",
+            `watchdog: released stale pre-agent Telegram work pin for ${sessionKey} after ${Math.round(idleMs / 1e3)}s; queued messages may dispatch now`
+          ));
+        }
+      }
       dispatchQueuedTelegramSessionWorkSoon(delayMs = 0) {
         const dueAt = Date.now() + Math.max(0, delayMs);
         if (this.telegramDispatchQueuedTimer && this.telegramDispatchQueuedAtMs <= dueAt) return;
@@ -631956,6 +632082,9 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
             this.dispatchQueuedTelegramSessionWorkSoon(Math.max(0, nextDue - Date.now()));
           }
         }
+        if (this.telegramQueuedSessionWork.size > 0) {
+          this.maybeLogTelegramQueueDiagnostic("dispatch");
+        }
         this.refreshActiveTelegramInteractionCount();
       }
       buildTelegramQueuedSessionWork(sessionKey, msg, toolContext, now) {
@@ -631999,11 +632128,16 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
           return;
         }
         this.telegramActiveWorkSessions.add(work.sessionKey);
+        this.telegramActiveWorkStartedAtMs.set(work.sessionKey, Date.now());
+        const generation = this.nextTelegramWorkGeneration(work.sessionKey);
         this.refreshActiveTelegramInteractionCount();
-        void this.processTelegramMessageWork(work).catch((err) => {
+        void this.processTelegramMessageWork(work, generation).catch((err) => {
           this.tuiWrite(() => renderWarning(`Telegram sub-agent error: ${err instanceof Error ? err.message : String(err)}`));
         }).finally(() => {
-          this.telegramActiveWorkSessions.delete(work.sessionKey);
+          if (this.telegramWorkGenerationIsCurrent(work.sessionKey, generation)) {
+            this.telegramActiveWorkSessions.delete(work.sessionKey);
+            this.telegramActiveWorkStartedAtMs.delete(work.sessionKey);
+          }
           this.refreshActiveTelegramInteractionCount();
           this.dispatchQueuedTelegramSessionWorkSoon();
         });
@@ -635238,7 +635372,7 @@ ${lines.join("\n")}`);
           `Current Telegram message text (untrusted user data):
 ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
           "",
-          "/no_think"
+          "/nothink\n/no_think"
         ].filter(Boolean).join("\n");
         try {
           const result = await this.telegramRouterJsonCompletion(
@@ -635705,7 +635839,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
           `Original router output:`,
           rawPreview,
           ``,
-          `/no_think`
+          `/nothink
+/no_think`
         ].join("\n");
         try {
           const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635718,8 +635853,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
             ],
             tools: [],
             temperature: 0,
-            maxTokens: 800,
-            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
+            maxTokens: 500,
+            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
             think: false
           }, diagnostics, "router-repair", sessionKey);
           const repairedText = result.choices[0]?.message?.content ?? "";
@@ -635772,7 +635907,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
           `Router context (trailing-window):`,
           trimmedUserPrompt,
           ``,
-          `/no_think`
+          `/nothink
+/no_think`
         ].join("\n");
         try {
           const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635785,8 +635921,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
             ],
             tools: [],
             temperature: 0,
-            maxTokens: 1e3,
-            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
+            maxTokens: 500,
+            timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
             think: false
           }, diagnostics, "router-strict-retry", sessionKey);
           const retryText = result.choices[0]?.message?.content ?? "";
@@ -635980,6 +636116,7 @@ ${retryText}`,
        * never fires.
        */
       reapStaleTelegramSubAgents() {
+        this.reapStaleTelegramPreAgentWork();
         const maxIdleMs = this.telegramSubAgentMaxIdleMs();
         const now = Date.now();
         const stale = [];
@@ -636000,6 +636137,7 @@ ${retryText}`,
             clearInterval(agent.typingInterval);
             agent.typingInterval = null;
           }
+          this.stopTelegramPublicProgressMessage(agent);
           try {
             agent.runner?.abort?.();
           } catch {
@@ -636019,6 +636157,10 @@ ${retryText}`,
           this.subAgentViewCallbacks?.onStatus(agent.viewId, "failed");
           this.subAgentViewCallbacks?.onComplete(agent.viewId);
         }
+        if (this.telegramQueuedSessionWork.size > 0) {
+          this.maybeLogTelegramQueueDiagnostic("watchdog");
+          this.dispatchQueuedTelegramSessionWorkSoon();
+        }
       }
       async inferTelegramInteractionDecision(msg, toolContext) {
         const config = this.agentConfig;
@@ -636205,10 +636347,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             ],
             tools: [],
             temperature: 0,
-            // Minimal route JSON should fit comfortably; keeping this small avoids
-            // reintroducing truncated-note repair cascades.
-            maxTokens: 900,
-            timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
+            // Router JSON is tiny. Keep the answer budget tight so Qwen-class
+            // models cannot spend a minute producing hidden <think>-only output.
+            maxTokens: 360,
+            timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
             think: false
           }, diagnostics, "router", sessionKey);
           const text = result.choices[0]?.message?.content ?? "";
@@ -636263,8 +636405,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
                 ],
                 tools: [],
                 temperature: 0,
-                maxTokens: 1400,
-                timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
+                maxTokens: 700,
+                timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
                 think: false
               }, diagnostics, "router", sessionKey);
               const reissuedText = reissued.choices[0]?.message?.content ?? "";
@@ -636277,7 +636419,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
             } catch {
             }
           }
-          const repaired = await this.repairTelegramInteractionDecision(
+          const dualEmptyVisible = telegramRouterDiagnosticIsDualEmptyVisible(diagnostics) && !telegramRouterRawPreview(text);
+          if (dualEmptyVisible) {
+            if (diagnostics.repairStatus === void 0) {
+              diagnostics.repairStatus = "skipped";
+              diagnostics.repairError = "router returned no visible text in json-mode or plain retry; repair/strict retry would only burn more inference";
+            }
+          }
+          const repaired = dualEmptyVisible ? null : await this.repairTelegramInteractionDecision(
             backend,
             text,
             forcedRoute,
@@ -636288,7 +636437,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           if (repaired) {
             return withRouterTelemetry(this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes));
           }
-          const strictRetry = await this.retryTelegramInteractionDecisionStrict(
+          const strictRetry = dualEmptyVisible ? null : await this.retryTelegramInteractionDecisionStrict(
             backend,
             userPrompt,
             text,
@@ -636304,12 +636453,12 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
           const failureNarrative = this.summarizeTelegramRouterFailure(diagnostics);
           const backendLivenessFailure = (diagnostics.attempts ?? []).some(telegramRouterDiagnosticAttemptLooksLikeBackendLiveness) || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.repairError ?? "") || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.strictRetryError ?? "");
           const fallback = this.applyTelegramSilentReflectionNotes(this.buildTelegramRouterUnavailableDecision(msg, toolContext, {
-            reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
+            reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
             silentDisposition: reflectionNotes.silentDisposition,
             diagnosticNote: this.composeTelegramRouterDiagnosticNote(
               invalidRouterPreview,
               failureNarrative,
-              backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
+              backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
             ),
             raw: text
           }), reflectionNotes);
@@ -636884,6 +637033,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         for (const [, agent] of this.subAgents) {
           agent.aborted = true;
           if (agent.typingInterval) clearInterval(agent.typingInterval);
+          this.stopTelegramPublicProgressMessage(agent);
           try {
             agent.runner?.abort?.();
           } catch {
@@ -636899,6 +637049,8 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         }
         this.telegramQueuedSessionWork.clear();
         this.telegramActiveWorkSessions.clear();
+        this.telegramActiveWorkGenerations.clear();
+        this.telegramActiveWorkStartedAtMs.clear();
         this.telegramAdminLivePanels.clear();
         this.flushTelegramViewWrites();
         this.flushTelegramTuiWrites();
@@ -637085,6 +637237,62 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
           }
         }
       }
+      shouldUseTelegramPublicProgressMessage(msg, toolContext) {
+        return toolContext === "telegram-public" && msg.chatType !== "private" && !msg.guestQueryId;
+      }
+      renderTelegramPublicProgressHTML(subAgent, msg, phase) {
+        const elapsedSec = Math.max(0, Math.floor((Date.now() - subAgent.startedAtMs) / 1e3));
+        const sessionKey = this.sessionKeyForMessage(msg);
+        const activeInference = this.getTelegramActiveInferences().find((inf) => inf.sessionKey === sessionKey);
+        const status = activeInference ? activeInference.ttfbSec === void 0 ? `model request active; waiting for first token (${activeInference.kind}, ${activeInference.elapsedSec.toFixed(1)}s)` : `streaming ${activeInference.kind}; content=${activeInference.contentTokens}t thinking=${activeInference.thinkingTokens}t` : phase;
+        const width = 12;
+        const filled = Math.min(width, Math.floor(elapsedSec % 60 / 60 * width));
+        const bar = `[${"#".repeat(filled)}${"-".repeat(width - filled)}]`;
+        return [
+          `<b>Working</b>`,
+          `<code>${bar}</code> ${elapsedSec}s`,
+          `<i>${escapeTelegramHTML(status)}</i>`
+        ].join("\n");
+      }
+      startTelegramPublicProgressMessage(subAgent, msg, phase) {
+        if (!this.shouldUseTelegramPublicProgressMessage(msg, subAgent.toolContext)) return;
+        if (subAgent.publicProgressTimer) return;
+        const update2 = () => {
+          if (subAgent.aborted) return;
+          if (!this.subAgents.has(this.sessionKeyForMessage(msg))) return;
+          const html = this.renderTelegramPublicProgressHTML(subAgent, msg, phase);
+          if (subAgent.liveMessageId) {
+            const now = Date.now();
+            if (now - subAgent.lastEditMs < 3e3) return;
+            subAgent.lastEditMs = now;
+            void this.editLiveMessage(msg.chatId, subAgent.liveMessageId, html).catch(() => {
+            });
+            return;
+          }
+          if (subAgent.liveMessagePromise) return;
+          subAgent.liveMessagePromise = this.sendLiveMessage(
+            msg.chatId,
+            html,
+            msg.chatType !== "private" ? msg.messageId : void 0
+          ).then((id) => {
+            subAgent.liveMessageId = id;
+            subAgent.lastEditMs = Date.now();
+          }).catch(() => {
+          }).finally(() => {
+            subAgent.liveMessagePromise = null;
+          });
+        };
+        update2();
+        subAgent.publicProgressTimer = setInterval(update2, 5e3);
+        if (typeof subAgent.publicProgressTimer.unref === "function") {
+          subAgent.publicProgressTimer.unref();
+        }
+      }
+      stopTelegramPublicProgressMessage(subAgent) {
+        if (!subAgent.publicProgressTimer) return;
+        clearInterval(subAgent.publicProgressTimer);
+        subAgent.publicProgressTimer = null;
+      }
       ensureTelegramAdminLivePanel(subAgent, msg) {
         const existing = subAgent.adminLivePanelNonce ? this.telegramAdminLivePanels.get(subAgent.adminLivePanelNonce) : void 0;
         if (existing) return existing;
@@ -637343,11 +637551,12 @@ Join: ${newUrl}`);
         }
         this.scheduleTelegramSessionWork(msg, toolContext);
       }
-      async processTelegramMessageWork(work) {
+      async processTelegramMessageWork(work, workGeneration) {
         const msg = work.msg;
         const toolContext = work.toolContext;
         const sessionKey = this.sessionKeyForMessage(msg);
         const isAdminDM = toolContext === "telegram-admin-dm";
+        if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) return;
         const existing = this.subAgents.get(sessionKey);
         if (existing && !existing.aborted) {
           await this.enqueueTelegramQueuedSessionWorkForExistingSubAgent(work, existing);
@@ -637365,6 +637574,13 @@ Join: ${newUrl}`);
         } catch (err) {
           decision2 = this.fallbackTelegramRouterDecision(msg, toolContext, err);
         }
+        if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) {
+          this.tuiWrite(() => renderTelegramSubAgentEvent(
+            msg.username,
+            `discarded stale Telegram work result after queue pin release for ${sessionKey}`
+          ));
+          return;
+        }
         const storedPreference = this.applyTelegramReplyPreferenceUpdate(
           sessionKey,
           msg,
@@ -637482,6 +637698,7 @@ Join: ${newUrl}`);
         if (replyEdge) {
           this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, replyEdge));
         }
+        this.startTelegramPublicProgressMessage(subAgent, msg, "taking notes and preparing tools");
         try {
           let mediaContext = "";
           if (msg.media || msg.replyToMedia) {
@@ -637498,6 +637715,7 @@ Join: ${newUrl}`);
             clearInterval(subAgent.typingInterval);
             subAgent.typingInterval = null;
           }
+          this.stopTelegramPublicProgressMessage(subAgent);
           const finalText = cleanTelegramVisibleReply(result || "");
           if (isAdminDM && !this.telegramAdminRunCompleted(subAgent)) {
             const incompleteText = this.telegramAdminIncompleteRunText(subAgent, finalText);
@@ -637566,6 +637784,7 @@ Join: ${newUrl}`);
             clearInterval(subAgent.typingInterval);
             subAgent.typingInterval = null;
           }
+          this.stopTelegramPublicProgressMessage(subAgent);
           const errMsg = err instanceof Error ? err.message : String(err);
           this.tuiWrite(() => renderTelegramSubAgentError(msg.username, errMsg));
           this.subAgentViewCallbacks?.onWrite(subAgent.viewId, `error: ${errMsg}`);
@@ -637582,6 +637801,7 @@ Join: ${newUrl}`);
             });
           }
         } finally {
+          this.stopTelegramPublicProgressMessage(subAgent);
           this.clearTelegramSubAgentContextBuffer(sessionKey);
           this.subAgents.delete(sessionKey);
           this.refreshActiveTelegramInteractionCount();
@@ -637755,6 +637975,24 @@ Join: ${newUrl}`);
           typingInterval = this.startTypingIndicator(msg.chatId);
         }
         this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `live inference: chat reply (${this.interactionMode})`));
+        if (this.shouldUseTelegramPublicProgressMessage(msg, toolContext)) {
+          const initialHtml = [
+            `<b>Working</b>`,
+            `<code>[------------]</code> 0s`,
+            `<i>preparing a concise reply</i>`
+          ].join("\n");
+          liveMessagePromise = this.sendLiveMessage(
+            msg.chatId,
+            initialHtml,
+            msg.chatType !== "private" ? msg.messageId : void 0
+          ).then((id) => {
+            liveMessageId = id;
+            lastEditMs = Date.now();
+          }).catch(() => {
+          }).finally(() => {
+            liveMessagePromise = null;
+          });
+        }
         try {
           const mediaContext = msg.media || msg.replyToMedia || msg.livePhoto ? await this.processMediaContextForMessage(msg) : "";
           const contextualPayload = [mediaContext, additionalContext].filter(Boolean).join("\n\n");
@@ -641633,11 +641871,23 @@ ${caption}\r
             }
           } catch (err) {
             if (this.polling) {
+              const now = Date.now();
+              if (now - this.telegramPollWarningLastAtMs > 3e4) {
+                this.telegramPollWarningLastAtMs = now;
+                this.tuiWrite(() => renderWarning(
+                  `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); retrying`
+                ));
+              }
               await new Promise((r2) => setTimeout(r2, 5e3));
             }
           }
         }
       }
+      telegramLongPollClientTimeoutMs() {
+        const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_CLIENT_TIMEOUT_MS"] ?? "", 10);
+        if (Number.isFinite(raw) && raw >= 35e3 && raw <= 3e5) return raw;
+        return 45e3;
+      }
       /** Make a Telegram Bot API call with rate-limit retry */
       async apiCall(method, body, _retryDepth = 0) {
         const url = `https://api.telegram.org/bot${this.botToken}/${method}`;
@@ -641650,7 +641900,13 @@ ${caption}\r
         }
         const isLongPoll = method === "getUpdates";
         if (isLongPoll && this.abortController) {
-          options2.signal = this.abortController.signal;
+          const timeoutFn = AbortSignal.timeout;
+          const anyFn = AbortSignal.any;
+          const signals = [
+            this.abortController.signal,
+            typeof timeoutFn === "function" ? timeoutFn(this.telegramLongPollClientTimeoutMs()) : void 0
+          ].filter((signal) => signal instanceof AbortSignal);
+          options2.signal = typeof anyFn === "function" && signals.length > 1 ? anyFn(signals) : signals[0];
         } else if (!isLongPoll) {
           options2.signal = AbortSignal.timeout(3e4);
         }
@@ -659607,6 +659863,30 @@ function sanitizeChatContent(raw) {
   }
   return cleaned.join("\n").trim();
 }
+function appendNoThinkDirectivesToMessages(messages2) {
+  let lastUserIdx = -1;
+  for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
+    if (messages2[i2]?.role === "user") {
+      lastUserIdx = i2;
+      break;
+    }
+  }
+  if (lastUserIdx < 0) return messages2;
+  const target = messages2[lastUserIdx];
+  if (!target || typeof target.content !== "string") return messages2;
+  const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
+  const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
+  if (hasOllamaNoThink && hasQwenNoThink) return messages2;
+  const suffix = [
+    hasOllamaNoThink ? null : "/nothink",
+    hasQwenNoThink ? null : "/no_think"
+  ].filter(Boolean).join("\n");
+  return messages2.map(
+    (m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
+${suffix}` } : m2
+  );
+}
 async function directChatBackend(opts) {
   const { model, messages: messages2, stream, res, sessionId, ollamaUrl, extraFields } = opts;
   const cfg = loadConfig();
@@ -659695,13 +659975,12 @@ async function directChatBackend(opts) {
     if (Array.isArray(ef["stop"]) || typeof ef["stop"] === "string") ollamaOpts["stop"] = ef["stop"];
     const hasTools = Array.isArray(ef["tools"]) && ef["tools"].length > 0;
     const ollamaFormat = ollamaFormatFromOpenAIResponseFormat(ef["response_format"]);
+    const ollamaMessages = appendNoThinkDirectivesToMessages(messages2);
     const reqBody = JSON.stringify({
       model: cleanModel,
-      messages: messages2,
+      messages: ollamaMessages,
       stream,
-      // Don't force think:false when the caller is using tool calling —
-      // thinking models often need their reasoning chain to choose a tool.
-      ...hasTools ? {} : { think: false },
+      think: false,
       ...hasTools ? { tools: ef["tools"] } : {},
       ...ef["tool_choice"] !== void 0 ? { tool_choice: ef["tool_choice"] } : {},
       ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {},
@@ -659931,13 +660210,18 @@ async function completeRealtimeTextOnly(opts) {
   if (!requestedModel) {
     originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
   }
-  const makeOllamaChatBody = (modelName) => JSON.stringify({
-    model: modelName,
-    messages: requestBody["messages"],
-    stream: false,
-    think: false,
-    options: { temperature, num_predict: maxTokens }
-  });
+  const makeOllamaChatBody = (modelName) => {
+    const rtMessages = Array.isArray(requestBody["messages"]) ? appendNoThinkDirectivesToMessages(
+      requestBody["messages"]
+    ) : requestBody["messages"];
+    return JSON.stringify({
+      model: modelName,
+      messages: rtMessages,
+      stream: false,
+      think: false,
+      options: { temperature, num_predict: maxTokens }
+    });
+  };
   let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
   if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
     const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.182",
+  "version": "1.0.183",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.182",
+      "version": "1.0.183",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.182",
+  "version": "1.0.183",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",