npm - omnius - Versions diffs - 1.0.113 → 1.0.114 - Mend

omnius 1.0.113 → 1.0.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -614455,6 +614455,28 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 12e4, _legacyMaxMs) {
   const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
   return Math.max(configured, minMs, 12e4);
 }
+function telegramThinkSuppressedRequest(request) {
+  const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
+  let appended = false;
+  for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
+    const m2 = messages2[i2];
+    if (!m2 || m2.role !== "user") continue;
+    const content = typeof m2.content === "string" ? m2.content : "";
+    if (/\/no_think\b/i.test(content)) {
+      appended = true;
+      break;
+    }
+    messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
+/no_think` };
+    appended = true;
+    break;
+  }
+  if (!appended) {
+    messages2.push({ role: "user", content: "/no_think" });
+  }
+  return { ...request, messages: messages2, think: false };
+}
 function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
   for (const jsonText of telegramDecisionJsonCandidates(text)) {
     try {
@@ -616308,6 +616330,23 @@ External acquisition contract:
       stimulation = new StimulationController();
       /** Throttles noisy "skipped group chatter" waterfall logs */
       groupSkipLogAt = /* @__PURE__ */ new Map();
+      /**
+       * Per-chat router-call coalescing state. Bounds concurrent router (attention
+       * decision) inferences to at most 2 per sessionKey regardless of incoming
+       * message rate. Without this, a burst of N messages in a single chat fires
+       * N concurrent Ollama calls; over hours of bursts this saturates the GPU
+       * and produces the 10+ minute reply latencies observed after 24h uptime.
+       *
+       * Shape per sessionKey:
+       *   - inFlight: the currently-running router call for this chat
+       *   - trailing: a queued "next" call. If multiple messages arrive while
+       *     inFlight is running, they collapse into a single trailing call
+       *     that uses the most-recent message. All callers that arrived during
+       *     the in-flight window receive the trailing decision.
+       *
+       * Disabled with OMNIUS_TG_ROUTER_DEBOUNCE=off (emergency bypass).
+       */
+      telegramRouterSessionState = /* @__PURE__ */ new Map();
       /** Telegram interaction routing profile */
       interactionMode = "auto";
       /** Actual model context window discovered by the main TUI. */
@@ -617943,7 +617982,7 @@ ${mediaContext}` : ""
             this.agentConfig.model,
             this.agentConfig.apiKey
           );
-          const result = await backend.chatCompletion({
+          const result = await backend.chatCompletion(telegramThinkSuppressedRequest({
             messages: [
               { role: "system", content: "You are a Telegram public-follow-up discretion model. Output strict JSON only." },
               { role: "user", content: prompt }
@@ -617951,9 +617990,8 @@ ${mediaContext}` : ""
             tools: [],
             temperature: 0.2,
             maxTokens: 300,
-            timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4),
-            think: false
-          });
+            timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4)
+          }));
           const decision2 = parseTelegramReflectionFollowupDecision(result.choices[0]?.message?.content ?? "");
           state.lastFollowupArtifactAt = artifact.generatedAt;
           if (!decision2) {
@@ -619577,9 +619615,10 @@ ${lines.join("\n")}`);
       async telegramRouterJsonCompletion(backend, request, diagnostics) {
         let jsonModeResult;
         let jsonModeError;
+        const suppressed = telegramThinkSuppressedRequest(request);
         try {
           jsonModeResult = await backend.chatCompletion({
-            ...request,
+            ...suppressed,
             responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT
           });
           const visible = jsonModeResult.choices.some(
@@ -619598,7 +619637,7 @@ ${lines.join("\n")}`);
           }
         }
         try {
-          const plainResult = await backend.chatCompletion(request);
+          const plainResult = await backend.chatCompletion(suppressed);
           if (diagnostics) {
             const plainVisible = plainResult.choices.some(
               (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -619751,6 +619790,53 @@ ${retryText}`,
           return null;
         }
       }
+      /**
+       * Coalesced wrapper around inferTelegramInteractionDecision. Bounds
+       * concurrent router calls per chat to 2: one in-flight plus one trailing
+       * that absorbs every message arriving during the in-flight window. The
+       * trailing call uses the most-recent message and serves all queued
+       * callers. Bypass with OMNIUS_TG_ROUTER_DEBOUNCE=off.
+       */
+      inferTelegramInteractionDecisionCoalesced(msg, toolContext) {
+        if (process.env["OMNIUS_TG_ROUTER_DEBOUNCE"] === "off") {
+          return this.inferTelegramInteractionDecision(msg, toolContext);
+        }
+        const sessionKey = this.sessionKeyForMessage(msg);
+        const existing = this.telegramRouterSessionState.get(sessionKey);
+        if (!existing) {
+          return this.startCoalescedTelegramRouterCall(sessionKey, msg, toolContext);
+        }
+        if (existing.trailing) {
+          existing.trailing.msg = msg;
+          existing.trailing.toolContext = toolContext;
+          return existing.trailing.promise;
+        }
+        let resolve52;
+        let reject;
+        const promise = new Promise((res, rej) => {
+          resolve52 = res;
+          reject = rej;
+        });
+        existing.trailing = { msg, toolContext, promise, resolve: resolve52, reject };
+        return promise;
+      }
+      /**
+       * Internal: start an actual router inference for a sessionKey, store its
+       * in-flight promise, and on completion fire any queued trailing call.
+       */
+      startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
+        const promise = this.inferTelegramInteractionDecision(msg, toolContext);
+        this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
+        const onSettled = () => {
+          const state = this.telegramRouterSessionState.get(sessionKey);
+          this.telegramRouterSessionState.delete(sessionKey);
+          if (!state?.trailing) return;
+          const { msg: nextMsg, toolContext: nextCtx, resolve: resolve52, reject } = state.trailing;
+          this.startCoalescedTelegramRouterCall(sessionKey, nextMsg, nextCtx).then(resolve52, reject);
+        };
+        promise.then(onSettled, onSettled);
+        return promise;
+      }
       async inferTelegramInteractionDecision(msg, toolContext) {
         const config = this.agentConfig;
         const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
@@ -620754,7 +620840,7 @@ Join: ${newUrl}`);
           const isGroup = msg.chatType !== "private";
           if (isGroup) {
             const attentionViewId2 = this.registerTelegramAttentionView(msg, existing.toolContext || toolContext, "active Telegram thread");
-            const decision3 = await this.inferTelegramInteractionDecision(msg, existing.toolContext || toolContext);
+            const decision3 = await this.inferTelegramInteractionDecisionCoalesced(msg, existing.toolContext || toolContext);
             this.deliverTelegramAttentionDecision(
               sessionKey,
               msg,
@@ -620798,7 +620884,7 @@ Join: ${newUrl}`);
           return;
         }
         const attentionViewId = this.registerTelegramAttentionView(msg, toolContext);
-        const decision2 = await this.inferTelegramInteractionDecision(msg, toolContext);
+        const decision2 = await this.inferTelegramInteractionDecisionCoalesced(msg, toolContext);
         this.deliverTelegramAttentionDecision(
           sessionKey,
           msg,
@@ -621231,14 +621317,13 @@ ${conversationStream}`
           config.model,
           config.apiKey
         );
-        const request = {
+        const request = telegramThinkSuppressedRequest({
           messages: this.buildTelegramChatMessages(msg, toolContext, mediaContext),
           tools: [],
           temperature: 0.4,
           maxTokens: 700,
-          timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4),
-          think: false
-        };
+          timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4)
+        });
         let accumulated = "";
         let streamError;
         const streamable = backend;
@@ -621331,6 +621416,15 @@ ${conversationStream}`
           disablePersistentMemory: false,
           disableCodebaseMap: !isAdminDM,
           subAgent: !isAdminDM,
+          // Telegram sub-agents run tool-heavy workflows where qwen3 <think>
+          // reasoning is notorious for stalling: the model burns its token
+          // budget inside <think>...</think> and never closes the tag, producing
+          // empty content or 10+ minute replies after 24h. The runner's
+          // computeEffectiveThink() already kills thinking when hasTools=true,
+          // but we set it explicitly here as well so a future no-tools turn
+          // (compaction window, recovery prompt, watchdog probe) inherits the
+          // off default rather than the global config's value.
+          thinking: false,
           // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
           // the Littleman near-cap turn extension are appropriate for the full TUI
           // session but cause Telegram to silently outgrow its nominal maxTurns,

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.113",
+  "version": "1.0.114",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.113",
+      "version": "1.0.114",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.113",
+  "version": "1.0.114",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",