npm - omnius - Versions diffs - 1.0.115 → 1.0.117 - Mend

omnius 1.0.115 → 1.0.117

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -564766,6 +564766,9 @@ __export(render_exports, {
   setEmojisEnabled: () => setEmojisEnabled,
   ui: () => ui
 });
+function stdoutIsTTY() {
+  return process.stdout.isTTY ?? false;
+}
 function accentFg() {
   const a2 = tuiAccent();
   return a2 < 0 ? "\x1B[39m" : `\x1B[38;5;${a2}m`;
@@ -564774,17 +564777,17 @@ function dimFg() {
   return `\x1B[38;5;${tuiTextDim()}m`;
 }
 function ansi2(code8, text) {
-  return isTTY2 ? `\x1B[${code8}m${text}\x1B[0m` : text;
+  return stdoutIsTTY() ? `\x1B[${code8}m${text}\x1B[0m` : text;
 }
 function fg256(code8, text) {
-  return isTTY2 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
+  return stdoutIsTTY() ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
 }
 function hyperlink(url, text) {
-  if (!isTTY2) return text;
+  if (!stdoutIsTTY()) return text;
   return `\x1B]8;;${url}\x07${text}\x1B]8;;\x07`;
 }
 function fileLink(filePath) {
-  if (!isTTY2) return filePath;
+  if (!stdoutIsTTY()) return filePath;
   if (filePath.startsWith("/") || filePath.startsWith("~")) {
     const absPath = filePath.startsWith("~") ? filePath.replace("~", process.env["HOME"] ?? "") : filePath;
     return hyperlink(`file://${absPath}`, filePath);
@@ -564923,11 +564926,11 @@ function toolColorCode(toolName) {
   return TOOL_COLOR_CODES[toolName] ?? tuiTextDim();
 }
 function toolColorSeq(code8, bold = false) {
-  if (!_colorsEnabled || !isTTY2) return "";
+  if (!_colorsEnabled || !stdoutIsTTY()) return "";
   return `\x1B[${bold ? "1;" : ""}38;5;${code8}m`;
 }
 function toolResetSeq() {
-  return _colorsEnabled && isTTY2 ? RESET2 : "";
+  return _colorsEnabled && stdoutIsTTY() ? RESET2 : "";
 }
 function visibleLen(text) {
   return stripAnsi(text).length;
@@ -564969,10 +564972,10 @@ function wrapToolTextLine(text, width) {
   out.push(remaining);
   return out;
 }
-function buildToolTopBorder(title, metrics2, width, colorCode) {
+function buildToolTopBorder(title, metrics2, width, colorCode, metricsColorCode = 222) {
   const border = toolColorSeq(colorCode);
   const titleColor = toolColorSeq(colorCode, true);
-  const metricColor = toolColorSeq(222);
+  const metricColor = toolColorSeq(metricsColorCode);
   const reset = toolResetSeq();
   const inner = Math.max(4, width - 2);
   const titleVisible = stripAnsi(title);
@@ -565077,7 +565080,7 @@ function buildToolBoxLines(data, width) {
   const w = Math.max(40, width);
   const innerWidth = Math.max(1, w - 4);
   const lines = [
-    buildToolTopBorder(data.title, data.metrics, w, data.colorCode),
+    buildToolTopBorder(data.title, data.metrics, w, data.colorCode, data.metricsColorCode),
     buildToolDivider(w, data.colorCode)
   ];
   for (const bodyLine of data.body.length > 0 ? data.body : [{ text: "Done", mode: "wrap", kind: "dim" }]) {
@@ -565187,7 +565190,8 @@ function buildToolResultBoxLines(toolName, success, output, opts, width) {
     metrics: metrics2,
     body,
     footers,
-    colorCode: toolColorCode(toolName)
+    colorCode: success ? toolColorCode(toolName) : TOOL_ERROR_COLOR_CODE,
+    metricsColorCode: success ? void 0 : TOOL_ERROR_COLOR_CODE
   }, width);
 }
 function buildToolResultBody(toolName, success, output, verbose) {
@@ -565644,7 +565648,7 @@ function formatDuration3(ms) {
   const secs = Math.floor(totalSecs % 60);
   return `${mins}m ${secs}s`;
 }
-var isTTY2, c3, ui, pastel, _emojisEnabled, _colorsEnabled, MD, TOOL_ICONS, TOOL_LABELS, TOOL_COLOR_CODES, BOX_TL2, BOX_TR2, BOX_BL2, BOX_BR2, BOX_H2, BOX_V2, BOX_TJ_L2, BOX_TJ_R2, RESET2, _contentWriteHook, HINTS, TOOL_NAMES, COMMAND_NAMES, SLASH_COMMANDS2;
+var c3, ui, pastel, _emojisEnabled, _colorsEnabled, MD, TOOL_ICONS, TOOL_LABELS, TOOL_COLOR_CODES, TOOL_ERROR_COLOR_CODE, BOX_TL2, BOX_TR2, BOX_BL2, BOX_BR2, BOX_H2, BOX_V2, BOX_TJ_L2, BOX_TJ_R2, RESET2, _contentWriteHook, HINTS, TOOL_NAMES, COMMAND_NAMES, SLASH_COMMANDS2;
 var init_render = __esm({
   "packages/cli/src/tui/render.ts"() {
     "use strict";
@@ -565655,10 +565659,9 @@ var init_render = __esm({
     init_text_selection();
     init_task_complete_box();
     init_model_picker();
-    isTTY2 = process.stdout.isTTY ?? false;
     c3 = {
       bold: (t2) => ansi2("1", t2),
-      dim: (t2) => isTTY2 ? `${dimFg()}${t2}\x1B[0m` : t2,
+      dim: (t2) => stdoutIsTTY() ? `${dimFg()}${t2}\x1B[0m` : t2,
       italic: (t2) => ansi2("3", t2),
       red: (t2) => ansi2("31", t2),
       green: (t2) => ansi2("32", t2),
@@ -565827,6 +565830,7 @@ var init_render = __esm({
       transcribe_url: 43,
       ask_user: 44
     };
+    TOOL_ERROR_COLOR_CODE = 198;
     BOX_TL2 = "╭";
     BOX_TR2 = "╮";
     BOX_BL2 = "╰";
@@ -577003,10 +577007,10 @@ ${CONTENT_BG_SEQ}`);
 // packages/cli/src/tui/tui-select.ts
 function ansi3(code8, text) {
-  return isTTY3 ? `\x1B[${code8}m${text}\x1B[0m` : text;
+  return isTTY2 ? `\x1B[${code8}m${text}\x1B[0m` : text;
 }
 function fg2563(code8, text) {
-  return isTTY3 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
+  return isTTY2 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
 }
 function stripAnsi3(s2) {
   return s2.replace(/\x1B\[[0-9;]*m/g, "");
@@ -577636,14 +577640,14 @@ ${tuiBgSeq()}`);
     }
   });
 }
-var isTTY3, MENU_ACTIVE_GREEN_256, selectColors;
+var isTTY2, MENU_ACTIVE_GREEN_256, selectColors;
 var init_tui_select = __esm({
   "packages/cli/src/tui/tui-select.ts"() {
     "use strict";
     init_overlay_lock();
     init_theme();
     init_layout2();
-    isTTY3 = process.stdout.isTTY ?? false;
+    isTTY2 = process.stdout.isTTY ?? false;
     MENU_ACTIVE_GREEN_256 = 154;
     selectColors = {
       blue: (t2) => fg2563(39, t2),
@@ -582209,7 +582213,7 @@ var init_workspace_explorer = __esm({
 import { existsSync as existsSync95 } from "node:fs";
 import { extname as extname13, resolve as resolve39 } from "node:path";
 function ansi4(code8, text) {
-  return isTTY4 ? `\x1B[${code8}m${text}\x1B[0m` : text;
+  return isTTY3 ? `\x1B[${code8}m${text}\x1B[0m` : text;
 }
 function stripAnsi4(s2) {
   return s2.replace(/\x1B\[[0-9;]*m/g, "");
@@ -582380,13 +582384,13 @@ function showDropPanel(opts) {
     render2();
   });
 }
-var isTTY4, dc;
+var isTTY3, dc;
 var init_drop_panel = __esm({
   "packages/cli/src/tui/drop-panel.ts"() {
     "use strict";
     init_overlay_lock();
     init_layout2();
-    isTTY4 = process.stdout.isTTY ?? false;
+    isTTY3 = process.stdout.isTTY ?? false;
     dc = {
       bold: (t2) => ansi4("1", t2),
       dim: (t2) => ansi4("38;5;250", t2),
@@ -584736,7 +584740,7 @@ async function startNeovimMode(opts) {
   const ptyCols = opts.cols;
   const topOffset = opts.topOffset ?? 0;
   const ptyRows = Math.max(5, opts.contentRows);
-  if (isTTY5) {
+  if (isTTY4) {
     const L = layout();
     const bottomBound = L.contentBottom;
     process.stdout.write(
@@ -584805,7 +584809,7 @@ async function startNeovimMode(opts) {
     }
   }
   function renderToolbar() {
-    if (!isTTY5) return;
+    if (!isTTY4) return;
     const L = layout();
     const hdrRow = L.headerContent;
     const fg2 = 252;
@@ -584866,7 +584870,7 @@ async function startNeovimMode(opts) {
     stdin.setRawMode(true);
   }
   stdin.resume();
-  if (isTTY5) {
+  if (isTTY4) {
     process.stdout.write("\x1B[?1002h\x1B[?1006h");
   }
   state.stdinHandler = (data) => {
@@ -585165,13 +585169,13 @@ function doCleanup(state) {
   }
   state.opts.onExit?.();
 }
-var isTTY5, PTY_MODE_ENABLE_RE, STDIN_MOUSE_FOCUS_RE, _state;
+var isTTY4, PTY_MODE_ENABLE_RE, STDIN_MOUSE_FOCUS_RE, _state;
 var init_neovim_mode = __esm({
   "packages/cli/src/tui/neovim-mode.ts"() {
     "use strict";
     init_setup();
     init_layout2();
-    isTTY5 = process.stdout.isTTY ?? false;
+    isTTY4 = process.stdout.isTTY ?? false;
     PTY_MODE_ENABLE_RE = /\x1B\[\?(?:1004|2004)h/g;
     STDIN_MOUSE_FOCUS_RE = /\x1B\[<[\d;]+[Mm]|\x1B\[M[\s\S]{3}|\x1B\[[IO]|\x1BO[ABCD]/g;
     _state = null;
@@ -604876,7 +604880,7 @@ function setCarouselWriter(writer) {
   chromeWrite2 = writer;
 }
 function fg(code8, text) {
-  return isTTY6 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
+  return isTTY5 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
 }
 function displayWidth(str) {
   let w = 0;
@@ -604914,12 +604918,12 @@ function createRow(phraseIndices, speed, direction, bank2) {
   const phrases = phraseIndices.map((i2) => bank2[i2 % bank2.length]);
   return { phrases, offset: 0, speed, direction, renderedPlain: "" };
 }
-var isTTY6, chromeWrite2, PHRASES, Carousel;
+var isTTY5, chromeWrite2, PHRASES, Carousel;
 var init_carousel = __esm({
   "packages/cli/src/tui/carousel.ts"() {
     "use strict";
     init_layout2();
-    isTTY6 = process.stdout.isTTY ?? false;
+    isTTY5 = process.stdout.isTTY ?? false;
     chromeWrite2 = ((data) => {
       process.stdout.write(data);
     });
@@ -605031,7 +605035,7 @@ var init_carousel = __esm({
        * Sets scroll region to row 5+ for all content/readline.
        */
       start() {
-        if (!isTTY6) return 0;
+        if (!isTTY5) return 0;
         this.started = true;
         setHeaderHeight(this.reservedRows);
         const L = layout();
@@ -605063,7 +605067,7 @@ var init_carousel = __esm({
        * Row 4 is left blank as a separator.
        */
       renderFrame() {
-        if (!isTTY6) return;
+        if (!isTTY5) return;
         const L = layout();
         let buf = "\x1B7";
         buf += "\x1B[?7l";
@@ -605140,7 +605144,7 @@ var init_carousel = __esm({
           process.stdout.removeListener("resize", this.resizeHandler);
           this.resizeHandler = null;
         }
-        if (!isTTY6 || !this.started) return;
+        if (!isTTY5 || !this.started) return;
         const L = layout();
         let buf = "\x1B7";
         for (let i2 = 0; i2 < this.reservedRows; i2++) {
@@ -605376,13 +605380,13 @@ function createAnimatedBanner(id, name10, frameBuilders, frameDurationMs, author
     createdAt: (/* @__PURE__ */ new Date()).toISOString()
   };
 }
-var isTTY7, chromeWrite3, MNEMONIC_ADJECTIVES, MNEMONIC_NOUNS, BannerRenderer;
+var isTTY6, chromeWrite3, MNEMONIC_ADJECTIVES, MNEMONIC_NOUNS, BannerRenderer;
 var init_banner = __esm({
   "packages/cli/src/tui/banner.ts"() {
     "use strict";
     init_theme();
     init_layout2();
-    isTTY7 = process.stdout.isTTY ?? false;
+    isTTY6 = process.stdout.isTTY ?? false;
     chromeWrite3 = ((data) => {
       process.stdout.write(data);
     });
@@ -605544,7 +605548,7 @@ var init_banner = __esm({
        * Returns the number of rows reserved (3 banner + 1 separator = 4).
        */
       start() {
-        if (!isTTY7 || !this.currentDesign) return 0;
+        if (!isTTY6 || !this.currentDesign) return 0;
         this.renderCurrentFrame();
         this._resizeHandler = () => {
           setTermSize(process.stdout.rows ?? 24, process.stdout.columns ?? 80);
@@ -605585,7 +605589,7 @@ var init_banner = __esm({
       }
       /** Render the current frame into the top 3 rows (public for refresh callbacks) */
       renderCurrentFrame() {
-        if (!isTTY7 || !this.currentDesign) return;
+        if (!isTTY6 || !this.currentDesign) return;
         const frame = this.currentDesign.frames[this.currentFrame];
         if (!frame) return;
         this.width = termCols();
@@ -606000,13 +606004,13 @@ __export(syntax_highlight_exports, {
   prewarm: () => prewarm
 });
 function highlightingDisabled() {
-  return !isTTY8 || noColorEnv || disableEnv;
+  return !isTTY7 || noColorEnv || disableEnv;
 }
 async function loadHighlighter() {
   if (_state2.attempted) return _state2.fn;
   _state2.attempted = true;
   if (highlightingDisabled()) {
-    _state2.reason = !isTTY8 ? "non-tty" : noColorEnv ? "NO_COLOR set" : "OMNIUS_TUI_HIGHLIGHT=0";
+    _state2.reason = !isTTY7 ? "non-tty" : noColorEnv ? "NO_COLOR set" : "OMNIUS_TUI_HIGHLIGHT=0";
     return null;
   }
   try {
@@ -606053,7 +606057,7 @@ function getHighlightStatus() {
     available: isAvailable(),
     attempted: _state2.attempted,
     reason: _state2.reason,
-    isTTY: isTTY8,
+    isTTY: isTTY7,
     noColor: noColorEnv,
     disabledByEnv: disableEnv
   };
@@ -606143,11 +606147,11 @@ function highlightBlock(code8, language) {
     return code8.split("\n");
   }
 }
-var isTTY8, noColorEnv, disableEnv, _state2;
+var isTTY7, noColorEnv, disableEnv, _state2;
 var init_syntax_highlight = __esm({
   "packages/cli/src/tui/syntax-highlight.ts"() {
     "use strict";
-    isTTY8 = process.stdout?.isTTY ?? false;
+    isTTY7 = process.stdout?.isTTY ?? false;
     noColorEnv = process.env["NO_COLOR"] !== void 0 && process.env["NO_COLOR"] !== "";
     disableEnv = process.env["OMNIUS_TUI_HIGHLIGHT"] === "0";
     _state2 = {
@@ -606160,21 +606164,21 @@ var init_syntax_highlight = __esm({
 // packages/cli/src/tui/stream-renderer.ts
 function fg2564(code8, text) {
-  return isTTY9 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
+  return isTTY8 ? `\x1B[38;5;${code8}m${text}\x1B[0m` : text;
 }
 function dimText(text) {
-  return isTTY9 ? `\x1B[38;5;${tuiTextDim()}m${text}\x1B[0m` : text;
+  return isTTY8 ? `\x1B[38;5;${tuiTextDim()}m${text}\x1B[0m` : text;
 }
 function italicText(text) {
-  return isTTY9 ? `\x1B[3m${text}\x1B[0m` : text;
+  return isTTY8 ? `\x1B[3m${text}\x1B[0m` : text;
 }
 function dimItalic(text) {
-  return isTTY9 ? `\x1B[3m\x1B[38;5;${tuiTextDim()}m${text}\x1B[0m` : text;
+  return isTTY8 ? `\x1B[3m\x1B[38;5;${tuiTextDim()}m${text}\x1B[0m` : text;
 }
 function boldText(text) {
-  return isTTY9 ? `\x1B[1m${text}\x1B[0m` : text;
+  return isTTY8 ? `\x1B[1m${text}\x1B[0m` : text;
 }
-var isTTY9, PASTEL, StreamRenderer;
+var isTTY8, PASTEL, StreamRenderer;
 var init_stream_renderer = __esm({
   "packages/cli/src/tui/stream-renderer.ts"() {
     "use strict";
@@ -606182,7 +606186,7 @@ var init_stream_renderer = __esm({
     init_text_selection();
     init_theme();
     init_syntax_highlight();
-    isTTY9 = process.stdout.isTTY ?? false;
+    isTTY8 = process.stdout.isTTY ?? false;
     PASTEL = {
       key: 222,
       // light gold — JSON keys
@@ -606551,7 +606555,7 @@ var init_stream_renderer = __esm({
        *  Also maintains _cursorCol so emitWrapped can decide when to force a
        *  wrap on the NEXT partial flush (avoiding bottom-row token pile-up). */
       writeRaw(text) {
-        if (isTTY9) {
+        if (isTTY8) {
           process.stdout.write(`\x1B[?25l\x1B[?7l${text}\x1B[?7h`);
         } else {
           process.stdout.write(text);
@@ -616349,6 +616353,34 @@ External acquisition contract:
       telegramRouterSessionState = /* @__PURE__ */ new Map();
       /** Telegram interaction routing profile */
       interactionMode = "auto";
+      /**
+       * Toggle for surfacing qwen3 `<think>` content streamed by Telegram-side
+       * inferences (router, chat fast-path, follow-up). Mirrors the main TUI's
+       * Ctrl+O thinking-visibility toggle but applies to the bridge's stream
+       * surface (which has its own write path through tuiWrite + view
+       * callbacks). Default off; flip via env `OMNIUS_TG_SHOW_THINKING=1` or
+       * setTelegramThinkingVisible(). Independent of the model-side
+       * `think:false` directive — that controls whether the model emits
+       * thinking content at all; this controls whether the operator sees it
+       * when it IS emitted.
+       */
+      telegramThinkingVisible = process.env["OMNIUS_TG_SHOW_THINKING"] === "1";
+      /**
+       * Live telemetry of every in-flight Ollama call originating from the
+       * bridge. Lets the operator see WHY multiple GPUs are spun up at once
+       * and HOW each call is progressing — which is the only way to debug a
+       * 180s hard-deadline firing event without grepping logs.
+       *
+       * Each entry tracks:
+       *   - kind: router | chat-fast-path | followup | sub-agent
+       *   - sessionKey: which chat
+       *   - startTs: wall-clock start
+       *   - contentTokens / thinkingTokens: cumulative count from the stream
+       *   - lastTokenAt: timestamp of the most-recent chunk (staleness signal)
+       *   - model: the model being called (helps differentiate concurrent calls)
+       */
+      telegramActiveInferences = /* @__PURE__ */ new Map();
+      telegramInferenceCounter = 0;
       /** Actual model context window discovered by the main TUI. */
       contextWindowSize = 0;
       _metricsProvider = null;
@@ -617982,16 +618014,21 @@ ${mediaContext}` : ""
             this.agentConfig.model,
             this.agentConfig.apiKey
           );
-          const result = await backend.chatCompletion(telegramThinkSuppressedRequest({
-            messages: [
-              { role: "system", content: "You are a Telegram public-follow-up discretion model. Output strict JSON only." },
-              { role: "user", content: prompt }
-            ],
-            tools: [],
-            temperature: 0.2,
-            maxTokens: 300,
-            timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4)
-          }));
+          const result = await this.telegramObservableInference(
+            backend,
+            telegramThinkSuppressedRequest({
+              messages: [
+                { role: "system", content: "You are a Telegram public-follow-up discretion model. Output strict JSON only." },
+                { role: "user", content: prompt }
+              ],
+              tools: [],
+              temperature: 0.2,
+              maxTokens: 300,
+              timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4)
+            }),
+            "followup",
+            sessionKey
+          );
           const decision2 = parseTelegramReflectionFollowupDecision(result.choices[0]?.message?.content ?? "");
           state.lastFollowupArtifactAt = artifact.generatedAt;
           if (!decision2) {
@@ -619612,15 +619649,17 @@ ${lines.join("\n")}`);
           nextAnalysisAfterMessages: decision2.nextCheckAfterMessages
         });
       }
-      async telegramRouterJsonCompletion(backend, request, diagnostics) {
+      async telegramRouterJsonCompletion(backend, request, diagnostics, inferenceKind = "router", sessionKey = "__router__") {
         let jsonModeResult;
         let jsonModeError;
         const suppressed = telegramThinkSuppressedRequest(request);
         try {
-          jsonModeResult = await backend.chatCompletion({
-            ...suppressed,
-            responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT
-          });
+          jsonModeResult = await this.telegramObservableInference(
+            backend,
+            { ...suppressed, responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT },
+            inferenceKind,
+            sessionKey
+          );
           const visible = jsonModeResult.choices.some(
             (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
           );
@@ -619637,7 +619676,12 @@ ${lines.join("\n")}`);
           }
         }
         try {
-          const plainResult = await backend.chatCompletion(suppressed);
+          const plainResult = await this.telegramObservableInference(
+            backend,
+            suppressed,
+            inferenceKind,
+            sessionKey
+          );
           if (diagnostics) {
             const plainVisible = plainResult.choices.some(
               (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -619654,6 +619698,205 @@ ${lines.join("\n")}`);
           throw err;
         }
       }
+      // ─────────────────────────────────────────────────────────────────
+      // Observable inference — streams chatCompletion-shaped calls so the
+      // operator can SEE what's happening during a long-running router or
+      // chat-fast-path call instead of waiting 180s for a hard-deadline.
+      // ─────────────────────────────────────────────────────────────────
+      /**
+       * Wrap a chatCompletion-shaped call so the bridge can observe its token
+       * stream and surface telemetry. Falls back to non-streaming if the
+       * backend doesn't expose chatCompletionStream (older test stubs) or if
+       * streaming throws. The returned shape matches chatCompletion exactly,
+       * so callers don't have to know whether streaming was used.
+       *
+       * What this gives us:
+       *   1. Per-call entry in the active-inferences registry (visible to the
+       *      operator — answers "why are 2 GPUs spun up at once?")
+       *   2. Live emission of thinking + content tokens to the TUI when
+       *      telegramThinkingVisible is true (mirror of Ctrl+O for the bridge)
+       *   3. Wall-clock observability — if the call hangs at 60s with zero
+       *      content tokens emitted, the registry shows it, and the
+       *      hard-deadline retire path becomes diagnosable instead of opaque
+       */
+      async telegramObservableInference(backend, request, kind, sessionKey) {
+        const streamFn = backend.chatCompletionStream;
+        const id = this.registerTelegramInference(kind, sessionKey, this.agentConfig?.model ?? "?");
+        try {
+          if (typeof streamFn !== "function") {
+            const r2 = await backend.chatCompletion(request);
+            this.updateTelegramInferenceFinal(id, r2);
+            return r2;
+          }
+          try {
+            const result = await this.streamTelegramInferenceToCompletion(
+              streamFn.bind(backend),
+              request,
+              id
+            );
+            return result;
+          } catch (streamErr) {
+            const r2 = await backend.chatCompletion(request);
+            this.updateTelegramInferenceFinal(id, r2);
+            this.tuiWrite(() => renderTelegramSubAgentEvent(
+              sessionKey,
+              `inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
+            ));
+            return r2;
+          }
+        } finally {
+          this.deregisterTelegramInference(id);
+        }
+      }
+      /**
+       * Drive a chatCompletionStream to exhaustion, accumulating tokens into a
+       * chatCompletion-shaped result. Live-emits content + thinking tokens
+       * through the TUI when telegramThinkingVisible is true, throttled to
+       * avoid spamming the waterfall on fast streams.
+       */
+      async streamTelegramInferenceToCompletion(streamFn, request, inferenceId) {
+        let contentBuf = "";
+        let thinkingBuf = "";
+        let finishReason;
+        let usage;
+        let lastEmitMs = 0;
+        const EMIT_THROTTLE_MS = 500;
+        const flushPreview = (force) => {
+          if (!this.telegramThinkingVisible) return;
+          const now = Date.now();
+          if (!force && now - lastEmitMs < EMIT_THROTTLE_MS) return;
+          lastEmitMs = now;
+          const entry = this.telegramActiveInferences.get(inferenceId);
+          if (!entry) return;
+          const elapsed = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
+          const thinkRatio = entry.contentTokens + entry.thinkingTokens > 0 ? Math.round(entry.thinkingTokens * 100 / (entry.contentTokens + entry.thinkingTokens)) : 0;
+          const preview = (thinkingBuf || contentBuf).slice(-180).replace(/\s+/g, " ");
+          this.tuiWrite(() => renderTelegramSubAgentEvent(
+            entry.sessionKey,
+            `inference ${inferenceId} [${entry.kind}] ${elapsed}s content=${entry.contentTokens}t thinking=${entry.thinkingTokens}t (${thinkRatio}% think) live=${JSON.stringify(preview)}`
+          ));
+        };
+        for await (const chunk of streamFn(request)) {
+          if (chunk.type === "content" && chunk.content) {
+            if (chunk.thinking) {
+              thinkingBuf += chunk.content;
+              this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
+            } else {
+              contentBuf += chunk.content;
+              this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
+            }
+            flushPreview(false);
+          } else if (chunk.type === "finish") {
+            finishReason = chunk.finishReason;
+          } else if (chunk.type === "usage") {
+            usage = {
+              prompt_tokens: chunk.promptTokens,
+              completion_tokens: chunk.completionTokens,
+              total_tokens: chunk.totalTokens
+            };
+          }
+        }
+        flushPreview(true);
+        void finishReason;
+        return {
+          choices: [
+            {
+              message: {
+                content: thinkingBuf ? `<think>${thinkingBuf}</think>${contentBuf}` : contentBuf
+              }
+            }
+          ],
+          usage: usage ? {
+            totalTokens: usage.total_tokens ?? 0,
+            promptTokens: usage.prompt_tokens,
+            completionTokens: usage.completion_tokens
+          } : void 0
+        };
+      }
+      // ─────────────────────────────────────────────────────────────────
+      // Inference telemetry registry
+      // ─────────────────────────────────────────────────────────────────
+      registerTelegramInference(kind, sessionKey, model) {
+        const id = `inf-${++this.telegramInferenceCounter}`;
+        const now = performance.now();
+        this.telegramActiveInferences.set(id, {
+          id,
+          kind,
+          sessionKey,
+          model,
+          startTs: now,
+          lastTokenAt: now,
+          contentTokens: 0,
+          thinkingTokens: 0,
+          streaming: true
+        });
+        return id;
+      }
+      bumpTelegramInferenceTokens(id, contentDelta, thinkingDelta) {
+        const entry = this.telegramActiveInferences.get(id);
+        if (!entry) return;
+        entry.contentTokens += contentDelta;
+        entry.thinkingTokens += thinkingDelta;
+        entry.lastTokenAt = performance.now();
+      }
+      /**
+       * Called when a non-streaming chatCompletion returns. Walks the completion
+       * to extract a rough token count from the visible content so the registry
+       * has SOME size signal even for non-streamed calls.
+       */
+      updateTelegramInferenceFinal(id, result) {
+        const entry = this.telegramActiveInferences.get(id);
+        if (!entry) return;
+        entry.streaming = false;
+        const text = result.choices[0]?.message?.content ?? "";
+        const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/);
+        const thinkingText = thinkMatch ? thinkMatch[1] : "";
+        const contentText = thinkMatch ? text.replace(thinkMatch[0], "") : text;
+        entry.thinkingTokens = Math.ceil(thinkingText.length / 4);
+        entry.contentTokens = Math.ceil(contentText.length / 4);
+        entry.lastTokenAt = performance.now();
+      }
+      deregisterTelegramInference(id) {
+        const entry = this.telegramActiveInferences.get(id);
+        if (!entry) return;
+        this.telegramActiveInferences.delete(id);
+        if (this.telegramThinkingVisible) {
+          const dur = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
+          const totalTokens = entry.contentTokens + entry.thinkingTokens;
+          const ratio = totalTokens > 0 ? Math.round(entry.thinkingTokens * 100 / totalTokens) : 0;
+          this.tuiWrite(() => renderTelegramSubAgentEvent(
+            entry.sessionKey,
+            `inference ${id} [${entry.kind}] done in ${dur}s — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
+          ));
+        }
+      }
+      /**
+       * Snapshot of every in-flight Telegram-originated inference. The TUI
+       * dashboard / status line can call this to display "why are 2 GPUs spun
+       * up?" — each entry includes the kind, session, model, elapsed seconds,
+       * and token counts so the operator can correlate Ollama load to bridge
+       * activity.
+       */
+      getTelegramActiveInferences() {
+        const now = performance.now();
+        return Array.from(this.telegramActiveInferences.values()).map((e2) => ({
+          ...e2,
+          elapsedSec: (now - e2.startTs) / 1e3,
+          idleSec: (now - e2.lastTokenAt) / 1e3
+        }));
+      }
+      /**
+       * Toggle thinking visibility for the Telegram bridge. Mirrors the main
+       * TUI's Ctrl+O semantics but applies to bridge-side streams. Returns the
+       * new state so a binding can echo it back to the operator.
+       */
+      setTelegramThinkingVisible(visible) {
+        this.telegramThinkingVisible = visible;
+        return this.telegramThinkingVisible;
+      }
+      getTelegramThinkingVisible() {
+        return this.telegramThinkingVisible;
+      }
       async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics) {
         const rawPreview = telegramRouterRawPreview(rawOutput, 4e3);
         if (!rawPreview || telegramDecisionOutputHasDanglingJson(rawOutput)) {
@@ -620666,6 +620909,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         }
         this.stopTelegramSubAgentWatchdog();
         this.cancelTelegramRouterSessionState("bridge stop");
+        this.telegramActiveInferences.clear();
         if (this.telegramSqliteDb && this.telegramSqliteDb !== false) {
           try {
             this.telegramSqliteDb.close();
@@ -621482,35 +621726,55 @@ ${conversationStream}`
         });
         let accumulated = "";
         let streamError;
+        const sessionKey = this.sessionKeyForMessage(msg);
+        const inferenceId = this.registerTelegramInference("chat-fast-path", sessionKey, config.model);
         const streamable = backend;
         const stream = typeof streamable.chatCompletionStream === "function" ? streamable.chatCompletionStream(request) : null;
-        if (stream && typeof stream[Symbol.asyncIterator] === "function") {
-          try {
-            for await (const chunk of stream) {
-              if (chunk.type === "content" && !chunk.thinking && chunk.content) {
-                accumulated += chunk.content;
-                await onToken(accumulated);
+        try {
+          if (stream && typeof stream[Symbol.asyncIterator] === "function") {
+            try {
+              for await (const chunk of stream) {
+                if (chunk.type !== "content") continue;
+                const piece = chunk.content;
+                if (!piece) continue;
+                if (chunk.thinking) {
+                  this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
+                  if (this.telegramThinkingVisible) {
+                    const preview = piece.slice(0, 120);
+                    this.tuiWrite(() => renderTelegramSubAgentEvent(
+                      msg.username,
+                      `chat-fast-path thinking: ${JSON.stringify(preview)}`
+                    ));
+                  }
+                } else {
+                  this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
+                  accumulated += piece;
+                  await onToken(accumulated);
+                }
               }
+            } catch (err) {
+              streamError = err;
+              accumulated = "";
             }
-          } catch (err) {
-            streamError = err;
-            accumulated = "";
           }
-        }
-        if (!accumulated.trim()) {
-          let result;
-          try {
-            result = await backend.chatCompletion(request);
-          } catch (err) {
-            if (streamError) {
-              const streamMsg = streamError instanceof Error ? streamError.message : String(streamError);
-              const retryMsg = err instanceof Error ? err.message : String(err);
-              throw new Error(`streaming failed (${streamMsg}); non-stream retry failed (${retryMsg})`);
+          if (!accumulated.trim()) {
+            let result;
+            try {
+              result = await backend.chatCompletion(request);
+            } catch (err) {
+              if (streamError) {
+                const streamMsg = streamError instanceof Error ? streamError.message : String(streamError);
+                const retryMsg = err instanceof Error ? err.message : String(err);
+                throw new Error(`streaming failed (${streamMsg}); non-stream retry failed (${retryMsg})`);
+              }
+              throw err;
             }
-            throw err;
+            this.updateTelegramInferenceFinal(inferenceId, result);
+            accumulated = result.choices[0]?.message?.content ?? "";
+            if (accumulated) await onToken(accumulated);
           }
-          accumulated = result.choices[0]?.message?.content ?? "";
-          if (accumulated) await onToken(accumulated);
+        } finally {
+          this.deregisterTelegramInference(inferenceId);
         }
         return stripTelegramHiddenThinking(accumulated).trim();
       }
@@ -621665,6 +621929,13 @@ ${conversationStream}`
           if (event.type === "stream_token" && event.streamKind === "content" && event.content) {
             subAgent.accumulated += event.content;
           }
+          if (event.type === "stream_token" && event.streamKind === "thinking" && event.content && this.telegramThinkingVisible) {
+            const trimmed = event.content.replace(/\s+/g, " ").slice(0, 200);
+            this.subAgentViewCallbacks?.onWrite(
+              subAgent.viewId,
+              `thinking: ${trimmed}`
+            );
+          }
           const intermediateLine = formatTelegramProgressEvent(event);
           if (intermediateLine && (isAdminDM || event.type !== "status")) {
             subAgent.intermediateLines.push(intermediateLine);

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.115",
+  "version": "1.0.117",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.115",
+      "version": "1.0.117",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.115",
+  "version": "1.0.117",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",