npm - omnius - Versions diffs - 1.0.81 → 1.0.82 - Mend

omnius 1.0.81 → 1.0.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -539007,6 +539007,18 @@ function injectNoThinkDirective(messages2) {
 /no_think`;
   return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
 }
+function backendHttpErrorDetail(text) {
+  const trimmed = text.trimStart();
+  const isHtml = trimmed.startsWith("<!") || trimmed.startsWith("<html");
+  return isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
+}
+function isOllamaModelNotFoundResponse(status, text, model) {
+  if (status !== 404)
+    return false;
+  const lower = text.toLowerCase();
+  const modelLower = model.toLowerCase();
+  return lower.includes("model") && lower.includes("not found") || lower.includes("not_found_error") || modelLower.length > 0 && lower.includes(modelLower) && lower.includes("not found");
+}
 function computeEffectiveThink(params) {
   if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
     return false;
@@ -551452,11 +551464,17 @@ ${description}`
         if (responseFormat !== void 0) {
           body["response_format"] = responseFormat;
         }
-        const poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
+        let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
           model: this.model
         }) : null;
-        const requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
+        let requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
         let poolSuccess = false;
+        const releasePoolSlot = (success) => {
+          if (!poolSlot)
+            return;
+          poolSlot.release(success);
+          poolSlot = null;
+        };
         const combineAbortSignals = (signals) => {
           const filtered = signals.filter((s2) => s2 instanceof AbortSignal);
           if (filtered.length === 0)
@@ -551491,11 +551509,26 @@ ${description}`
           };
           if (combinedAbortSignal)
             fetchOpts.signal = combinedAbortSignal;
-          const resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
+          let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
           if (!resp.ok) {
             const text = await resp.text().catch(() => "");
-            const isHtml = text.trimStart().startsWith("<!") || text.trimStart().startsWith("<html");
-            const detail = isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
+            if (poolSlot?.poolOwned && isOllamaModelNotFoundResponse(resp.status, text, this.model)) {
+              releasePoolSlot(false);
+              requestBaseUrl = this.baseUrl;
+              resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
+              if (resp.ok) {
+              } else {
+                const retryText = await resp.text().catch(() => "");
+                throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(retryText)}`);
+              }
+            } else {
+              const detail = backendHttpErrorDetail(text);
+              throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
+            }
+          }
+          if (!resp.ok) {
+            const text = await resp.text().catch(() => "");
+            const detail = backendHttpErrorDetail(text);
             throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
           }
           const data = await resp.json();
@@ -551577,7 +551610,7 @@ ${description}`
             } : void 0
           };
         } finally {
-          poolSlot?.release(poolSuccess);
+          releasePoolSlot(poolSuccess);
         }
       }
       /** Anthropic Messages API translation — converts our standard format to/from Anthropic's. */
@@ -551686,8 +551719,8 @@ ${description}`
       }
       /**
        * SSE streaming variant — yields StreamChunks as tokens arrive.
-       * Uses `stream: true` and the current thinking setting.
-       * The existing chatCompletion() method is completely unmodified.
+       * Uses `stream: true`, the current thinking setting, and the same
+       * Ollama pool routing as non-stream completions.
        */
       async *chatCompletionStream(request) {
         const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
@@ -551715,100 +551748,125 @@ ${description}`
           stream_options: { include_usage: true },
           think: effectiveThink
         };
-        const streamFetchOpts = {
-          method: "POST",
-          headers: this.authHeaders(),
-          body: JSON.stringify(body)
+        let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
+          model: this.model
+        }) : null;
+        let requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
+        let poolSuccess = false;
+        const releasePoolSlot = (success) => {
+          if (!poolSlot)
+            return;
+          poolSlot.release(success);
+          poolSlot = null;
         };
-        if (this._abortSignal)
-          streamFetchOpts.signal = this._abortSignal;
-        const resp = await fetch(`${this.baseUrl}/v1/chat/completions`, streamFetchOpts);
-        if (!resp.ok) {
-          const text = await resp.text().catch(() => "");
-          const isHtml = text.trimStart().startsWith("<!") || text.trimStart().startsWith("<html");
-          const detail = isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
-          throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
-        }
-        let sseBuffer = "";
-        const decoder = new TextDecoder();
-        let accumulatedContent = "";
-        let accumulatedThinking = "";
-        let sawReasoningTokens = false;
-        for await (const rawChunk of resp.body) {
-          sseBuffer += decoder.decode(rawChunk, { stream: true });
-          const parts = sseBuffer.split("\n\n");
-          sseBuffer = parts.pop();
-          for (const part of parts) {
-            const line = part.trim();
-            if (!line)
-              continue;
-            if (line === "data: [DONE]") {
-              this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
-              return;
+        try {
+          const streamFetchOpts = {
+            method: "POST",
+            headers: this.authHeaders(),
+            body: JSON.stringify(body)
+          };
+          if (this._abortSignal)
+            streamFetchOpts.signal = this._abortSignal;
+          let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
+          if (!resp.ok) {
+            const text = await resp.text().catch(() => "");
+            if (poolSlot?.poolOwned && isOllamaModelNotFoundResponse(resp.status, text, this.model)) {
+              releasePoolSlot(false);
+              requestBaseUrl = this.baseUrl;
+              resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
+              if (!resp.ok) {
+                const retryText = await resp.text().catch(() => "");
+                throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(retryText)}`);
+              }
+            } else {
+              throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(text)}`);
             }
-            if (!line.startsWith("data: "))
-              continue;
-            try {
-              const data = JSON.parse(line.slice(6));
-              const choices = data.choices ?? [];
-              const chunkUsageEarly = data.usage;
-              if (chunkUsageEarly) {
-                yield {
-                  type: "usage",
-                  usage: {
-                    promptTokens: chunkUsageEarly.prompt_tokens ?? 0,
-                    completionTokens: chunkUsageEarly.completion_tokens ?? 0,
-                    totalTokens: chunkUsageEarly.total_tokens ?? 0
-                  }
-                };
+          }
+          let sseBuffer = "";
+          const decoder = new TextDecoder();
+          let accumulatedContent = "";
+          let accumulatedThinking = "";
+          let sawReasoningTokens = false;
+          for await (const rawChunk of resp.body) {
+            sseBuffer += decoder.decode(rawChunk, { stream: true });
+            const parts = sseBuffer.split("\n\n");
+            sseBuffer = parts.pop();
+            for (const part of parts) {
+              const line = part.trim();
+              if (!line)
+                continue;
+              if (line === "data: [DONE]") {
+                this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
+                poolSuccess = true;
+                return;
               }
-              const choice = choices[0];
-              if (!choice)
+              if (!line.startsWith("data: "))
                 continue;
-              const delta = choice.delta;
-              const finishReason = choice.finish_reason;
-              const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
-              if (reasoningToken && effectiveThink) {
-                sawReasoningTokens = true;
-                accumulatedThinking += reasoningToken;
-                yield { type: "content", content: reasoningToken, thinking: true };
-              }
-              if (delta?.content) {
-                accumulatedContent += delta.content;
-                yield { type: "content", content: delta.content };
-              }
-              const tcDeltas = delta?.tool_calls;
-              if (tcDeltas) {
-                for (const tcd of tcDeltas) {
-                  const fn = tcd.function;
+              try {
+                const data = JSON.parse(line.slice(6));
+                const choices = data.choices ?? [];
+                const chunkUsageEarly = data.usage;
+                if (chunkUsageEarly) {
                   yield {
-                    type: "tool_call_delta",
-                    toolCallIndex: tcd.index ?? 0,
-                    toolCallId: tcd.id || void 0,
-                    toolCallName: fn?.name || void 0,
-                    toolCallArgs: fn?.arguments || void 0
+                    type: "usage",
+                    usage: {
+                      promptTokens: chunkUsageEarly.prompt_tokens ?? 0,
+                      completionTokens: chunkUsageEarly.completion_tokens ?? 0,
+                      totalTokens: chunkUsageEarly.total_tokens ?? 0
+                    }
                   };
                 }
-              }
-              const chunkUsage = data.usage;
-              if (chunkUsage) {
-                yield {
-                  type: "usage",
-                  usage: {
-                    promptTokens: chunkUsage.prompt_tokens ?? 0,
-                    completionTokens: chunkUsage.completion_tokens ?? 0,
-                    totalTokens: chunkUsage.total_tokens ?? 0
+                const choice = choices[0];
+                if (!choice)
+                  continue;
+                const delta = choice.delta;
+                const finishReason = choice.finish_reason;
+                const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
+                if (reasoningToken && effectiveThink) {
+                  sawReasoningTokens = true;
+                  accumulatedThinking += reasoningToken;
+                  yield { type: "content", content: reasoningToken, thinking: true };
+                }
+                if (delta?.content) {
+                  accumulatedContent += delta.content;
+                  yield { type: "content", content: delta.content };
+                }
+                const tcDeltas = delta?.tool_calls;
+                if (tcDeltas) {
+                  for (const tcd of tcDeltas) {
+                    const fn = tcd.function;
+                    yield {
+                      type: "tool_call_delta",
+                      toolCallIndex: tcd.index ?? 0,
+                      toolCallId: tcd.id || void 0,
+                      toolCallName: fn?.name || void 0,
+                      toolCallArgs: fn?.arguments || void 0
+                    };
                   }
-                };
-              }
-              if (finishReason) {
-                yield { type: "finish", finishReason };
+                }
+                const chunkUsage = data.usage;
+                if (chunkUsage) {
+                  yield {
+                    type: "usage",
+                    usage: {
+                      promptTokens: chunkUsage.prompt_tokens ?? 0,
+                      completionTokens: chunkUsage.completion_tokens ?? 0,
+                      totalTokens: chunkUsage.total_tokens ?? 0
+                    }
+                  };
+                }
+                if (finishReason) {
+                  yield { type: "finish", finishReason };
+                }
+              } catch {
               }
-            } catch {
             }
           }
+          this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
+          poolSuccess = true;
+        } finally {
+          releasePoolSlot(poolSuccess);
         }
-        this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
       }
       /** Reconstruct a raw-looking assistant response from the streamed
        *  parts, then feed it into the loop-guard. Used at stream end (both
@@ -610401,8 +610459,9 @@ function telegramDecisionRecoverableFlag(text) {
   }
   return void 0;
 }
-function telegramRouterTimeoutMs(configTimeoutMs, minMs = 15e3, maxMs = 6e4) {
-  return Math.min(Math.max(configTimeoutMs ?? 3e4, minMs), maxMs);
+function telegramRouterTimeoutMs(configTimeoutMs, minMs = 15e3, _legacyMaxMs) {
+  const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
+  return Math.max(configured, minMs);
 }
 function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
   for (const jsonText of telegramDecisionJsonCandidates(text)) {
@@ -610740,6 +610799,21 @@ function cleanTelegramVisibleReply(text, options2 = {}) {
   if (!filtered) return "";
   return dedupeTelegramVisibleReply(filtered);
 }
+function summarizeTelegramInferenceError(message2) {
+  if (/aborted due to timeout|aborterror|timed? out/i.test(message2)) {
+    return "backend inference timed out before a reply was delivered";
+  }
+  if (/model ['"]?[^'"]+['"]? not found|not_found_error/i.test(message2)) {
+    return "the configured model was not available on the selected Ollama runner";
+  }
+  if (/Backend HTTP 5\d\d/i.test(message2)) {
+    return "the backend returned a transient server error";
+  }
+  if (/Backend HTTP 4\d\d/i.test(message2)) {
+    return message2.slice(0, 180);
+  }
+  return message2.slice(0, 180) || "unknown backend failure";
+}
 function dedupeTelegramVisibleReply(text) {
   const paragraphs = text.split(/\n{2,}/);
   const seenParagraphs = /* @__PURE__ */ new Set();
@@ -616982,7 +617056,8 @@ Join: ${newUrl}`);
             await this.editLiveMessage(msg.chatId, liveMessageId, `Error: ${escapeTelegramHTML(errMsg)}`).catch(() => {
             });
           } else {
-            await this.replyToTelegramMessage(msg, "Sorry, I couldn't process that quick chat message.").catch(() => {
+            const summary = summarizeTelegramInferenceError(errMsg);
+            await this.replyToTelegramMessage(msg, `Sorry, quick chat inference failed: ${summary}.`).catch(() => {
             });
           }
         } finally {
@@ -617060,10 +617135,11 @@ ${conversationStream}`
           tools: [],
           temperature: 0.4,
           maxTokens: 700,
-          timeoutMs: Math.min(config.timeoutMs ?? 3e4, 3e4),
+          timeoutMs: Math.max(config.timeoutMs ?? 3e5, 6e4),
           think: false
         };
         let accumulated = "";
+        let streamError;
         const streamable = backend;
         const stream = typeof streamable.chatCompletionStream === "function" ? streamable.chatCompletionStream(request) : null;
         if (stream && typeof stream[Symbol.asyncIterator] === "function") {
@@ -617074,12 +617150,23 @@ ${conversationStream}`
                 await onToken(accumulated);
               }
             }
-          } catch {
+          } catch (err) {
+            streamError = err;
             accumulated = "";
           }
         }
         if (!accumulated.trim()) {
-          const result = await backend.chatCompletion(request);
+          let result;
+          try {
+            result = await backend.chatCompletion(request);
+          } catch (err) {
+            if (streamError) {
+              const streamMsg = streamError instanceof Error ? streamError.message : String(streamError);
+              const retryMsg = err instanceof Error ? err.message : String(err);
+              throw new Error(`streaming failed (${streamMsg}); non-stream retry failed (${retryMsg})`);
+            }
+            throw err;
+          }
           accumulated = result.choices[0]?.message?.content ?? "";
           if (accumulated) await onToken(accumulated);
         }

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.81",
+  "version": "1.0.82",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.81",
+      "version": "1.0.82",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.81",
+  "version": "1.0.82",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",