omnius 1.0.81 → 1.0.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -539007,6 +539007,18 @@ function injectNoThinkDirective(messages2) {
539007
539007
  /no_think`;
539008
539008
  return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
539009
539009
  }
539010
+ function backendHttpErrorDetail(text) {
539011
+ const trimmed = text.trimStart();
539012
+ const isHtml = trimmed.startsWith("<!") || trimmed.startsWith("<html");
539013
+ return isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
539014
+ }
539015
+ function isOllamaModelNotFoundResponse(status, text, model) {
539016
+ if (status !== 404)
539017
+ return false;
539018
+ const lower = text.toLowerCase();
539019
+ const modelLower = model.toLowerCase();
539020
+ return lower.includes("model") && lower.includes("not found") || lower.includes("not_found_error") || modelLower.length > 0 && lower.includes(modelLower) && lower.includes("not found");
539021
+ }
539010
539022
  function computeEffectiveThink(params) {
539011
539023
  if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
539012
539024
  return false;
@@ -551452,11 +551464,17 @@ ${description}`
551452
551464
  if (responseFormat !== void 0) {
551453
551465
  body["response_format"] = responseFormat;
551454
551466
  }
551455
- const poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
551467
+ let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
551456
551468
  model: this.model
551457
551469
  }) : null;
551458
- const requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
551470
+ let requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
551459
551471
  let poolSuccess = false;
551472
+ const releasePoolSlot = (success) => {
551473
+ if (!poolSlot)
551474
+ return;
551475
+ poolSlot.release(success);
551476
+ poolSlot = null;
551477
+ };
551460
551478
  const combineAbortSignals = (signals) => {
551461
551479
  const filtered = signals.filter((s2) => s2 instanceof AbortSignal);
551462
551480
  if (filtered.length === 0)
@@ -551491,11 +551509,26 @@ ${description}`
551491
551509
  };
551492
551510
  if (combinedAbortSignal)
551493
551511
  fetchOpts.signal = combinedAbortSignal;
551494
- const resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
551512
+ let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
551495
551513
  if (!resp.ok) {
551496
551514
  const text = await resp.text().catch(() => "");
551497
- const isHtml = text.trimStart().startsWith("<!") || text.trimStart().startsWith("<html");
551498
- const detail = isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
551515
+ if (poolSlot?.poolOwned && isOllamaModelNotFoundResponse(resp.status, text, this.model)) {
551516
+ releasePoolSlot(false);
551517
+ requestBaseUrl = this.baseUrl;
551518
+ resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
551519
+ if (resp.ok) {
551520
+ } else {
551521
+ const retryText = await resp.text().catch(() => "");
551522
+ throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(retryText)}`);
551523
+ }
551524
+ } else {
551525
+ const detail = backendHttpErrorDetail(text);
551526
+ throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
551527
+ }
551528
+ }
551529
+ if (!resp.ok) {
551530
+ const text = await resp.text().catch(() => "");
551531
+ const detail = backendHttpErrorDetail(text);
551499
551532
  throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
551500
551533
  }
551501
551534
  const data = await resp.json();
@@ -551577,7 +551610,7 @@ ${description}`
551577
551610
  } : void 0
551578
551611
  };
551579
551612
  } finally {
551580
- poolSlot?.release(poolSuccess);
551613
+ releasePoolSlot(poolSuccess);
551581
551614
  }
551582
551615
  }
551583
551616
  /** Anthropic Messages API translation — converts our standard format to/from Anthropic's. */
@@ -551686,8 +551719,8 @@ ${description}`
551686
551719
  }
551687
551720
  /**
551688
551721
  * SSE streaming variant — yields StreamChunks as tokens arrive.
551689
- * Uses `stream: true` and the current thinking setting.
551690
- * The existing chatCompletion() method is completely unmodified.
551722
+ * Uses `stream: true`, the current thinking setting, and the same
551723
+ * Ollama pool routing as non-stream completions.
551691
551724
  */
551692
551725
  async *chatCompletionStream(request) {
551693
551726
  const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
@@ -551715,100 +551748,125 @@ ${description}`
551715
551748
  stream_options: { include_usage: true },
551716
551749
  think: effectiveThink
551717
551750
  };
551718
- const streamFetchOpts = {
551719
- method: "POST",
551720
- headers: this.authHeaders(),
551721
- body: JSON.stringify(body)
551751
+ let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
551752
+ model: this.model
551753
+ }) : null;
551754
+ let requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
551755
+ let poolSuccess = false;
551756
+ const releasePoolSlot = (success) => {
551757
+ if (!poolSlot)
551758
+ return;
551759
+ poolSlot.release(success);
551760
+ poolSlot = null;
551722
551761
  };
551723
- if (this._abortSignal)
551724
- streamFetchOpts.signal = this._abortSignal;
551725
- const resp = await fetch(`${this.baseUrl}/v1/chat/completions`, streamFetchOpts);
551726
- if (!resp.ok) {
551727
- const text = await resp.text().catch(() => "");
551728
- const isHtml = text.trimStart().startsWith("<!") || text.trimStart().startsWith("<html");
551729
- const detail = isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
551730
- throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
551731
- }
551732
- let sseBuffer = "";
551733
- const decoder = new TextDecoder();
551734
- let accumulatedContent = "";
551735
- let accumulatedThinking = "";
551736
- let sawReasoningTokens = false;
551737
- for await (const rawChunk of resp.body) {
551738
- sseBuffer += decoder.decode(rawChunk, { stream: true });
551739
- const parts = sseBuffer.split("\n\n");
551740
- sseBuffer = parts.pop();
551741
- for (const part of parts) {
551742
- const line = part.trim();
551743
- if (!line)
551744
- continue;
551745
- if (line === "data: [DONE]") {
551746
- this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
551747
- return;
551762
+ try {
551763
+ const streamFetchOpts = {
551764
+ method: "POST",
551765
+ headers: this.authHeaders(),
551766
+ body: JSON.stringify(body)
551767
+ };
551768
+ if (this._abortSignal)
551769
+ streamFetchOpts.signal = this._abortSignal;
551770
+ let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
551771
+ if (!resp.ok) {
551772
+ const text = await resp.text().catch(() => "");
551773
+ if (poolSlot?.poolOwned && isOllamaModelNotFoundResponse(resp.status, text, this.model)) {
551774
+ releasePoolSlot(false);
551775
+ requestBaseUrl = this.baseUrl;
551776
+ resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
551777
+ if (!resp.ok) {
551778
+ const retryText = await resp.text().catch(() => "");
551779
+ throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(retryText)}`);
551780
+ }
551781
+ } else {
551782
+ throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(text)}`);
551748
551783
  }
551749
- if (!line.startsWith("data: "))
551750
- continue;
551751
- try {
551752
- const data = JSON.parse(line.slice(6));
551753
- const choices = data.choices ?? [];
551754
- const chunkUsageEarly = data.usage;
551755
- if (chunkUsageEarly) {
551756
- yield {
551757
- type: "usage",
551758
- usage: {
551759
- promptTokens: chunkUsageEarly.prompt_tokens ?? 0,
551760
- completionTokens: chunkUsageEarly.completion_tokens ?? 0,
551761
- totalTokens: chunkUsageEarly.total_tokens ?? 0
551762
- }
551763
- };
551784
+ }
551785
+ let sseBuffer = "";
551786
+ const decoder = new TextDecoder();
551787
+ let accumulatedContent = "";
551788
+ let accumulatedThinking = "";
551789
+ let sawReasoningTokens = false;
551790
+ for await (const rawChunk of resp.body) {
551791
+ sseBuffer += decoder.decode(rawChunk, { stream: true });
551792
+ const parts = sseBuffer.split("\n\n");
551793
+ sseBuffer = parts.pop();
551794
+ for (const part of parts) {
551795
+ const line = part.trim();
551796
+ if (!line)
551797
+ continue;
551798
+ if (line === "data: [DONE]") {
551799
+ this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
551800
+ poolSuccess = true;
551801
+ return;
551764
551802
  }
551765
- const choice = choices[0];
551766
- if (!choice)
551803
+ if (!line.startsWith("data: "))
551767
551804
  continue;
551768
- const delta = choice.delta;
551769
- const finishReason = choice.finish_reason;
551770
- const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
551771
- if (reasoningToken && effectiveThink) {
551772
- sawReasoningTokens = true;
551773
- accumulatedThinking += reasoningToken;
551774
- yield { type: "content", content: reasoningToken, thinking: true };
551775
- }
551776
- if (delta?.content) {
551777
- accumulatedContent += delta.content;
551778
- yield { type: "content", content: delta.content };
551779
- }
551780
- const tcDeltas = delta?.tool_calls;
551781
- if (tcDeltas) {
551782
- for (const tcd of tcDeltas) {
551783
- const fn = tcd.function;
551805
+ try {
551806
+ const data = JSON.parse(line.slice(6));
551807
+ const choices = data.choices ?? [];
551808
+ const chunkUsageEarly = data.usage;
551809
+ if (chunkUsageEarly) {
551784
551810
  yield {
551785
- type: "tool_call_delta",
551786
- toolCallIndex: tcd.index ?? 0,
551787
- toolCallId: tcd.id || void 0,
551788
- toolCallName: fn?.name || void 0,
551789
- toolCallArgs: fn?.arguments || void 0
551811
+ type: "usage",
551812
+ usage: {
551813
+ promptTokens: chunkUsageEarly.prompt_tokens ?? 0,
551814
+ completionTokens: chunkUsageEarly.completion_tokens ?? 0,
551815
+ totalTokens: chunkUsageEarly.total_tokens ?? 0
551816
+ }
551790
551817
  };
551791
551818
  }
551792
- }
551793
- const chunkUsage = data.usage;
551794
- if (chunkUsage) {
551795
- yield {
551796
- type: "usage",
551797
- usage: {
551798
- promptTokens: chunkUsage.prompt_tokens ?? 0,
551799
- completionTokens: chunkUsage.completion_tokens ?? 0,
551800
- totalTokens: chunkUsage.total_tokens ?? 0
551819
+ const choice = choices[0];
551820
+ if (!choice)
551821
+ continue;
551822
+ const delta = choice.delta;
551823
+ const finishReason = choice.finish_reason;
551824
+ const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
551825
+ if (reasoningToken && effectiveThink) {
551826
+ sawReasoningTokens = true;
551827
+ accumulatedThinking += reasoningToken;
551828
+ yield { type: "content", content: reasoningToken, thinking: true };
551829
+ }
551830
+ if (delta?.content) {
551831
+ accumulatedContent += delta.content;
551832
+ yield { type: "content", content: delta.content };
551833
+ }
551834
+ const tcDeltas = delta?.tool_calls;
551835
+ if (tcDeltas) {
551836
+ for (const tcd of tcDeltas) {
551837
+ const fn = tcd.function;
551838
+ yield {
551839
+ type: "tool_call_delta",
551840
+ toolCallIndex: tcd.index ?? 0,
551841
+ toolCallId: tcd.id || void 0,
551842
+ toolCallName: fn?.name || void 0,
551843
+ toolCallArgs: fn?.arguments || void 0
551844
+ };
551801
551845
  }
551802
- };
551803
- }
551804
- if (finishReason) {
551805
- yield { type: "finish", finishReason };
551846
+ }
551847
+ const chunkUsage = data.usage;
551848
+ if (chunkUsage) {
551849
+ yield {
551850
+ type: "usage",
551851
+ usage: {
551852
+ promptTokens: chunkUsage.prompt_tokens ?? 0,
551853
+ completionTokens: chunkUsage.completion_tokens ?? 0,
551854
+ totalTokens: chunkUsage.total_tokens ?? 0
551855
+ }
551856
+ };
551857
+ }
551858
+ if (finishReason) {
551859
+ yield { type: "finish", finishReason };
551860
+ }
551861
+ } catch {
551806
551862
  }
551807
- } catch {
551808
551863
  }
551809
551864
  }
551865
+ this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
551866
+ poolSuccess = true;
551867
+ } finally {
551868
+ releasePoolSlot(poolSuccess);
551810
551869
  }
551811
- this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
551812
551870
  }
551813
551871
  /** Reconstruct a raw-looking assistant response from the streamed
551814
551872
  * parts, then feed it into the loop-guard. Used at stream end (both
@@ -610401,8 +610459,9 @@ function telegramDecisionRecoverableFlag(text) {
610401
610459
  }
610402
610460
  return void 0;
610403
610461
  }
610404
- function telegramRouterTimeoutMs(configTimeoutMs, minMs = 15e3, maxMs = 6e4) {
610405
- return Math.min(Math.max(configTimeoutMs ?? 3e4, minMs), maxMs);
610462
+ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 15e3, _legacyMaxMs) {
610463
+ const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
610464
+ return Math.max(configured, minMs);
610406
610465
  }
610407
610466
  function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
610408
610467
  for (const jsonText of telegramDecisionJsonCandidates(text)) {
@@ -610740,6 +610799,21 @@ function cleanTelegramVisibleReply(text, options2 = {}) {
610740
610799
  if (!filtered) return "";
610741
610800
  return dedupeTelegramVisibleReply(filtered);
610742
610801
  }
610802
+ function summarizeTelegramInferenceError(message2) {
610803
+ if (/aborted due to timeout|aborterror|timed? out/i.test(message2)) {
610804
+ return "backend inference timed out before a reply was delivered";
610805
+ }
610806
+ if (/model ['"]?[^'"]+['"]? not found|not_found_error/i.test(message2)) {
610807
+ return "the configured model was not available on the selected Ollama runner";
610808
+ }
610809
+ if (/Backend HTTP 5\d\d/i.test(message2)) {
610810
+ return "the backend returned a transient server error";
610811
+ }
610812
+ if (/Backend HTTP 4\d\d/i.test(message2)) {
610813
+ return message2.slice(0, 180);
610814
+ }
610815
+ return message2.slice(0, 180) || "unknown backend failure";
610816
+ }
610743
610817
  function dedupeTelegramVisibleReply(text) {
610744
610818
  const paragraphs = text.split(/\n{2,}/);
610745
610819
  const seenParagraphs = /* @__PURE__ */ new Set();
@@ -616982,7 +617056,8 @@ Join: ${newUrl}`);
616982
617056
  await this.editLiveMessage(msg.chatId, liveMessageId, `Error: ${escapeTelegramHTML(errMsg)}`).catch(() => {
616983
617057
  });
616984
617058
  } else {
616985
- await this.replyToTelegramMessage(msg, "Sorry, I couldn't process that quick chat message.").catch(() => {
617059
+ const summary = summarizeTelegramInferenceError(errMsg);
617060
+ await this.replyToTelegramMessage(msg, `Sorry, quick chat inference failed: ${summary}.`).catch(() => {
616986
617061
  });
616987
617062
  }
616988
617063
  } finally {
@@ -617060,10 +617135,11 @@ ${conversationStream}`
617060
617135
  tools: [],
617061
617136
  temperature: 0.4,
617062
617137
  maxTokens: 700,
617063
- timeoutMs: Math.min(config.timeoutMs ?? 3e4, 3e4),
617138
+ timeoutMs: Math.max(config.timeoutMs ?? 3e5, 6e4),
617064
617139
  think: false
617065
617140
  };
617066
617141
  let accumulated = "";
617142
+ let streamError;
617067
617143
  const streamable = backend;
617068
617144
  const stream = typeof streamable.chatCompletionStream === "function" ? streamable.chatCompletionStream(request) : null;
617069
617145
  if (stream && typeof stream[Symbol.asyncIterator] === "function") {
@@ -617074,12 +617150,23 @@ ${conversationStream}`
617074
617150
  await onToken(accumulated);
617075
617151
  }
617076
617152
  }
617077
- } catch {
617153
+ } catch (err) {
617154
+ streamError = err;
617078
617155
  accumulated = "";
617079
617156
  }
617080
617157
  }
617081
617158
  if (!accumulated.trim()) {
617082
- const result = await backend.chatCompletion(request);
617159
+ let result;
617160
+ try {
617161
+ result = await backend.chatCompletion(request);
617162
+ } catch (err) {
617163
+ if (streamError) {
617164
+ const streamMsg = streamError instanceof Error ? streamError.message : String(streamError);
617165
+ const retryMsg = err instanceof Error ? err.message : String(err);
617166
+ throw new Error(`streaming failed (${streamMsg}); non-stream retry failed (${retryMsg})`);
617167
+ }
617168
+ throw err;
617169
+ }
617083
617170
  accumulated = result.choices[0]?.message?.content ?? "";
617084
617171
  if (accumulated) await onToken(accumulated);
617085
617172
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.81",
3
+ "version": "1.0.82",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.81",
9
+ "version": "1.0.82",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.81",
3
+ "version": "1.0.82",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",