omnius 1.0.81 → 1.0.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +183 -96
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -539007,6 +539007,18 @@ function injectNoThinkDirective(messages2) {
|
|
|
539007
539007
|
/no_think`;
|
|
539008
539008
|
return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
|
|
539009
539009
|
}
|
|
539010
|
+
function backendHttpErrorDetail(text) {
|
|
539011
|
+
const trimmed = text.trimStart();
|
|
539012
|
+
const isHtml = trimmed.startsWith("<!") || trimmed.startsWith("<html");
|
|
539013
|
+
return isHtml ? `(received HTML error page — backend may be behind a proxy/CDN that is timing out)` : text.slice(0, 200);
|
|
539014
|
+
}
|
|
539015
|
+
function isOllamaModelNotFoundResponse(status, text, model) {
|
|
539016
|
+
if (status !== 404)
|
|
539017
|
+
return false;
|
|
539018
|
+
const lower = text.toLowerCase();
|
|
539019
|
+
const modelLower = model.toLowerCase();
|
|
539020
|
+
return lower.includes("model") && lower.includes("not found") || lower.includes("not_found_error") || modelLower.length > 0 && lower.includes(modelLower) && lower.includes("not found");
|
|
539021
|
+
}
|
|
539010
539022
|
function computeEffectiveThink(params) {
|
|
539011
539023
|
if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
|
|
539012
539024
|
return false;
|
|
@@ -551452,11 +551464,17 @@ ${description}`
|
|
|
551452
551464
|
if (responseFormat !== void 0) {
|
|
551453
551465
|
body["response_format"] = responseFormat;
|
|
551454
551466
|
}
|
|
551455
|
-
|
|
551467
|
+
let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
|
|
551456
551468
|
model: this.model
|
|
551457
551469
|
}) : null;
|
|
551458
|
-
|
|
551470
|
+
let requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
|
|
551459
551471
|
let poolSuccess = false;
|
|
551472
|
+
const releasePoolSlot = (success) => {
|
|
551473
|
+
if (!poolSlot)
|
|
551474
|
+
return;
|
|
551475
|
+
poolSlot.release(success);
|
|
551476
|
+
poolSlot = null;
|
|
551477
|
+
};
|
|
551460
551478
|
const combineAbortSignals = (signals) => {
|
|
551461
551479
|
const filtered = signals.filter((s2) => s2 instanceof AbortSignal);
|
|
551462
551480
|
if (filtered.length === 0)
|
|
@@ -551491,11 +551509,26 @@ ${description}`
|
|
|
551491
551509
|
};
|
|
551492
551510
|
if (combinedAbortSignal)
|
|
551493
551511
|
fetchOpts.signal = combinedAbortSignal;
|
|
551494
|
-
|
|
551512
|
+
let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
|
|
551495
551513
|
if (!resp.ok) {
|
|
551496
551514
|
const text = await resp.text().catch(() => "");
|
|
551497
|
-
|
|
551498
|
-
|
|
551515
|
+
if (poolSlot?.poolOwned && isOllamaModelNotFoundResponse(resp.status, text, this.model)) {
|
|
551516
|
+
releasePoolSlot(false);
|
|
551517
|
+
requestBaseUrl = this.baseUrl;
|
|
551518
|
+
resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, fetchOpts);
|
|
551519
|
+
if (resp.ok) {
|
|
551520
|
+
} else {
|
|
551521
|
+
const retryText = await resp.text().catch(() => "");
|
|
551522
|
+
throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(retryText)}`);
|
|
551523
|
+
}
|
|
551524
|
+
} else {
|
|
551525
|
+
const detail = backendHttpErrorDetail(text);
|
|
551526
|
+
throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
|
|
551527
|
+
}
|
|
551528
|
+
}
|
|
551529
|
+
if (!resp.ok) {
|
|
551530
|
+
const text = await resp.text().catch(() => "");
|
|
551531
|
+
const detail = backendHttpErrorDetail(text);
|
|
551499
551532
|
throw new Error(`Backend HTTP ${resp.status}: ${detail}`);
|
|
551500
551533
|
}
|
|
551501
551534
|
const data = await resp.json();
|
|
@@ -551577,7 +551610,7 @@ ${description}`
|
|
|
551577
551610
|
} : void 0
|
|
551578
551611
|
};
|
|
551579
551612
|
} finally {
|
|
551580
|
-
|
|
551613
|
+
releasePoolSlot(poolSuccess);
|
|
551581
551614
|
}
|
|
551582
551615
|
}
|
|
551583
551616
|
/** Anthropic Messages API translation — converts our standard format to/from Anthropic's. */
|
|
@@ -551686,8 +551719,8 @@ ${description}`
|
|
|
551686
551719
|
}
|
|
551687
551720
|
/**
|
|
551688
551721
|
* SSE streaming variant — yields StreamChunks as tokens arrive.
|
|
551689
|
-
* Uses `stream: true
|
|
551690
|
-
*
|
|
551722
|
+
* Uses `stream: true`, the current thinking setting, and the same
|
|
551723
|
+
* Ollama pool routing as non-stream completions.
|
|
551691
551724
|
*/
|
|
551692
551725
|
async *chatCompletionStream(request) {
|
|
551693
551726
|
const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
|
|
@@ -551715,100 +551748,125 @@ ${description}`
|
|
|
551715
551748
|
stream_options: { include_usage: true },
|
|
551716
551749
|
think: effectiveThink
|
|
551717
551750
|
};
|
|
551718
|
-
|
|
551719
|
-
|
|
551720
|
-
|
|
551721
|
-
|
|
551751
|
+
let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
|
|
551752
|
+
model: this.model
|
|
551753
|
+
}) : null;
|
|
551754
|
+
let requestBaseUrl = poolSlot?.baseUrl ?? this.baseUrl;
|
|
551755
|
+
let poolSuccess = false;
|
|
551756
|
+
const releasePoolSlot = (success) => {
|
|
551757
|
+
if (!poolSlot)
|
|
551758
|
+
return;
|
|
551759
|
+
poolSlot.release(success);
|
|
551760
|
+
poolSlot = null;
|
|
551722
551761
|
};
|
|
551723
|
-
|
|
551724
|
-
streamFetchOpts
|
|
551725
|
-
|
|
551726
|
-
|
|
551727
|
-
|
|
551728
|
-
|
|
551729
|
-
|
|
551730
|
-
|
|
551731
|
-
|
|
551732
|
-
|
|
551733
|
-
|
|
551734
|
-
|
|
551735
|
-
|
|
551736
|
-
|
|
551737
|
-
|
|
551738
|
-
|
|
551739
|
-
|
|
551740
|
-
|
|
551741
|
-
|
|
551742
|
-
|
|
551743
|
-
|
|
551744
|
-
continue;
|
|
551745
|
-
if (line === "data: [DONE]") {
|
|
551746
|
-
this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
|
|
551747
|
-
return;
|
|
551762
|
+
try {
|
|
551763
|
+
const streamFetchOpts = {
|
|
551764
|
+
method: "POST",
|
|
551765
|
+
headers: this.authHeaders(),
|
|
551766
|
+
body: JSON.stringify(body)
|
|
551767
|
+
};
|
|
551768
|
+
if (this._abortSignal)
|
|
551769
|
+
streamFetchOpts.signal = this._abortSignal;
|
|
551770
|
+
let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
|
|
551771
|
+
if (!resp.ok) {
|
|
551772
|
+
const text = await resp.text().catch(() => "");
|
|
551773
|
+
if (poolSlot?.poolOwned && isOllamaModelNotFoundResponse(resp.status, text, this.model)) {
|
|
551774
|
+
releasePoolSlot(false);
|
|
551775
|
+
requestBaseUrl = this.baseUrl;
|
|
551776
|
+
resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
|
|
551777
|
+
if (!resp.ok) {
|
|
551778
|
+
const retryText = await resp.text().catch(() => "");
|
|
551779
|
+
throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(retryText)}`);
|
|
551780
|
+
}
|
|
551781
|
+
} else {
|
|
551782
|
+
throw new Error(`Backend HTTP ${resp.status}: ${backendHttpErrorDetail(text)}`);
|
|
551748
551783
|
}
|
|
551749
|
-
|
|
551750
|
-
|
|
551751
|
-
|
|
551752
|
-
|
|
551753
|
-
|
|
551754
|
-
|
|
551755
|
-
|
|
551756
|
-
|
|
551757
|
-
|
|
551758
|
-
|
|
551759
|
-
|
|
551760
|
-
|
|
551761
|
-
|
|
551762
|
-
|
|
551763
|
-
|
|
551784
|
+
}
|
|
551785
|
+
let sseBuffer = "";
|
|
551786
|
+
const decoder = new TextDecoder();
|
|
551787
|
+
let accumulatedContent = "";
|
|
551788
|
+
let accumulatedThinking = "";
|
|
551789
|
+
let sawReasoningTokens = false;
|
|
551790
|
+
for await (const rawChunk of resp.body) {
|
|
551791
|
+
sseBuffer += decoder.decode(rawChunk, { stream: true });
|
|
551792
|
+
const parts = sseBuffer.split("\n\n");
|
|
551793
|
+
sseBuffer = parts.pop();
|
|
551794
|
+
for (const part of parts) {
|
|
551795
|
+
const line = part.trim();
|
|
551796
|
+
if (!line)
|
|
551797
|
+
continue;
|
|
551798
|
+
if (line === "data: [DONE]") {
|
|
551799
|
+
this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
|
|
551800
|
+
poolSuccess = true;
|
|
551801
|
+
return;
|
|
551764
551802
|
}
|
|
551765
|
-
|
|
551766
|
-
if (!choice)
|
|
551803
|
+
if (!line.startsWith("data: "))
|
|
551767
551804
|
continue;
|
|
551768
|
-
|
|
551769
|
-
|
|
551770
|
-
|
|
551771
|
-
|
|
551772
|
-
|
|
551773
|
-
accumulatedThinking += reasoningToken;
|
|
551774
|
-
yield { type: "content", content: reasoningToken, thinking: true };
|
|
551775
|
-
}
|
|
551776
|
-
if (delta?.content) {
|
|
551777
|
-
accumulatedContent += delta.content;
|
|
551778
|
-
yield { type: "content", content: delta.content };
|
|
551779
|
-
}
|
|
551780
|
-
const tcDeltas = delta?.tool_calls;
|
|
551781
|
-
if (tcDeltas) {
|
|
551782
|
-
for (const tcd of tcDeltas) {
|
|
551783
|
-
const fn = tcd.function;
|
|
551805
|
+
try {
|
|
551806
|
+
const data = JSON.parse(line.slice(6));
|
|
551807
|
+
const choices = data.choices ?? [];
|
|
551808
|
+
const chunkUsageEarly = data.usage;
|
|
551809
|
+
if (chunkUsageEarly) {
|
|
551784
551810
|
yield {
|
|
551785
|
-
type: "
|
|
551786
|
-
|
|
551787
|
-
|
|
551788
|
-
|
|
551789
|
-
|
|
551811
|
+
type: "usage",
|
|
551812
|
+
usage: {
|
|
551813
|
+
promptTokens: chunkUsageEarly.prompt_tokens ?? 0,
|
|
551814
|
+
completionTokens: chunkUsageEarly.completion_tokens ?? 0,
|
|
551815
|
+
totalTokens: chunkUsageEarly.total_tokens ?? 0
|
|
551816
|
+
}
|
|
551790
551817
|
};
|
|
551791
551818
|
}
|
|
551792
|
-
|
|
551793
|
-
|
|
551794
|
-
|
|
551795
|
-
|
|
551796
|
-
|
|
551797
|
-
|
|
551798
|
-
|
|
551799
|
-
|
|
551800
|
-
|
|
551819
|
+
const choice = choices[0];
|
|
551820
|
+
if (!choice)
|
|
551821
|
+
continue;
|
|
551822
|
+
const delta = choice.delta;
|
|
551823
|
+
const finishReason = choice.finish_reason;
|
|
551824
|
+
const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
|
|
551825
|
+
if (reasoningToken && effectiveThink) {
|
|
551826
|
+
sawReasoningTokens = true;
|
|
551827
|
+
accumulatedThinking += reasoningToken;
|
|
551828
|
+
yield { type: "content", content: reasoningToken, thinking: true };
|
|
551829
|
+
}
|
|
551830
|
+
if (delta?.content) {
|
|
551831
|
+
accumulatedContent += delta.content;
|
|
551832
|
+
yield { type: "content", content: delta.content };
|
|
551833
|
+
}
|
|
551834
|
+
const tcDeltas = delta?.tool_calls;
|
|
551835
|
+
if (tcDeltas) {
|
|
551836
|
+
for (const tcd of tcDeltas) {
|
|
551837
|
+
const fn = tcd.function;
|
|
551838
|
+
yield {
|
|
551839
|
+
type: "tool_call_delta",
|
|
551840
|
+
toolCallIndex: tcd.index ?? 0,
|
|
551841
|
+
toolCallId: tcd.id || void 0,
|
|
551842
|
+
toolCallName: fn?.name || void 0,
|
|
551843
|
+
toolCallArgs: fn?.arguments || void 0
|
|
551844
|
+
};
|
|
551801
551845
|
}
|
|
551802
|
-
}
|
|
551803
|
-
|
|
551804
|
-
|
|
551805
|
-
|
|
551846
|
+
}
|
|
551847
|
+
const chunkUsage = data.usage;
|
|
551848
|
+
if (chunkUsage) {
|
|
551849
|
+
yield {
|
|
551850
|
+
type: "usage",
|
|
551851
|
+
usage: {
|
|
551852
|
+
promptTokens: chunkUsage.prompt_tokens ?? 0,
|
|
551853
|
+
completionTokens: chunkUsage.completion_tokens ?? 0,
|
|
551854
|
+
totalTokens: chunkUsage.total_tokens ?? 0
|
|
551855
|
+
}
|
|
551856
|
+
};
|
|
551857
|
+
}
|
|
551858
|
+
if (finishReason) {
|
|
551859
|
+
yield { type: "finish", finishReason };
|
|
551860
|
+
}
|
|
551861
|
+
} catch {
|
|
551806
551862
|
}
|
|
551807
|
-
} catch {
|
|
551808
551863
|
}
|
|
551809
551864
|
}
|
|
551865
|
+
this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
|
|
551866
|
+
poolSuccess = true;
|
|
551867
|
+
} finally {
|
|
551868
|
+
releasePoolSlot(poolSuccess);
|
|
551810
551869
|
}
|
|
551811
|
-
this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
|
|
551812
551870
|
}
|
|
551813
551871
|
/** Reconstruct a raw-looking assistant response from the streamed
|
|
551814
551872
|
* parts, then feed it into the loop-guard. Used at stream end (both
|
|
@@ -610401,8 +610459,9 @@ function telegramDecisionRecoverableFlag(text) {
|
|
|
610401
610459
|
}
|
|
610402
610460
|
return void 0;
|
|
610403
610461
|
}
|
|
610404
|
-
function telegramRouterTimeoutMs(configTimeoutMs, minMs = 15e3,
|
|
610405
|
-
|
|
610462
|
+
function telegramRouterTimeoutMs(configTimeoutMs, minMs = 15e3, _legacyMaxMs) {
|
|
610463
|
+
const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
|
|
610464
|
+
return Math.max(configured, minMs);
|
|
610406
610465
|
}
|
|
610407
610466
|
function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
|
|
610408
610467
|
for (const jsonText of telegramDecisionJsonCandidates(text)) {
|
|
@@ -610740,6 +610799,21 @@ function cleanTelegramVisibleReply(text, options2 = {}) {
|
|
|
610740
610799
|
if (!filtered) return "";
|
|
610741
610800
|
return dedupeTelegramVisibleReply(filtered);
|
|
610742
610801
|
}
|
|
610802
|
+
function summarizeTelegramInferenceError(message2) {
|
|
610803
|
+
if (/aborted due to timeout|aborterror|timed? out/i.test(message2)) {
|
|
610804
|
+
return "backend inference timed out before a reply was delivered";
|
|
610805
|
+
}
|
|
610806
|
+
if (/model ['"]?[^'"]+['"]? not found|not_found_error/i.test(message2)) {
|
|
610807
|
+
return "the configured model was not available on the selected Ollama runner";
|
|
610808
|
+
}
|
|
610809
|
+
if (/Backend HTTP 5\d\d/i.test(message2)) {
|
|
610810
|
+
return "the backend returned a transient server error";
|
|
610811
|
+
}
|
|
610812
|
+
if (/Backend HTTP 4\d\d/i.test(message2)) {
|
|
610813
|
+
return message2.slice(0, 180);
|
|
610814
|
+
}
|
|
610815
|
+
return message2.slice(0, 180) || "unknown backend failure";
|
|
610816
|
+
}
|
|
610743
610817
|
function dedupeTelegramVisibleReply(text) {
|
|
610744
610818
|
const paragraphs = text.split(/\n{2,}/);
|
|
610745
610819
|
const seenParagraphs = /* @__PURE__ */ new Set();
|
|
@@ -616982,7 +617056,8 @@ Join: ${newUrl}`);
|
|
|
616982
617056
|
await this.editLiveMessage(msg.chatId, liveMessageId, `Error: ${escapeTelegramHTML(errMsg)}`).catch(() => {
|
|
616983
617057
|
});
|
|
616984
617058
|
} else {
|
|
616985
|
-
|
|
617059
|
+
const summary = summarizeTelegramInferenceError(errMsg);
|
|
617060
|
+
await this.replyToTelegramMessage(msg, `Sorry, quick chat inference failed: ${summary}.`).catch(() => {
|
|
616986
617061
|
});
|
|
616987
617062
|
}
|
|
616988
617063
|
} finally {
|
|
@@ -617060,10 +617135,11 @@ ${conversationStream}`
|
|
|
617060
617135
|
tools: [],
|
|
617061
617136
|
temperature: 0.4,
|
|
617062
617137
|
maxTokens: 700,
|
|
617063
|
-
timeoutMs: Math.
|
|
617138
|
+
timeoutMs: Math.max(config.timeoutMs ?? 3e5, 6e4),
|
|
617064
617139
|
think: false
|
|
617065
617140
|
};
|
|
617066
617141
|
let accumulated = "";
|
|
617142
|
+
let streamError;
|
|
617067
617143
|
const streamable = backend;
|
|
617068
617144
|
const stream = typeof streamable.chatCompletionStream === "function" ? streamable.chatCompletionStream(request) : null;
|
|
617069
617145
|
if (stream && typeof stream[Symbol.asyncIterator] === "function") {
|
|
@@ -617074,12 +617150,23 @@ ${conversationStream}`
|
|
|
617074
617150
|
await onToken(accumulated);
|
|
617075
617151
|
}
|
|
617076
617152
|
}
|
|
617077
|
-
} catch {
|
|
617153
|
+
} catch (err) {
|
|
617154
|
+
streamError = err;
|
|
617078
617155
|
accumulated = "";
|
|
617079
617156
|
}
|
|
617080
617157
|
}
|
|
617081
617158
|
if (!accumulated.trim()) {
|
|
617082
|
-
|
|
617159
|
+
let result;
|
|
617160
|
+
try {
|
|
617161
|
+
result = await backend.chatCompletion(request);
|
|
617162
|
+
} catch (err) {
|
|
617163
|
+
if (streamError) {
|
|
617164
|
+
const streamMsg = streamError instanceof Error ? streamError.message : String(streamError);
|
|
617165
|
+
const retryMsg = err instanceof Error ? err.message : String(err);
|
|
617166
|
+
throw new Error(`streaming failed (${streamMsg}); non-stream retry failed (${retryMsg})`);
|
|
617167
|
+
}
|
|
617168
|
+
throw err;
|
|
617169
|
+
}
|
|
617083
617170
|
accumulated = result.choices[0]?.message?.content ?? "";
|
|
617084
617171
|
if (accumulated) await onToken(accumulated);
|
|
617085
617172
|
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.82",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.82",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED