omnius 1.0.120 → 1.0.121
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +101 -23
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -553442,14 +553442,29 @@ ${description}`
|
|
|
553442
553442
|
poolSlot.release(success);
|
|
553443
553443
|
poolSlot = null;
|
|
553444
553444
|
};
|
|
553445
|
+
const streamTimeoutMs = Number.isFinite(request.timeoutMs) && request.timeoutMs > 0 ? Math.max(request.timeoutMs, 1e4) : 3e5;
|
|
553446
|
+
const streamAbort = new AbortController();
|
|
553447
|
+
const streamTimeoutHandle = setTimeout(() => {
|
|
553448
|
+
streamAbort.abort(new Error(`stream timeout: no response or chunk within ${(streamTimeoutMs / 1e3).toFixed(0)}s`));
|
|
553449
|
+
}, streamTimeoutMs);
|
|
553450
|
+
if (typeof streamTimeoutHandle.unref === "function") {
|
|
553451
|
+
streamTimeoutHandle.unref();
|
|
553452
|
+
}
|
|
553453
|
+
const externalAbortListener = this._abortSignal ? () => streamAbort.abort(this._abortSignal?.reason ?? new Error("external abort")) : null;
|
|
553454
|
+
if (this._abortSignal && externalAbortListener) {
|
|
553455
|
+
if (this._abortSignal.aborted) {
|
|
553456
|
+
externalAbortListener();
|
|
553457
|
+
} else {
|
|
553458
|
+
this._abortSignal.addEventListener("abort", externalAbortListener, { once: true });
|
|
553459
|
+
}
|
|
553460
|
+
}
|
|
553445
553461
|
try {
|
|
553446
553462
|
const streamFetchOpts = {
|
|
553447
553463
|
method: "POST",
|
|
553448
553464
|
headers: this.authHeaders(),
|
|
553449
|
-
body: JSON.stringify(body)
|
|
553465
|
+
body: JSON.stringify(body),
|
|
553466
|
+
signal: streamAbort.signal
|
|
553450
553467
|
};
|
|
553451
|
-
if (this._abortSignal)
|
|
553452
|
-
streamFetchOpts.signal = this._abortSignal;
|
|
553453
553468
|
let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
|
|
553454
553469
|
if (!resp.ok) {
|
|
553455
553470
|
const text = await resp.text().catch(() => "");
|
|
@@ -553548,6 +553563,13 @@ ${description}`
|
|
|
553548
553563
|
this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
|
|
553549
553564
|
poolSuccess = true;
|
|
553550
553565
|
} finally {
|
|
553566
|
+
clearTimeout(streamTimeoutHandle);
|
|
553567
|
+
if (this._abortSignal && externalAbortListener) {
|
|
553568
|
+
try {
|
|
553569
|
+
this._abortSignal.removeEventListener("abort", externalAbortListener);
|
|
553570
|
+
} catch {
|
|
553571
|
+
}
|
|
553572
|
+
}
|
|
553551
553573
|
releasePoolSlot(poolSuccess);
|
|
553552
553574
|
}
|
|
553553
553575
|
}
|
|
@@ -619882,24 +619904,57 @@ ${lines.join("\n")}`);
|
|
|
619882
619904
|
`inference ${inferenceId} [${entry.kind}] ${elapsed}s content=${entry.contentTokens}t thinking=${entry.thinkingTokens}t (${thinkRatio}% think) live=${JSON.stringify(preview)}`
|
|
619883
619905
|
));
|
|
619884
619906
|
};
|
|
619885
|
-
|
|
619886
|
-
|
|
619887
|
-
|
|
619888
|
-
|
|
619889
|
-
|
|
619890
|
-
|
|
619891
|
-
|
|
619892
|
-
|
|
619893
|
-
|
|
619894
|
-
|
|
619895
|
-
|
|
619896
|
-
|
|
619897
|
-
|
|
619898
|
-
|
|
619899
|
-
|
|
619900
|
-
|
|
619901
|
-
|
|
619902
|
-
|
|
619907
|
+
const inactivityMs = this.telegramStreamInactivityMs();
|
|
619908
|
+
const iter = streamFn(request)[Symbol.asyncIterator]();
|
|
619909
|
+
try {
|
|
619910
|
+
while (true) {
|
|
619911
|
+
let timeoutHandle = null;
|
|
619912
|
+
const inactivityPromise = new Promise((_, reject) => {
|
|
619913
|
+
timeoutHandle = setTimeout(
|
|
619914
|
+
() => reject(new Error(
|
|
619915
|
+
`stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (content=${contentBuf.length}c thinking=${thinkingBuf.length}c so far) — Ollama likely cold-loading the model or wedged; falling back to non-stream`
|
|
619916
|
+
)),
|
|
619917
|
+
inactivityMs
|
|
619918
|
+
);
|
|
619919
|
+
if (typeof timeoutHandle.unref === "function") {
|
|
619920
|
+
timeoutHandle.unref();
|
|
619921
|
+
}
|
|
619922
|
+
});
|
|
619923
|
+
let next;
|
|
619924
|
+
try {
|
|
619925
|
+
next = await Promise.race([iter.next(), inactivityPromise]);
|
|
619926
|
+
} finally {
|
|
619927
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
619928
|
+
}
|
|
619929
|
+
if (next.done) break;
|
|
619930
|
+
const chunk = next.value;
|
|
619931
|
+
if (chunk.type === "content" && chunk.content) {
|
|
619932
|
+
const entry = this.telegramActiveInferences.get(inferenceId);
|
|
619933
|
+
if (entry && entry.firstChunkAt === void 0) {
|
|
619934
|
+
entry.firstChunkAt = performance.now();
|
|
619935
|
+
}
|
|
619936
|
+
if (chunk.thinking) {
|
|
619937
|
+
thinkingBuf += chunk.content;
|
|
619938
|
+
this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
|
|
619939
|
+
} else {
|
|
619940
|
+
contentBuf += chunk.content;
|
|
619941
|
+
this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
|
|
619942
|
+
}
|
|
619943
|
+
flushPreview(false);
|
|
619944
|
+
} else if (chunk.type === "finish") {
|
|
619945
|
+
finishReason = chunk.finishReason;
|
|
619946
|
+
} else if (chunk.type === "usage") {
|
|
619947
|
+
usage = {
|
|
619948
|
+
prompt_tokens: chunk.promptTokens,
|
|
619949
|
+
completion_tokens: chunk.completionTokens,
|
|
619950
|
+
total_tokens: chunk.totalTokens
|
|
619951
|
+
};
|
|
619952
|
+
}
|
|
619953
|
+
}
|
|
619954
|
+
} finally {
|
|
619955
|
+
try {
|
|
619956
|
+
await iter.return?.(void 0);
|
|
619957
|
+
} catch {
|
|
619903
619958
|
}
|
|
619904
619959
|
}
|
|
619905
619960
|
flushPreview(true);
|
|
@@ -619970,9 +620025,10 @@ ${lines.join("\n")}`);
|
|
|
619970
620025
|
const dur = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
|
|
619971
620026
|
const totalTokens = entry.contentTokens + entry.thinkingTokens;
|
|
619972
620027
|
const ratio = totalTokens > 0 ? Math.round(entry.thinkingTokens * 100 / totalTokens) : 0;
|
|
620028
|
+
const ttfb = entry.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
|
|
619973
620029
|
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
619974
620030
|
entry.sessionKey,
|
|
619975
|
-
`inference ${id} [${entry.kind}] done in ${dur}s — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
|
|
620031
|
+
`inference ${id} [${entry.kind}] done in ${dur}s (ttfb=${ttfb}) — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
|
|
619976
620032
|
));
|
|
619977
620033
|
}
|
|
619978
620034
|
}
|
|
@@ -619988,7 +620044,10 @@ ${lines.join("\n")}`);
|
|
|
619988
620044
|
return Array.from(this.telegramActiveInferences.values()).map((e2) => ({
|
|
619989
620045
|
...e2,
|
|
619990
620046
|
elapsedSec: (now - e2.startTs) / 1e3,
|
|
619991
|
-
idleSec: (now - e2.lastTokenAt) / 1e3
|
|
620047
|
+
idleSec: (now - e2.lastTokenAt) / 1e3,
|
|
620048
|
+
// Undefined when no chunk has arrived yet (still cold-loading or wedged).
|
|
620049
|
+
// A dashboard renderer should display "—" or "waiting" in that case.
|
|
620050
|
+
ttfbSec: e2.firstChunkAt !== void 0 ? (e2.firstChunkAt - e2.startTs) / 1e3 : void 0
|
|
619992
620051
|
}));
|
|
619993
620052
|
}
|
|
619994
620053
|
/**
|
|
@@ -620260,6 +620319,25 @@ ${retryText}`,
|
|
|
620260
620319
|
telegramSubAgentWatchdogIntervalMs() {
|
|
620261
620320
|
return 3e4;
|
|
620262
620321
|
}
|
|
620322
|
+
/**
|
|
620323
|
+
* Per-chunk inactivity window for the bridge's stream consumer. If no
|
|
620324
|
+
* chunk arrives within this window, the streaming consumer in
|
|
620325
|
+
* streamTelegramInferenceToCompletion aborts via Promise.race + clears
|
|
620326
|
+
* the iterator, and telegramObservableInference falls back to the
|
|
620327
|
+
* non-streaming chatCompletion path. This gives operators a clean
|
|
620328
|
+
* "stream silent for 60s, falling back" signal instead of the opaque
|
|
620329
|
+
* 180s coalescer hard-deadline.
|
|
620330
|
+
*
|
|
620331
|
+
* Default 60s — comfortably longer than a healthy cold-load of a 35B
|
|
620332
|
+
* model on a warm VRAM cache (typically <30s) but short enough to
|
|
620333
|
+
* surface a real wedge before the 180s coalescer fires. Override via
|
|
620334
|
+
* OMNIUS_TG_STREAM_INACTIVITY_MS (clamped to [10s, 5min]).
|
|
620335
|
+
*/
|
|
620336
|
+
telegramStreamInactivityMs() {
|
|
620337
|
+
const raw = Number.parseInt(process.env["OMNIUS_TG_STREAM_INACTIVITY_MS"] ?? "", 10);
|
|
620338
|
+
if (Number.isFinite(raw) && raw >= 1e4 && raw <= 3e5) return raw;
|
|
620339
|
+
return 6e4;
|
|
620340
|
+
}
|
|
620263
620341
|
/**
|
|
620264
620342
|
* Start the periodic stale-sub-agent reaper. Idempotent — safe to call
|
|
620265
620343
|
* multiple times (no-op if already running). Stopped by stop() and on
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.121",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.121",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED