omnius 1.0.120 → 1.0.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -553442,14 +553442,29 @@ ${description}`
553442
553442
  poolSlot.release(success);
553443
553443
  poolSlot = null;
553444
553444
  };
553445
+ const streamTimeoutMs = Number.isFinite(request.timeoutMs) && request.timeoutMs > 0 ? Math.max(request.timeoutMs, 1e4) : 3e5;
553446
+ const streamAbort = new AbortController();
553447
+ const streamTimeoutHandle = setTimeout(() => {
553448
+ streamAbort.abort(new Error(`stream timeout: no response or chunk within ${(streamTimeoutMs / 1e3).toFixed(0)}s`));
553449
+ }, streamTimeoutMs);
553450
+ if (typeof streamTimeoutHandle.unref === "function") {
553451
+ streamTimeoutHandle.unref();
553452
+ }
553453
+ const externalAbortListener = this._abortSignal ? () => streamAbort.abort(this._abortSignal?.reason ?? new Error("external abort")) : null;
553454
+ if (this._abortSignal && externalAbortListener) {
553455
+ if (this._abortSignal.aborted) {
553456
+ externalAbortListener();
553457
+ } else {
553458
+ this._abortSignal.addEventListener("abort", externalAbortListener, { once: true });
553459
+ }
553460
+ }
553445
553461
  try {
553446
553462
  const streamFetchOpts = {
553447
553463
  method: "POST",
553448
553464
  headers: this.authHeaders(),
553449
- body: JSON.stringify(body)
553465
+ body: JSON.stringify(body),
553466
+ signal: streamAbort.signal
553450
553467
  };
553451
- if (this._abortSignal)
553452
- streamFetchOpts.signal = this._abortSignal;
553453
553468
  let resp = await fetch(`${requestBaseUrl}/v1/chat/completions`, streamFetchOpts);
553454
553469
  if (!resp.ok) {
553455
553470
  const text = await resp.text().catch(() => "");
@@ -553548,6 +553563,13 @@ ${description}`
553548
553563
  this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
553549
553564
  poolSuccess = true;
553550
553565
  } finally {
553566
+ clearTimeout(streamTimeoutHandle);
553567
+ if (this._abortSignal && externalAbortListener) {
553568
+ try {
553569
+ this._abortSignal.removeEventListener("abort", externalAbortListener);
553570
+ } catch {
553571
+ }
553572
+ }
553551
553573
  releasePoolSlot(poolSuccess);
553552
553574
  }
553553
553575
  }
@@ -619882,24 +619904,57 @@ ${lines.join("\n")}`);
619882
619904
  `inference ${inferenceId} [${entry.kind}] ${elapsed}s content=${entry.contentTokens}t thinking=${entry.thinkingTokens}t (${thinkRatio}% think) live=${JSON.stringify(preview)}`
619883
619905
  ));
619884
619906
  };
619885
- for await (const chunk of streamFn(request)) {
619886
- if (chunk.type === "content" && chunk.content) {
619887
- if (chunk.thinking) {
619888
- thinkingBuf += chunk.content;
619889
- this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
619890
- } else {
619891
- contentBuf += chunk.content;
619892
- this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
619893
- }
619894
- flushPreview(false);
619895
- } else if (chunk.type === "finish") {
619896
- finishReason = chunk.finishReason;
619897
- } else if (chunk.type === "usage") {
619898
- usage = {
619899
- prompt_tokens: chunk.promptTokens,
619900
- completion_tokens: chunk.completionTokens,
619901
- total_tokens: chunk.totalTokens
619902
- };
619907
+ const inactivityMs = this.telegramStreamInactivityMs();
619908
+ const iter = streamFn(request)[Symbol.asyncIterator]();
619909
+ try {
619910
+ while (true) {
619911
+ let timeoutHandle = null;
619912
+ const inactivityPromise = new Promise((_, reject) => {
619913
+ timeoutHandle = setTimeout(
619914
+ () => reject(new Error(
619915
+ `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (content=${contentBuf.length}c thinking=${thinkingBuf.length}c so far) — Ollama likely cold-loading the model or wedged; falling back to non-stream`
619916
+ )),
619917
+ inactivityMs
619918
+ );
619919
+ if (typeof timeoutHandle.unref === "function") {
619920
+ timeoutHandle.unref();
619921
+ }
619922
+ });
619923
+ let next;
619924
+ try {
619925
+ next = await Promise.race([iter.next(), inactivityPromise]);
619926
+ } finally {
619927
+ if (timeoutHandle) clearTimeout(timeoutHandle);
619928
+ }
619929
+ if (next.done) break;
619930
+ const chunk = next.value;
619931
+ if (chunk.type === "content" && chunk.content) {
619932
+ const entry = this.telegramActiveInferences.get(inferenceId);
619933
+ if (entry && entry.firstChunkAt === void 0) {
619934
+ entry.firstChunkAt = performance.now();
619935
+ }
619936
+ if (chunk.thinking) {
619937
+ thinkingBuf += chunk.content;
619938
+ this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
619939
+ } else {
619940
+ contentBuf += chunk.content;
619941
+ this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
619942
+ }
619943
+ flushPreview(false);
619944
+ } else if (chunk.type === "finish") {
619945
+ finishReason = chunk.finishReason;
619946
+ } else if (chunk.type === "usage") {
619947
+ usage = {
619948
+ prompt_tokens: chunk.promptTokens,
619949
+ completion_tokens: chunk.completionTokens,
619950
+ total_tokens: chunk.totalTokens
619951
+ };
619952
+ }
619953
+ }
619954
+ } finally {
619955
+ try {
619956
+ await iter.return?.(void 0);
619957
+ } catch {
619903
619958
  }
619904
619959
  }
619905
619960
  flushPreview(true);
@@ -619970,9 +620025,10 @@ ${lines.join("\n")}`);
619970
620025
  const dur = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
619971
620026
  const totalTokens = entry.contentTokens + entry.thinkingTokens;
619972
620027
  const ratio = totalTokens > 0 ? Math.round(entry.thinkingTokens * 100 / totalTokens) : 0;
620028
+ const ttfb = entry.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
619973
620029
  this.tuiWrite(() => renderTelegramSubAgentEvent(
619974
620030
  entry.sessionKey,
619975
- `inference ${id} [${entry.kind}] done in ${dur}s — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
620031
+ `inference ${id} [${entry.kind}] done in ${dur}s (ttfb=${ttfb}) — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
619976
620032
  ));
619977
620033
  }
619978
620034
  }
@@ -619988,7 +620044,10 @@ ${lines.join("\n")}`);
619988
620044
  return Array.from(this.telegramActiveInferences.values()).map((e2) => ({
619989
620045
  ...e2,
619990
620046
  elapsedSec: (now - e2.startTs) / 1e3,
619991
- idleSec: (now - e2.lastTokenAt) / 1e3
620047
+ idleSec: (now - e2.lastTokenAt) / 1e3,
620048
+ // Undefined when no chunk has arrived yet (still cold-loading or wedged).
620049
+ // A dashboard renderer should display "—" or "waiting" in that case.
620050
+ ttfbSec: e2.firstChunkAt !== void 0 ? (e2.firstChunkAt - e2.startTs) / 1e3 : void 0
619992
620051
  }));
619993
620052
  }
619994
620053
  /**
@@ -620260,6 +620319,25 @@ ${retryText}`,
620260
620319
  telegramSubAgentWatchdogIntervalMs() {
620261
620320
  return 3e4;
620262
620321
  }
620322
+ /**
620323
+ * Per-chunk inactivity window for the bridge's stream consumer. If no
620324
+ * chunk arrives within this window, the streaming consumer in
620325
+ * streamTelegramInferenceToCompletion aborts via Promise.race + clears
620326
+ * the iterator, and telegramObservableInference falls back to the
620327
+ * non-streaming chatCompletion path. This gives operators a clean
620328
+ * "stream silent for 60s, falling back" signal instead of the opaque
620329
+ * 180s coalescer hard-deadline.
620330
+ *
620331
+ * Default 60s — comfortably longer than a healthy cold-load of a 35B
620332
+ * model on a warm VRAM cache (typically <30s) but short enough to
620333
+ * surface a real wedge before the 180s coalescer fires. Override via
620334
+ * OMNIUS_TG_STREAM_INACTIVITY_MS (clamped to [10s, 5min]).
620335
+ */
620336
+ telegramStreamInactivityMs() {
620337
+ const raw = Number.parseInt(process.env["OMNIUS_TG_STREAM_INACTIVITY_MS"] ?? "", 10);
620338
+ if (Number.isFinite(raw) && raw >= 1e4 && raw <= 3e5) return raw;
620339
+ return 6e4;
620340
+ }
620263
620341
  /**
620264
620342
  * Start the periodic stale-sub-agent reaper. Idempotent — safe to call
620265
620343
  * multiple times (no-op if already running). Stopped by stop() and on
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.120",
3
+ "version": "1.0.121",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.120",
9
+ "version": "1.0.121",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.120",
3
+ "version": "1.0.121",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",