omnius 1.0.114 → 1.0.116
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +465 -42
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -616349,6 +616349,34 @@ External acquisition contract:
|
|
|
616349
616349
|
telegramRouterSessionState = /* @__PURE__ */ new Map();
|
|
616350
616350
|
/** Telegram interaction routing profile */
|
|
616351
616351
|
interactionMode = "auto";
|
|
616352
|
+
/**
|
|
616353
|
+
* Toggle for surfacing qwen3 `<think>` content streamed by Telegram-side
|
|
616354
|
+
* inferences (router, chat fast-path, follow-up). Mirrors the main TUI's
|
|
616355
|
+
* Ctrl+O thinking-visibility toggle but applies to the bridge's stream
|
|
616356
|
+
* surface (which has its own write path through tuiWrite + view
|
|
616357
|
+
* callbacks). Default off; flip via env `OMNIUS_TG_SHOW_THINKING=1` or
|
|
616358
|
+
* setTelegramThinkingVisible(). Independent of the model-side
|
|
616359
|
+
* `think:false` directive — that controls whether the model emits
|
|
616360
|
+
* thinking content at all; this controls whether the operator sees it
|
|
616361
|
+
* when it IS emitted.
|
|
616362
|
+
*/
|
|
616363
|
+
telegramThinkingVisible = process.env["OMNIUS_TG_SHOW_THINKING"] === "1";
|
|
616364
|
+
/**
|
|
616365
|
+
* Live telemetry of every in-flight Ollama call originating from the
|
|
616366
|
+
* bridge. Lets the operator see WHY multiple GPUs are spun up at once
|
|
616367
|
+
* and HOW each call is progressing — which is the only way to debug a
|
|
616368
|
+
* 180s hard-deadline firing event without grepping logs.
|
|
616369
|
+
*
|
|
616370
|
+
* Each entry tracks:
|
|
616371
|
+
* - kind: router | chat-fast-path | followup | sub-agent
|
|
616372
|
+
* - sessionKey: which chat
|
|
616373
|
+
* - startTs: wall-clock start
|
|
616374
|
+
* - contentTokens / thinkingTokens: cumulative count from the stream
|
|
616375
|
+
* - lastTokenAt: timestamp of the most-recent chunk (staleness signal)
|
|
616376
|
+
* - model: the model being called (helps differentiate concurrent calls)
|
|
616377
|
+
*/
|
|
616378
|
+
telegramActiveInferences = /* @__PURE__ */ new Map();
|
|
616379
|
+
telegramInferenceCounter = 0;
|
|
616352
616380
|
/** Actual model context window discovered by the main TUI. */
|
|
616353
616381
|
contextWindowSize = 0;
|
|
616354
616382
|
_metricsProvider = null;
|
|
@@ -617982,16 +618010,21 @@ ${mediaContext}` : ""
|
|
|
617982
618010
|
this.agentConfig.model,
|
|
617983
618011
|
this.agentConfig.apiKey
|
|
617984
618012
|
);
|
|
617985
|
-
const result = await
|
|
617986
|
-
|
|
617987
|
-
|
|
617988
|
-
|
|
617989
|
-
|
|
617990
|
-
|
|
617991
|
-
|
|
617992
|
-
|
|
617993
|
-
|
|
617994
|
-
|
|
618013
|
+
const result = await this.telegramObservableInference(
|
|
618014
|
+
backend,
|
|
618015
|
+
telegramThinkSuppressedRequest({
|
|
618016
|
+
messages: [
|
|
618017
|
+
{ role: "system", content: "You are a Telegram public-follow-up discretion model. Output strict JSON only." },
|
|
618018
|
+
{ role: "user", content: prompt }
|
|
618019
|
+
],
|
|
618020
|
+
tools: [],
|
|
618021
|
+
temperature: 0.2,
|
|
618022
|
+
maxTokens: 300,
|
|
618023
|
+
timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4)
|
|
618024
|
+
}),
|
|
618025
|
+
"followup",
|
|
618026
|
+
sessionKey
|
|
618027
|
+
);
|
|
617995
618028
|
const decision2 = parseTelegramReflectionFollowupDecision(result.choices[0]?.message?.content ?? "");
|
|
617996
618029
|
state.lastFollowupArtifactAt = artifact.generatedAt;
|
|
617997
618030
|
if (!decision2) {
|
|
@@ -619612,15 +619645,17 @@ ${lines.join("\n")}`);
|
|
|
619612
619645
|
nextAnalysisAfterMessages: decision2.nextCheckAfterMessages
|
|
619613
619646
|
});
|
|
619614
619647
|
}
|
|
619615
|
-
async telegramRouterJsonCompletion(backend, request, diagnostics) {
|
|
619648
|
+
async telegramRouterJsonCompletion(backend, request, diagnostics, inferenceKind = "router", sessionKey = "__router__") {
|
|
619616
619649
|
let jsonModeResult;
|
|
619617
619650
|
let jsonModeError;
|
|
619618
619651
|
const suppressed = telegramThinkSuppressedRequest(request);
|
|
619619
619652
|
try {
|
|
619620
|
-
jsonModeResult = await
|
|
619621
|
-
|
|
619622
|
-
responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT
|
|
619623
|
-
|
|
619653
|
+
jsonModeResult = await this.telegramObservableInference(
|
|
619654
|
+
backend,
|
|
619655
|
+
{ ...suppressed, responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT },
|
|
619656
|
+
inferenceKind,
|
|
619657
|
+
sessionKey
|
|
619658
|
+
);
|
|
619624
619659
|
const visible = jsonModeResult.choices.some(
|
|
619625
619660
|
(choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
|
|
619626
619661
|
);
|
|
@@ -619637,7 +619672,12 @@ ${lines.join("\n")}`);
|
|
|
619637
619672
|
}
|
|
619638
619673
|
}
|
|
619639
619674
|
try {
|
|
619640
|
-
const plainResult = await
|
|
619675
|
+
const plainResult = await this.telegramObservableInference(
|
|
619676
|
+
backend,
|
|
619677
|
+
suppressed,
|
|
619678
|
+
inferenceKind,
|
|
619679
|
+
sessionKey
|
|
619680
|
+
);
|
|
619641
619681
|
if (diagnostics) {
|
|
619642
619682
|
const plainVisible = plainResult.choices.some(
|
|
619643
619683
|
(choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
|
|
@@ -619654,6 +619694,205 @@ ${lines.join("\n")}`);
|
|
|
619654
619694
|
throw err;
|
|
619655
619695
|
}
|
|
619656
619696
|
}
|
|
619697
|
+
// ─────────────────────────────────────────────────────────────────
|
|
619698
|
+
// Observable inference — streams chatCompletion-shaped calls so the
|
|
619699
|
+
// operator can SEE what's happening during a long-running router or
|
|
619700
|
+
// chat-fast-path call instead of waiting 180s for a hard-deadline.
|
|
619701
|
+
// ─────────────────────────────────────────────────────────────────
|
|
619702
|
+
/**
|
|
619703
|
+
* Wrap a chatCompletion-shaped call so the bridge can observe its token
|
|
619704
|
+
* stream and surface telemetry. Falls back to non-streaming if the
|
|
619705
|
+
* backend doesn't expose chatCompletionStream (older test stubs) or if
|
|
619706
|
+
* streaming throws. The returned shape matches chatCompletion exactly,
|
|
619707
|
+
* so callers don't have to know whether streaming was used.
|
|
619708
|
+
*
|
|
619709
|
+
* What this gives us:
|
|
619710
|
+
* 1. Per-call entry in the active-inferences registry (visible to the
|
|
619711
|
+
* operator — answers "why are 2 GPUs spun up at once?")
|
|
619712
|
+
* 2. Live emission of thinking + content tokens to the TUI when
|
|
619713
|
+
* telegramThinkingVisible is true (mirror of Ctrl+O for the bridge)
|
|
619714
|
+
* 3. Wall-clock observability — if the call hangs at 60s with zero
|
|
619715
|
+
* content tokens emitted, the registry shows it, and the
|
|
619716
|
+
* hard-deadline retire path becomes diagnosable instead of opaque
|
|
619717
|
+
*/
|
|
619718
|
+
async telegramObservableInference(backend, request, kind, sessionKey) {
|
|
619719
|
+
const streamFn = backend.chatCompletionStream;
|
|
619720
|
+
const id = this.registerTelegramInference(kind, sessionKey, this.agentConfig?.model ?? "?");
|
|
619721
|
+
try {
|
|
619722
|
+
if (typeof streamFn !== "function") {
|
|
619723
|
+
const r2 = await backend.chatCompletion(request);
|
|
619724
|
+
this.updateTelegramInferenceFinal(id, r2);
|
|
619725
|
+
return r2;
|
|
619726
|
+
}
|
|
619727
|
+
try {
|
|
619728
|
+
const result = await this.streamTelegramInferenceToCompletion(
|
|
619729
|
+
streamFn.bind(backend),
|
|
619730
|
+
request,
|
|
619731
|
+
id
|
|
619732
|
+
);
|
|
619733
|
+
return result;
|
|
619734
|
+
} catch (streamErr) {
|
|
619735
|
+
const r2 = await backend.chatCompletion(request);
|
|
619736
|
+
this.updateTelegramInferenceFinal(id, r2);
|
|
619737
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
619738
|
+
sessionKey,
|
|
619739
|
+
`inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
|
|
619740
|
+
));
|
|
619741
|
+
return r2;
|
|
619742
|
+
}
|
|
619743
|
+
} finally {
|
|
619744
|
+
this.deregisterTelegramInference(id);
|
|
619745
|
+
}
|
|
619746
|
+
}
|
|
619747
|
+
/**
|
|
619748
|
+
* Drive a chatCompletionStream to exhaustion, accumulating tokens into a
|
|
619749
|
+
* chatCompletion-shaped result. Live-emits content + thinking tokens
|
|
619750
|
+
* through the TUI when telegramThinkingVisible is true, throttled to
|
|
619751
|
+
* avoid spamming the waterfall on fast streams.
|
|
619752
|
+
*/
|
|
619753
|
+
async streamTelegramInferenceToCompletion(streamFn, request, inferenceId) {
|
|
619754
|
+
let contentBuf = "";
|
|
619755
|
+
let thinkingBuf = "";
|
|
619756
|
+
let finishReason;
|
|
619757
|
+
let usage;
|
|
619758
|
+
let lastEmitMs = 0;
|
|
619759
|
+
const EMIT_THROTTLE_MS = 500;
|
|
619760
|
+
const flushPreview = (force) => {
|
|
619761
|
+
if (!this.telegramThinkingVisible) return;
|
|
619762
|
+
const now = Date.now();
|
|
619763
|
+
if (!force && now - lastEmitMs < EMIT_THROTTLE_MS) return;
|
|
619764
|
+
lastEmitMs = now;
|
|
619765
|
+
const entry = this.telegramActiveInferences.get(inferenceId);
|
|
619766
|
+
if (!entry) return;
|
|
619767
|
+
const elapsed = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
|
|
619768
|
+
const thinkRatio = entry.contentTokens + entry.thinkingTokens > 0 ? Math.round(entry.thinkingTokens * 100 / (entry.contentTokens + entry.thinkingTokens)) : 0;
|
|
619769
|
+
const preview = (thinkingBuf || contentBuf).slice(-180).replace(/\s+/g, " ");
|
|
619770
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
619771
|
+
entry.sessionKey,
|
|
619772
|
+
`inference ${inferenceId} [${entry.kind}] ${elapsed}s content=${entry.contentTokens}t thinking=${entry.thinkingTokens}t (${thinkRatio}% think) live=${JSON.stringify(preview)}`
|
|
619773
|
+
));
|
|
619774
|
+
};
|
|
619775
|
+
for await (const chunk of streamFn(request)) {
|
|
619776
|
+
if (chunk.type === "content" && chunk.content) {
|
|
619777
|
+
if (chunk.thinking) {
|
|
619778
|
+
thinkingBuf += chunk.content;
|
|
619779
|
+
this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
|
|
619780
|
+
} else {
|
|
619781
|
+
contentBuf += chunk.content;
|
|
619782
|
+
this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
|
|
619783
|
+
}
|
|
619784
|
+
flushPreview(false);
|
|
619785
|
+
} else if (chunk.type === "finish") {
|
|
619786
|
+
finishReason = chunk.finishReason;
|
|
619787
|
+
} else if (chunk.type === "usage") {
|
|
619788
|
+
usage = {
|
|
619789
|
+
prompt_tokens: chunk.promptTokens,
|
|
619790
|
+
completion_tokens: chunk.completionTokens,
|
|
619791
|
+
total_tokens: chunk.totalTokens
|
|
619792
|
+
};
|
|
619793
|
+
}
|
|
619794
|
+
}
|
|
619795
|
+
flushPreview(true);
|
|
619796
|
+
void finishReason;
|
|
619797
|
+
return {
|
|
619798
|
+
choices: [
|
|
619799
|
+
{
|
|
619800
|
+
message: {
|
|
619801
|
+
content: thinkingBuf ? `<think>${thinkingBuf}</think>${contentBuf}` : contentBuf
|
|
619802
|
+
}
|
|
619803
|
+
}
|
|
619804
|
+
],
|
|
619805
|
+
usage: usage ? {
|
|
619806
|
+
totalTokens: usage.total_tokens ?? 0,
|
|
619807
|
+
promptTokens: usage.prompt_tokens,
|
|
619808
|
+
completionTokens: usage.completion_tokens
|
|
619809
|
+
} : void 0
|
|
619810
|
+
};
|
|
619811
|
+
}
|
|
619812
|
+
// ─────────────────────────────────────────────────────────────────
|
|
619813
|
+
// Inference telemetry registry
|
|
619814
|
+
// ─────────────────────────────────────────────────────────────────
|
|
619815
|
+
registerTelegramInference(kind, sessionKey, model) {
|
|
619816
|
+
const id = `inf-${++this.telegramInferenceCounter}`;
|
|
619817
|
+
const now = performance.now();
|
|
619818
|
+
this.telegramActiveInferences.set(id, {
|
|
619819
|
+
id,
|
|
619820
|
+
kind,
|
|
619821
|
+
sessionKey,
|
|
619822
|
+
model,
|
|
619823
|
+
startTs: now,
|
|
619824
|
+
lastTokenAt: now,
|
|
619825
|
+
contentTokens: 0,
|
|
619826
|
+
thinkingTokens: 0,
|
|
619827
|
+
streaming: true
|
|
619828
|
+
});
|
|
619829
|
+
return id;
|
|
619830
|
+
}
|
|
619831
|
+
bumpTelegramInferenceTokens(id, contentDelta, thinkingDelta) {
|
|
619832
|
+
const entry = this.telegramActiveInferences.get(id);
|
|
619833
|
+
if (!entry) return;
|
|
619834
|
+
entry.contentTokens += contentDelta;
|
|
619835
|
+
entry.thinkingTokens += thinkingDelta;
|
|
619836
|
+
entry.lastTokenAt = performance.now();
|
|
619837
|
+
}
|
|
619838
|
+
/**
|
|
619839
|
+
* Called when a non-streaming chatCompletion returns. Walks the completion
|
|
619840
|
+
* to extract a rough token count from the visible content so the registry
|
|
619841
|
+
* has SOME size signal even for non-streamed calls.
|
|
619842
|
+
*/
|
|
619843
|
+
updateTelegramInferenceFinal(id, result) {
|
|
619844
|
+
const entry = this.telegramActiveInferences.get(id);
|
|
619845
|
+
if (!entry) return;
|
|
619846
|
+
entry.streaming = false;
|
|
619847
|
+
const text = result.choices[0]?.message?.content ?? "";
|
|
619848
|
+
const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/);
|
|
619849
|
+
const thinkingText = thinkMatch ? thinkMatch[1] : "";
|
|
619850
|
+
const contentText = thinkMatch ? text.replace(thinkMatch[0], "") : text;
|
|
619851
|
+
entry.thinkingTokens = Math.ceil(thinkingText.length / 4);
|
|
619852
|
+
entry.contentTokens = Math.ceil(contentText.length / 4);
|
|
619853
|
+
entry.lastTokenAt = performance.now();
|
|
619854
|
+
}
|
|
619855
|
+
deregisterTelegramInference(id) {
|
|
619856
|
+
const entry = this.telegramActiveInferences.get(id);
|
|
619857
|
+
if (!entry) return;
|
|
619858
|
+
this.telegramActiveInferences.delete(id);
|
|
619859
|
+
if (this.telegramThinkingVisible) {
|
|
619860
|
+
const dur = ((performance.now() - entry.startTs) / 1e3).toFixed(1);
|
|
619861
|
+
const totalTokens = entry.contentTokens + entry.thinkingTokens;
|
|
619862
|
+
const ratio = totalTokens > 0 ? Math.round(entry.thinkingTokens * 100 / totalTokens) : 0;
|
|
619863
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
619864
|
+
entry.sessionKey,
|
|
619865
|
+
`inference ${id} [${entry.kind}] done in ${dur}s — ${entry.contentTokens}t content / ${entry.thinkingTokens}t thinking (${ratio}% think)`
|
|
619866
|
+
));
|
|
619867
|
+
}
|
|
619868
|
+
}
|
|
619869
|
+
/**
|
|
619870
|
+
* Snapshot of every in-flight Telegram-originated inference. The TUI
|
|
619871
|
+
* dashboard / status line can call this to display "why are 2 GPUs spun
|
|
619872
|
+
* up?" — each entry includes the kind, session, model, elapsed seconds,
|
|
619873
|
+
* and token counts so the operator can correlate Ollama load to bridge
|
|
619874
|
+
* activity.
|
|
619875
|
+
*/
|
|
619876
|
+
getTelegramActiveInferences() {
|
|
619877
|
+
const now = performance.now();
|
|
619878
|
+
return Array.from(this.telegramActiveInferences.values()).map((e2) => ({
|
|
619879
|
+
...e2,
|
|
619880
|
+
elapsedSec: (now - e2.startTs) / 1e3,
|
|
619881
|
+
idleSec: (now - e2.lastTokenAt) / 1e3
|
|
619882
|
+
}));
|
|
619883
|
+
}
|
|
619884
|
+
/**
|
|
619885
|
+
* Toggle thinking visibility for the Telegram bridge. Mirrors the main
|
|
619886
|
+
* TUI's Ctrl+O semantics but applies to bridge-side streams. Returns the
|
|
619887
|
+
* new state so a binding can echo it back to the operator.
|
|
619888
|
+
*/
|
|
619889
|
+
setTelegramThinkingVisible(visible) {
|
|
619890
|
+
this.telegramThinkingVisible = visible;
|
|
619891
|
+
return this.telegramThinkingVisible;
|
|
619892
|
+
}
|
|
619893
|
+
getTelegramThinkingVisible() {
|
|
619894
|
+
return this.telegramThinkingVisible;
|
|
619895
|
+
}
|
|
619657
619896
|
async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics) {
|
|
619658
619897
|
const rawPreview = telegramRouterRawPreview(rawOutput, 4e3);
|
|
619659
619898
|
if (!rawPreview || telegramDecisionOutputHasDanglingJson(rawOutput)) {
|
|
@@ -619825,18 +620064,167 @@ ${retryText}`,
|
|
|
619825
620064
|
* in-flight promise, and on completion fire any queued trailing call.
|
|
619826
620065
|
*/
|
|
619827
620066
|
startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
|
|
619828
|
-
const
|
|
620067
|
+
const HARD_DEADLINE_MS = 18e4;
|
|
620068
|
+
const inner = this.inferTelegramInteractionDecision(msg, toolContext);
|
|
620069
|
+
const promise = new Promise((resolve52, reject) => {
|
|
620070
|
+
let settled = false;
|
|
620071
|
+
const guard = setTimeout(() => {
|
|
620072
|
+
if (settled) return;
|
|
620073
|
+
settled = true;
|
|
620074
|
+
reject(new Error("router-coalescer: hard deadline exceeded (180s); inner inference did not settle"));
|
|
620075
|
+
}, HARD_DEADLINE_MS);
|
|
620076
|
+
if (typeof guard.unref === "function") guard.unref();
|
|
620077
|
+
inner.then(
|
|
620078
|
+
(v) => {
|
|
620079
|
+
if (settled) return;
|
|
620080
|
+
settled = true;
|
|
620081
|
+
clearTimeout(guard);
|
|
620082
|
+
resolve52(v);
|
|
620083
|
+
},
|
|
620084
|
+
(e2) => {
|
|
620085
|
+
if (settled) return;
|
|
620086
|
+
settled = true;
|
|
620087
|
+
clearTimeout(guard);
|
|
620088
|
+
reject(e2);
|
|
620089
|
+
}
|
|
620090
|
+
);
|
|
620091
|
+
});
|
|
619829
620092
|
this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
|
|
619830
620093
|
const onSettled = () => {
|
|
619831
|
-
|
|
619832
|
-
|
|
620094
|
+
let state;
|
|
620095
|
+
try {
|
|
620096
|
+
state = this.telegramRouterSessionState.get(sessionKey);
|
|
620097
|
+
this.telegramRouterSessionState.delete(sessionKey);
|
|
620098
|
+
} catch {
|
|
620099
|
+
state = void 0;
|
|
620100
|
+
}
|
|
619833
620101
|
if (!state?.trailing) return;
|
|
619834
620102
|
const { msg: nextMsg, toolContext: nextCtx, resolve: resolve52, reject } = state.trailing;
|
|
619835
|
-
|
|
620103
|
+
try {
|
|
620104
|
+
this.startCoalescedTelegramRouterCall(sessionKey, nextMsg, nextCtx).then(resolve52, reject);
|
|
620105
|
+
} catch (err) {
|
|
620106
|
+
reject(err);
|
|
620107
|
+
}
|
|
619836
620108
|
};
|
|
619837
620109
|
promise.then(onSettled, onSettled);
|
|
619838
620110
|
return promise;
|
|
619839
620111
|
}
|
|
620112
|
+
/**
|
|
620113
|
+
* Forcibly cancel every in-flight + trailing router-coalescer entry.
|
|
620114
|
+
* Used on bridge stop() and by the watchdog if it detects the coalescer
|
|
620115
|
+
* map has grown unboundedly. Rejects every queued caller cleanly so they
|
|
620116
|
+
* surface the cancellation rather than waiting forever.
|
|
620117
|
+
*/
|
|
620118
|
+
cancelTelegramRouterSessionState(reason) {
|
|
620119
|
+
const err = new Error(`router-coalescer cancelled: ${reason}`);
|
|
620120
|
+
for (const [, state] of this.telegramRouterSessionState) {
|
|
620121
|
+
if (state.trailing) {
|
|
620122
|
+
try {
|
|
620123
|
+
state.trailing.reject(err);
|
|
620124
|
+
} catch {
|
|
620125
|
+
}
|
|
620126
|
+
}
|
|
620127
|
+
}
|
|
620128
|
+
this.telegramRouterSessionState.clear();
|
|
620129
|
+
}
|
|
620130
|
+
// ─────────────────────────────────────────────────────────────────
|
|
620131
|
+
// Sub-agent staleness watchdog
|
|
620132
|
+
// ─────────────────────────────────────────────────────────────────
|
|
620133
|
+
/** Interval handle for the periodic stale-sub-agent reaper. */
|
|
620134
|
+
telegramSubAgentWatchdogTimer = null;
|
|
620135
|
+
/**
|
|
620136
|
+
* Maximum wall-clock time a sub-agent may go without a visible-edit
|
|
620137
|
+
* progress event before the watchdog declares it stale and tears it
|
|
620138
|
+
* down. Tuned to be comfortably longer than the slowest healthy turn
|
|
620139
|
+
* (which is bounded by request_timeout = 5-15min per turn) but short
|
|
620140
|
+
* enough that a wedged sub-agent doesn't pin a chat for an entire day.
|
|
620141
|
+
*
|
|
620142
|
+
* Override with env var OMNIUS_TG_SUBAGENT_MAX_IDLE_MS for ops tuning.
|
|
620143
|
+
*/
|
|
620144
|
+
telegramSubAgentMaxIdleMs() {
|
|
620145
|
+
const raw = Number.parseInt(process.env["OMNIUS_TG_SUBAGENT_MAX_IDLE_MS"] ?? "", 10);
|
|
620146
|
+
if (Number.isFinite(raw) && raw >= 3e4 && raw <= 36e5) return raw;
|
|
620147
|
+
return 6e5;
|
|
620148
|
+
}
|
|
620149
|
+
/** Watchdog tick period — checked every 30s. */
|
|
620150
|
+
telegramSubAgentWatchdogIntervalMs() {
|
|
620151
|
+
return 3e4;
|
|
620152
|
+
}
|
|
620153
|
+
/**
|
|
620154
|
+
* Start the periodic stale-sub-agent reaper. Idempotent — safe to call
|
|
620155
|
+
* multiple times (no-op if already running). Stopped by stop() and on
|
|
620156
|
+
* SIGTERM via the cleanup chain.
|
|
620157
|
+
*/
|
|
620158
|
+
startTelegramSubAgentWatchdog() {
|
|
620159
|
+
if (this.telegramSubAgentWatchdogTimer) return;
|
|
620160
|
+
const tick = () => {
|
|
620161
|
+
try {
|
|
620162
|
+
this.reapStaleTelegramSubAgents();
|
|
620163
|
+
} catch (err) {
|
|
620164
|
+
this.tuiWrite(() => renderTelegramSubAgentError(
|
|
620165
|
+
"watchdog",
|
|
620166
|
+
`tick failed: ${err instanceof Error ? err.message : String(err)}`
|
|
620167
|
+
));
|
|
620168
|
+
}
|
|
620169
|
+
};
|
|
620170
|
+
this.telegramSubAgentWatchdogTimer = setInterval(tick, this.telegramSubAgentWatchdogIntervalMs());
|
|
620171
|
+
if (typeof this.telegramSubAgentWatchdogTimer.unref === "function") {
|
|
620172
|
+
this.telegramSubAgentWatchdogTimer.unref();
|
|
620173
|
+
}
|
|
620174
|
+
}
|
|
620175
|
+
/** Stop the periodic stale-sub-agent reaper. */
|
|
620176
|
+
stopTelegramSubAgentWatchdog() {
|
|
620177
|
+
if (this.telegramSubAgentWatchdogTimer) {
|
|
620178
|
+
clearInterval(this.telegramSubAgentWatchdogTimer);
|
|
620179
|
+
this.telegramSubAgentWatchdogTimer = null;
|
|
620180
|
+
}
|
|
620181
|
+
}
|
|
620182
|
+
/**
|
|
620183
|
+
* One watchdog pass: walk the sub-agent map; for each entry where the
|
|
620184
|
+
* last visible-edit progress event is older than maxIdle AND no completion
|
|
620185
|
+
* boundary has been seen, abort the runner and remove the entry. This is
|
|
620186
|
+
* the load-bearing fix for the runaway-sub-agent steady-state leak: a
|
|
620187
|
+
* runner that hangs (qwen3 think-stall, Ollama TCP wedge, lost stream)
|
|
620188
|
+
* otherwise pins the chat forever, because the finally{} in runSubAgent
|
|
620189
|
+
* never fires.
|
|
620190
|
+
*/
|
|
620191
|
+
reapStaleTelegramSubAgents() {
|
|
620192
|
+
const maxIdleMs = this.telegramSubAgentMaxIdleMs();
|
|
620193
|
+
const now = Date.now();
|
|
620194
|
+
const stale = [];
|
|
620195
|
+
for (const [sessionKey, agent] of this.subAgents) {
|
|
620196
|
+
if (agent.aborted) continue;
|
|
620197
|
+
const idle = agent.lastEditMs > 0 ? now - agent.lastEditMs : 0;
|
|
620198
|
+
if (idle <= maxIdleMs) continue;
|
|
620199
|
+
if (agent.completionBoundarySeen) continue;
|
|
620200
|
+
stale.push(sessionKey);
|
|
620201
|
+
}
|
|
620202
|
+
for (const sessionKey of stale) {
|
|
620203
|
+
const agent = this.subAgents.get(sessionKey);
|
|
620204
|
+
if (!agent) continue;
|
|
620205
|
+
agent.aborted = true;
|
|
620206
|
+
if (agent.typingInterval) {
|
|
620207
|
+
clearInterval(agent.typingInterval);
|
|
620208
|
+
agent.typingInterval = null;
|
|
620209
|
+
}
|
|
620210
|
+
try {
|
|
620211
|
+
agent.runner?.abort?.();
|
|
620212
|
+
} catch {
|
|
620213
|
+
}
|
|
620214
|
+
this.subAgents.delete(sessionKey);
|
|
620215
|
+
this.refreshActiveTelegramInteractionCount();
|
|
620216
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
620217
|
+
agent.username,
|
|
620218
|
+
`watchdog: aborted stale sub-agent (idle ${Math.round((now - agent.lastEditMs) / 1e3)}s without completion)`
|
|
620219
|
+
));
|
|
620220
|
+
this.subAgentViewCallbacks?.onWrite(
|
|
620221
|
+
agent.viewId,
|
|
620222
|
+
`watchdog: sub-agent retired after ${Math.round((now - agent.lastEditMs) / 1e3)}s without a progress event`
|
|
620223
|
+
);
|
|
620224
|
+
this.subAgentViewCallbacks?.onStatus(agent.viewId, "failed");
|
|
620225
|
+
this.subAgentViewCallbacks?.onComplete(agent.viewId);
|
|
620226
|
+
}
|
|
620227
|
+
}
|
|
619840
620228
|
async inferTelegramInteractionDecision(msg, toolContext) {
|
|
619841
620229
|
const config = this.agentConfig;
|
|
619842
620230
|
const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
|
|
@@ -620434,6 +620822,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
|
|
|
620434
620822
|
this.polling = true;
|
|
620435
620823
|
this.pollFatalNotified = false;
|
|
620436
620824
|
this.abortController = new AbortController();
|
|
620825
|
+
this.startTelegramSubAgentWatchdog();
|
|
620437
620826
|
await this.prepareTelegramLongPolling();
|
|
620438
620827
|
try {
|
|
620439
620828
|
mkdirSync66(this.mediaCacheDir, { recursive: true });
|
|
@@ -620509,7 +620898,14 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
|
|
|
620509
620898
|
for (const [, agent] of this.subAgents) {
|
|
620510
620899
|
agent.aborted = true;
|
|
620511
620900
|
if (agent.typingInterval) clearInterval(agent.typingInterval);
|
|
620901
|
+
try {
|
|
620902
|
+
agent.runner?.abort?.();
|
|
620903
|
+
} catch {
|
|
620904
|
+
}
|
|
620512
620905
|
}
|
|
620906
|
+
this.stopTelegramSubAgentWatchdog();
|
|
620907
|
+
this.cancelTelegramRouterSessionState("bridge stop");
|
|
620908
|
+
this.telegramActiveInferences.clear();
|
|
620513
620909
|
if (this.telegramSqliteDb && this.telegramSqliteDb !== false) {
|
|
620514
620910
|
try {
|
|
620515
620911
|
this.telegramSqliteDb.close();
|
|
@@ -621326,35 +621722,55 @@ ${conversationStream}`
|
|
|
621326
621722
|
});
|
|
621327
621723
|
let accumulated = "";
|
|
621328
621724
|
let streamError;
|
|
621725
|
+
const sessionKey = this.sessionKeyForMessage(msg);
|
|
621726
|
+
const inferenceId = this.registerTelegramInference("chat-fast-path", sessionKey, config.model);
|
|
621329
621727
|
const streamable = backend;
|
|
621330
621728
|
const stream = typeof streamable.chatCompletionStream === "function" ? streamable.chatCompletionStream(request) : null;
|
|
621331
|
-
|
|
621332
|
-
|
|
621333
|
-
|
|
621334
|
-
|
|
621335
|
-
|
|
621336
|
-
|
|
621729
|
+
try {
|
|
621730
|
+
if (stream && typeof stream[Symbol.asyncIterator] === "function") {
|
|
621731
|
+
try {
|
|
621732
|
+
for await (const chunk of stream) {
|
|
621733
|
+
if (chunk.type !== "content") continue;
|
|
621734
|
+
const piece = chunk.content;
|
|
621735
|
+
if (!piece) continue;
|
|
621736
|
+
if (chunk.thinking) {
|
|
621737
|
+
this.bumpTelegramInferenceTokens(inferenceId, 0, 1);
|
|
621738
|
+
if (this.telegramThinkingVisible) {
|
|
621739
|
+
const preview = piece.slice(0, 120);
|
|
621740
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
621741
|
+
msg.username,
|
|
621742
|
+
`chat-fast-path thinking: ${JSON.stringify(preview)}`
|
|
621743
|
+
));
|
|
621744
|
+
}
|
|
621745
|
+
} else {
|
|
621746
|
+
this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
|
|
621747
|
+
accumulated += piece;
|
|
621748
|
+
await onToken(accumulated);
|
|
621749
|
+
}
|
|
621337
621750
|
}
|
|
621751
|
+
} catch (err) {
|
|
621752
|
+
streamError = err;
|
|
621753
|
+
accumulated = "";
|
|
621338
621754
|
}
|
|
621339
|
-
} catch (err) {
|
|
621340
|
-
streamError = err;
|
|
621341
|
-
accumulated = "";
|
|
621342
621755
|
}
|
|
621343
|
-
|
|
621344
|
-
|
|
621345
|
-
|
|
621346
|
-
|
|
621347
|
-
|
|
621348
|
-
|
|
621349
|
-
|
|
621350
|
-
|
|
621351
|
-
|
|
621352
|
-
|
|
621756
|
+
if (!accumulated.trim()) {
|
|
621757
|
+
let result;
|
|
621758
|
+
try {
|
|
621759
|
+
result = await backend.chatCompletion(request);
|
|
621760
|
+
} catch (err) {
|
|
621761
|
+
if (streamError) {
|
|
621762
|
+
const streamMsg = streamError instanceof Error ? streamError.message : String(streamError);
|
|
621763
|
+
const retryMsg = err instanceof Error ? err.message : String(err);
|
|
621764
|
+
throw new Error(`streaming failed (${streamMsg}); non-stream retry failed (${retryMsg})`);
|
|
621765
|
+
}
|
|
621766
|
+
throw err;
|
|
621353
621767
|
}
|
|
621354
|
-
|
|
621768
|
+
this.updateTelegramInferenceFinal(inferenceId, result);
|
|
621769
|
+
accumulated = result.choices[0]?.message?.content ?? "";
|
|
621770
|
+
if (accumulated) await onToken(accumulated);
|
|
621355
621771
|
}
|
|
621356
|
-
|
|
621357
|
-
|
|
621772
|
+
} finally {
|
|
621773
|
+
this.deregisterTelegramInference(inferenceId);
|
|
621358
621774
|
}
|
|
621359
621775
|
return stripTelegramHiddenThinking(accumulated).trim();
|
|
621360
621776
|
}
|
|
@@ -621509,6 +621925,13 @@ ${conversationStream}`
|
|
|
621509
621925
|
if (event.type === "stream_token" && event.streamKind === "content" && event.content) {
|
|
621510
621926
|
subAgent.accumulated += event.content;
|
|
621511
621927
|
}
|
|
621928
|
+
if (event.type === "stream_token" && event.streamKind === "thinking" && event.content && this.telegramThinkingVisible) {
|
|
621929
|
+
const trimmed = event.content.replace(/\s+/g, " ").slice(0, 200);
|
|
621930
|
+
this.subAgentViewCallbacks?.onWrite(
|
|
621931
|
+
subAgent.viewId,
|
|
621932
|
+
`thinking: ${trimmed}`
|
|
621933
|
+
);
|
|
621934
|
+
}
|
|
621512
621935
|
const intermediateLine = formatTelegramProgressEvent(event);
|
|
621513
621936
|
if (intermediateLine && (isAdminDM || event.type !== "status")) {
|
|
621514
621937
|
subAgent.intermediateLines.push(intermediateLine);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.116",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.116",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED