omnius 1.0.113 → 1.0.115

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -614455,6 +614455,28 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 12e4, _legacyMaxMs) {
614455
614455
  const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
614456
614456
  return Math.max(configured, minMs, 12e4);
614457
614457
  }
614458
+ function telegramThinkSuppressedRequest(request) {
614459
+ const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
614460
+ let appended = false;
614461
+ for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
614462
+ const m2 = messages2[i2];
614463
+ if (!m2 || m2.role !== "user") continue;
614464
+ const content = typeof m2.content === "string" ? m2.content : "";
614465
+ if (/\/no_think\b/i.test(content)) {
614466
+ appended = true;
614467
+ break;
614468
+ }
614469
+ messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
614470
+
614471
+ /no_think` };
614472
+ appended = true;
614473
+ break;
614474
+ }
614475
+ if (!appended) {
614476
+ messages2.push({ role: "user", content: "/no_think" });
614477
+ }
614478
+ return { ...request, messages: messages2, think: false };
614479
+ }
614458
614480
  function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
614459
614481
  for (const jsonText of telegramDecisionJsonCandidates(text)) {
614460
614482
  try {
@@ -616308,6 +616330,23 @@ External acquisition contract:
616308
616330
  stimulation = new StimulationController();
616309
616331
  /** Throttles noisy "skipped group chatter" waterfall logs */
616310
616332
  groupSkipLogAt = /* @__PURE__ */ new Map();
616333
+ /**
616334
+ * Per-chat router-call coalescing state. Bounds concurrent router (attention
616335
+ * decision) inferences to at most 2 per sessionKey regardless of incoming
616336
+ * message rate. Without this, a burst of N messages in a single chat fires
616337
+ * N concurrent Ollama calls; over hours of bursts this saturates the GPU
616338
+ * and produces the 10+ minute reply latencies observed after 24h uptime.
616339
+ *
616340
+ * Shape per sessionKey:
616341
+ * - inFlight: the currently-running router call for this chat
616342
+ * - trailing: a queued "next" call. If multiple messages arrive while
616343
+ * inFlight is running, they collapse into a single trailing call
616344
+ * that uses the most-recent message. All callers that arrived during
616345
+ * the in-flight window receive the trailing decision.
616346
+ *
616347
+ * Disabled with OMNIUS_TG_ROUTER_DEBOUNCE=off (emergency bypass).
616348
+ */
616349
+ telegramRouterSessionState = /* @__PURE__ */ new Map();
616311
616350
  /** Telegram interaction routing profile */
616312
616351
  interactionMode = "auto";
616313
616352
  /** Actual model context window discovered by the main TUI. */
@@ -617943,7 +617982,7 @@ ${mediaContext}` : ""
617943
617982
  this.agentConfig.model,
617944
617983
  this.agentConfig.apiKey
617945
617984
  );
617946
- const result = await backend.chatCompletion({
617985
+ const result = await backend.chatCompletion(telegramThinkSuppressedRequest({
617947
617986
  messages: [
617948
617987
  { role: "system", content: "You are a Telegram public-follow-up discretion model. Output strict JSON only." },
617949
617988
  { role: "user", content: prompt }
@@ -617951,9 +617990,8 @@ ${mediaContext}` : ""
617951
617990
  tools: [],
617952
617991
  temperature: 0.2,
617953
617992
  maxTokens: 300,
617954
- timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4),
617955
- think: false
617956
- });
617993
+ timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4)
617994
+ }));
617957
617995
  const decision2 = parseTelegramReflectionFollowupDecision(result.choices[0]?.message?.content ?? "");
617958
617996
  state.lastFollowupArtifactAt = artifact.generatedAt;
617959
617997
  if (!decision2) {
@@ -619577,9 +619615,10 @@ ${lines.join("\n")}`);
619577
619615
  async telegramRouterJsonCompletion(backend, request, diagnostics) {
619578
619616
  let jsonModeResult;
619579
619617
  let jsonModeError;
619618
+ const suppressed = telegramThinkSuppressedRequest(request);
619580
619619
  try {
619581
619620
  jsonModeResult = await backend.chatCompletion({
619582
- ...request,
619621
+ ...suppressed,
619583
619622
  responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT
619584
619623
  });
619585
619624
  const visible = jsonModeResult.choices.some(
@@ -619598,7 +619637,7 @@ ${lines.join("\n")}`);
619598
619637
  }
619599
619638
  }
619600
619639
  try {
619601
- const plainResult = await backend.chatCompletion(request);
619640
+ const plainResult = await backend.chatCompletion(suppressed);
619602
619641
  if (diagnostics) {
619603
619642
  const plainVisible = plainResult.choices.some(
619604
619643
  (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -619751,6 +619790,202 @@ ${retryText}`,
619751
619790
  return null;
619752
619791
  }
619753
619792
  }
619793
+ /**
619794
+ * Coalesced wrapper around inferTelegramInteractionDecision. Bounds
619795
+ * concurrent router calls per chat to 2: one in-flight plus one trailing
619796
+ * that absorbs every message arriving during the in-flight window. The
619797
+ * trailing call uses the most-recent message and serves all queued
619798
+ * callers. Bypass with OMNIUS_TG_ROUTER_DEBOUNCE=off.
619799
+ */
619800
+ inferTelegramInteractionDecisionCoalesced(msg, toolContext) {
619801
+ if (process.env["OMNIUS_TG_ROUTER_DEBOUNCE"] === "off") {
619802
+ return this.inferTelegramInteractionDecision(msg, toolContext);
619803
+ }
619804
+ const sessionKey = this.sessionKeyForMessage(msg);
619805
+ const existing = this.telegramRouterSessionState.get(sessionKey);
619806
+ if (!existing) {
619807
+ return this.startCoalescedTelegramRouterCall(sessionKey, msg, toolContext);
619808
+ }
619809
+ if (existing.trailing) {
619810
+ existing.trailing.msg = msg;
619811
+ existing.trailing.toolContext = toolContext;
619812
+ return existing.trailing.promise;
619813
+ }
619814
+ let resolve52;
619815
+ let reject;
619816
+ const promise = new Promise((res, rej) => {
619817
+ resolve52 = res;
619818
+ reject = rej;
619819
+ });
619820
+ existing.trailing = { msg, toolContext, promise, resolve: resolve52, reject };
619821
+ return promise;
619822
+ }
619823
+ /**
619824
+ * Internal: start an actual router inference for a sessionKey, store its
619825
+ * in-flight promise, and on completion fire any queued trailing call.
619826
+ */
619827
+ startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
619828
+ const HARD_DEADLINE_MS = 18e4;
619829
+ const inner = this.inferTelegramInteractionDecision(msg, toolContext);
619830
+ const promise = new Promise((resolve52, reject) => {
619831
+ let settled = false;
619832
+ const guard = setTimeout(() => {
619833
+ if (settled) return;
619834
+ settled = true;
619835
+ reject(new Error("router-coalescer: hard deadline exceeded (180s); inner inference did not settle"));
619836
+ }, HARD_DEADLINE_MS);
619837
+ if (typeof guard.unref === "function") guard.unref();
619838
+ inner.then(
619839
+ (v) => {
619840
+ if (settled) return;
619841
+ settled = true;
619842
+ clearTimeout(guard);
619843
+ resolve52(v);
619844
+ },
619845
+ (e2) => {
619846
+ if (settled) return;
619847
+ settled = true;
619848
+ clearTimeout(guard);
619849
+ reject(e2);
619850
+ }
619851
+ );
619852
+ });
619853
+ this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
619854
+ const onSettled = () => {
619855
+ let state;
619856
+ try {
619857
+ state = this.telegramRouterSessionState.get(sessionKey);
619858
+ this.telegramRouterSessionState.delete(sessionKey);
619859
+ } catch {
619860
+ state = void 0;
619861
+ }
619862
+ if (!state?.trailing) return;
619863
+ const { msg: nextMsg, toolContext: nextCtx, resolve: resolve52, reject } = state.trailing;
619864
+ try {
619865
+ this.startCoalescedTelegramRouterCall(sessionKey, nextMsg, nextCtx).then(resolve52, reject);
619866
+ } catch (err) {
619867
+ reject(err);
619868
+ }
619869
+ };
619870
+ promise.then(onSettled, onSettled);
619871
+ return promise;
619872
+ }
619873
+ /**
619874
+ * Forcibly cancel every in-flight + trailing router-coalescer entry.
619875
+ * Used on bridge stop() and by the watchdog if it detects the coalescer
619876
+ * map has grown unboundedly. Rejects every queued caller cleanly so they
619877
+ * surface the cancellation rather than waiting forever.
619878
+ */
619879
+ cancelTelegramRouterSessionState(reason) {
619880
+ const err = new Error(`router-coalescer cancelled: ${reason}`);
619881
+ for (const [, state] of this.telegramRouterSessionState) {
619882
+ if (state.trailing) {
619883
+ try {
619884
+ state.trailing.reject(err);
619885
+ } catch {
619886
+ }
619887
+ }
619888
+ }
619889
+ this.telegramRouterSessionState.clear();
619890
+ }
619891
+ // ─────────────────────────────────────────────────────────────────
619892
+ // Sub-agent staleness watchdog
619893
+ // ─────────────────────────────────────────────────────────────────
619894
+ /** Interval handle for the periodic stale-sub-agent reaper. */
619895
+ telegramSubAgentWatchdogTimer = null;
619896
+ /**
619897
+ * Maximum wall-clock time a sub-agent may go without a visible-edit
619898
+ * progress event before the watchdog declares it stale and tears it
619899
+ * down. Tuned to be comfortably longer than the slowest healthy turn
619900
+ * (which is bounded by request_timeout = 5-15min per turn) but short
619901
+ * enough that a wedged sub-agent doesn't pin a chat for an entire day.
619902
+ *
619903
+ * Override with env var OMNIUS_TG_SUBAGENT_MAX_IDLE_MS for ops tuning.
619904
+ */
619905
+ telegramSubAgentMaxIdleMs() {
619906
+ const raw = Number.parseInt(process.env["OMNIUS_TG_SUBAGENT_MAX_IDLE_MS"] ?? "", 10);
619907
+ if (Number.isFinite(raw) && raw >= 3e4 && raw <= 36e5) return raw;
619908
+ return 6e5;
619909
+ }
619910
+ /** Watchdog tick period — checked every 30s. */
619911
+ telegramSubAgentWatchdogIntervalMs() {
619912
+ return 3e4;
619913
+ }
619914
+ /**
619915
+ * Start the periodic stale-sub-agent reaper. Idempotent — safe to call
619916
+ * multiple times (no-op if already running). Stopped by stop() and on
619917
+ * SIGTERM via the cleanup chain.
619918
+ */
619919
+ startTelegramSubAgentWatchdog() {
619920
+ if (this.telegramSubAgentWatchdogTimer) return;
619921
+ const tick = () => {
619922
+ try {
619923
+ this.reapStaleTelegramSubAgents();
619924
+ } catch (err) {
619925
+ this.tuiWrite(() => renderTelegramSubAgentError(
619926
+ "watchdog",
619927
+ `tick failed: ${err instanceof Error ? err.message : String(err)}`
619928
+ ));
619929
+ }
619930
+ };
619931
+ this.telegramSubAgentWatchdogTimer = setInterval(tick, this.telegramSubAgentWatchdogIntervalMs());
619932
+ if (typeof this.telegramSubAgentWatchdogTimer.unref === "function") {
619933
+ this.telegramSubAgentWatchdogTimer.unref();
619934
+ }
619935
+ }
619936
+ /** Stop the periodic stale-sub-agent reaper. */
619937
+ stopTelegramSubAgentWatchdog() {
619938
+ if (this.telegramSubAgentWatchdogTimer) {
619939
+ clearInterval(this.telegramSubAgentWatchdogTimer);
619940
+ this.telegramSubAgentWatchdogTimer = null;
619941
+ }
619942
+ }
619943
+ /**
619944
+ * One watchdog pass: walk the sub-agent map; for each entry where the
619945
+ * last visible-edit progress event is older than maxIdle AND no completion
619946
+ * boundary has been seen, abort the runner and remove the entry. This is
619947
+ * the load-bearing fix for the runaway-sub-agent steady-state leak: a
619948
+ * runner that hangs (qwen3 think-stall, Ollama TCP wedge, lost stream)
619949
+ * otherwise pins the chat forever, because the finally{} in runSubAgent
619950
+ * never fires.
619951
+ */
619952
+ reapStaleTelegramSubAgents() {
619953
+ const maxIdleMs = this.telegramSubAgentMaxIdleMs();
619954
+ const now = Date.now();
619955
+ const stale = [];
619956
+ for (const [sessionKey, agent] of this.subAgents) {
619957
+ if (agent.aborted) continue;
619958
+ const idle = agent.lastEditMs > 0 ? now - agent.lastEditMs : 0;
619959
+ if (idle <= maxIdleMs) continue;
619960
+ if (agent.completionBoundarySeen) continue;
619961
+ stale.push(sessionKey);
619962
+ }
619963
+ for (const sessionKey of stale) {
619964
+ const agent = this.subAgents.get(sessionKey);
619965
+ if (!agent) continue;
619966
+ agent.aborted = true;
619967
+ if (agent.typingInterval) {
619968
+ clearInterval(agent.typingInterval);
619969
+ agent.typingInterval = null;
619970
+ }
619971
+ try {
619972
+ agent.runner?.abort?.();
619973
+ } catch {
619974
+ }
619975
+ this.subAgents.delete(sessionKey);
619976
+ this.refreshActiveTelegramInteractionCount();
619977
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
619978
+ agent.username,
619979
+ `watchdog: aborted stale sub-agent (idle ${Math.round((now - agent.lastEditMs) / 1e3)}s without completion)`
619980
+ ));
619981
+ this.subAgentViewCallbacks?.onWrite(
619982
+ agent.viewId,
619983
+ `watchdog: sub-agent retired after ${Math.round((now - agent.lastEditMs) / 1e3)}s without a progress event`
619984
+ );
619985
+ this.subAgentViewCallbacks?.onStatus(agent.viewId, "failed");
619986
+ this.subAgentViewCallbacks?.onComplete(agent.viewId);
619987
+ }
619988
+ }
619754
619989
  async inferTelegramInteractionDecision(msg, toolContext) {
619755
619990
  const config = this.agentConfig;
619756
619991
  const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
@@ -620348,6 +620583,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
620348
620583
  this.polling = true;
620349
620584
  this.pollFatalNotified = false;
620350
620585
  this.abortController = new AbortController();
620586
+ this.startTelegramSubAgentWatchdog();
620351
620587
  await this.prepareTelegramLongPolling();
620352
620588
  try {
620353
620589
  mkdirSync66(this.mediaCacheDir, { recursive: true });
@@ -620423,7 +620659,13 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
620423
620659
  for (const [, agent] of this.subAgents) {
620424
620660
  agent.aborted = true;
620425
620661
  if (agent.typingInterval) clearInterval(agent.typingInterval);
620662
+ try {
620663
+ agent.runner?.abort?.();
620664
+ } catch {
620665
+ }
620426
620666
  }
620667
+ this.stopTelegramSubAgentWatchdog();
620668
+ this.cancelTelegramRouterSessionState("bridge stop");
620427
620669
  if (this.telegramSqliteDb && this.telegramSqliteDb !== false) {
620428
620670
  try {
620429
620671
  this.telegramSqliteDb.close();
@@ -620754,7 +620996,7 @@ Join: ${newUrl}`);
620754
620996
  const isGroup = msg.chatType !== "private";
620755
620997
  if (isGroup) {
620756
620998
  const attentionViewId2 = this.registerTelegramAttentionView(msg, existing.toolContext || toolContext, "active Telegram thread");
620757
- const decision3 = await this.inferTelegramInteractionDecision(msg, existing.toolContext || toolContext);
620999
+ const decision3 = await this.inferTelegramInteractionDecisionCoalesced(msg, existing.toolContext || toolContext);
620758
621000
  this.deliverTelegramAttentionDecision(
620759
621001
  sessionKey,
620760
621002
  msg,
@@ -620798,7 +621040,7 @@ Join: ${newUrl}`);
620798
621040
  return;
620799
621041
  }
620800
621042
  const attentionViewId = this.registerTelegramAttentionView(msg, toolContext);
620801
- const decision2 = await this.inferTelegramInteractionDecision(msg, toolContext);
621043
+ const decision2 = await this.inferTelegramInteractionDecisionCoalesced(msg, toolContext);
620802
621044
  this.deliverTelegramAttentionDecision(
620803
621045
  sessionKey,
620804
621046
  msg,
@@ -621231,14 +621473,13 @@ ${conversationStream}`
621231
621473
  config.model,
621232
621474
  config.apiKey
621233
621475
  );
621234
- const request = {
621476
+ const request = telegramThinkSuppressedRequest({
621235
621477
  messages: this.buildTelegramChatMessages(msg, toolContext, mediaContext),
621236
621478
  tools: [],
621237
621479
  temperature: 0.4,
621238
621480
  maxTokens: 700,
621239
- timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4),
621240
- think: false
621241
- };
621481
+ timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4)
621482
+ });
621242
621483
  let accumulated = "";
621243
621484
  let streamError;
621244
621485
  const streamable = backend;
@@ -621331,6 +621572,15 @@ ${conversationStream}`
621331
621572
  disablePersistentMemory: false,
621332
621573
  disableCodebaseMap: !isAdminDM,
621333
621574
  subAgent: !isAdminDM,
621575
+ // Telegram sub-agents run tool-heavy workflows where qwen3 <think>
621576
+ // reasoning is notorious for stalling: the model burns its token
621577
+ // budget inside <think>...</think> and never closes the tag, producing
621578
+ // empty content or 10+ minute replies after 24h. The runner's
621579
+ // computeEffectiveThink() already kills thinking when hasTools=true,
621580
+ // but we set it explicitly here as well so a future no-tools turn
621581
+ // (compaction window, recovery prompt, watchdog probe) inherits the
621582
+ // off default rather than the global config's value.
621583
+ thinking: false,
621334
621584
  // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
621335
621585
  // the Littleman near-cap turn extension are appropriate for the full TUI
621336
621586
  // session but cause Telegram to silently outgrow its nominal maxTurns,
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.113",
3
+ "version": "1.0.115",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.113",
9
+ "version": "1.0.115",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.113",
3
+ "version": "1.0.115",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",