omnius 1.0.113 → 1.0.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -614455,6 +614455,28 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 12e4, _legacyMaxMs) {
614455
614455
  const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
614456
614456
  return Math.max(configured, minMs, 12e4);
614457
614457
  }
614458
+ function telegramThinkSuppressedRequest(request) {
614459
+ const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
614460
+ let appended = false;
614461
+ for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
614462
+ const m2 = messages2[i2];
614463
+ if (!m2 || m2.role !== "user") continue;
614464
+ const content = typeof m2.content === "string" ? m2.content : "";
614465
+ if (/\/no_think\b/i.test(content)) {
614466
+ appended = true;
614467
+ break;
614468
+ }
614469
+ messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
614470
+
614471
+ /no_think` };
614472
+ appended = true;
614473
+ break;
614474
+ }
614475
+ if (!appended) {
614476
+ messages2.push({ role: "user", content: "/no_think" });
614477
+ }
614478
+ return { ...request, messages: messages2, think: false };
614479
+ }
614458
614480
  function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
614459
614481
  for (const jsonText of telegramDecisionJsonCandidates(text)) {
614460
614482
  try {
@@ -616308,6 +616330,23 @@ External acquisition contract:
616308
616330
  stimulation = new StimulationController();
616309
616331
  /** Throttles noisy "skipped group chatter" waterfall logs */
616310
616332
  groupSkipLogAt = /* @__PURE__ */ new Map();
616333
+ /**
616334
+ * Per-chat router-call coalescing state. Bounds concurrent router (attention
616335
+ * decision) inferences to at most 2 per sessionKey regardless of incoming
616336
+ * message rate. Without this, a burst of N messages in a single chat fires
616337
+ * N concurrent Ollama calls; over hours of bursts this saturates the GPU
616338
+ * and produces the 10+ minute reply latencies observed after 24h uptime.
616339
+ *
616340
+ * Shape per sessionKey:
616341
+ * - inFlight: the currently-running router call for this chat
616342
+ * - trailing: a queued "next" call. If multiple messages arrive while
616343
+ * inFlight is running, they collapse into a single trailing call
616344
+ * that uses the most-recent message. All callers that arrived during
616345
+ * the in-flight window receive the trailing decision.
616346
+ *
616347
+ * Disabled with OMNIUS_TG_ROUTER_DEBOUNCE=off (emergency bypass).
616348
+ */
616349
+ telegramRouterSessionState = /* @__PURE__ */ new Map();
616311
616350
  /** Telegram interaction routing profile */
616312
616351
  interactionMode = "auto";
616313
616352
  /** Actual model context window discovered by the main TUI. */
@@ -617943,7 +617982,7 @@ ${mediaContext}` : ""
617943
617982
  this.agentConfig.model,
617944
617983
  this.agentConfig.apiKey
617945
617984
  );
617946
- const result = await backend.chatCompletion({
617985
+ const result = await backend.chatCompletion(telegramThinkSuppressedRequest({
617947
617986
  messages: [
617948
617987
  { role: "system", content: "You are a Telegram public-follow-up discretion model. Output strict JSON only." },
617949
617988
  { role: "user", content: prompt }
@@ -617951,9 +617990,8 @@ ${mediaContext}` : ""
617951
617990
  tools: [],
617952
617991
  temperature: 0.2,
617953
617992
  maxTokens: 300,
617954
- timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4),
617955
- think: false
617956
- });
617993
+ timeoutMs: Math.min(Math.max(this.agentConfig.timeoutMs ?? 3e4, 5e3), 2e4)
617994
+ }));
617957
617995
  const decision2 = parseTelegramReflectionFollowupDecision(result.choices[0]?.message?.content ?? "");
617958
617996
  state.lastFollowupArtifactAt = artifact.generatedAt;
617959
617997
  if (!decision2) {
@@ -619577,9 +619615,10 @@ ${lines.join("\n")}`);
619577
619615
  async telegramRouterJsonCompletion(backend, request, diagnostics) {
619578
619616
  let jsonModeResult;
619579
619617
  let jsonModeError;
619618
+ const suppressed = telegramThinkSuppressedRequest(request);
619580
619619
  try {
619581
619620
  jsonModeResult = await backend.chatCompletion({
619582
- ...request,
619621
+ ...suppressed,
619583
619622
  responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT
619584
619623
  });
619585
619624
  const visible = jsonModeResult.choices.some(
@@ -619598,7 +619637,7 @@ ${lines.join("\n")}`);
619598
619637
  }
619599
619638
  }
619600
619639
  try {
619601
- const plainResult = await backend.chatCompletion(request);
619640
+ const plainResult = await backend.chatCompletion(suppressed);
619602
619641
  if (diagnostics) {
619603
619642
  const plainVisible = plainResult.choices.some(
619604
619643
  (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -619751,6 +619790,53 @@ ${retryText}`,
619751
619790
  return null;
619752
619791
  }
619753
619792
  }
619793
+ /**
619794
+ * Coalesced wrapper around inferTelegramInteractionDecision. Bounds
619795
+ * concurrent router calls per chat to 2: one in-flight plus one trailing
619796
+ * that absorbs every message arriving during the in-flight window. The
619797
+ * trailing call uses the most-recent message and serves all queued
619798
+ * callers. Bypass with OMNIUS_TG_ROUTER_DEBOUNCE=off.
619799
+ */
619800
+ inferTelegramInteractionDecisionCoalesced(msg, toolContext) {
619801
+ if (process.env["OMNIUS_TG_ROUTER_DEBOUNCE"] === "off") {
619802
+ return this.inferTelegramInteractionDecision(msg, toolContext);
619803
+ }
619804
+ const sessionKey = this.sessionKeyForMessage(msg);
619805
+ const existing = this.telegramRouterSessionState.get(sessionKey);
619806
+ if (!existing) {
619807
+ return this.startCoalescedTelegramRouterCall(sessionKey, msg, toolContext);
619808
+ }
619809
+ if (existing.trailing) {
619810
+ existing.trailing.msg = msg;
619811
+ existing.trailing.toolContext = toolContext;
619812
+ return existing.trailing.promise;
619813
+ }
619814
+ let resolve52;
619815
+ let reject;
619816
+ const promise = new Promise((res, rej) => {
619817
+ resolve52 = res;
619818
+ reject = rej;
619819
+ });
619820
+ existing.trailing = { msg, toolContext, promise, resolve: resolve52, reject };
619821
+ return promise;
619822
+ }
619823
+ /**
619824
+ * Internal: start an actual router inference for a sessionKey, store its
619825
+ * in-flight promise, and on completion fire any queued trailing call.
619826
+ */
619827
+ startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
619828
+ const promise = this.inferTelegramInteractionDecision(msg, toolContext);
619829
+ this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
619830
+ const onSettled = () => {
619831
+ const state = this.telegramRouterSessionState.get(sessionKey);
619832
+ this.telegramRouterSessionState.delete(sessionKey);
619833
+ if (!state?.trailing) return;
619834
+ const { msg: nextMsg, toolContext: nextCtx, resolve: resolve52, reject } = state.trailing;
619835
+ this.startCoalescedTelegramRouterCall(sessionKey, nextMsg, nextCtx).then(resolve52, reject);
619836
+ };
619837
+ promise.then(onSettled, onSettled);
619838
+ return promise;
619839
+ }
619754
619840
  async inferTelegramInteractionDecision(msg, toolContext) {
619755
619841
  const config = this.agentConfig;
619756
619842
  const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
@@ -620754,7 +620840,7 @@ Join: ${newUrl}`);
620754
620840
  const isGroup = msg.chatType !== "private";
620755
620841
  if (isGroup) {
620756
620842
  const attentionViewId2 = this.registerTelegramAttentionView(msg, existing.toolContext || toolContext, "active Telegram thread");
620757
- const decision3 = await this.inferTelegramInteractionDecision(msg, existing.toolContext || toolContext);
620843
+ const decision3 = await this.inferTelegramInteractionDecisionCoalesced(msg, existing.toolContext || toolContext);
620758
620844
  this.deliverTelegramAttentionDecision(
620759
620845
  sessionKey,
620760
620846
  msg,
@@ -620798,7 +620884,7 @@ Join: ${newUrl}`);
620798
620884
  return;
620799
620885
  }
620800
620886
  const attentionViewId = this.registerTelegramAttentionView(msg, toolContext);
620801
- const decision2 = await this.inferTelegramInteractionDecision(msg, toolContext);
620887
+ const decision2 = await this.inferTelegramInteractionDecisionCoalesced(msg, toolContext);
620802
620888
  this.deliverTelegramAttentionDecision(
620803
620889
  sessionKey,
620804
620890
  msg,
@@ -621231,14 +621317,13 @@ ${conversationStream}`
621231
621317
  config.model,
621232
621318
  config.apiKey
621233
621319
  );
621234
- const request = {
621320
+ const request = telegramThinkSuppressedRequest({
621235
621321
  messages: this.buildTelegramChatMessages(msg, toolContext, mediaContext),
621236
621322
  tools: [],
621237
621323
  temperature: 0.4,
621238
621324
  maxTokens: 700,
621239
- timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4),
621240
- think: false
621241
- };
621325
+ timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4)
621326
+ });
621242
621327
  let accumulated = "";
621243
621328
  let streamError;
621244
621329
  const streamable = backend;
@@ -621331,6 +621416,15 @@ ${conversationStream}`
621331
621416
  disablePersistentMemory: false,
621332
621417
  disableCodebaseMap: !isAdminDM,
621333
621418
  subAgent: !isAdminDM,
621419
+ // Telegram sub-agents run tool-heavy workflows where qwen3 <think>
621420
+ // reasoning is notorious for stalling: the model burns its token
621421
+ // budget inside <think>...</think> and never closes the tag, producing
621422
+ // empty content or 10+ minute replies after 24h. The runner's
621423
+ // computeEffectiveThink() already kills thinking when hasTools=true,
621424
+ // but we set it explicitly here as well so a future no-tools turn
621425
+ // (compaction window, recovery prompt, watchdog probe) inherits the
621426
+ // off default rather than the global config's value.
621427
+ thinking: false,
621334
621428
  // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
621335
621429
  // the Littleman near-cap turn extension are appropriate for the full TUI
621336
621430
  // session but cause Telegram to silently outgrow its nominal maxTurns,
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.113",
3
+ "version": "1.0.114",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.113",
9
+ "version": "1.0.114",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.113",
3
+ "version": "1.0.114",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",