omnius 1.0.181 → 1.0.183

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -550132,11 +550132,17 @@ function injectNoThinkDirective(messages2) {
550132
550132
  const target = messages2[lastUserIdx];
550133
550133
  if (!target || typeof target.content !== "string")
550134
550134
  return messages2;
550135
- if (/\/no_think\b/i.test(target.content))
550135
+ const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
550136
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
550137
+ if (hasOllamaNoThink && hasQwenNoThink)
550136
550138
  return messages2;
550139
+ const suffix = [
550140
+ hasOllamaNoThink ? null : "/nothink",
550141
+ hasQwenNoThink ? null : "/no_think"
550142
+ ].filter(Boolean).join("\n");
550137
550143
  const annotated = `${target.content}
550138
550144
 
550139
- /no_think`;
550145
+ ${suffix}`;
550140
550146
  return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
550141
550147
  }
550142
550148
  function backendHttpErrorDetail(text) {
@@ -550154,6 +550160,8 @@ function isOllamaModelNotFoundResponse(status, text, model) {
550154
550160
  function computeEffectiveThink(params) {
550155
550161
  if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
550156
550162
  return false;
550163
+ if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
550164
+ return false;
550157
550165
  if (params.suppressed)
550158
550166
  return false;
550159
550167
  if (params.hasTools)
@@ -550172,18 +550180,9 @@ function computeEffectiveThink(params) {
550172
550180
  return params.defaultThink;
550173
550181
  }
550174
550182
  function sanitizeHistoryThink(messages2) {
550175
- let lastAsstIdx = -1;
550176
- for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
550177
- if (messages2[i2]?.role === "assistant") {
550178
- lastAsstIdx = i2;
550179
- break;
550180
- }
550181
- }
550182
- return messages2.map((m2, i2) => {
550183
+ return messages2.map((m2) => {
550183
550184
  if (m2.role !== "assistant" || typeof m2.content !== "string")
550184
550185
  return m2;
550185
- if (i2 === lastAsstIdx)
550186
- return m2;
550187
550186
  return { ...m2, content: stripThinkBlocks(m2.content) };
550188
550187
  });
550189
550188
  }
@@ -563608,10 +563607,11 @@ ${description}`
563608
563607
  if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
563609
563608
  effectiveMaxTokens = 4096;
563610
563609
  }
563610
+ const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
563611
563611
  const responseFormat = request.responseFormat ?? request.response_format;
563612
563612
  const body = {
563613
563613
  model: this.model,
563614
- messages: cleanedMessages,
563614
+ messages: requestMessages,
563615
563615
  tools: request.tools,
563616
563616
  temperature: request.temperature,
563617
563617
  max_tokens: effectiveMaxTokens,
@@ -563620,7 +563620,7 @@ ${description}`
563620
563620
  if (responseFormat !== void 0) {
563621
563621
  body["response_format"] = responseFormat;
563622
563622
  }
563623
- const reqNumCtx = request.numCtx;
563623
+ const reqNumCtx = request.numCtx ?? request.num_ctx;
563624
563624
  if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
563625
563625
  const opts = body["options"] ?? {};
563626
563626
  opts["num_ctx"] = reqNumCtx;
@@ -563701,11 +563701,11 @@ ${description}`
563701
563701
  const responseText = firstChoice ? String(firstChoice.message?.content ?? "") : "";
563702
563702
  const outcome = this.recordThinkOutcome(responseText, effectiveThink === true);
563703
563703
  const independentOutcome = effectiveThink !== true ? classifyThinkOutcome(responseText) : null;
563704
- const shouldRecoverFromEmpty = responseFormat !== void 0 && independentOutcome !== null && (independentOutcome === "empty_after_strip" || independentOutcome === "unclosed_think");
563704
+ const shouldRecoverFromEmpty = request.disableEmptyContentRecovery !== true && responseFormat !== void 0 && independentOutcome !== null && (independentOutcome === "empty_after_strip" || independentOutcome === "unclosed_think");
563705
563705
  const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
563706
563706
  const shouldRetryThinkGuard = outcome !== null && effectiveThink === true && (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think");
563707
563707
  if (shouldRetryThinkGuard || shouldRecoverFromEmpty) {
563708
- const retryMessages = injectNoThinkDirective(cleanedMessages);
563708
+ const retryMessages = injectNoThinkDirective(requestMessages);
563709
563709
  const retryBody = {
563710
563710
  model: this.model,
563711
563711
  messages: retryMessages,
@@ -563892,7 +563892,7 @@ ${description}`
563892
563892
  * Ollama pool routing as non-stream completions.
563893
563893
  */
563894
563894
  async *chatCompletionStream(request) {
563895
- const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
563895
+ const cleanedMessages = normalizeMessagesForStrictOpenAI(sanitizeHistoryThink(request.messages));
563896
563896
  let effectiveThink = computeEffectiveThink({
563897
563897
  requestThink: request.think,
563898
563898
  defaultThink: this.thinking,
@@ -563907,10 +563907,11 @@ ${description}`
563907
563907
  if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
563908
563908
  effectiveMaxTokens = 4096;
563909
563909
  }
563910
+ const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
563910
563911
  const responseFormat = request.responseFormat ?? request.response_format;
563911
563912
  const body = {
563912
563913
  model: this.model,
563913
- messages: cleanedMessages,
563914
+ messages: requestMessages,
563914
563915
  tools: request.tools,
563915
563916
  temperature: request.temperature,
563916
563917
  max_tokens: effectiveMaxTokens,
@@ -563921,7 +563922,7 @@ ${description}`
563921
563922
  if (responseFormat !== void 0) {
563922
563923
  body["response_format"] = responseFormat;
563923
563924
  }
563924
- const reqNumCtx = request.numCtx;
563925
+ const reqNumCtx = request.numCtx ?? request.num_ctx;
563925
563926
  if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
563926
563927
  const opts = body["options"] ?? {};
563927
563928
  opts["num_ctx"] = reqNumCtx;
@@ -564176,6 +564177,57 @@ var init_nexusBackend = __esm({
564176
564177
  this.authKey = authKey || "";
564177
564178
  this.thinking = thinking ?? false;
564178
564179
  }
564180
+ effectiveThink(request) {
564181
+ if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
564182
+ return false;
564183
+ if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
564184
+ return false;
564185
+ if (Array.isArray(request.tools) && request.tools.length > 0)
564186
+ return false;
564187
+ if (request.think === true)
564188
+ return true;
564189
+ if (request.think === false)
564190
+ return false;
564191
+ return this.thinking === true;
564192
+ }
564193
+ noThinkMessages(messages2) {
564194
+ let lastUserIdx = -1;
564195
+ for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
564196
+ if (messages2[i2]?.role === "user") {
564197
+ lastUserIdx = i2;
564198
+ break;
564199
+ }
564200
+ }
564201
+ if (lastUserIdx < 0)
564202
+ return messages2;
564203
+ const target = messages2[lastUserIdx];
564204
+ if (!target || typeof target.content !== "string")
564205
+ return messages2;
564206
+ const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
564207
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
564208
+ if (hasOllamaNoThink && hasQwenNoThink)
564209
+ return messages2;
564210
+ const suffix = [
564211
+ hasOllamaNoThink ? null : "/nothink",
564212
+ hasQwenNoThink ? null : "/no_think"
564213
+ ].filter(Boolean).join("\n");
564214
+ return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
564215
+
564216
+ ${suffix}` } : m2);
564217
+ }
564218
+ requestMessages(request, effectiveThink) {
564219
+ return effectiveThink ? request.messages : this.noThinkMessages(request.messages);
564220
+ }
564221
+ applyOptionalRequestFields(daemonArgs, request) {
564222
+ const responseFormat = request.responseFormat ?? request.response_format;
564223
+ if (responseFormat !== void 0) {
564224
+ daemonArgs.response_format = JSON.stringify(responseFormat);
564225
+ }
564226
+ const numCtx = request.numCtx ?? request.num_ctx;
564227
+ if (Number.isFinite(numCtx) && (numCtx ?? 0) > 0) {
564228
+ daemonArgs.num_ctx = String(numCtx);
564229
+ }
564230
+ }
564179
564231
  /** Reset the consecutive failure counter (called on endpoint switch / reconnect) */
564180
564232
  resetFailures() {
564181
564233
  this.consecutiveFailures = 0;
@@ -564191,9 +564243,10 @@ var init_nexusBackend = __esm({
564191
564243
  err.fatal = true;
564192
564244
  throw err;
564193
564245
  }
564246
+ const effectiveThink = this.effectiveThink(request);
564194
564247
  const daemonArgs = {
564195
564248
  model: this.model,
564196
- messages: JSON.stringify(request.messages),
564249
+ messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
564197
564250
  tools: JSON.stringify(request.tools),
564198
564251
  temperature: String(request.temperature),
564199
564252
  max_tokens: String(request.maxTokens)
@@ -564204,7 +564257,8 @@ var init_nexusBackend = __esm({
564204
564257
  if (this.authKey) {
564205
564258
  daemonArgs.auth_key = this.authKey;
564206
564259
  }
564207
- daemonArgs.think = String(this.thinking);
564260
+ daemonArgs.think = String(effectiveThink);
564261
+ this.applyOptionalRequestFields(daemonArgs, request);
564208
564262
  let rawResult;
564209
564263
  try {
564210
564264
  rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -564303,9 +564357,10 @@ var init_nexusBackend = __esm({
564303
564357
  async *chatCompletionStream(request) {
564304
564358
  const streamFile = join97(tmpdir18(), `nexus-stream-${randomBytes19(6).toString("hex")}.jsonl`);
564305
564359
  writeFileSync38(streamFile, "", "utf8");
564360
+ const effectiveThink = this.effectiveThink(request);
564306
564361
  const daemonArgs = {
564307
564362
  model: this.model,
564308
- messages: JSON.stringify(request.messages),
564363
+ messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
564309
564364
  tools: JSON.stringify(request.tools),
564310
564365
  temperature: String(request.temperature),
564311
564366
  max_tokens: String(request.maxTokens),
@@ -564315,7 +564370,8 @@ var init_nexusBackend = __esm({
564315
564370
  daemonArgs.target_peer = this.targetPeer;
564316
564371
  if (this.authKey)
564317
564372
  daemonArgs.auth_key = this.authKey;
564318
- daemonArgs.think = String(this.thinking);
564373
+ daemonArgs.think = String(effectiveThink);
564374
+ this.applyOptionalRequestFields(daemonArgs, request);
564319
564375
  let rawResult;
564320
564376
  try {
564321
564377
  rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -629145,12 +629201,36 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 1e4, maxMs) {
629145
629201
  10
629146
629202
  );
629147
629203
  const floor = Number.isFinite(minMs) && minMs > 0 ? minMs : 1e4;
629148
- const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 9e4;
629204
+ const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 3e4;
629149
629205
  const callerCap = Number.isFinite(maxMs) && (maxMs ?? 0) >= floor ? maxMs : configuredCap;
629150
629206
  const cap = Math.max(floor, Math.min(configuredCap, callerCap));
629151
629207
  const requested = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : cap;
629152
629208
  return Math.max(floor, Math.min(requested, cap));
629153
629209
  }
629210
+ function telegramRouterErrorText(err) {
629211
+ return err instanceof Error ? err.message : String(err);
629212
+ }
629213
+ function compactTelegramRouterDiagnosticText(text, maxLength = 220) {
629214
+ const compact3 = text.replace(/\s+/g, " ").trim();
629215
+ return compact3.length > maxLength ? `${compact3.slice(0, Math.max(0, maxLength - 3))}...` : compact3;
629216
+ }
629217
+ function telegramRouterErrorLooksLikeTimeout(err) {
629218
+ const text = telegramRouterErrorText(err);
629219
+ return /\b(timeout|timed out|ETIMEDOUT)\b|aborted due to timeout|stream timeout/i.test(text);
629220
+ }
629221
+ function telegramRouterErrorLooksLikeBackendLiveness(err) {
629222
+ const text = telegramRouterErrorText(err);
629223
+ return telegramRouterErrorLooksLikeTimeout(err) || /fetch failed|ECONNREFUSED|ECONNRESET|EHOSTUNREACH|ENETUNREACH|ECONNABORTED|socket hang up|terminated|network error/i.test(text);
629224
+ }
629225
+ function telegramRouterDiagnosticAttemptLooksLikeTimeout(attempt) {
629226
+ return attempt.status === "threw" && telegramRouterErrorLooksLikeTimeout(attempt.error ?? "");
629227
+ }
629228
+ function telegramRouterDiagnosticAttemptLooksLikeBackendLiveness(attempt) {
629229
+ return attempt.status === "threw" && telegramRouterErrorLooksLikeBackendLiveness(attempt.error ?? "");
629230
+ }
629231
+ function telegramRouterDiagnosticIsDualEmptyVisible(diag) {
629232
+ return diag.jsonModeStatus === "empty-after-strip" && diag.plainStatus === "empty-after-strip";
629233
+ }
629154
629234
  function telegramThinkSuppressedRequest(request) {
629155
629235
  const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
629156
629236
  let appended = false;
@@ -629158,18 +629238,24 @@ function telegramThinkSuppressedRequest(request) {
629158
629238
  const m2 = messages2[i2];
629159
629239
  if (!m2 || m2.role !== "user") continue;
629160
629240
  const content = typeof m2.content === "string" ? m2.content : "";
629161
- if (/\/no_think\b/i.test(content)) {
629241
+ const hasOllamaNoThink = /\/nothink\b/i.test(content);
629242
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(content);
629243
+ if (hasOllamaNoThink && hasQwenNoThink) {
629162
629244
  appended = true;
629163
629245
  break;
629164
629246
  }
629165
- messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
629247
+ const suffix = [
629248
+ hasOllamaNoThink ? null : "/nothink",
629249
+ hasQwenNoThink ? null : "/no_think"
629250
+ ].filter(Boolean).join("\n");
629251
+ messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}${suffix}` : `${content}
629166
629252
 
629167
- /no_think` };
629253
+ ${suffix}` };
629168
629254
  appended = true;
629169
629255
  break;
629170
629256
  }
629171
629257
  if (!appended) {
629172
- messages2.push({ role: "user", content: "/no_think" });
629258
+ messages2.push({ role: "user", content: "/nothink\n/no_think" });
629173
629259
  }
629174
629260
  return { ...request, messages: messages2, think: false };
629175
629261
  }
@@ -631267,10 +631353,14 @@ Telegram link integrity contract:
631267
631353
  * capacity and flood the TUI.
631268
631354
  */
631269
631355
  telegramActiveWorkSessions = /* @__PURE__ */ new Set();
631356
+ telegramActiveWorkGenerations = /* @__PURE__ */ new Map();
631357
+ telegramActiveWorkStartedAtMs = /* @__PURE__ */ new Map();
631270
631358
  /** Queued Telegram sessions waiting for a global work slot. */
631271
631359
  telegramQueuedSessionWork = /* @__PURE__ */ new Map();
631272
631360
  telegramDispatchQueuedTimer = null;
631273
631361
  telegramDispatchQueuedAtMs = 0;
631362
+ telegramQueueDiagnosticLastAtMs = 0;
631363
+ telegramPollWarningLastAtMs = 0;
631274
631364
  /** Lightweight chat history by chat/guest session key */
631275
631365
  chatHistory = /* @__PURE__ */ new Map();
631276
631366
  /** Participant and tone state by chat/guest session key */
@@ -631906,6 +631996,63 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
631906
631996
  if (!Number.isFinite(parsed)) return 350;
631907
631997
  return Math.max(0, Math.min(2e3, Math.floor(parsed)));
631908
631998
  }
631999
+ telegramQueueDiagnosticIntervalMs() {
632000
+ const raw = Number.parseInt(process.env["OMNIUS_TG_QUEUE_DIAGNOSTIC_MS"] ?? "", 10);
632001
+ if (Number.isFinite(raw) && raw >= 5e3 && raw <= 3e5) return raw;
632002
+ return 3e4;
632003
+ }
632004
+ maybeLogTelegramQueueDiagnostic(reason) {
632005
+ if (this.telegramQueuedSessionWork.size === 0) return;
632006
+ const now = Date.now();
632007
+ const interval = this.telegramQueueDiagnosticIntervalMs();
632008
+ if (now - this.telegramQueueDiagnosticLastAtMs < interval) return;
632009
+ this.telegramQueueDiagnosticLastAtMs = now;
632010
+ const queued = [...this.telegramQueuedSessionWork.values()].sort((a2, b) => a2.enqueuedAtMs - b.enqueuedAtMs).slice(0, 4).map((work) => {
632011
+ const age = formatTelegramPipelineDuration(now - work.enqueuedAtMs);
632012
+ const live = this.telegramSessionIsLive(work.sessionKey) ? "blocked:same-session-live" : "ready";
632013
+ return `${work.sessionKey} age=${age} bundled=${work.messageCount} ${live}`;
632014
+ });
632015
+ const active = [...this.activeTelegramInteractionSessionKeys()].slice(0, 6);
632016
+ const inferences = this.getTelegramActiveInferences().slice(0, 4).map((inf) => `${inf.id}/${inf.kind}/${inf.model} elapsed=${inf.elapsedSec.toFixed(1)}s ttfb=${inf.ttfbSec === void 0 ? "waiting" : `${inf.ttfbSec.toFixed(1)}s`}`);
632017
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
632018
+ "queue",
632019
+ `queue diagnostic (${reason}): active ${this.activeTelegramInteractionCount()}/${this.getSubAgentLimit()} [${active.join(", ") || "none"}]; queued ${this.telegramQueuedSessionWork.size} [${queued.join(" | ")}]; inferences [${inferences.join(" | ") || "none"}]`
632020
+ ));
632021
+ }
632022
+ nextTelegramWorkGeneration(sessionKey) {
632023
+ const generation = (this.telegramActiveWorkGenerations.get(sessionKey) ?? 0) + 1;
632024
+ this.telegramActiveWorkGenerations.set(sessionKey, generation);
632025
+ return generation;
632026
+ }
632027
+ telegramWorkGenerationIsCurrent(sessionKey, generation) {
632028
+ return this.telegramActiveWorkGenerations.get(sessionKey) === generation;
632029
+ }
632030
+ telegramPreAgentWorkMaxIdleMs() {
632031
+ const routerMs = telegramRouterTimeoutMs(this.agentConfig?.timeoutMs);
632032
+ const raw = Number.parseInt(process.env["OMNIUS_TG_PRE_AGENT_MAX_IDLE_MS"] ?? "", 10);
632033
+ if (Number.isFinite(raw) && raw >= 3e4 && raw <= 9e5) return raw;
632034
+ return Math.max(12e4, routerMs + 3e4);
632035
+ }
632036
+ reapStaleTelegramPreAgentWork() {
632037
+ const now = Date.now();
632038
+ const maxIdleMs = this.telegramPreAgentWorkMaxIdleMs();
632039
+ for (const sessionKey of [...this.telegramActiveWorkSessions]) {
632040
+ if (this.subAgents.has(sessionKey) || this.activeChatSessions.has(sessionKey)) continue;
632041
+ const startedAt2 = this.telegramActiveWorkStartedAtMs.get(sessionKey);
632042
+ if (!startedAt2) continue;
632043
+ const idleMs = now - startedAt2;
632044
+ if (idleMs <= maxIdleMs) continue;
632045
+ const generation = this.telegramActiveWorkGenerations.get(sessionKey) ?? 0;
632046
+ this.telegramActiveWorkGenerations.set(sessionKey, generation + 1);
632047
+ this.telegramActiveWorkSessions.delete(sessionKey);
632048
+ this.telegramActiveWorkStartedAtMs.delete(sessionKey);
632049
+ this.refreshActiveTelegramInteractionCount();
632050
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
632051
+ "queue",
632052
+ `watchdog: released stale pre-agent Telegram work pin for ${sessionKey} after ${Math.round(idleMs / 1e3)}s; queued messages may dispatch now`
632053
+ ));
632054
+ }
632055
+ }
631909
632056
  dispatchQueuedTelegramSessionWorkSoon(delayMs = 0) {
631910
632057
  const dueAt = Date.now() + Math.max(0, delayMs);
631911
632058
  if (this.telegramDispatchQueuedTimer && this.telegramDispatchQueuedAtMs <= dueAt) return;
@@ -631935,6 +632082,9 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
631935
632082
  this.dispatchQueuedTelegramSessionWorkSoon(Math.max(0, nextDue - Date.now()));
631936
632083
  }
631937
632084
  }
632085
+ if (this.telegramQueuedSessionWork.size > 0) {
632086
+ this.maybeLogTelegramQueueDiagnostic("dispatch");
632087
+ }
631938
632088
  this.refreshActiveTelegramInteractionCount();
631939
632089
  }
631940
632090
  buildTelegramQueuedSessionWork(sessionKey, msg, toolContext, now) {
@@ -631978,11 +632128,16 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
631978
632128
  return;
631979
632129
  }
631980
632130
  this.telegramActiveWorkSessions.add(work.sessionKey);
632131
+ this.telegramActiveWorkStartedAtMs.set(work.sessionKey, Date.now());
632132
+ const generation = this.nextTelegramWorkGeneration(work.sessionKey);
631981
632133
  this.refreshActiveTelegramInteractionCount();
631982
- void this.processTelegramMessageWork(work).catch((err) => {
632134
+ void this.processTelegramMessageWork(work, generation).catch((err) => {
631983
632135
  this.tuiWrite(() => renderWarning(`Telegram sub-agent error: ${err instanceof Error ? err.message : String(err)}`));
631984
632136
  }).finally(() => {
631985
- this.telegramActiveWorkSessions.delete(work.sessionKey);
632137
+ if (this.telegramWorkGenerationIsCurrent(work.sessionKey, generation)) {
632138
+ this.telegramActiveWorkSessions.delete(work.sessionKey);
632139
+ this.telegramActiveWorkStartedAtMs.delete(work.sessionKey);
632140
+ }
631986
632141
  this.refreshActiveTelegramInteractionCount();
631987
632142
  this.dispatchQueuedTelegramSessionWorkSoon();
631988
632143
  });
@@ -635217,7 +635372,7 @@ ${lines.join("\n")}`);
635217
635372
  `Current Telegram message text (untrusted user data):
635218
635373
  ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635219
635374
  "",
635220
- "/no_think"
635375
+ "/nothink\n/no_think"
635221
635376
  ].filter(Boolean).join("\n");
635222
635377
  try {
635223
635378
  const result = await this.telegramRouterJsonCompletion(
@@ -635251,11 +635406,25 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635251
635406
  async telegramRouterJsonCompletion(backend, request, diagnostics, inferenceKind = "router", sessionKey = "__router__") {
635252
635407
  let jsonModeResult;
635253
635408
  let jsonModeError;
635409
+ const recordAttempt = (attempt) => {
635410
+ if (!diagnostics) return;
635411
+ diagnostics.attempts ??= [];
635412
+ diagnostics.attempts.push({
635413
+ stage: inferenceKind,
635414
+ ...attempt
635415
+ });
635416
+ };
635254
635417
  const suppressed = telegramThinkSuppressedRequest(request);
635418
+ const requestTimeoutMs = Number.isFinite(suppressed.timeoutMs) && (suppressed.timeoutMs ?? 0) > 0 ? suppressed.timeoutMs : void 0;
635419
+ const jsonStartMs = Date.now();
635255
635420
  try {
635256
635421
  jsonModeResult = await this.telegramObservableInference(
635257
635422
  backend,
635258
- { ...suppressed, responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT },
635423
+ {
635424
+ ...suppressed,
635425
+ responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT,
635426
+ disableEmptyContentRecovery: true
635427
+ },
635259
635428
  inferenceKind,
635260
635429
  sessionKey,
635261
635430
  { stream: false, reason: "router-json" }
@@ -635265,16 +635434,49 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635265
635434
  );
635266
635435
  if (visible) {
635267
635436
  if (diagnostics) diagnostics.jsonModeStatus = "visible";
635437
+ recordAttempt({
635438
+ mode: "json-mode",
635439
+ status: "visible",
635440
+ elapsedMs: Date.now() - jsonStartMs,
635441
+ timeoutMs: requestTimeoutMs
635442
+ });
635268
635443
  return jsonModeResult;
635269
635444
  }
635270
635445
  if (diagnostics) diagnostics.jsonModeStatus = "empty-after-strip";
635446
+ recordAttempt({
635447
+ mode: "json-mode",
635448
+ status: "empty-after-strip",
635449
+ elapsedMs: Date.now() - jsonStartMs,
635450
+ timeoutMs: requestTimeoutMs
635451
+ });
635271
635452
  } catch (err) {
635272
635453
  jsonModeError = err;
635273
635454
  if (diagnostics) {
635274
635455
  diagnostics.jsonModeStatus = "threw";
635275
- diagnostics.jsonModeError = err instanceof Error ? err.message : String(err);
635456
+ diagnostics.jsonModeError = telegramRouterErrorText(err);
635457
+ }
635458
+ recordAttempt({
635459
+ mode: "json-mode",
635460
+ status: "threw",
635461
+ error: telegramRouterErrorText(err),
635462
+ elapsedMs: Date.now() - jsonStartMs,
635463
+ timeoutMs: requestTimeoutMs
635464
+ });
635465
+ if (telegramRouterErrorLooksLikeBackendLiveness(err)) {
635466
+ if (diagnostics) {
635467
+ diagnostics.plainStatus = "skipped";
635468
+ diagnostics.plainError = "skipped because json-mode backend liveness failed";
635469
+ }
635470
+ recordAttempt({
635471
+ mode: "plain-retry",
635472
+ status: "skipped",
635473
+ error: "skipped because json-mode backend liveness failed",
635474
+ timeoutMs: requestTimeoutMs
635475
+ });
635476
+ throw err;
635276
635477
  }
635277
635478
  }
635479
+ const plainStartMs = Date.now();
635278
635480
  try {
635279
635481
  const plainResult = await this.telegramObservableInference(
635280
635482
  backend,
@@ -635288,13 +635490,26 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635288
635490
  (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
635289
635491
  );
635290
635492
  diagnostics.plainStatus = plainVisible ? "visible" : "empty-after-strip";
635493
+ recordAttempt({
635494
+ mode: "plain-retry",
635495
+ status: plainVisible ? "visible" : "empty-after-strip",
635496
+ elapsedMs: Date.now() - plainStartMs,
635497
+ timeoutMs: requestTimeoutMs
635498
+ });
635291
635499
  }
635292
635500
  return plainResult;
635293
635501
  } catch (err) {
635294
635502
  if (diagnostics) {
635295
635503
  diagnostics.plainStatus = "threw";
635296
- diagnostics.plainError = err instanceof Error ? err.message : String(err);
635504
+ diagnostics.plainError = telegramRouterErrorText(err);
635297
635505
  }
635506
+ recordAttempt({
635507
+ mode: "plain-retry",
635508
+ status: "threw",
635509
+ error: telegramRouterErrorText(err),
635510
+ elapsedMs: Date.now() - plainStartMs,
635511
+ timeoutMs: requestTimeoutMs
635512
+ });
635298
635513
  if (jsonModeError instanceof Error && !(err instanceof Error)) throw jsonModeError;
635299
635514
  throw err;
635300
635515
  }
@@ -635624,7 +635839,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635624
635839
  `Original router output:`,
635625
635840
  rawPreview,
635626
635841
  ``,
635627
- `/no_think`
635842
+ `/nothink
635843
+ /no_think`
635628
635844
  ].join("\n");
635629
635845
  try {
635630
635846
  const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635637,8 +635853,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635637
635853
  ],
635638
635854
  tools: [],
635639
635855
  temperature: 0,
635640
- maxTokens: 800,
635641
- timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
635856
+ maxTokens: 500,
635857
+ timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
635642
635858
  think: false
635643
635859
  }, diagnostics, "router-repair", sessionKey);
635644
635860
  const repairedText = result.choices[0]?.message?.content ?? "";
@@ -635667,7 +635883,7 @@ ${repairedText}`,
635667
635883
  } catch (err) {
635668
635884
  if (diagnostics) {
635669
635885
  diagnostics.repairStatus = "threw";
635670
- diagnostics.repairError = err instanceof Error ? err.message : String(err);
635886
+ diagnostics.repairError = telegramRouterErrorText(err);
635671
635887
  }
635672
635888
  return null;
635673
635889
  }
@@ -635691,7 +635907,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
635691
635907
  `Router context (trailing-window):`,
635692
635908
  trimmedUserPrompt,
635693
635909
  ``,
635694
- `/no_think`
635910
+ `/nothink
635911
+ /no_think`
635695
635912
  ].join("\n");
635696
635913
  try {
635697
635914
  const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635704,8 +635921,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
635704
635921
  ],
635705
635922
  tools: [],
635706
635923
  temperature: 0,
635707
- maxTokens: 1e3,
635708
- timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
635924
+ maxTokens: 500,
635925
+ timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
635709
635926
  think: false
635710
635927
  }, diagnostics, "router-strict-retry", sessionKey);
635711
635928
  const retryText = result.choices[0]?.message?.content ?? "";
@@ -635733,7 +635950,7 @@ ${retryText}`,
635733
635950
  } catch (err) {
635734
635951
  if (diagnostics) {
635735
635952
  diagnostics.strictRetryStatus = "threw";
635736
- diagnostics.strictRetryError = err instanceof Error ? err.message : String(err);
635953
+ diagnostics.strictRetryError = telegramRouterErrorText(err);
635737
635954
  }
635738
635955
  return null;
635739
635956
  }
@@ -635899,6 +636116,7 @@ ${retryText}`,
635899
636116
  * never fires.
635900
636117
  */
635901
636118
  reapStaleTelegramSubAgents() {
636119
+ this.reapStaleTelegramPreAgentWork();
635902
636120
  const maxIdleMs = this.telegramSubAgentMaxIdleMs();
635903
636121
  const now = Date.now();
635904
636122
  const stale = [];
@@ -635919,6 +636137,7 @@ ${retryText}`,
635919
636137
  clearInterval(agent.typingInterval);
635920
636138
  agent.typingInterval = null;
635921
636139
  }
636140
+ this.stopTelegramPublicProgressMessage(agent);
635922
636141
  try {
635923
636142
  agent.runner?.abort?.();
635924
636143
  } catch {
@@ -635938,6 +636157,10 @@ ${retryText}`,
635938
636157
  this.subAgentViewCallbacks?.onStatus(agent.viewId, "failed");
635939
636158
  this.subAgentViewCallbacks?.onComplete(agent.viewId);
635940
636159
  }
636160
+ if (this.telegramQueuedSessionWork.size > 0) {
636161
+ this.maybeLogTelegramQueueDiagnostic("watchdog");
636162
+ this.dispatchQueuedTelegramSessionWorkSoon();
636163
+ }
635941
636164
  }
635942
636165
  async inferTelegramInteractionDecision(msg, toolContext) {
635943
636166
  const config = this.agentConfig;
@@ -636124,10 +636347,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636124
636347
  ],
636125
636348
  tools: [],
636126
636349
  temperature: 0,
636127
- // Minimal route JSON should fit comfortably; keeping this small avoids
636128
- // reintroducing truncated-note repair cascades.
636129
- maxTokens: 900,
636130
- timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
636350
+ // Router JSON is tiny. Keep the answer budget tight so Qwen-class
636351
+ // models cannot spend a minute producing hidden <think>-only output.
636352
+ maxTokens: 360,
636353
+ timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
636131
636354
  think: false
636132
636355
  }, diagnostics, "router", sessionKey);
636133
636356
  const text = result.choices[0]?.message?.content ?? "";
@@ -636182,8 +636405,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636182
636405
  ],
636183
636406
  tools: [],
636184
636407
  temperature: 0,
636185
- maxTokens: 1400,
636186
- timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
636408
+ maxTokens: 700,
636409
+ timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
636187
636410
  think: false
636188
636411
  }, diagnostics, "router", sessionKey);
636189
636412
  const reissuedText = reissued.choices[0]?.message?.content ?? "";
@@ -636196,7 +636419,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636196
636419
  } catch {
636197
636420
  }
636198
636421
  }
636199
- const repaired = await this.repairTelegramInteractionDecision(
636422
+ const dualEmptyVisible = telegramRouterDiagnosticIsDualEmptyVisible(diagnostics) && !telegramRouterRawPreview(text);
636423
+ if (dualEmptyVisible) {
636424
+ if (diagnostics.repairStatus === void 0) {
636425
+ diagnostics.repairStatus = "skipped";
636426
+ diagnostics.repairError = "router returned no visible text in json-mode or plain retry; repair/strict retry would only burn more inference";
636427
+ }
636428
+ }
636429
+ const repaired = dualEmptyVisible ? null : await this.repairTelegramInteractionDecision(
636200
636430
  backend,
636201
636431
  text,
636202
636432
  forcedRoute,
@@ -636207,7 +636437,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636207
636437
  if (repaired) {
636208
636438
  return withRouterTelemetry(this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes));
636209
636439
  }
636210
- const strictRetry = await this.retryTelegramInteractionDecisionStrict(
636440
+ const strictRetry = dualEmptyVisible ? null : await this.retryTelegramInteractionDecisionStrict(
636211
636441
  backend,
636212
636442
  userPrompt,
636213
636443
  text,
@@ -636221,20 +636451,21 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636221
636451
  }
636222
636452
  const invalidRouterPreview = telegramRouterRawPreview(text);
636223
636453
  const failureNarrative = this.summarizeTelegramRouterFailure(diagnostics);
636454
+ const backendLivenessFailure = (diagnostics.attempts ?? []).some(telegramRouterDiagnosticAttemptLooksLikeBackendLiveness) || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.repairError ?? "") || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.strictRetryError ?? "");
636224
636455
  const fallback = this.applyTelegramSilentReflectionNotes(this.buildTelegramRouterUnavailableDecision(msg, toolContext, {
636225
- reason: "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
636456
+ reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
636226
636457
  silentDisposition: reflectionNotes.silentDisposition,
636227
636458
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636228
636459
  invalidRouterPreview,
636229
636460
  failureNarrative,
636230
- invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
636461
+ backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
636231
636462
  ),
636232
636463
  raw: text
636233
636464
  }), reflectionNotes);
636234
636465
  return withRouterTelemetry(fallback);
636235
636466
  } catch (err) {
636236
636467
  const failureNarrative = this.summarizeTelegramRouterFailure(diagnostics);
636237
- const errMsg = err instanceof Error ? err.message : String(err);
636468
+ const errMsg = telegramRouterErrorText(err);
636238
636469
  const fallback = this.applyTelegramSilentReflectionNotes(this.buildTelegramRouterUnavailableDecision(msg, toolContext, {
636239
636470
  reason: `router inference failed; no model-derived reply decision (${errMsg.slice(0, 160)})`,
636240
636471
  silentDisposition: reflectionNotes.silentDisposition,
@@ -636247,46 +636478,93 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636247
636478
  return withRouterTelemetry(fallback);
636248
636479
  }
636249
636480
  }
636481
+ formatTelegramRouterAttemptTrace(attempt) {
636482
+ const elapsed = Number.isFinite(attempt.elapsedMs) ? ` elapsedMs=${Math.max(0, Math.round(attempt.elapsedMs ?? 0))}` : "";
636483
+ const timeout2 = Number.isFinite(attempt.timeoutMs) ? ` timeoutMs=${Math.max(0, Math.round(attempt.timeoutMs ?? 0))}` : "";
636484
+ const error = attempt.error ? ` (${compactTelegramRouterDiagnosticText(attempt.error, 180)})` : "";
636485
+ return `${attempt.stage}/${attempt.mode}: ${attempt.status}${error}${elapsed}${timeout2}`;
636486
+ }
636487
+ telegramRouterBackendLivenessHint(timeoutSeen) {
636488
+ const backendType = this.agentConfig?.backendType ?? "backend";
636489
+ if (backendType === "ollama") {
636490
+ return timeoutSeen ? "local Ollama did not return router tokens within the liveness window; likely queue backlog, cold model load, GPU/RAM contention, or a wedged runner. JSON repair cannot recover until the backend responds" : "local Ollama request failed before router content was available; check Ollama reachability, model availability, and runner health";
636491
+ }
636492
+ return timeoutSeen ? `${backendType} backend did not return router tokens within the liveness window; check provider latency, rate limits, and request queue depth` : `${backendType} backend request failed before router content was available; check provider reachability and rate limits`;
636493
+ }
636250
636494
  /**
636251
636495
  * Reduce captured per-step diagnostics into:
636252
636496
  * - `summary`: a short outcome-level diagnostic
636253
636497
  * - `detail`: a longer ordered trace for operator debugging
636254
- * - `operatorHint`: an operational hint (e.g. "ollama backend appears to be injecting <think> tags despite think:false")
636498
+ * - `operatorHint`: an operational hint with backend/model context
636255
636499
  */
636256
636500
  summarizeTelegramRouterFailure(diag) {
636257
636501
  const parts = [];
636258
636502
  const detailParts = [];
636259
636503
  let thinkInjectionSuspected = false;
636260
636504
  let networkErrorSeen = false;
636261
- if (diag.jsonModeStatus === "threw") {
636262
- parts.push(`json-mode call threw`);
636263
- detailParts.push(`json-mode: threw (${diag.jsonModeError ?? "no detail"})`);
636264
- networkErrorSeen = true;
636265
- } else if (diag.jsonModeStatus === "empty-after-strip") {
636266
- parts.push(`json-mode returned empty content (likely <think>-only)`);
636267
- detailParts.push(`json-mode: empty-after-strip`);
636268
- thinkInjectionSuspected = true;
636269
- } else if (diag.jsonModeStatus === "visible") {
636270
- detailParts.push(`json-mode: visible`);
636271
- }
636272
- if (diag.plainStatus === "threw") {
636273
- parts.push(`plain call threw`);
636274
- detailParts.push(`plain: threw (${diag.plainError ?? "no detail"})`);
636275
- networkErrorSeen = true;
636276
- } else if (diag.plainStatus === "empty-after-strip") {
636277
- parts.push(`plain call returned empty content`);
636278
- detailParts.push(`plain: empty-after-strip`);
636279
- thinkInjectionSuspected = true;
636280
- } else if (diag.plainStatus === "visible") {
636281
- detailParts.push(`plain: visible-but-unparseable`);
636505
+ let timeoutSeen = false;
636506
+ const attempts = diag.attempts ?? [];
636507
+ if (attempts.length > 0) {
636508
+ const livenessAttempts = attempts.filter(telegramRouterDiagnosticAttemptLooksLikeBackendLiveness);
636509
+ const timeoutAttempts = attempts.filter(telegramRouterDiagnosticAttemptLooksLikeTimeout);
636510
+ const emptyAttempts = attempts.filter((attempt) => attempt.status === "empty-after-strip");
636511
+ const visibleAttempts = attempts.filter((attempt) => attempt.status === "visible");
636512
+ const skippedAttempts = attempts.filter((attempt) => attempt.status === "skipped");
636513
+ networkErrorSeen = livenessAttempts.length > 0;
636514
+ timeoutSeen = timeoutAttempts.length > 0;
636515
+ thinkInjectionSuspected = emptyAttempts.length > 0;
636516
+ if (livenessAttempts.length > 0) {
636517
+ const affected = livenessAttempts.map((attempt) => `${attempt.stage}/${attempt.mode}`).join(", ");
636518
+ parts.push(timeoutSeen ? `backend timeout/liveness failure during ${affected}` : `backend liveness failure during ${affected}`);
636519
+ }
636520
+ if (emptyAttempts.length > 0) {
636521
+ const affected = emptyAttempts.map((attempt) => `${attempt.stage}/${attempt.mode}`).join(", ");
636522
+ parts.push(`empty visible content after <think> strip during ${affected}`);
636523
+ }
636524
+ if (visibleAttempts.length > 0 && diag.repairStatus !== "recovered" && diag.strictRetryStatus !== "recovered") {
636525
+ parts.push(`visible router text remained unparseable`);
636526
+ }
636527
+ if (skippedAttempts.length > 0) {
636528
+ const affected = skippedAttempts.map((attempt) => `${attempt.stage}/${attempt.mode}`).join(", ");
636529
+ parts.push(`fallback attempt skipped during ${affected}`);
636530
+ }
636531
+ detailParts.push(...attempts.map((attempt) => this.formatTelegramRouterAttemptTrace(attempt)));
636532
+ } else {
636533
+ if (diag.jsonModeStatus === "threw") {
636534
+ parts.push(`json-mode call threw`);
636535
+ detailParts.push(`json-mode: threw (${diag.jsonModeError ?? "no detail"})`);
636536
+ networkErrorSeen = true;
636537
+ timeoutSeen = telegramRouterErrorLooksLikeTimeout(diag.jsonModeError ?? "");
636538
+ } else if (diag.jsonModeStatus === "empty-after-strip") {
636539
+ parts.push(`json-mode returned empty content (likely <think>-only)`);
636540
+ detailParts.push(`json-mode: empty-after-strip`);
636541
+ thinkInjectionSuspected = true;
636542
+ } else if (diag.jsonModeStatus === "visible") {
636543
+ detailParts.push(`json-mode: visible`);
636544
+ }
636545
+ if (diag.plainStatus === "threw") {
636546
+ parts.push(`plain call threw`);
636547
+ detailParts.push(`plain: threw (${diag.plainError ?? "no detail"})`);
636548
+ networkErrorSeen = true;
636549
+ timeoutSeen ||= telegramRouterErrorLooksLikeTimeout(diag.plainError ?? "");
636550
+ } else if (diag.plainStatus === "empty-after-strip") {
636551
+ parts.push(`plain call returned empty content`);
636552
+ detailParts.push(`plain: empty-after-strip`);
636553
+ thinkInjectionSuspected = true;
636554
+ } else if (diag.plainStatus === "visible") {
636555
+ detailParts.push(`plain: visible-but-unparseable`);
636556
+ }
636282
636557
  }
636283
636558
  if (diag.repairStatus === "skipped") {
636284
636559
  detailParts.push(`repair: skipped (${diag.repairError ?? "no recoverable input"})`);
636560
+ } else if (diag.repairStatus === "skipped-truncation-rerun") {
636561
+ detailParts.push(`repair: skipped for truncation rerun`);
636285
636562
  } else if (diag.repairStatus === "no-recoverable-output") {
636286
636563
  detailParts.push(`repair: returned non-recoverable JSON`);
636287
636564
  } else if (diag.repairStatus === "threw") {
636288
636565
  detailParts.push(`repair: threw (${diag.repairError ?? "no detail"})`);
636289
636566
  networkErrorSeen = true;
636567
+ timeoutSeen ||= telegramRouterErrorLooksLikeTimeout(diag.repairError ?? "");
636290
636568
  } else if (diag.repairStatus === "recovered") {
636291
636569
  detailParts.push(`repair: recovered`);
636292
636570
  }
@@ -636298,14 +636576,15 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636298
636576
  } else if (diag.strictRetryStatus === "threw") {
636299
636577
  detailParts.push(`strict-retry: threw (${diag.strictRetryError ?? "no detail"})`);
636300
636578
  networkErrorSeen = true;
636579
+ timeoutSeen ||= telegramRouterErrorLooksLikeTimeout(diag.strictRetryError ?? "");
636301
636580
  } else if (diag.strictRetryStatus === "recovered") {
636302
636581
  detailParts.push(`strict-retry: recovered`);
636303
636582
  }
636304
636583
  let operatorHint;
636305
636584
  if (networkErrorSeen) {
636306
- operatorHint = "router backend appears unreachable or rate-limited; continued conversation depends on recovery";
636585
+ operatorHint = this.telegramRouterBackendLivenessHint(timeoutSeen);
636307
636586
  } else if (thinkInjectionSuspected) {
636308
- operatorHint = "router model emitted <think>-only or unclosed-think output; conversation continuity preserved but inference is degraded";
636587
+ operatorHint = "router model emitted <think>-only or unclosed-think output despite think suppression; visible decision content was empty after stripping hidden reasoning";
636309
636588
  }
636310
636589
  return {
636311
636590
  summary: parts.join("; "),
@@ -636321,7 +636600,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636321
636600
  if (invalidRouterPreview) segments.push(`invalid router output preview: ${invalidRouterPreview}`);
636322
636601
  if (failureNarrative.detail) segments.push(`router-failure trace: ${failureNarrative.detail}`);
636323
636602
  if (failureNarrative.operatorHint) segments.push(failureNarrative.operatorHint);
636324
- return segments.join(" | ").slice(0, 900);
636603
+ return segments.join(" | ").slice(0, 1400);
636325
636604
  }
636326
636605
  buildTelegramWorkspaceContext(modelTier, budget = 14e3) {
636327
636606
  if (!this.repoRoot) return "";
@@ -636754,6 +637033,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
636754
637033
  for (const [, agent] of this.subAgents) {
636755
637034
  agent.aborted = true;
636756
637035
  if (agent.typingInterval) clearInterval(agent.typingInterval);
637036
+ this.stopTelegramPublicProgressMessage(agent);
636757
637037
  try {
636758
637038
  agent.runner?.abort?.();
636759
637039
  } catch {
@@ -636769,6 +637049,8 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
636769
637049
  }
636770
637050
  this.telegramQueuedSessionWork.clear();
636771
637051
  this.telegramActiveWorkSessions.clear();
637052
+ this.telegramActiveWorkGenerations.clear();
637053
+ this.telegramActiveWorkStartedAtMs.clear();
636772
637054
  this.telegramAdminLivePanels.clear();
636773
637055
  this.flushTelegramViewWrites();
636774
637056
  this.flushTelegramTuiWrites();
@@ -636955,6 +637237,62 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
636955
637237
  }
636956
637238
  }
636957
637239
  }
637240
+ shouldUseTelegramPublicProgressMessage(msg, toolContext) {
637241
+ return toolContext === "telegram-public" && msg.chatType !== "private" && !msg.guestQueryId;
637242
+ }
637243
+ renderTelegramPublicProgressHTML(subAgent, msg, phase) {
637244
+ const elapsedSec = Math.max(0, Math.floor((Date.now() - subAgent.startedAtMs) / 1e3));
637245
+ const sessionKey = this.sessionKeyForMessage(msg);
637246
+ const activeInference = this.getTelegramActiveInferences().find((inf) => inf.sessionKey === sessionKey);
637247
+ const status = activeInference ? activeInference.ttfbSec === void 0 ? `model request active; waiting for first token (${activeInference.kind}, ${activeInference.elapsedSec.toFixed(1)}s)` : `streaming ${activeInference.kind}; content=${activeInference.contentTokens}t thinking=${activeInference.thinkingTokens}t` : phase;
637248
+ const width = 12;
637249
+ const filled = Math.min(width, Math.floor(elapsedSec % 60 / 60 * width));
637250
+ const bar = `[${"#".repeat(filled)}${"-".repeat(width - filled)}]`;
637251
+ return [
637252
+ `<b>Working</b>`,
637253
+ `<code>${bar}</code> ${elapsedSec}s`,
637254
+ `<i>${escapeTelegramHTML(status)}</i>`
637255
+ ].join("\n");
637256
+ }
637257
+ startTelegramPublicProgressMessage(subAgent, msg, phase) {
637258
+ if (!this.shouldUseTelegramPublicProgressMessage(msg, subAgent.toolContext)) return;
637259
+ if (subAgent.publicProgressTimer) return;
637260
+ const update2 = () => {
637261
+ if (subAgent.aborted) return;
637262
+ if (!this.subAgents.has(this.sessionKeyForMessage(msg))) return;
637263
+ const html = this.renderTelegramPublicProgressHTML(subAgent, msg, phase);
637264
+ if (subAgent.liveMessageId) {
637265
+ const now = Date.now();
637266
+ if (now - subAgent.lastEditMs < 3e3) return;
637267
+ subAgent.lastEditMs = now;
637268
+ void this.editLiveMessage(msg.chatId, subAgent.liveMessageId, html).catch(() => {
637269
+ });
637270
+ return;
637271
+ }
637272
+ if (subAgent.liveMessagePromise) return;
637273
+ subAgent.liveMessagePromise = this.sendLiveMessage(
637274
+ msg.chatId,
637275
+ html,
637276
+ msg.chatType !== "private" ? msg.messageId : void 0
637277
+ ).then((id) => {
637278
+ subAgent.liveMessageId = id;
637279
+ subAgent.lastEditMs = Date.now();
637280
+ }).catch(() => {
637281
+ }).finally(() => {
637282
+ subAgent.liveMessagePromise = null;
637283
+ });
637284
+ };
637285
+ update2();
637286
+ subAgent.publicProgressTimer = setInterval(update2, 5e3);
637287
+ if (typeof subAgent.publicProgressTimer.unref === "function") {
637288
+ subAgent.publicProgressTimer.unref();
637289
+ }
637290
+ }
637291
+ stopTelegramPublicProgressMessage(subAgent) {
637292
+ if (!subAgent.publicProgressTimer) return;
637293
+ clearInterval(subAgent.publicProgressTimer);
637294
+ subAgent.publicProgressTimer = null;
637295
+ }
636958
637296
  ensureTelegramAdminLivePanel(subAgent, msg) {
636959
637297
  const existing = subAgent.adminLivePanelNonce ? this.telegramAdminLivePanels.get(subAgent.adminLivePanelNonce) : void 0;
636960
637298
  if (existing) return existing;
@@ -637213,11 +637551,12 @@ Join: ${newUrl}`);
637213
637551
  }
637214
637552
  this.scheduleTelegramSessionWork(msg, toolContext);
637215
637553
  }
637216
- async processTelegramMessageWork(work) {
637554
+ async processTelegramMessageWork(work, workGeneration) {
637217
637555
  const msg = work.msg;
637218
637556
  const toolContext = work.toolContext;
637219
637557
  const sessionKey = this.sessionKeyForMessage(msg);
637220
637558
  const isAdminDM = toolContext === "telegram-admin-dm";
637559
+ if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) return;
637221
637560
  const existing = this.subAgents.get(sessionKey);
637222
637561
  if (existing && !existing.aborted) {
637223
637562
  await this.enqueueTelegramQueuedSessionWorkForExistingSubAgent(work, existing);
@@ -637235,6 +637574,13 @@ Join: ${newUrl}`);
637235
637574
  } catch (err) {
637236
637575
  decision2 = this.fallbackTelegramRouterDecision(msg, toolContext, err);
637237
637576
  }
637577
+ if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) {
637578
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
637579
+ msg.username,
637580
+ `discarded stale Telegram work result after queue pin release for ${sessionKey}`
637581
+ ));
637582
+ return;
637583
+ }
637238
637584
  const storedPreference = this.applyTelegramReplyPreferenceUpdate(
637239
637585
  sessionKey,
637240
637586
  msg,
@@ -637352,6 +637698,7 @@ Join: ${newUrl}`);
637352
637698
  if (replyEdge) {
637353
637699
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, replyEdge));
637354
637700
  }
637701
+ this.startTelegramPublicProgressMessage(subAgent, msg, "taking notes and preparing tools");
637355
637702
  try {
637356
637703
  let mediaContext = "";
637357
637704
  if (msg.media || msg.replyToMedia) {
@@ -637368,6 +637715,7 @@ Join: ${newUrl}`);
637368
637715
  clearInterval(subAgent.typingInterval);
637369
637716
  subAgent.typingInterval = null;
637370
637717
  }
637718
+ this.stopTelegramPublicProgressMessage(subAgent);
637371
637719
  const finalText = cleanTelegramVisibleReply(result || "");
637372
637720
  if (isAdminDM && !this.telegramAdminRunCompleted(subAgent)) {
637373
637721
  const incompleteText = this.telegramAdminIncompleteRunText(subAgent, finalText);
@@ -637436,6 +637784,7 @@ Join: ${newUrl}`);
637436
637784
  clearInterval(subAgent.typingInterval);
637437
637785
  subAgent.typingInterval = null;
637438
637786
  }
637787
+ this.stopTelegramPublicProgressMessage(subAgent);
637439
637788
  const errMsg = err instanceof Error ? err.message : String(err);
637440
637789
  this.tuiWrite(() => renderTelegramSubAgentError(msg.username, errMsg));
637441
637790
  this.subAgentViewCallbacks?.onWrite(subAgent.viewId, `error: ${errMsg}`);
@@ -637452,6 +637801,7 @@ Join: ${newUrl}`);
637452
637801
  });
637453
637802
  }
637454
637803
  } finally {
637804
+ this.stopTelegramPublicProgressMessage(subAgent);
637455
637805
  this.clearTelegramSubAgentContextBuffer(sessionKey);
637456
637806
  this.subAgents.delete(sessionKey);
637457
637807
  this.refreshActiveTelegramInteractionCount();
@@ -637625,6 +637975,24 @@ Join: ${newUrl}`);
637625
637975
  typingInterval = this.startTypingIndicator(msg.chatId);
637626
637976
  }
637627
637977
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `live inference: chat reply (${this.interactionMode})`));
637978
+ if (this.shouldUseTelegramPublicProgressMessage(msg, toolContext)) {
637979
+ const initialHtml = [
637980
+ `<b>Working</b>`,
637981
+ `<code>[------------]</code> 0s`,
637982
+ `<i>preparing a concise reply</i>`
637983
+ ].join("\n");
637984
+ liveMessagePromise = this.sendLiveMessage(
637985
+ msg.chatId,
637986
+ initialHtml,
637987
+ msg.chatType !== "private" ? msg.messageId : void 0
637988
+ ).then((id) => {
637989
+ liveMessageId = id;
637990
+ lastEditMs = Date.now();
637991
+ }).catch(() => {
637992
+ }).finally(() => {
637993
+ liveMessagePromise = null;
637994
+ });
637995
+ }
637628
637996
  try {
637629
637997
  const mediaContext = msg.media || msg.replyToMedia || msg.livePhoto ? await this.processMediaContextForMessage(msg) : "";
637630
637998
  const contextualPayload = [mediaContext, additionalContext].filter(Boolean).join("\n\n");
@@ -641503,11 +641871,23 @@ ${caption}\r
641503
641871
  }
641504
641872
  } catch (err) {
641505
641873
  if (this.polling) {
641874
+ const now = Date.now();
641875
+ if (now - this.telegramPollWarningLastAtMs > 3e4) {
641876
+ this.telegramPollWarningLastAtMs = now;
641877
+ this.tuiWrite(() => renderWarning(
641878
+ `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); retrying`
641879
+ ));
641880
+ }
641506
641881
  await new Promise((r2) => setTimeout(r2, 5e3));
641507
641882
  }
641508
641883
  }
641509
641884
  }
641510
641885
  }
641886
+ telegramLongPollClientTimeoutMs() {
641887
+ const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_CLIENT_TIMEOUT_MS"] ?? "", 10);
641888
+ if (Number.isFinite(raw) && raw >= 35e3 && raw <= 3e5) return raw;
641889
+ return 45e3;
641890
+ }
641511
641891
  /** Make a Telegram Bot API call with rate-limit retry */
641512
641892
  async apiCall(method, body, _retryDepth = 0) {
641513
641893
  const url = `https://api.telegram.org/bot${this.botToken}/${method}`;
@@ -641520,7 +641900,13 @@ ${caption}\r
641520
641900
  }
641521
641901
  const isLongPoll = method === "getUpdates";
641522
641902
  if (isLongPoll && this.abortController) {
641523
- options2.signal = this.abortController.signal;
641903
+ const timeoutFn = AbortSignal.timeout;
641904
+ const anyFn = AbortSignal.any;
641905
+ const signals = [
641906
+ this.abortController.signal,
641907
+ typeof timeoutFn === "function" ? timeoutFn(this.telegramLongPollClientTimeoutMs()) : void 0
641908
+ ].filter((signal) => signal instanceof AbortSignal);
641909
+ options2.signal = typeof anyFn === "function" && signals.length > 1 ? anyFn(signals) : signals[0];
641524
641910
  } else if (!isLongPoll) {
641525
641911
  options2.signal = AbortSignal.timeout(3e4);
641526
641912
  }
@@ -659477,6 +659863,30 @@ function sanitizeChatContent(raw) {
659477
659863
  }
659478
659864
  return cleaned.join("\n").trim();
659479
659865
  }
659866
+ function appendNoThinkDirectivesToMessages(messages2) {
659867
+ let lastUserIdx = -1;
659868
+ for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
659869
+ if (messages2[i2]?.role === "user") {
659870
+ lastUserIdx = i2;
659871
+ break;
659872
+ }
659873
+ }
659874
+ if (lastUserIdx < 0) return messages2;
659875
+ const target = messages2[lastUserIdx];
659876
+ if (!target || typeof target.content !== "string") return messages2;
659877
+ const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
659878
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
659879
+ if (hasOllamaNoThink && hasQwenNoThink) return messages2;
659880
+ const suffix = [
659881
+ hasOllamaNoThink ? null : "/nothink",
659882
+ hasQwenNoThink ? null : "/no_think"
659883
+ ].filter(Boolean).join("\n");
659884
+ return messages2.map(
659885
+ (m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
659886
+
659887
+ ${suffix}` } : m2
659888
+ );
659889
+ }
659480
659890
  async function directChatBackend(opts) {
659481
659891
  const { model, messages: messages2, stream, res, sessionId, ollamaUrl, extraFields } = opts;
659482
659892
  const cfg = loadConfig();
@@ -659565,13 +659975,12 @@ async function directChatBackend(opts) {
659565
659975
  if (Array.isArray(ef["stop"]) || typeof ef["stop"] === "string") ollamaOpts["stop"] = ef["stop"];
659566
659976
  const hasTools = Array.isArray(ef["tools"]) && ef["tools"].length > 0;
659567
659977
  const ollamaFormat = ollamaFormatFromOpenAIResponseFormat(ef["response_format"]);
659978
+ const ollamaMessages = appendNoThinkDirectivesToMessages(messages2);
659568
659979
  const reqBody = JSON.stringify({
659569
659980
  model: cleanModel,
659570
- messages: messages2,
659981
+ messages: ollamaMessages,
659571
659982
  stream,
659572
- // Don't force think:false when the caller is using tool calling —
659573
- // thinking models often need their reasoning chain to choose a tool.
659574
- ...hasTools ? {} : { think: false },
659983
+ think: false,
659575
659984
  ...hasTools ? { tools: ef["tools"] } : {},
659576
659985
  ...ef["tool_choice"] !== void 0 ? { tool_choice: ef["tool_choice"] } : {},
659577
659986
  ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {},
@@ -659801,13 +660210,18 @@ async function completeRealtimeTextOnly(opts) {
659801
660210
  if (!requestedModel) {
659802
660211
  originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
659803
660212
  }
659804
- const makeOllamaChatBody = (modelName) => JSON.stringify({
659805
- model: modelName,
659806
- messages: requestBody["messages"],
659807
- stream: false,
659808
- think: false,
659809
- options: { temperature, num_predict: maxTokens }
659810
- });
660213
+ const makeOllamaChatBody = (modelName) => {
660214
+ const rtMessages = Array.isArray(requestBody["messages"]) ? appendNoThinkDirectivesToMessages(
660215
+ requestBody["messages"]
660216
+ ) : requestBody["messages"];
660217
+ return JSON.stringify({
660218
+ model: modelName,
660219
+ messages: rtMessages,
660220
+ stream: false,
660221
+ think: false,
660222
+ options: { temperature, num_predict: maxTokens }
660223
+ });
660224
+ };
659811
660225
  let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
659812
660226
  if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
659813
660227
  const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.181",
3
+ "version": "1.0.183",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.181",
9
+ "version": "1.0.183",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.181",
3
+ "version": "1.0.183",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",