omnius 1.0.182 → 1.0.184

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -550132,11 +550132,17 @@ function injectNoThinkDirective(messages2) {
550132
550132
  const target = messages2[lastUserIdx];
550133
550133
  if (!target || typeof target.content !== "string")
550134
550134
  return messages2;
550135
- if (/\/no_think\b/i.test(target.content))
550135
+ const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
550136
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
550137
+ if (hasOllamaNoThink && hasQwenNoThink)
550136
550138
  return messages2;
550139
+ const suffix = [
550140
+ hasOllamaNoThink ? null : "/nothink",
550141
+ hasQwenNoThink ? null : "/no_think"
550142
+ ].filter(Boolean).join("\n");
550137
550143
  const annotated = `${target.content}
550138
550144
 
550139
- /no_think`;
550145
+ ${suffix}`;
550140
550146
  return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: annotated } : m2);
550141
550147
  }
550142
550148
  function backendHttpErrorDetail(text) {
@@ -550154,6 +550160,8 @@ function isOllamaModelNotFoundResponse(status, text, model) {
550154
550160
  function computeEffectiveThink(params) {
550155
550161
  if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
550156
550162
  return false;
550163
+ if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
550164
+ return false;
550157
550165
  if (params.suppressed)
550158
550166
  return false;
550159
550167
  if (params.hasTools)
@@ -550172,18 +550180,9 @@ function computeEffectiveThink(params) {
550172
550180
  return params.defaultThink;
550173
550181
  }
550174
550182
  function sanitizeHistoryThink(messages2) {
550175
- let lastAsstIdx = -1;
550176
- for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
550177
- if (messages2[i2]?.role === "assistant") {
550178
- lastAsstIdx = i2;
550179
- break;
550180
- }
550181
- }
550182
- return messages2.map((m2, i2) => {
550183
+ return messages2.map((m2) => {
550183
550184
  if (m2.role !== "assistant" || typeof m2.content !== "string")
550184
550185
  return m2;
550185
- if (i2 === lastAsstIdx)
550186
- return m2;
550187
550186
  return { ...m2, content: stripThinkBlocks(m2.content) };
550188
550187
  });
550189
550188
  }
@@ -563608,10 +563607,11 @@ ${description}`
563608
563607
  if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
563609
563608
  effectiveMaxTokens = 4096;
563610
563609
  }
563610
+ const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
563611
563611
  const responseFormat = request.responseFormat ?? request.response_format;
563612
563612
  const body = {
563613
563613
  model: this.model,
563614
- messages: cleanedMessages,
563614
+ messages: requestMessages,
563615
563615
  tools: request.tools,
563616
563616
  temperature: request.temperature,
563617
563617
  max_tokens: effectiveMaxTokens,
@@ -563620,7 +563620,7 @@ ${description}`
563620
563620
  if (responseFormat !== void 0) {
563621
563621
  body["response_format"] = responseFormat;
563622
563622
  }
563623
- const reqNumCtx = request.numCtx;
563623
+ const reqNumCtx = request.numCtx ?? request.num_ctx;
563624
563624
  if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
563625
563625
  const opts = body["options"] ?? {};
563626
563626
  opts["num_ctx"] = reqNumCtx;
@@ -563705,7 +563705,7 @@ ${description}`
563705
563705
  const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
563706
563706
  const shouldRetryThinkGuard = outcome !== null && effectiveThink === true && (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think");
563707
563707
  if (shouldRetryThinkGuard || shouldRecoverFromEmpty) {
563708
- const retryMessages = injectNoThinkDirective(cleanedMessages);
563708
+ const retryMessages = injectNoThinkDirective(requestMessages);
563709
563709
  const retryBody = {
563710
563710
  model: this.model,
563711
563711
  messages: retryMessages,
@@ -563892,7 +563892,7 @@ ${description}`
563892
563892
  * Ollama pool routing as non-stream completions.
563893
563893
  */
563894
563894
  async *chatCompletionStream(request) {
563895
- const cleanedMessages = normalizeMessagesForStrictOpenAI(request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2));
563895
+ const cleanedMessages = normalizeMessagesForStrictOpenAI(sanitizeHistoryThink(request.messages));
563896
563896
  let effectiveThink = computeEffectiveThink({
563897
563897
  requestThink: request.think,
563898
563898
  defaultThink: this.thinking,
@@ -563907,10 +563907,11 @@ ${description}`
563907
563907
  if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
563908
563908
  effectiveMaxTokens = 4096;
563909
563909
  }
563910
+ const requestMessages = effectiveThink ? cleanedMessages : injectNoThinkDirective(cleanedMessages);
563910
563911
  const responseFormat = request.responseFormat ?? request.response_format;
563911
563912
  const body = {
563912
563913
  model: this.model,
563913
- messages: cleanedMessages,
563914
+ messages: requestMessages,
563914
563915
  tools: request.tools,
563915
563916
  temperature: request.temperature,
563916
563917
  max_tokens: effectiveMaxTokens,
@@ -563921,7 +563922,7 @@ ${description}`
563921
563922
  if (responseFormat !== void 0) {
563922
563923
  body["response_format"] = responseFormat;
563923
563924
  }
563924
- const reqNumCtx = request.numCtx;
563925
+ const reqNumCtx = request.numCtx ?? request.num_ctx;
563925
563926
  if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
563926
563927
  const opts = body["options"] ?? {};
563927
563928
  opts["num_ctx"] = reqNumCtx;
@@ -564176,6 +564177,57 @@ var init_nexusBackend = __esm({
564176
564177
  this.authKey = authKey || "";
564177
564178
  this.thinking = thinking ?? false;
564178
564179
  }
564180
+ effectiveThink(request) {
564181
+ if (process.env["OMNIUS_FORCE_NO_THINK"] === "1")
564182
+ return false;
564183
+ if (process.env["OMNIUS_ENABLE_THINKING"] !== "1")
564184
+ return false;
564185
+ if (Array.isArray(request.tools) && request.tools.length > 0)
564186
+ return false;
564187
+ if (request.think === true)
564188
+ return true;
564189
+ if (request.think === false)
564190
+ return false;
564191
+ return this.thinking === true;
564192
+ }
564193
+ noThinkMessages(messages2) {
564194
+ let lastUserIdx = -1;
564195
+ for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
564196
+ if (messages2[i2]?.role === "user") {
564197
+ lastUserIdx = i2;
564198
+ break;
564199
+ }
564200
+ }
564201
+ if (lastUserIdx < 0)
564202
+ return messages2;
564203
+ const target = messages2[lastUserIdx];
564204
+ if (!target || typeof target.content !== "string")
564205
+ return messages2;
564206
+ const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
564207
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
564208
+ if (hasOllamaNoThink && hasQwenNoThink)
564209
+ return messages2;
564210
+ const suffix = [
564211
+ hasOllamaNoThink ? null : "/nothink",
564212
+ hasQwenNoThink ? null : "/no_think"
564213
+ ].filter(Boolean).join("\n");
564214
+ return messages2.map((m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
564215
+
564216
+ ${suffix}` } : m2);
564217
+ }
564218
+ requestMessages(request, effectiveThink) {
564219
+ return effectiveThink ? request.messages : this.noThinkMessages(request.messages);
564220
+ }
564221
+ applyOptionalRequestFields(daemonArgs, request) {
564222
+ const responseFormat = request.responseFormat ?? request.response_format;
564223
+ if (responseFormat !== void 0) {
564224
+ daemonArgs.response_format = JSON.stringify(responseFormat);
564225
+ }
564226
+ const numCtx = request.numCtx ?? request.num_ctx;
564227
+ if (Number.isFinite(numCtx) && (numCtx ?? 0) > 0) {
564228
+ daemonArgs.num_ctx = String(numCtx);
564229
+ }
564230
+ }
564179
564231
  /** Reset the consecutive failure counter (called on endpoint switch / reconnect) */
564180
564232
  resetFailures() {
564181
564233
  this.consecutiveFailures = 0;
@@ -564191,9 +564243,10 @@ var init_nexusBackend = __esm({
564191
564243
  err.fatal = true;
564192
564244
  throw err;
564193
564245
  }
564246
+ const effectiveThink = this.effectiveThink(request);
564194
564247
  const daemonArgs = {
564195
564248
  model: this.model,
564196
- messages: JSON.stringify(request.messages),
564249
+ messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
564197
564250
  tools: JSON.stringify(request.tools),
564198
564251
  temperature: String(request.temperature),
564199
564252
  max_tokens: String(request.maxTokens)
@@ -564204,7 +564257,8 @@ var init_nexusBackend = __esm({
564204
564257
  if (this.authKey) {
564205
564258
  daemonArgs.auth_key = this.authKey;
564206
564259
  }
564207
- daemonArgs.think = String(this.thinking);
564260
+ daemonArgs.think = String(effectiveThink);
564261
+ this.applyOptionalRequestFields(daemonArgs, request);
564208
564262
  let rawResult;
564209
564263
  try {
564210
564264
  rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -564303,9 +564357,10 @@ var init_nexusBackend = __esm({
564303
564357
  async *chatCompletionStream(request) {
564304
564358
  const streamFile = join97(tmpdir18(), `nexus-stream-${randomBytes19(6).toString("hex")}.jsonl`);
564305
564359
  writeFileSync38(streamFile, "", "utf8");
564360
+ const effectiveThink = this.effectiveThink(request);
564306
564361
  const daemonArgs = {
564307
564362
  model: this.model,
564308
- messages: JSON.stringify(request.messages),
564363
+ messages: JSON.stringify(this.requestMessages(request, effectiveThink)),
564309
564364
  tools: JSON.stringify(request.tools),
564310
564365
  temperature: String(request.temperature),
564311
564366
  max_tokens: String(request.maxTokens),
@@ -564315,7 +564370,8 @@ var init_nexusBackend = __esm({
564315
564370
  daemonArgs.target_peer = this.targetPeer;
564316
564371
  if (this.authKey)
564317
564372
  daemonArgs.auth_key = this.authKey;
564318
- daemonArgs.think = String(this.thinking);
564373
+ daemonArgs.think = String(effectiveThink);
564374
+ this.applyOptionalRequestFields(daemonArgs, request);
564319
564375
  let rawResult;
564320
564376
  try {
564321
564377
  rawResult = await this.sendFn("remote_infer", daemonArgs, request.timeoutMs || 12e4);
@@ -591098,6 +591154,7 @@ __export(setup_exports, {
591098
591154
  checkOllamaUpdate: () => checkOllamaUpdate,
591099
591155
  checkPythonVenv: () => checkPythonVenv,
591100
591156
  checkToolSupport: () => checkToolSupport,
591157
+ classifyOllamaThinkingTreatment: () => classifyOllamaThinkingTreatment,
591101
591158
  computeInferenceScore: () => computeInferenceScore,
591102
591159
  createExpandedVariant: () => createExpandedVariant,
591103
591160
  createExpandedVariantAsync: () => createExpandedVariantAsync,
@@ -591125,6 +591182,7 @@ __export(setup_exports, {
591125
591182
  repairAllExpandedVariants: () => repairAllExpandedVariants,
591126
591183
  runElevatedCommand: () => runElevatedCommand,
591127
591184
  runSetupWizard: () => runSetupWizard,
591185
+ shouldBakeNoThinkIntoOllamaModelfile: () => shouldBakeNoThinkIntoOllamaModelfile,
591128
591186
  updateOllama: () => updateOllama
591129
591187
  });
591130
591188
  import * as readline from "node:readline";
@@ -592840,23 +592898,26 @@ ${c3.cyan(OMNIUS_FIRST_RUN_BANNER)}
592840
592898
  const createModelfile = await ask(rl, ` Create optimized model "${c3.bold(customName)}" with ${ctx3.label} context? (Y/n) `);
592841
592899
  if (createModelfile.toLowerCase() !== "n") {
592842
592900
  try {
592843
- const numPredict = Math.min(16384, Math.max(2048, Math.floor(ctx3.numCtx * 0.25)));
592844
- const modelfileContent = [
592845
- `FROM ${selectedVariant.tag}`,
592846
- `PARAMETER num_ctx ${ctx3.numCtx}`,
592847
- `PARAMETER temperature 0`,
592848
- `PARAMETER num_predict ${numPredict}`,
592849
- `PARAMETER stop "<|endoftext|>"`
592850
- ].join("\n");
592901
+ const modelfileCandidates = expandedVariantContentCandidates(selectedVariant.tag, ctx3.numCtx);
592851
592902
  const modelDir2 = join115(homedir37(), ".omnius", "models");
592852
592903
  mkdirSync56(modelDir2, { recursive: true });
592853
592904
  const modelfilePath = join115(modelDir2, `Modelfile.${customName}`);
592854
- writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
592855
592905
  process.stdout.write(` ${c3.dim("Creating model...")} `);
592856
- execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
592857
- stdio: "pipe",
592858
- timeout: 12e4
592859
- });
592906
+ for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
592907
+ writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
592908
+ try {
592909
+ execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
592910
+ stdio: "pipe",
592911
+ timeout: 12e4
592912
+ });
592913
+ break;
592914
+ } catch (err) {
592915
+ if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
592916
+ continue;
592917
+ }
592918
+ throw err;
592919
+ }
592920
+ }
592860
592921
  process.stdout.write(`${c3.green("✔")}
592861
592922
  `);
592862
592923
  setConfigValue("model", customName);
@@ -593439,6 +593500,29 @@ function parseShowNumCtx2(show) {
593439
593500
  }
593440
593501
  return 0;
593441
593502
  }
593503
+ function classifyOllamaThinkingTreatment(modelName) {
593504
+ const normalized = modelName.replace(/^omnius-/i, "").replace(/:latest$/i, "").toLowerCase();
593505
+ if (/\bgpt[-_]?oss\b/.test(normalized)) return "gpt-oss-levels";
593506
+ if (/(?:^|[-_/:])(?:qwq|qvq)(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])thinking(?:[-_/:]|$)/.test(normalized) || /[-_]thinking(?:[-_/:]|$)/.test(normalized)) {
593507
+ return "thinking-only";
593508
+ }
593509
+ if (/(?:^|[-_/:])qwen3(?:[._-]?\d+)?(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])qwen3(?:vl|omni)(?:[-_/:]|$)/.test(normalized) || /deepseek[-_]?r1/.test(normalized) || /deepseek[-_]?v?3[._-]1/.test(normalized)) {
593510
+ return "toggleable";
593511
+ }
593512
+ return "none";
593513
+ }
593514
+ function shouldBakeNoThinkIntoOllamaModelfile(modelName) {
593515
+ return classifyOllamaThinkingTreatment(modelName) === "toggleable";
593516
+ }
593517
+ function parseShowNoThink(show) {
593518
+ const sources = [show.parameters, show.modelfile];
593519
+ for (const source of sources) {
593520
+ if (!source) continue;
593521
+ if (/\b(?:PARAMETER\s+)?nothink\s+(?:true|1|on|yes)\b/i.test(source)) return true;
593522
+ if (/\b(?:PARAMETER\s+)?think\s+(?:false|0|off|no)\b/i.test(source)) return true;
593523
+ }
593524
+ return false;
593525
+ }
593442
593526
  async function checkExpandedVariant(modelName, backendUrl2) {
593443
593527
  if (modelName.startsWith("omnius-")) return null;
593444
593528
  try {
@@ -593518,7 +593602,7 @@ async function readExpandedVariantState(backendUrl2, modelName) {
593518
593602
  if (baseModel && (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel))) {
593519
593603
  baseModel = null;
593520
593604
  }
593521
- return { currentNumCtx, baseModel };
593605
+ return { currentNumCtx, baseModel, hasNoThink: parseShowNoThink(showData) };
593522
593606
  } catch {
593523
593607
  return null;
593524
593608
  }
@@ -593526,50 +593610,93 @@ async function readExpandedVariantState(backendUrl2, modelName) {
593526
593610
  function stripVariantTag(modelName) {
593527
593611
  return modelName.replace(/:latest$/i, "");
593528
593612
  }
593529
- function createExpandedVariantContent(baseModel, numCtx) {
593613
+ function createExpandedVariantContent(baseModel, numCtx, options2 = {}) {
593530
593614
  if (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel)) {
593531
593615
  throw new Error(
593532
593616
  `createExpandedVariantContent: refusing to use blob-path base "${baseModel}". Pass the user-facing model name (e.g. "qwen3.6:latest") instead.`
593533
593617
  );
593534
593618
  }
593535
593619
  const numPredict = Math.min(16384, Math.max(2048, Math.floor(numCtx * 0.25)));
593536
- return [
593620
+ const lines = [
593537
593621
  `FROM ${baseModel}`,
593538
593622
  `PARAMETER num_ctx ${numCtx}`,
593623
+ ...options2.includeNoThink ? [
593624
+ `# Keep toggleable reasoning models in direct-answer mode by default.`,
593625
+ `PARAMETER nothink true`
593626
+ ] : [],
593539
593627
  `PARAMETER temperature 0`,
593540
593628
  `PARAMETER num_predict ${numPredict}`,
593541
593629
  `PARAMETER stop "<|endoftext|>"`
593630
+ ];
593631
+ return lines.join("\n");
593632
+ }
593633
+ function expandedVariantContentCandidates(baseModel, numCtx) {
593634
+ if (!shouldBakeNoThinkIntoOllamaModelfile(baseModel)) {
593635
+ return [createExpandedVariantContent(baseModel, numCtx)];
593636
+ }
593637
+ return [
593638
+ createExpandedVariantContent(baseModel, numCtx, { includeNoThink: true }),
593639
+ createExpandedVariantContent(baseModel, numCtx, { includeNoThink: false })
593640
+ ];
593641
+ }
593642
+ function ollamaCreateNothinkRejected(err) {
593643
+ const anyErr = err;
593644
+ const text = [
593645
+ anyErr?.stderr?.toString?.() ?? "",
593646
+ anyErr?.stdout?.toString?.() ?? "",
593647
+ anyErr?.message ?? ""
593542
593648
  ].join("\n");
593649
+ return /nothink|unknown parameter|invalid parameter|unsupported parameter/i.test(text);
593543
593650
  }
593544
593651
  function createExpandedVariantNamed(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
593545
593652
  const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
593546
- const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
593653
+ const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
593547
593654
  try {
593548
593655
  const modelDir2 = join115(homedir37(), ".omnius", "models");
593549
593656
  mkdirSync56(modelDir2, { recursive: true });
593550
593657
  const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
593551
- writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
593552
- execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
593553
- stdio: "pipe",
593554
- timeout: 12e4
593555
- });
593556
- return targetModel;
593658
+ for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
593659
+ writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
593660
+ try {
593661
+ execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
593662
+ stdio: "pipe",
593663
+ timeout: 12e4
593664
+ });
593665
+ return targetModel;
593666
+ } catch (err) {
593667
+ if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
593668
+ continue;
593669
+ }
593670
+ throw err;
593671
+ }
593672
+ }
593673
+ return null;
593557
593674
  } catch {
593558
593675
  return null;
593559
593676
  }
593560
593677
  }
593561
593678
  async function createExpandedVariantNamedAsync(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
593562
593679
  const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
593563
- const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
593680
+ const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
593564
593681
  try {
593565
593682
  const modelDir2 = join115(homedir37(), ".omnius", "models");
593566
593683
  mkdirSync56(modelDir2, { recursive: true });
593567
593684
  const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
593568
- writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
593569
- await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
593570
- timeout: 12e4
593571
- });
593572
- return targetModel;
593685
+ for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
593686
+ writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
593687
+ try {
593688
+ await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
593689
+ timeout: 12e4
593690
+ });
593691
+ return targetModel;
593692
+ } catch (err) {
593693
+ if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
593694
+ continue;
593695
+ }
593696
+ throw err;
593697
+ }
593698
+ }
593699
+ return null;
593573
593700
  } catch {
593574
593701
  return null;
593575
593702
  }
@@ -606896,6 +607023,10 @@ Clone a new voice: /voice clone <wav-file> [name]`);
606896
607023
  renderWarning(
606897
607024
  "OMNIUS_FORCE_NO_THINK=1 forces off regardless of /think setting"
606898
607025
  );
607026
+ else if (cur && process.env["OMNIUS_ENABLE_THINKING"] !== "1")
607027
+ renderWarning(
607028
+ "OMNIUS_ENABLE_THINKING is not set; /think is saved but backend requests remain direct-answer mode."
607029
+ );
606899
607030
  return "handled";
606900
607031
  }
606901
607032
  if (token === "auto") {
@@ -606934,6 +607065,11 @@ Clone a new voice: /voice clone <wav-file> [name]`);
606934
607065
  renderInfo(
606935
607066
  "Note: max_tokens will auto-raise to ≥4096 per request to prevent <think> truncation."
606936
607067
  );
607068
+ if (process.env["OMNIUS_ENABLE_THINKING"] !== "1") {
607069
+ renderWarning(
607070
+ "Thinking is hard-disabled by default. Set OMNIUS_ENABLE_THINKING=1 before launch for /think on or /think auto to affect backend requests."
607071
+ );
607072
+ }
606937
607073
  }
606938
607074
  return "handled";
606939
607075
  }
@@ -629145,7 +629281,7 @@ function telegramRouterTimeoutMs(configTimeoutMs, minMs = 1e4, maxMs) {
629145
629281
  10
629146
629282
  );
629147
629283
  const floor = Number.isFinite(minMs) && minMs > 0 ? minMs : 1e4;
629148
- const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 9e4;
629284
+ const configuredCap = Number.isFinite(envRaw) && envRaw >= floor ? envRaw : 3e4;
629149
629285
  const callerCap = Number.isFinite(maxMs) && (maxMs ?? 0) >= floor ? maxMs : configuredCap;
629150
629286
  const cap = Math.max(floor, Math.min(configuredCap, callerCap));
629151
629287
  const requested = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : cap;
@@ -629172,6 +629308,9 @@ function telegramRouterDiagnosticAttemptLooksLikeTimeout(attempt) {
629172
629308
  function telegramRouterDiagnosticAttemptLooksLikeBackendLiveness(attempt) {
629173
629309
  return attempt.status === "threw" && telegramRouterErrorLooksLikeBackendLiveness(attempt.error ?? "");
629174
629310
  }
629311
+ function telegramRouterDiagnosticIsDualEmptyVisible(diag) {
629312
+ return diag.jsonModeStatus === "empty-after-strip" && diag.plainStatus === "empty-after-strip";
629313
+ }
629175
629314
  function telegramThinkSuppressedRequest(request) {
629176
629315
  const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
629177
629316
  let appended = false;
@@ -629179,18 +629318,24 @@ function telegramThinkSuppressedRequest(request) {
629179
629318
  const m2 = messages2[i2];
629180
629319
  if (!m2 || m2.role !== "user") continue;
629181
629320
  const content = typeof m2.content === "string" ? m2.content : "";
629182
- if (/\/no_think\b/i.test(content)) {
629321
+ const hasOllamaNoThink = /\/nothink\b/i.test(content);
629322
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(content);
629323
+ if (hasOllamaNoThink && hasQwenNoThink) {
629183
629324
  appended = true;
629184
629325
  break;
629185
629326
  }
629186
- messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}/no_think` : `${content}
629327
+ const suffix = [
629328
+ hasOllamaNoThink ? null : "/nothink",
629329
+ hasQwenNoThink ? null : "/no_think"
629330
+ ].filter(Boolean).join("\n");
629331
+ messages2[i2] = { ...m2, content: content.endsWith("\n") ? `${content}${suffix}` : `${content}
629187
629332
 
629188
- /no_think` };
629333
+ ${suffix}` };
629189
629334
  appended = true;
629190
629335
  break;
629191
629336
  }
629192
629337
  if (!appended) {
629193
- messages2.push({ role: "user", content: "/no_think" });
629338
+ messages2.push({ role: "user", content: "/nothink\n/no_think" });
629194
629339
  }
629195
629340
  return { ...request, messages: messages2, think: false };
629196
629341
  }
@@ -630779,7 +630924,7 @@ function renderTelegramSubAgentError(username, error) {
630779
630924
  process.stdout.write(` ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
630780
630925
  `);
630781
630926
  }
630782
- var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
630927
+ var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS, TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
630783
630928
  var init_telegram_bridge = __esm({
630784
630929
  "packages/cli/src/tui/telegram-bridge.ts"() {
630785
630930
  "use strict";
@@ -631223,6 +631368,21 @@ Telegram link integrity contract:
631223
631368
  TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS = 20 * 60 * 1e3;
631224
631369
  TELEGRAM_CHANNEL_DMN_MIN_MESSAGES = 4;
631225
631370
  TELEGRAM_ALLOWED_UPDATES = ["message", "guest_message", "callback_query", "poll", "message_reaction", "message_reaction_count"];
631371
+ TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS = 50;
631372
+ TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES = [
631373
+ "qwen3:0.6b",
631374
+ "qwen3:1.7b",
631375
+ "qwen3:4b",
631376
+ "qwen3:8b",
631377
+ "qwen2.5:3b",
631378
+ "qwen2.5:7b",
631379
+ "llama3.2:1b",
631380
+ "llama3.2:3b",
631381
+ "gemma3:1b",
631382
+ "gemma3:4b",
631383
+ "phi3:mini",
631384
+ "phi4-mini:latest"
631385
+ ];
631226
631386
  TELEGRAM_PUBLIC_TOOL_QUOTAS = {
631227
631387
  web: { limit: 20, windowMs: 60 * 6e4 },
631228
631388
  media: { limit: 30, windowMs: 60 * 6e4 },
@@ -631256,6 +631416,7 @@ Telegram link integrity contract:
631256
631416
  pollLoopPromise = null;
631257
631417
  pollFatalNotified = false;
631258
631418
  lastUpdateId = 0;
631419
+ telegramRouterModelCache = null;
631259
631420
  state = {
631260
631421
  active: false,
631261
631422
  botUserId: void 0,
@@ -631288,10 +631449,14 @@ Telegram link integrity contract:
631288
631449
  * capacity and flood the TUI.
631289
631450
  */
631290
631451
  telegramActiveWorkSessions = /* @__PURE__ */ new Set();
631452
+ telegramActiveWorkGenerations = /* @__PURE__ */ new Map();
631453
+ telegramActiveWorkStartedAtMs = /* @__PURE__ */ new Map();
631291
631454
  /** Queued Telegram sessions waiting for a global work slot. */
631292
631455
  telegramQueuedSessionWork = /* @__PURE__ */ new Map();
631293
631456
  telegramDispatchQueuedTimer = null;
631294
631457
  telegramDispatchQueuedAtMs = 0;
631458
+ telegramQueueDiagnosticLastAtMs = 0;
631459
+ telegramPollWarningLastAtMs = 0;
631295
631460
  /** Lightweight chat history by chat/guest session key */
631296
631461
  chatHistory = /* @__PURE__ */ new Map();
631297
631462
  /** Participant and tone state by chat/guest session key */
@@ -631927,6 +632092,63 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
631927
632092
  if (!Number.isFinite(parsed)) return 350;
631928
632093
  return Math.max(0, Math.min(2e3, Math.floor(parsed)));
631929
632094
  }
632095
+ telegramQueueDiagnosticIntervalMs() {
632096
+ const raw = Number.parseInt(process.env["OMNIUS_TG_QUEUE_DIAGNOSTIC_MS"] ?? "", 10);
632097
+ if (Number.isFinite(raw) && raw >= 5e3 && raw <= 3e5) return raw;
632098
+ return 3e4;
632099
+ }
632100
+ maybeLogTelegramQueueDiagnostic(reason) {
632101
+ if (this.telegramQueuedSessionWork.size === 0) return;
632102
+ const now = Date.now();
632103
+ const interval = this.telegramQueueDiagnosticIntervalMs();
632104
+ if (now - this.telegramQueueDiagnosticLastAtMs < interval) return;
632105
+ this.telegramQueueDiagnosticLastAtMs = now;
632106
+ const queued = [...this.telegramQueuedSessionWork.values()].sort((a2, b) => a2.enqueuedAtMs - b.enqueuedAtMs).slice(0, 4).map((work) => {
632107
+ const age = formatTelegramPipelineDuration(now - work.enqueuedAtMs);
632108
+ const live = this.telegramSessionIsLive(work.sessionKey) ? "blocked:same-session-live" : "ready";
632109
+ return `${work.sessionKey} age=${age} bundled=${work.messageCount} ${live}`;
632110
+ });
632111
+ const active = [...this.activeTelegramInteractionSessionKeys()].slice(0, 6);
632112
+ const inferences = this.getTelegramActiveInferences().slice(0, 4).map((inf) => `${inf.id}/${inf.kind}/${inf.model} elapsed=${inf.elapsedSec.toFixed(1)}s ttfb=${inf.ttfbSec === void 0 ? "waiting" : `${inf.ttfbSec.toFixed(1)}s`}`);
632113
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
632114
+ "queue",
632115
+ `queue diagnostic (${reason}): active ${this.activeTelegramInteractionCount()}/${this.getSubAgentLimit()} [${active.join(", ") || "none"}]; queued ${this.telegramQueuedSessionWork.size} [${queued.join(" | ")}]; inferences [${inferences.join(" | ") || "none"}]`
632116
+ ));
632117
+ }
632118
+ nextTelegramWorkGeneration(sessionKey) {
632119
+ const generation = (this.telegramActiveWorkGenerations.get(sessionKey) ?? 0) + 1;
632120
+ this.telegramActiveWorkGenerations.set(sessionKey, generation);
632121
+ return generation;
632122
+ }
632123
+ telegramWorkGenerationIsCurrent(sessionKey, generation) {
632124
+ return this.telegramActiveWorkGenerations.get(sessionKey) === generation;
632125
+ }
632126
+ telegramPreAgentWorkMaxIdleMs() {
632127
+ const routerMs = telegramRouterTimeoutMs(this.agentConfig?.timeoutMs);
632128
+ const raw = Number.parseInt(process.env["OMNIUS_TG_PRE_AGENT_MAX_IDLE_MS"] ?? "", 10);
632129
+ if (Number.isFinite(raw) && raw >= 3e4 && raw <= 9e5) return raw;
632130
+ return Math.max(12e4, routerMs + 3e4);
632131
+ }
632132
+ reapStaleTelegramPreAgentWork() {
632133
+ const now = Date.now();
632134
+ const maxIdleMs = this.telegramPreAgentWorkMaxIdleMs();
632135
+ for (const sessionKey of [...this.telegramActiveWorkSessions]) {
632136
+ if (this.subAgents.has(sessionKey) || this.activeChatSessions.has(sessionKey)) continue;
632137
+ const startedAt2 = this.telegramActiveWorkStartedAtMs.get(sessionKey);
632138
+ if (!startedAt2) continue;
632139
+ const idleMs = now - startedAt2;
632140
+ if (idleMs <= maxIdleMs) continue;
632141
+ const generation = this.telegramActiveWorkGenerations.get(sessionKey) ?? 0;
632142
+ this.telegramActiveWorkGenerations.set(sessionKey, generation + 1);
632143
+ this.telegramActiveWorkSessions.delete(sessionKey);
632144
+ this.telegramActiveWorkStartedAtMs.delete(sessionKey);
632145
+ this.refreshActiveTelegramInteractionCount();
632146
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
632147
+ "queue",
632148
+ `watchdog: released stale pre-agent Telegram work pin for ${sessionKey} after ${Math.round(idleMs / 1e3)}s; queued messages may dispatch now`
632149
+ ));
632150
+ }
632151
+ }
631930
632152
  dispatchQueuedTelegramSessionWorkSoon(delayMs = 0) {
631931
632153
  const dueAt = Date.now() + Math.max(0, delayMs);
631932
632154
  if (this.telegramDispatchQueuedTimer && this.telegramDispatchQueuedAtMs <= dueAt) return;
@@ -631956,6 +632178,9 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
631956
632178
  this.dispatchQueuedTelegramSessionWorkSoon(Math.max(0, nextDue - Date.now()));
631957
632179
  }
631958
632180
  }
632181
+ if (this.telegramQueuedSessionWork.size > 0) {
632182
+ this.maybeLogTelegramQueueDiagnostic("dispatch");
632183
+ }
631959
632184
  this.refreshActiveTelegramInteractionCount();
631960
632185
  }
631961
632186
  buildTelegramQueuedSessionWork(sessionKey, msg, toolContext, now) {
@@ -631999,11 +632224,16 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
631999
632224
  return;
632000
632225
  }
632001
632226
  this.telegramActiveWorkSessions.add(work.sessionKey);
632227
+ this.telegramActiveWorkStartedAtMs.set(work.sessionKey, Date.now());
632228
+ const generation = this.nextTelegramWorkGeneration(work.sessionKey);
632002
632229
  this.refreshActiveTelegramInteractionCount();
632003
- void this.processTelegramMessageWork(work).catch((err) => {
632230
+ void this.processTelegramMessageWork(work, generation).catch((err) => {
632004
632231
  this.tuiWrite(() => renderWarning(`Telegram sub-agent error: ${err instanceof Error ? err.message : String(err)}`));
632005
632232
  }).finally(() => {
632006
- this.telegramActiveWorkSessions.delete(work.sessionKey);
632233
+ if (this.telegramWorkGenerationIsCurrent(work.sessionKey, generation)) {
632234
+ this.telegramActiveWorkSessions.delete(work.sessionKey);
632235
+ this.telegramActiveWorkStartedAtMs.delete(work.sessionKey);
632236
+ }
632007
632237
  this.refreshActiveTelegramInteractionCount();
632008
632238
  this.dispatchQueuedTelegramSessionWorkSoon();
632009
632239
  });
@@ -635238,7 +635468,7 @@ ${lines.join("\n")}`);
635238
635468
  `Current Telegram message text (untrusted user data):
635239
635469
  ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635240
635470
  "",
635241
- "/no_think"
635471
+ "/nothink\n/no_think"
635242
635472
  ].filter(Boolean).join("\n");
635243
635473
  try {
635244
635474
  const result = await this.telegramRouterJsonCompletion(
@@ -635293,7 +635523,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635293
635523
  },
635294
635524
  inferenceKind,
635295
635525
  sessionKey,
635296
- { stream: false, reason: "router-json" }
635526
+ { stream: false, reason: "router-json", modelName: diagnostics?.backendModel }
635297
635527
  );
635298
635528
  const visible = jsonModeResult.choices.some(
635299
635529
  (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -635349,7 +635579,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635349
635579
  suppressed,
635350
635580
  inferenceKind,
635351
635581
  sessionKey,
635352
- { stream: false, reason: "router-plain-retry" }
635582
+ { stream: false, reason: "router-plain-retry", modelName: diagnostics?.backendModel }
635353
635583
  );
635354
635584
  if (diagnostics) {
635355
635585
  const plainVisible = plainResult.choices.some(
@@ -635402,7 +635632,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635402
635632
  * hard-deadline retire path becomes diagnosable instead of opaque
635403
635633
  */
635404
635634
  async telegramObservableInference(backend, request, kind, sessionKey, options2 = {}) {
635405
- const model = this.agentConfig?.model ?? "?";
635635
+ const model = options2.modelName ?? this.agentConfig?.model ?? "?";
635406
635636
  const promptTokens = estimatePromptTokensFromRequest(request);
635407
635637
  const broker = getModelBroker();
635408
635638
  const trainCtx = await broker.getNctxTrain(model).catch(() => null);
@@ -635436,7 +635666,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635436
635666
  if (!streamAllowed && process.env["OMNIUS_BROKER_TRACE"] === "1") {
635437
635667
  this.tuiWrite(() => renderTelegramSubAgentEvent(
635438
635668
  sessionKey,
635439
- `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx)}`
635669
+ `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx, model)}`
635440
635670
  ));
635441
635671
  }
635442
635672
  } else if (typeof streamFn !== "function") {
@@ -635469,18 +635699,22 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635469
635699
  this.deregisterTelegramInference(id);
635470
635700
  }
635471
635701
  }
635472
- telegramBackendDiagnostic() {
635702
+ telegramBackendDiagnostic(modelOverride, routerModelSource, routerModelDetail) {
635473
635703
  const config = this.agentConfig;
635474
635704
  if (!config) return "backend=unconfigured model=?";
635475
- return `backend=${config.backendType} url=${config.backendUrl} model=${config.model}`;
635705
+ const model = modelOverride || config.model;
635706
+ const source = routerModelSource ? ` router_model_source=${routerModelSource}` : "";
635707
+ const detail = routerModelDetail ? ` router_model_detail=${compactTelegramRouterDiagnosticText(routerModelDetail, 180)}` : "";
635708
+ const main2 = model !== config.model ? ` main_model=${config.model}` : "";
635709
+ return `backend=${config.backendType} url=${config.backendUrl} model=${model}${main2}${source}${detail}`;
635476
635710
  }
635477
- telegramInferenceRequestDiagnostic(request) {
635711
+ telegramInferenceRequestDiagnostic(request, modelOverride) {
635478
635712
  const responseFormat = request.responseFormat ?? request.response_format;
635479
635713
  const responseFormatType = responseFormat && typeof responseFormat["type"] === "string" ? responseFormat["type"] : responseFormat ? "present" : "none";
635480
635714
  const numCtx = request.numCtx;
635481
635715
  const think = request.think;
635482
635716
  const tools = Array.isArray(request.tools) ? request.tools.length : 0;
635483
- return `${this.telegramBackendDiagnostic()} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
635717
+ return `${this.telegramBackendDiagnostic(modelOverride)} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
635484
635718
  }
635485
635719
  telegramStreamInactivityDiagnostic(request, inferenceId, inactivityMs, contentChars, thinkingChars) {
635486
635720
  const entry = this.telegramActiveInferences.get(inferenceId);
@@ -635489,7 +635723,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635489
635723
  const idle = entry ? `${((now - entry.lastTokenAt) / 1e3).toFixed(1)}s` : "unknown";
635490
635724
  const ttfb = entry?.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
635491
635725
  const phase = entry?.firstChunkAt === void 0 ? "before-first-chunk" : "mid-stream";
635492
- return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request)}; stream_endpoint=no-sse-chunk)`;
635726
+ return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request, entry?.model)}; stream_endpoint=no-sse-chunk)`;
635493
635727
  }
635494
635728
  /**
635495
635729
  * Drive a chatCompletionStream to exhaustion, accumulating tokens into a
@@ -635705,7 +635939,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635705
635939
  `Original router output:`,
635706
635940
  rawPreview,
635707
635941
  ``,
635708
- `/no_think`
635942
+ `/nothink
635943
+ /no_think`
635709
635944
  ].join("\n");
635710
635945
  try {
635711
635946
  const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635718,8 +635953,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635718
635953
  ],
635719
635954
  tools: [],
635720
635955
  temperature: 0,
635721
- maxTokens: 800,
635722
- timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
635956
+ maxTokens: 500,
635957
+ timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
635723
635958
  think: false
635724
635959
  }, diagnostics, "router-repair", sessionKey);
635725
635960
  const repairedText = result.choices[0]?.message?.content ?? "";
@@ -635772,7 +636007,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
635772
636007
  `Router context (trailing-window):`,
635773
636008
  trimmedUserPrompt,
635774
636009
  ``,
635775
- `/no_think`
636010
+ `/nothink
636011
+ /no_think`
635776
636012
  ].join("\n");
635777
636013
  try {
635778
636014
  const result = await this.telegramRouterJsonCompletion(backend, {
@@ -635785,8 +636021,8 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
635785
636021
  ],
635786
636022
  tools: [],
635787
636023
  temperature: 0,
635788
- maxTokens: 1e3,
635789
- timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
636024
+ maxTokens: 500,
636025
+ timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 15e3),
635790
636026
  think: false
635791
636027
  }, diagnostics, "router-strict-retry", sessionKey);
635792
636028
  const retryText = result.choices[0]?.message?.content ?? "";
@@ -635980,6 +636216,7 @@ ${retryText}`,
635980
636216
  * never fires.
635981
636217
  */
635982
636218
  reapStaleTelegramSubAgents() {
636219
+ this.reapStaleTelegramPreAgentWork();
635983
636220
  const maxIdleMs = this.telegramSubAgentMaxIdleMs();
635984
636221
  const now = Date.now();
635985
636222
  const stale = [];
@@ -636000,6 +636237,7 @@ ${retryText}`,
636000
636237
  clearInterval(agent.typingInterval);
636001
636238
  agent.typingInterval = null;
636002
636239
  }
636240
+ this.stopTelegramPublicProgressMessage(agent);
636003
636241
  try {
636004
636242
  agent.runner?.abort?.();
636005
636243
  } catch {
@@ -636019,6 +636257,118 @@ ${retryText}`,
636019
636257
  this.subAgentViewCallbacks?.onStatus(agent.viewId, "failed");
636020
636258
  this.subAgentViewCallbacks?.onComplete(agent.viewId);
636021
636259
  }
636260
+ if (this.telegramQueuedSessionWork.size > 0) {
636261
+ this.maybeLogTelegramQueueDiagnostic("watchdog");
636262
+ this.dispatchQueuedTelegramSessionWorkSoon();
636263
+ }
636264
+ }
636265
+ telegramRouterAutoModelEnabled() {
636266
+ const raw = (process.env["OMNIUS_TG_ROUTER_AUTO_MODEL"] ?? "").trim().toLowerCase();
636267
+ return raw !== "0" && raw !== "false" && raw !== "off";
636268
+ }
636269
+ telegramRouterCandidateModels() {
636270
+ const raw = (process.env["OMNIUS_TG_ROUTER_MODEL_CANDIDATES"] ?? "").trim();
636271
+ const candidates = raw ? raw.split(/[,\s]+/).map((part) => part.trim()).filter(Boolean) : TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES;
636272
+ return Array.from(new Set(candidates));
636273
+ }
636274
+ normalizeOllamaModelNameForMatch(name10) {
636275
+ return name10.trim().toLowerCase().replace(/:latest$/, "");
636276
+ }
636277
+ async fetchOllamaInstalledModelNames(baseUrl) {
636278
+ const url = `${baseUrl.replace(/\/+$/, "")}/api/tags`;
636279
+ const timeoutFn = AbortSignal.timeout;
636280
+ const res = await fetch(url, {
636281
+ signal: typeof timeoutFn === "function" ? timeoutFn(2e3) : void 0
636282
+ });
636283
+ if (!res.ok) throw new Error(`ollama /api/tags returned HTTP ${res.status}`);
636284
+ const data = await res.json();
636285
+ return Array.isArray(data.models) ? data.models.map((model) => typeof model.name === "string" ? model.name : "").filter(Boolean) : [];
636286
+ }
636287
+ async resolveTelegramRouterBackend(config) {
636288
+ const explicit = (process.env["OMNIUS_TG_ROUTER_MODEL"] ?? "").trim();
636289
+ if (explicit && !/^(?:0|false|off|same|main)$/i.test(explicit)) {
636290
+ return {
636291
+ backend: new OllamaAgenticBackend(config.backendUrl, explicit, config.apiKey),
636292
+ model: explicit,
636293
+ source: "env",
636294
+ detail: "OMNIUS_TG_ROUTER_MODEL"
636295
+ };
636296
+ }
636297
+ if (config.backendType !== "ollama" || !this.telegramRouterAutoModelEnabled()) {
636298
+ return {
636299
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
636300
+ model: config.model,
636301
+ source: "main"
636302
+ };
636303
+ }
636304
+ const candidates = this.telegramRouterCandidateModels();
636305
+ const cacheKey = `${config.backendUrl}
636306
+ ${config.model}
636307
+ ${candidates.join(",")}`;
636308
+ const now = Date.now();
636309
+ if (this.telegramRouterModelCache && this.telegramRouterModelCache.cacheKey === cacheKey && now - this.telegramRouterModelCache.atMs < 6e4) {
636310
+ const cached = this.telegramRouterModelCache;
636311
+ return {
636312
+ backend: new OllamaAgenticBackend(config.backendUrl, cached.model, config.apiKey),
636313
+ model: cached.model,
636314
+ source: cached.source,
636315
+ detail: cached.detail
636316
+ };
636317
+ }
636318
+ try {
636319
+ const installed = await this.fetchOllamaInstalledModelNames(config.backendUrl);
636320
+ const installedByNormalized = /* @__PURE__ */ new Map();
636321
+ for (const name10 of installed) {
636322
+ installedByNormalized.set(this.normalizeOllamaModelNameForMatch(name10), name10);
636323
+ }
636324
+ for (const candidate of candidates) {
636325
+ const selected = installedByNormalized.get(this.normalizeOllamaModelNameForMatch(candidate));
636326
+ if (!selected) continue;
636327
+ const resolved = {
636328
+ cacheKey,
636329
+ atMs: now,
636330
+ model: selected,
636331
+ source: "auto-small",
636332
+ detail: "selected first installed OMNIUS_TG_ROUTER_MODEL_CANDIDATES entry from Ollama /api/tags"
636333
+ };
636334
+ this.telegramRouterModelCache = resolved;
636335
+ return {
636336
+ backend: new OllamaAgenticBackend(config.backendUrl, selected, config.apiKey),
636337
+ model: selected,
636338
+ source: "auto-small",
636339
+ detail: resolved.detail
636340
+ };
636341
+ }
636342
+ } catch (err) {
636343
+ const detail2 = `router model auto-detect failed: ${err instanceof Error ? err.message : String(err)}`;
636344
+ this.telegramRouterModelCache = {
636345
+ cacheKey,
636346
+ atMs: now,
636347
+ model: config.model,
636348
+ source: "main",
636349
+ detail: detail2
636350
+ };
636351
+ return {
636352
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
636353
+ model: config.model,
636354
+ source: "main",
636355
+ detail: detail2
636356
+ };
636357
+ }
636358
+ const detail = "no configured small router model was installed; using main model";
636359
+ this.telegramRouterModelCache = {
636360
+ cacheKey,
636361
+ atMs: now,
636362
+ model: config.model,
636363
+ source: "main",
636364
+ detail
636365
+ };
636366
+ return {
636367
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
636368
+ model: config.model,
636369
+ source: "main",
636370
+ detail
636371
+ };
636022
636372
  }
636023
636373
  async inferTelegramInteractionDecision(msg, toolContext) {
636024
636374
  const config = this.agentConfig;
@@ -636048,11 +636398,8 @@ ${retryText}`,
636048
636398
  };
636049
636399
  return fallback;
636050
636400
  }
636051
- const backend = new OllamaAgenticBackend(
636052
- config.backendUrl,
636053
- config.model,
636054
- config.apiKey
636055
- );
636401
+ const routerBackend = await this.resolveTelegramRouterBackend(config);
636402
+ const backend = routerBackend.backend;
636056
636403
  const forcedLine = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred live from context.` : `The operator selected Telegram mode "auto". Infer route live from context.`;
636057
636404
  const context2 = this.buildTelegramConversationContextStream(sessionKey, msg, isGroup ? 36 : 20, identitySalienceSignals);
636058
636405
  const currentReplyContext = this.buildTelegramCurrentReplyContext(sessionKey, msg);
@@ -636192,7 +636539,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636192
636539
  } catch {
636193
636540
  }
636194
636541
  }
636195
- const diagnostics = {};
636542
+ const diagnostics = {
636543
+ backendType: config.backendType,
636544
+ backendUrl: config.backendUrl,
636545
+ backendModel: routerBackend.model,
636546
+ routerModelSource: routerBackend.source,
636547
+ routerModelDetail: routerBackend.detail
636548
+ };
636196
636549
  const routerStartMs = Date.now();
636197
636550
  try {
636198
636551
  const result = await this.telegramRouterJsonCompletion(backend, {
@@ -636205,17 +636558,17 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636205
636558
  ],
636206
636559
  tools: [],
636207
636560
  temperature: 0,
636208
- // Minimal route JSON should fit comfortably; keeping this small avoids
636209
- // reintroducing truncated-note repair cascades.
636210
- maxTokens: 900,
636211
- timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
636561
+ // Router JSON is tiny. Keep the answer budget tight so Qwen-class
636562
+ // models cannot spend a minute producing hidden <think>-only output.
636563
+ maxTokens: 360,
636564
+ timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
636212
636565
  think: false
636213
636566
  }, diagnostics, "router", sessionKey);
636214
636567
  const text = result.choices[0]?.message?.content ?? "";
636215
636568
  const routerLatencyMs = Date.now() - routerStartMs;
636216
636569
  try {
636217
636570
  const pidReg = getPidRegistry();
636218
- const modelKey = this.agentConfig?.model ?? "?";
636571
+ const modelKey = routerBackend.model ?? this.agentConfig?.model ?? "?";
636219
636572
  pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
636220
636573
  pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
636221
636574
  } catch {
@@ -636242,7 +636595,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636242
636595
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636243
636596
  void 0,
636244
636597
  failureNarrative2,
636245
- "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open"
636598
+ "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open",
636599
+ diagnostics
636246
636600
  ),
636247
636601
  raw: text
636248
636602
  }),
@@ -636263,8 +636617,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636263
636617
  ],
636264
636618
  tools: [],
636265
636619
  temperature: 0,
636266
- maxTokens: 1400,
636267
- timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
636620
+ maxTokens: 700,
636621
+ timeoutMs: telegramRouterTimeoutMs(config.timeoutMs, 8e3, 3e4),
636268
636622
  think: false
636269
636623
  }, diagnostics, "router", sessionKey);
636270
636624
  const reissuedText = reissued.choices[0]?.message?.content ?? "";
@@ -636277,7 +636631,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636277
636631
  } catch {
636278
636632
  }
636279
636633
  }
636280
- const repaired = await this.repairTelegramInteractionDecision(
636634
+ const dualEmptyVisible = telegramRouterDiagnosticIsDualEmptyVisible(diagnostics) && !telegramRouterRawPreview(text);
636635
+ if (dualEmptyVisible) {
636636
+ if (diagnostics.repairStatus === void 0) {
636637
+ diagnostics.repairStatus = "skipped";
636638
+ diagnostics.repairError = "router returned no visible text in json-mode or plain retry; repair/strict retry would only burn more inference";
636639
+ }
636640
+ }
636641
+ const repaired = dualEmptyVisible ? null : await this.repairTelegramInteractionDecision(
636281
636642
  backend,
636282
636643
  text,
636283
636644
  forcedRoute,
@@ -636288,7 +636649,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636288
636649
  if (repaired) {
636289
636650
  return withRouterTelemetry(this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes));
636290
636651
  }
636291
- const strictRetry = await this.retryTelegramInteractionDecisionStrict(
636652
+ const strictRetry = dualEmptyVisible ? null : await this.retryTelegramInteractionDecisionStrict(
636292
636653
  backend,
636293
636654
  userPrompt,
636294
636655
  text,
@@ -636304,12 +636665,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636304
636665
  const failureNarrative = this.summarizeTelegramRouterFailure(diagnostics);
636305
636666
  const backendLivenessFailure = (diagnostics.attempts ?? []).some(telegramRouterDiagnosticAttemptLooksLikeBackendLiveness) || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.repairError ?? "") || telegramRouterErrorLooksLikeBackendLiveness(diagnostics.strictRetryError ?? "");
636306
636667
  const fallback = this.applyTelegramSilentReflectionNotes(this.buildTelegramRouterUnavailableDecision(msg, toolContext, {
636307
- reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
636668
+ reason: backendLivenessFailure ? "router recovery hit a backend liveness failure; no model-derived reply decision" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; no model-derived reply decision" : "router output was not valid decision JSON after repair/retry; no model-derived reply decision",
636308
636669
  silentDisposition: reflectionNotes.silentDisposition,
636309
636670
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636310
636671
  invalidRouterPreview,
636311
636672
  failureNarrative,
636312
- backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
636673
+ backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it",
636674
+ diagnostics
636313
636675
  ),
636314
636676
  raw: text
636315
636677
  }), reflectionNotes);
@@ -636323,7 +636685,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636323
636685
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636324
636686
  void 0,
636325
636687
  failureNarrative,
636326
- `router failed before live notes were generated: ${errMsg.slice(0, 160)}`
636688
+ `router failed before live notes were generated: ${errMsg.slice(0, 160)}`,
636689
+ diagnostics
636327
636690
  )
636328
636691
  }), reflectionNotes);
636329
636692
  return withRouterTelemetry(fallback);
@@ -636443,10 +636806,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636443
636806
  operatorHint
636444
636807
  };
636445
636808
  }
636446
- composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline) {
636809
+ composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline, diagnostics) {
636447
636810
  const segments = [];
636448
636811
  segments.push(headline);
636449
- segments.push(this.telegramBackendDiagnostic());
636812
+ segments.push(this.telegramBackendDiagnostic(
636813
+ diagnostics?.backendModel,
636814
+ diagnostics?.routerModelSource,
636815
+ diagnostics?.routerModelDetail
636816
+ ));
636450
636817
  if (failureNarrative.summary) segments.push(failureNarrative.summary);
636451
636818
  if (invalidRouterPreview) segments.push(`invalid router output preview: ${invalidRouterPreview}`);
636452
636819
  if (failureNarrative.detail) segments.push(`router-failure trace: ${failureNarrative.detail}`);
@@ -636884,6 +637251,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
636884
637251
  for (const [, agent] of this.subAgents) {
636885
637252
  agent.aborted = true;
636886
637253
  if (agent.typingInterval) clearInterval(agent.typingInterval);
637254
+ this.stopTelegramPublicProgressMessage(agent);
636887
637255
  try {
636888
637256
  agent.runner?.abort?.();
636889
637257
  } catch {
@@ -636899,6 +637267,8 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
636899
637267
  }
636900
637268
  this.telegramQueuedSessionWork.clear();
636901
637269
  this.telegramActiveWorkSessions.clear();
637270
+ this.telegramActiveWorkGenerations.clear();
637271
+ this.telegramActiveWorkStartedAtMs.clear();
636902
637272
  this.telegramAdminLivePanels.clear();
636903
637273
  this.flushTelegramViewWrites();
636904
637274
  this.flushTelegramTuiWrites();
@@ -637085,6 +637455,62 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
637085
637455
  }
637086
637456
  }
637087
637457
  }
637458
+ shouldUseTelegramPublicProgressMessage(msg, toolContext) {
637459
+ return toolContext === "telegram-public" && msg.chatType !== "private" && !msg.guestQueryId;
637460
+ }
637461
+ renderTelegramPublicProgressHTML(subAgent, msg, phase) {
637462
+ const elapsedSec = Math.max(0, Math.floor((Date.now() - subAgent.startedAtMs) / 1e3));
637463
+ const sessionKey = this.sessionKeyForMessage(msg);
637464
+ const activeInference = this.getTelegramActiveInferences().find((inf) => inf.sessionKey === sessionKey);
637465
+ const status = activeInference ? activeInference.ttfbSec === void 0 ? `model request active; waiting for first token (${activeInference.kind}, ${activeInference.elapsedSec.toFixed(1)}s)` : `streaming ${activeInference.kind}; content=${activeInference.contentTokens}t thinking=${activeInference.thinkingTokens}t` : phase;
637466
+ const width = 12;
637467
+ const filled = Math.min(width, Math.floor(elapsedSec % 60 / 60 * width));
637468
+ const bar = `[${"#".repeat(filled)}${"-".repeat(width - filled)}]`;
637469
+ return [
637470
+ `<b>Working</b>`,
637471
+ `<code>${bar}</code> ${elapsedSec}s`,
637472
+ `<i>${escapeTelegramHTML(status)}</i>`
637473
+ ].join("\n");
637474
+ }
637475
+ startTelegramPublicProgressMessage(subAgent, msg, phase) {
637476
+ if (!this.shouldUseTelegramPublicProgressMessage(msg, subAgent.toolContext)) return;
637477
+ if (subAgent.publicProgressTimer) return;
637478
+ const update2 = () => {
637479
+ if (subAgent.aborted) return;
637480
+ if (!this.subAgents.has(this.sessionKeyForMessage(msg))) return;
637481
+ const html = this.renderTelegramPublicProgressHTML(subAgent, msg, phase);
637482
+ if (subAgent.liveMessageId) {
637483
+ const now = Date.now();
637484
+ if (now - subAgent.lastEditMs < 3e3) return;
637485
+ subAgent.lastEditMs = now;
637486
+ void this.editLiveMessage(msg.chatId, subAgent.liveMessageId, html).catch(() => {
637487
+ });
637488
+ return;
637489
+ }
637490
+ if (subAgent.liveMessagePromise) return;
637491
+ subAgent.liveMessagePromise = this.sendLiveMessage(
637492
+ msg.chatId,
637493
+ html,
637494
+ msg.chatType !== "private" ? msg.messageId : void 0
637495
+ ).then((id) => {
637496
+ subAgent.liveMessageId = id;
637497
+ subAgent.lastEditMs = Date.now();
637498
+ }).catch(() => {
637499
+ }).finally(() => {
637500
+ subAgent.liveMessagePromise = null;
637501
+ });
637502
+ };
637503
+ update2();
637504
+ subAgent.publicProgressTimer = setInterval(update2, 5e3);
637505
+ if (typeof subAgent.publicProgressTimer.unref === "function") {
637506
+ subAgent.publicProgressTimer.unref();
637507
+ }
637508
+ }
637509
+ stopTelegramPublicProgressMessage(subAgent) {
637510
+ if (!subAgent.publicProgressTimer) return;
637511
+ clearInterval(subAgent.publicProgressTimer);
637512
+ subAgent.publicProgressTimer = null;
637513
+ }
637088
637514
  ensureTelegramAdminLivePanel(subAgent, msg) {
637089
637515
  const existing = subAgent.adminLivePanelNonce ? this.telegramAdminLivePanels.get(subAgent.adminLivePanelNonce) : void 0;
637090
637516
  if (existing) return existing;
@@ -637343,11 +637769,12 @@ Join: ${newUrl}`);
637343
637769
  }
637344
637770
  this.scheduleTelegramSessionWork(msg, toolContext);
637345
637771
  }
637346
- async processTelegramMessageWork(work) {
637772
+ async processTelegramMessageWork(work, workGeneration) {
637347
637773
  const msg = work.msg;
637348
637774
  const toolContext = work.toolContext;
637349
637775
  const sessionKey = this.sessionKeyForMessage(msg);
637350
637776
  const isAdminDM = toolContext === "telegram-admin-dm";
637777
+ if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) return;
637351
637778
  const existing = this.subAgents.get(sessionKey);
637352
637779
  if (existing && !existing.aborted) {
637353
637780
  await this.enqueueTelegramQueuedSessionWorkForExistingSubAgent(work, existing);
@@ -637365,6 +637792,13 @@ Join: ${newUrl}`);
637365
637792
  } catch (err) {
637366
637793
  decision2 = this.fallbackTelegramRouterDecision(msg, toolContext, err);
637367
637794
  }
637795
+ if (!this.telegramWorkGenerationIsCurrent(sessionKey, workGeneration)) {
637796
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
637797
+ msg.username,
637798
+ `discarded stale Telegram work result after queue pin release for ${sessionKey}`
637799
+ ));
637800
+ return;
637801
+ }
637368
637802
  const storedPreference = this.applyTelegramReplyPreferenceUpdate(
637369
637803
  sessionKey,
637370
637804
  msg,
@@ -637482,6 +637916,7 @@ Join: ${newUrl}`);
637482
637916
  if (replyEdge) {
637483
637917
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, replyEdge));
637484
637918
  }
637919
+ this.startTelegramPublicProgressMessage(subAgent, msg, "taking notes and preparing tools");
637485
637920
  try {
637486
637921
  let mediaContext = "";
637487
637922
  if (msg.media || msg.replyToMedia) {
@@ -637498,6 +637933,7 @@ Join: ${newUrl}`);
637498
637933
  clearInterval(subAgent.typingInterval);
637499
637934
  subAgent.typingInterval = null;
637500
637935
  }
637936
+ this.stopTelegramPublicProgressMessage(subAgent);
637501
637937
  const finalText = cleanTelegramVisibleReply(result || "");
637502
637938
  if (isAdminDM && !this.telegramAdminRunCompleted(subAgent)) {
637503
637939
  const incompleteText = this.telegramAdminIncompleteRunText(subAgent, finalText);
@@ -637566,6 +638002,7 @@ Join: ${newUrl}`);
637566
638002
  clearInterval(subAgent.typingInterval);
637567
638003
  subAgent.typingInterval = null;
637568
638004
  }
638005
+ this.stopTelegramPublicProgressMessage(subAgent);
637569
638006
  const errMsg = err instanceof Error ? err.message : String(err);
637570
638007
  this.tuiWrite(() => renderTelegramSubAgentError(msg.username, errMsg));
637571
638008
  this.subAgentViewCallbacks?.onWrite(subAgent.viewId, `error: ${errMsg}`);
@@ -637582,6 +638019,7 @@ Join: ${newUrl}`);
637582
638019
  });
637583
638020
  }
637584
638021
  } finally {
638022
+ this.stopTelegramPublicProgressMessage(subAgent);
637585
638023
  this.clearTelegramSubAgentContextBuffer(sessionKey);
637586
638024
  this.subAgents.delete(sessionKey);
637587
638025
  this.refreshActiveTelegramInteractionCount();
@@ -637755,6 +638193,24 @@ Join: ${newUrl}`);
637755
638193
  typingInterval = this.startTypingIndicator(msg.chatId);
637756
638194
  }
637757
638195
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `live inference: chat reply (${this.interactionMode})`));
638196
+ if (this.shouldUseTelegramPublicProgressMessage(msg, toolContext)) {
638197
+ const initialHtml = [
638198
+ `<b>Working</b>`,
638199
+ `<code>[------------]</code> 0s`,
638200
+ `<i>preparing a concise reply</i>`
638201
+ ].join("\n");
638202
+ liveMessagePromise = this.sendLiveMessage(
638203
+ msg.chatId,
638204
+ initialHtml,
638205
+ msg.chatType !== "private" ? msg.messageId : void 0
638206
+ ).then((id) => {
638207
+ liveMessageId = id;
638208
+ lastEditMs = Date.now();
638209
+ }).catch(() => {
638210
+ }).finally(() => {
638211
+ liveMessagePromise = null;
638212
+ });
638213
+ }
637758
638214
  try {
637759
638215
  const mediaContext = msg.media || msg.replyToMedia || msg.livePhoto ? await this.processMediaContextForMessage(msg) : "";
637760
638216
  const contextualPayload = [mediaContext, additionalContext].filter(Boolean).join("\n\n");
@@ -641592,10 +642048,12 @@ ${caption}\r
641592
642048
  /** Long polling loop */
641593
642049
  async pollLoop() {
641594
642050
  while (this.polling) {
642051
+ const longPollTimeoutSeconds = this.telegramLongPollTimeoutSeconds();
641595
642052
  try {
641596
642053
  const result = await this.apiCall("getUpdates", {
641597
642054
  offset: this.lastUpdateId + 1,
641598
- timeout: 30,
642055
+ timeout: longPollTimeoutSeconds,
642056
+ limit: 100,
641599
642057
  allowed_updates: TELEGRAM_ALLOWED_UPDATES
641600
642058
  });
641601
642059
  if (result.ok && Array.isArray(result.result)) {
@@ -641633,11 +642091,30 @@ ${caption}\r
641633
642091
  }
641634
642092
  } catch (err) {
641635
642093
  if (this.polling) {
642094
+ const now = Date.now();
642095
+ if (now - this.telegramPollWarningLastAtMs > 3e4) {
642096
+ this.telegramPollWarningLastAtMs = now;
642097
+ this.tuiWrite(() => renderWarning(
642098
+ `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); long_poll_timeout=${longPollTimeoutSeconds}s client_deadline_ms=${this.telegramLongPollClientTimeoutMs(longPollTimeoutSeconds) ?? "none"}; retrying`
642099
+ ));
642100
+ }
641636
642101
  await new Promise((r2) => setTimeout(r2, 5e3));
641637
642102
  }
641638
642103
  }
641639
642104
  }
641640
642105
  }
642106
+ telegramLongPollTimeoutSeconds() {
642107
+ const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_TIMEOUT_SECONDS"] ?? "", 10);
642108
+ if (Number.isFinite(raw) && raw >= 0 && raw <= 120) return raw;
642109
+ return TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS;
642110
+ }
642111
+ telegramLongPollClientTimeoutMs(serverTimeoutSeconds) {
642112
+ const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_CLIENT_TIMEOUT_MS"] ?? "", 10);
642113
+ if (!Number.isFinite(raw)) return null;
642114
+ const floor = Math.max(5e3, Math.floor((serverTimeoutSeconds ?? 0) * 1e3) + 5e3);
642115
+ if (raw >= floor && raw <= 3e5) return raw;
642116
+ return null;
642117
+ }
641641
642118
  /** Make a Telegram Bot API call with rate-limit retry */
641642
642119
  async apiCall(method, body, _retryDepth = 0) {
641643
642120
  const url = `https://api.telegram.org/bot${this.botToken}/${method}`;
@@ -641650,7 +642127,15 @@ ${caption}\r
641650
642127
  }
641651
642128
  const isLongPoll = method === "getUpdates";
641652
642129
  if (isLongPoll && this.abortController) {
641653
- options2.signal = this.abortController.signal;
642130
+ const timeoutFn = AbortSignal.timeout;
642131
+ const anyFn = AbortSignal.any;
642132
+ const bodyTimeout = typeof body?.["timeout"] === "number" ? body["timeout"] : void 0;
642133
+ const clientTimeoutMs = this.telegramLongPollClientTimeoutMs(bodyTimeout);
642134
+ const signals = [
642135
+ this.abortController.signal,
642136
+ clientTimeoutMs && typeof timeoutFn === "function" ? timeoutFn(clientTimeoutMs) : void 0
642137
+ ].filter((signal) => signal instanceof AbortSignal);
642138
+ options2.signal = typeof anyFn === "function" && signals.length > 1 ? anyFn(signals) : signals[0];
641654
642139
  } else if (!isLongPoll) {
641655
642140
  options2.signal = AbortSignal.timeout(3e4);
641656
642141
  }
@@ -659607,6 +660092,30 @@ function sanitizeChatContent(raw) {
659607
660092
  }
659608
660093
  return cleaned.join("\n").trim();
659609
660094
  }
660095
+ function appendNoThinkDirectivesToMessages(messages2) {
660096
+ let lastUserIdx = -1;
660097
+ for (let i2 = messages2.length - 1; i2 >= 0; i2--) {
660098
+ if (messages2[i2]?.role === "user") {
660099
+ lastUserIdx = i2;
660100
+ break;
660101
+ }
660102
+ }
660103
+ if (lastUserIdx < 0) return messages2;
660104
+ const target = messages2[lastUserIdx];
660105
+ if (!target || typeof target.content !== "string") return messages2;
660106
+ const hasOllamaNoThink = /\/nothink\b/i.test(target.content);
660107
+ const hasQwenNoThink = /\/no[_-]think\b/i.test(target.content);
660108
+ if (hasOllamaNoThink && hasQwenNoThink) return messages2;
660109
+ const suffix = [
660110
+ hasOllamaNoThink ? null : "/nothink",
660111
+ hasQwenNoThink ? null : "/no_think"
660112
+ ].filter(Boolean).join("\n");
660113
+ return messages2.map(
660114
+ (m2, i2) => i2 === lastUserIdx ? { ...m2, content: `${target.content}
660115
+
660116
+ ${suffix}` } : m2
660117
+ );
660118
+ }
659610
660119
  async function directChatBackend(opts) {
659611
660120
  const { model, messages: messages2, stream, res, sessionId, ollamaUrl, extraFields } = opts;
659612
660121
  const cfg = loadConfig();
@@ -659695,13 +660204,12 @@ async function directChatBackend(opts) {
659695
660204
  if (Array.isArray(ef["stop"]) || typeof ef["stop"] === "string") ollamaOpts["stop"] = ef["stop"];
659696
660205
  const hasTools = Array.isArray(ef["tools"]) && ef["tools"].length > 0;
659697
660206
  const ollamaFormat = ollamaFormatFromOpenAIResponseFormat(ef["response_format"]);
660207
+ const ollamaMessages = appendNoThinkDirectivesToMessages(messages2);
659698
660208
  const reqBody = JSON.stringify({
659699
660209
  model: cleanModel,
659700
- messages: messages2,
660210
+ messages: ollamaMessages,
659701
660211
  stream,
659702
- // Don't force think:false when the caller is using tool calling —
659703
- // thinking models often need their reasoning chain to choose a tool.
659704
- ...hasTools ? {} : { think: false },
660212
+ think: false,
659705
660213
  ...hasTools ? { tools: ef["tools"] } : {},
659706
660214
  ...ef["tool_choice"] !== void 0 ? { tool_choice: ef["tool_choice"] } : {},
659707
660215
  ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {},
@@ -659931,13 +660439,18 @@ async function completeRealtimeTextOnly(opts) {
659931
660439
  if (!requestedModel) {
659932
660440
  originalModel = realtimeOllamaFallbackCache.get(realtimeFallbackCacheKey(targetUrl, originalModel)) ?? originalModel;
659933
660441
  }
659934
- const makeOllamaChatBody = (modelName) => JSON.stringify({
659935
- model: modelName,
659936
- messages: requestBody["messages"],
659937
- stream: false,
659938
- think: false,
659939
- options: { temperature, num_predict: maxTokens }
659940
- });
660442
+ const makeOllamaChatBody = (modelName) => {
660443
+ const rtMessages = Array.isArray(requestBody["messages"]) ? appendNoThinkDirectivesToMessages(
660444
+ requestBody["messages"]
660445
+ ) : requestBody["messages"];
660446
+ return JSON.stringify({
660447
+ model: modelName,
660448
+ messages: rtMessages,
660449
+ stream: false,
660450
+ think: false,
660451
+ options: { temperature, num_predict: maxTokens }
660452
+ });
660453
+ };
659941
660454
  let result = await ollamaRequest(targetUrl, "/api/chat", "POST", makeOllamaChatBody(originalModel), timeoutMs, route?.endpoint);
659942
660455
  if (result.status >= 400 && !requestedModel && isOllamaMissingModelError(result.body)) {
659943
660456
  const fallbackModel = await resolveRealtimeOllamaFallbackModel(targetUrl, timeoutMs, originalModel);
@@ -661278,9 +661791,14 @@ async function handleV1ChatCompletions(req2, res, ollamaUrl) {
661278
661791
  return;
661279
661792
  }
661280
661793
  const callerProvidedThink = "think" in routedBody;
661281
- const callerProvidedTools = Array.isArray(routedBody["tools"]) && routedBody["tools"].length > 0;
661282
- const finalThink = callerProvidedThink ? routedBody["think"] : callerProvidedTools ? void 0 : false;
661794
+ const thinkingAllowed = process.env["OMNIUS_ENABLE_THINKING"] === "1" && process.env["OMNIUS_FORCE_NO_THINK"] !== "1";
661795
+ const finalThink = thinkingAllowed && callerProvidedThink ? routedBody["think"] : false;
661283
661796
  const ollamaBody = { ...routedBody };
661797
+ if (finalThink === false && Array.isArray(ollamaBody["messages"])) {
661798
+ ollamaBody["messages"] = appendNoThinkDirectivesToMessages(
661799
+ ollamaBody["messages"]
661800
+ );
661801
+ }
661284
661802
  const ollamaOptions = ollamaBody["options"] && typeof ollamaBody["options"] === "object" ? { ...ollamaBody["options"] } : {};
661285
661803
  if (typeof ollamaBody["max_tokens"] === "number") {
661286
661804
  ollamaOptions["num_predict"] = ollamaBody["max_tokens"];