omnius 1.0.183 → 1.0.184

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -591154,6 +591154,7 @@ __export(setup_exports, {
591154
591154
  checkOllamaUpdate: () => checkOllamaUpdate,
591155
591155
  checkPythonVenv: () => checkPythonVenv,
591156
591156
  checkToolSupport: () => checkToolSupport,
591157
+ classifyOllamaThinkingTreatment: () => classifyOllamaThinkingTreatment,
591157
591158
  computeInferenceScore: () => computeInferenceScore,
591158
591159
  createExpandedVariant: () => createExpandedVariant,
591159
591160
  createExpandedVariantAsync: () => createExpandedVariantAsync,
@@ -591181,6 +591182,7 @@ __export(setup_exports, {
591181
591182
  repairAllExpandedVariants: () => repairAllExpandedVariants,
591182
591183
  runElevatedCommand: () => runElevatedCommand,
591183
591184
  runSetupWizard: () => runSetupWizard,
591185
+ shouldBakeNoThinkIntoOllamaModelfile: () => shouldBakeNoThinkIntoOllamaModelfile,
591184
591186
  updateOllama: () => updateOllama
591185
591187
  });
591186
591188
  import * as readline from "node:readline";
@@ -592896,23 +592898,26 @@ ${c3.cyan(OMNIUS_FIRST_RUN_BANNER)}
592896
592898
  const createModelfile = await ask(rl, ` Create optimized model "${c3.bold(customName)}" with ${ctx3.label} context? (Y/n) `);
592897
592899
  if (createModelfile.toLowerCase() !== "n") {
592898
592900
  try {
592899
- const numPredict = Math.min(16384, Math.max(2048, Math.floor(ctx3.numCtx * 0.25)));
592900
- const modelfileContent = [
592901
- `FROM ${selectedVariant.tag}`,
592902
- `PARAMETER num_ctx ${ctx3.numCtx}`,
592903
- `PARAMETER temperature 0`,
592904
- `PARAMETER num_predict ${numPredict}`,
592905
- `PARAMETER stop "<|endoftext|>"`
592906
- ].join("\n");
592901
+ const modelfileCandidates = expandedVariantContentCandidates(selectedVariant.tag, ctx3.numCtx);
592907
592902
  const modelDir2 = join115(homedir37(), ".omnius", "models");
592908
592903
  mkdirSync56(modelDir2, { recursive: true });
592909
592904
  const modelfilePath = join115(modelDir2, `Modelfile.${customName}`);
592910
- writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
592911
592905
  process.stdout.write(` ${c3.dim("Creating model...")} `);
592912
- execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
592913
- stdio: "pipe",
592914
- timeout: 12e4
592915
- });
592906
+ for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
592907
+ writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
592908
+ try {
592909
+ execSync51(`ollama create ${customName} -f ${modelfilePath}`, {
592910
+ stdio: "pipe",
592911
+ timeout: 12e4
592912
+ });
592913
+ break;
592914
+ } catch (err) {
592915
+ if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
592916
+ continue;
592917
+ }
592918
+ throw err;
592919
+ }
592920
+ }
592916
592921
  process.stdout.write(`${c3.green("✔")}
592917
592922
  `);
592918
592923
  setConfigValue("model", customName);
@@ -593495,6 +593500,29 @@ function parseShowNumCtx2(show) {
593495
593500
  }
593496
593501
  return 0;
593497
593502
  }
593503
+ function classifyOllamaThinkingTreatment(modelName) {
593504
+ const normalized = modelName.replace(/^omnius-/i, "").replace(/:latest$/i, "").toLowerCase();
593505
+ if (/\bgpt[-_]?oss\b/.test(normalized)) return "gpt-oss-levels";
593506
+ if (/(?:^|[-_/:])(?:qwq|qvq)(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])thinking(?:[-_/:]|$)/.test(normalized) || /[-_]thinking(?:[-_/:]|$)/.test(normalized)) {
593507
+ return "thinking-only";
593508
+ }
593509
+ if (/(?:^|[-_/:])qwen3(?:[._-]?\d+)?(?:[-_/:]|$)/.test(normalized) || /(?:^|[-_/:])qwen3(?:vl|omni)(?:[-_/:]|$)/.test(normalized) || /deepseek[-_]?r1/.test(normalized) || /deepseek[-_]?v?3[._-]1/.test(normalized)) {
593510
+ return "toggleable";
593511
+ }
593512
+ return "none";
593513
+ }
593514
+ function shouldBakeNoThinkIntoOllamaModelfile(modelName) {
593515
+ return classifyOllamaThinkingTreatment(modelName) === "toggleable";
593516
+ }
593517
+ function parseShowNoThink(show) {
593518
+ const sources = [show.parameters, show.modelfile];
593519
+ for (const source of sources) {
593520
+ if (!source) continue;
593521
+ if (/\b(?:PARAMETER\s+)?nothink\s+(?:true|1|on|yes)\b/i.test(source)) return true;
593522
+ if (/\b(?:PARAMETER\s+)?think\s+(?:false|0|off|no)\b/i.test(source)) return true;
593523
+ }
593524
+ return false;
593525
+ }
593498
593526
  async function checkExpandedVariant(modelName, backendUrl2) {
593499
593527
  if (modelName.startsWith("omnius-")) return null;
593500
593528
  try {
@@ -593574,7 +593602,7 @@ async function readExpandedVariantState(backendUrl2, modelName) {
593574
593602
  if (baseModel && (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel))) {
593575
593603
  baseModel = null;
593576
593604
  }
593577
- return { currentNumCtx, baseModel };
593605
+ return { currentNumCtx, baseModel, hasNoThink: parseShowNoThink(showData) };
593578
593606
  } catch {
593579
593607
  return null;
593580
593608
  }
@@ -593582,50 +593610,93 @@ async function readExpandedVariantState(backendUrl2, modelName) {
593582
593610
  function stripVariantTag(modelName) {
593583
593611
  return modelName.replace(/:latest$/i, "");
593584
593612
  }
593585
- function createExpandedVariantContent(baseModel, numCtx) {
593613
+ function createExpandedVariantContent(baseModel, numCtx, options2 = {}) {
593586
593614
  if (baseModel.startsWith("/") || /blobs\/sha256[-:]/.test(baseModel)) {
593587
593615
  throw new Error(
593588
593616
  `createExpandedVariantContent: refusing to use blob-path base "${baseModel}". Pass the user-facing model name (e.g. "qwen3.6:latest") instead.`
593589
593617
  );
593590
593618
  }
593591
593619
  const numPredict = Math.min(16384, Math.max(2048, Math.floor(numCtx * 0.25)));
593592
- return [
593620
+ const lines = [
593593
593621
  `FROM ${baseModel}`,
593594
593622
  `PARAMETER num_ctx ${numCtx}`,
593623
+ ...options2.includeNoThink ? [
593624
+ `# Keep toggleable reasoning models in direct-answer mode by default.`,
593625
+ `PARAMETER nothink true`
593626
+ ] : [],
593595
593627
  `PARAMETER temperature 0`,
593596
593628
  `PARAMETER num_predict ${numPredict}`,
593597
593629
  `PARAMETER stop "<|endoftext|>"`
593630
+ ];
593631
+ return lines.join("\n");
593632
+ }
593633
+ function expandedVariantContentCandidates(baseModel, numCtx) {
593634
+ if (!shouldBakeNoThinkIntoOllamaModelfile(baseModel)) {
593635
+ return [createExpandedVariantContent(baseModel, numCtx)];
593636
+ }
593637
+ return [
593638
+ createExpandedVariantContent(baseModel, numCtx, { includeNoThink: true }),
593639
+ createExpandedVariantContent(baseModel, numCtx, { includeNoThink: false })
593640
+ ];
593641
+ }
593642
+ function ollamaCreateNothinkRejected(err) {
593643
+ const anyErr = err;
593644
+ const text = [
593645
+ anyErr?.stderr?.toString?.() ?? "",
593646
+ anyErr?.stdout?.toString?.() ?? "",
593647
+ anyErr?.message ?? ""
593598
593648
  ].join("\n");
593649
+ return /nothink|unknown parameter|invalid parameter|unsupported parameter/i.test(text);
593599
593650
  }
593600
593651
  function createExpandedVariantNamed(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
593601
593652
  const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
593602
- const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
593653
+ const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
593603
593654
  try {
593604
593655
  const modelDir2 = join115(homedir37(), ".omnius", "models");
593605
593656
  mkdirSync56(modelDir2, { recursive: true });
593606
593657
  const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
593607
- writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
593608
- execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
593609
- stdio: "pipe",
593610
- timeout: 12e4
593611
- });
593612
- return targetModel;
593658
+ for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
593659
+ writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
593660
+ try {
593661
+ execSync51(`ollama create ${targetModel} -f ${modelfilePath}`, {
593662
+ stdio: "pipe",
593663
+ timeout: 12e4
593664
+ });
593665
+ return targetModel;
593666
+ } catch (err) {
593667
+ if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
593668
+ continue;
593669
+ }
593670
+ throw err;
593671
+ }
593672
+ }
593673
+ return null;
593613
593674
  } catch {
593614
593675
  return null;
593615
593676
  }
593616
593677
  }
593617
593678
  async function createExpandedVariantNamedAsync(targetModel, baseModel, specs, sizeGB, kvBytesPerToken, archMax) {
593618
593679
  const ctx3 = calculateExpandedVariantContextWindow(specs, sizeGB, kvBytesPerToken, archMax);
593619
- const modelfileContent = createExpandedVariantContent(baseModel, ctx3.numCtx);
593680
+ const modelfileCandidates = expandedVariantContentCandidates(baseModel, ctx3.numCtx);
593620
593681
  try {
593621
593682
  const modelDir2 = join115(homedir37(), ".omnius", "models");
593622
593683
  mkdirSync56(modelDir2, { recursive: true });
593623
593684
  const modelfilePath = join115(modelDir2, `Modelfile.${targetModel}`);
593624
- writeFileSync51(modelfilePath, modelfileContent + "\n", "utf8");
593625
- await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
593626
- timeout: 12e4
593627
- });
593628
- return targetModel;
593685
+ for (let i2 = 0; i2 < modelfileCandidates.length; i2++) {
593686
+ writeFileSync51(modelfilePath, modelfileCandidates[i2] + "\n", "utf8");
593687
+ try {
593688
+ await execAsync2(`ollama create ${targetModel} -f ${modelfilePath}`, {
593689
+ timeout: 12e4
593690
+ });
593691
+ return targetModel;
593692
+ } catch (err) {
593693
+ if (i2 === 0 && modelfileCandidates.length > 1 && ollamaCreateNothinkRejected(err)) {
593694
+ continue;
593695
+ }
593696
+ throw err;
593697
+ }
593698
+ }
593699
+ return null;
593629
593700
  } catch {
593630
593701
  return null;
593631
593702
  }
@@ -606952,6 +607023,10 @@ Clone a new voice: /voice clone <wav-file> [name]`);
606952
607023
  renderWarning(
606953
607024
  "OMNIUS_FORCE_NO_THINK=1 forces off regardless of /think setting"
606954
607025
  );
607026
+ else if (cur && process.env["OMNIUS_ENABLE_THINKING"] !== "1")
607027
+ renderWarning(
607028
+ "OMNIUS_ENABLE_THINKING is not set; /think is saved but backend requests remain direct-answer mode."
607029
+ );
606955
607030
  return "handled";
606956
607031
  }
606957
607032
  if (token === "auto") {
@@ -606990,6 +607065,11 @@ Clone a new voice: /voice clone <wav-file> [name]`);
606990
607065
  renderInfo(
606991
607066
  "Note: max_tokens will auto-raise to ≥4096 per request to prevent <think> truncation."
606992
607067
  );
607068
+ if (process.env["OMNIUS_ENABLE_THINKING"] !== "1") {
607069
+ renderWarning(
607070
+ "Thinking is hard-disabled by default. Set OMNIUS_ENABLE_THINKING=1 before launch for /think on or /think auto to affect backend requests."
607071
+ );
607072
+ }
606993
607073
  }
606994
607074
  return "handled";
606995
607075
  }
@@ -630844,7 +630924,7 @@ function renderTelegramSubAgentError(username, error) {
630844
630924
  process.stdout.write(` ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
630845
630925
  `);
630846
630926
  }
630847
- var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
630927
+ var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS, TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
630848
630928
  var init_telegram_bridge = __esm({
630849
630929
  "packages/cli/src/tui/telegram-bridge.ts"() {
630850
630930
  "use strict";
@@ -631288,6 +631368,21 @@ Telegram link integrity contract:
631288
631368
  TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS = 20 * 60 * 1e3;
631289
631369
  TELEGRAM_CHANNEL_DMN_MIN_MESSAGES = 4;
631290
631370
  TELEGRAM_ALLOWED_UPDATES = ["message", "guest_message", "callback_query", "poll", "message_reaction", "message_reaction_count"];
631371
+ TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS = 50;
631372
+ TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES = [
631373
+ "qwen3:0.6b",
631374
+ "qwen3:1.7b",
631375
+ "qwen3:4b",
631376
+ "qwen3:8b",
631377
+ "qwen2.5:3b",
631378
+ "qwen2.5:7b",
631379
+ "llama3.2:1b",
631380
+ "llama3.2:3b",
631381
+ "gemma3:1b",
631382
+ "gemma3:4b",
631383
+ "phi3:mini",
631384
+ "phi4-mini:latest"
631385
+ ];
631291
631386
  TELEGRAM_PUBLIC_TOOL_QUOTAS = {
631292
631387
  web: { limit: 20, windowMs: 60 * 6e4 },
631293
631388
  media: { limit: 30, windowMs: 60 * 6e4 },
@@ -631321,6 +631416,7 @@ Telegram link integrity contract:
631321
631416
  pollLoopPromise = null;
631322
631417
  pollFatalNotified = false;
631323
631418
  lastUpdateId = 0;
631419
+ telegramRouterModelCache = null;
631324
631420
  state = {
631325
631421
  active: false,
631326
631422
  botUserId: void 0,
@@ -635427,7 +635523,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635427
635523
  },
635428
635524
  inferenceKind,
635429
635525
  sessionKey,
635430
- { stream: false, reason: "router-json" }
635526
+ { stream: false, reason: "router-json", modelName: diagnostics?.backendModel }
635431
635527
  );
635432
635528
  const visible = jsonModeResult.choices.some(
635433
635529
  (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -635483,7 +635579,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635483
635579
  suppressed,
635484
635580
  inferenceKind,
635485
635581
  sessionKey,
635486
- { stream: false, reason: "router-plain-retry" }
635582
+ { stream: false, reason: "router-plain-retry", modelName: diagnostics?.backendModel }
635487
635583
  );
635488
635584
  if (diagnostics) {
635489
635585
  const plainVisible = plainResult.choices.some(
@@ -635536,7 +635632,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635536
635632
  * hard-deadline retire path becomes diagnosable instead of opaque
635537
635633
  */
635538
635634
  async telegramObservableInference(backend, request, kind, sessionKey, options2 = {}) {
635539
- const model = this.agentConfig?.model ?? "?";
635635
+ const model = options2.modelName ?? this.agentConfig?.model ?? "?";
635540
635636
  const promptTokens = estimatePromptTokensFromRequest(request);
635541
635637
  const broker = getModelBroker();
635542
635638
  const trainCtx = await broker.getNctxTrain(model).catch(() => null);
@@ -635570,7 +635666,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635570
635666
  if (!streamAllowed && process.env["OMNIUS_BROKER_TRACE"] === "1") {
635571
635667
  this.tuiWrite(() => renderTelegramSubAgentEvent(
635572
635668
  sessionKey,
635573
- `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx)}`
635669
+ `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx, model)}`
635574
635670
  ));
635575
635671
  }
635576
635672
  } else if (typeof streamFn !== "function") {
@@ -635603,18 +635699,22 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635603
635699
  this.deregisterTelegramInference(id);
635604
635700
  }
635605
635701
  }
635606
- telegramBackendDiagnostic() {
635702
+ telegramBackendDiagnostic(modelOverride, routerModelSource, routerModelDetail) {
635607
635703
  const config = this.agentConfig;
635608
635704
  if (!config) return "backend=unconfigured model=?";
635609
- return `backend=${config.backendType} url=${config.backendUrl} model=${config.model}`;
635705
+ const model = modelOverride || config.model;
635706
+ const source = routerModelSource ? ` router_model_source=${routerModelSource}` : "";
635707
+ const detail = routerModelDetail ? ` router_model_detail=${compactTelegramRouterDiagnosticText(routerModelDetail, 180)}` : "";
635708
+ const main2 = model !== config.model ? ` main_model=${config.model}` : "";
635709
+ return `backend=${config.backendType} url=${config.backendUrl} model=${model}${main2}${source}${detail}`;
635610
635710
  }
635611
- telegramInferenceRequestDiagnostic(request) {
635711
+ telegramInferenceRequestDiagnostic(request, modelOverride) {
635612
635712
  const responseFormat = request.responseFormat ?? request.response_format;
635613
635713
  const responseFormatType = responseFormat && typeof responseFormat["type"] === "string" ? responseFormat["type"] : responseFormat ? "present" : "none";
635614
635714
  const numCtx = request.numCtx;
635615
635715
  const think = request.think;
635616
635716
  const tools = Array.isArray(request.tools) ? request.tools.length : 0;
635617
- return `${this.telegramBackendDiagnostic()} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
635717
+ return `${this.telegramBackendDiagnostic(modelOverride)} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
635618
635718
  }
635619
635719
  telegramStreamInactivityDiagnostic(request, inferenceId, inactivityMs, contentChars, thinkingChars) {
635620
635720
  const entry = this.telegramActiveInferences.get(inferenceId);
@@ -635623,7 +635723,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
635623
635723
  const idle = entry ? `${((now - entry.lastTokenAt) / 1e3).toFixed(1)}s` : "unknown";
635624
635724
  const ttfb = entry?.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
635625
635725
  const phase = entry?.firstChunkAt === void 0 ? "before-first-chunk" : "mid-stream";
635626
- return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request)}; stream_endpoint=no-sse-chunk)`;
635726
+ return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request, entry?.model)}; stream_endpoint=no-sse-chunk)`;
635627
635727
  }
635628
635728
  /**
635629
635729
  * Drive a chatCompletionStream to exhaustion, accumulating tokens into a
@@ -636162,6 +636262,114 @@ ${retryText}`,
636162
636262
  this.dispatchQueuedTelegramSessionWorkSoon();
636163
636263
  }
636164
636264
  }
636265
+ telegramRouterAutoModelEnabled() {
636266
+ const raw = (process.env["OMNIUS_TG_ROUTER_AUTO_MODEL"] ?? "").trim().toLowerCase();
636267
+ return raw !== "0" && raw !== "false" && raw !== "off";
636268
+ }
636269
+ telegramRouterCandidateModels() {
636270
+ const raw = (process.env["OMNIUS_TG_ROUTER_MODEL_CANDIDATES"] ?? "").trim();
636271
+ const candidates = raw ? raw.split(/[,\s]+/).map((part) => part.trim()).filter(Boolean) : TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES;
636272
+ return Array.from(new Set(candidates));
636273
+ }
636274
+ normalizeOllamaModelNameForMatch(name10) {
636275
+ return name10.trim().toLowerCase().replace(/:latest$/, "");
636276
+ }
636277
+ async fetchOllamaInstalledModelNames(baseUrl) {
636278
+ const url = `${baseUrl.replace(/\/+$/, "")}/api/tags`;
636279
+ const timeoutFn = AbortSignal.timeout;
636280
+ const res = await fetch(url, {
636281
+ signal: typeof timeoutFn === "function" ? timeoutFn(2e3) : void 0
636282
+ });
636283
+ if (!res.ok) throw new Error(`ollama /api/tags returned HTTP ${res.status}`);
636284
+ const data = await res.json();
636285
+ return Array.isArray(data.models) ? data.models.map((model) => typeof model.name === "string" ? model.name : "").filter(Boolean) : [];
636286
+ }
636287
+ async resolveTelegramRouterBackend(config) {
636288
+ const explicit = (process.env["OMNIUS_TG_ROUTER_MODEL"] ?? "").trim();
636289
+ if (explicit && !/^(?:0|false|off|same|main)$/i.test(explicit)) {
636290
+ return {
636291
+ backend: new OllamaAgenticBackend(config.backendUrl, explicit, config.apiKey),
636292
+ model: explicit,
636293
+ source: "env",
636294
+ detail: "OMNIUS_TG_ROUTER_MODEL"
636295
+ };
636296
+ }
636297
+ if (config.backendType !== "ollama" || !this.telegramRouterAutoModelEnabled()) {
636298
+ return {
636299
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
636300
+ model: config.model,
636301
+ source: "main"
636302
+ };
636303
+ }
636304
+ const candidates = this.telegramRouterCandidateModels();
636305
+ const cacheKey = `${config.backendUrl}
636306
+ ${config.model}
636307
+ ${candidates.join(",")}`;
636308
+ const now = Date.now();
636309
+ if (this.telegramRouterModelCache && this.telegramRouterModelCache.cacheKey === cacheKey && now - this.telegramRouterModelCache.atMs < 6e4) {
636310
+ const cached = this.telegramRouterModelCache;
636311
+ return {
636312
+ backend: new OllamaAgenticBackend(config.backendUrl, cached.model, config.apiKey),
636313
+ model: cached.model,
636314
+ source: cached.source,
636315
+ detail: cached.detail
636316
+ };
636317
+ }
636318
+ try {
636319
+ const installed = await this.fetchOllamaInstalledModelNames(config.backendUrl);
636320
+ const installedByNormalized = /* @__PURE__ */ new Map();
636321
+ for (const name10 of installed) {
636322
+ installedByNormalized.set(this.normalizeOllamaModelNameForMatch(name10), name10);
636323
+ }
636324
+ for (const candidate of candidates) {
636325
+ const selected = installedByNormalized.get(this.normalizeOllamaModelNameForMatch(candidate));
636326
+ if (!selected) continue;
636327
+ const resolved = {
636328
+ cacheKey,
636329
+ atMs: now,
636330
+ model: selected,
636331
+ source: "auto-small",
636332
+ detail: "selected first installed OMNIUS_TG_ROUTER_MODEL_CANDIDATES entry from Ollama /api/tags"
636333
+ };
636334
+ this.telegramRouterModelCache = resolved;
636335
+ return {
636336
+ backend: new OllamaAgenticBackend(config.backendUrl, selected, config.apiKey),
636337
+ model: selected,
636338
+ source: "auto-small",
636339
+ detail: resolved.detail
636340
+ };
636341
+ }
636342
+ } catch (err) {
636343
+ const detail2 = `router model auto-detect failed: ${err instanceof Error ? err.message : String(err)}`;
636344
+ this.telegramRouterModelCache = {
636345
+ cacheKey,
636346
+ atMs: now,
636347
+ model: config.model,
636348
+ source: "main",
636349
+ detail: detail2
636350
+ };
636351
+ return {
636352
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
636353
+ model: config.model,
636354
+ source: "main",
636355
+ detail: detail2
636356
+ };
636357
+ }
636358
+ const detail = "no configured small router model was installed; using main model";
636359
+ this.telegramRouterModelCache = {
636360
+ cacheKey,
636361
+ atMs: now,
636362
+ model: config.model,
636363
+ source: "main",
636364
+ detail
636365
+ };
636366
+ return {
636367
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
636368
+ model: config.model,
636369
+ source: "main",
636370
+ detail
636371
+ };
636372
+ }
636165
636373
  async inferTelegramInteractionDecision(msg, toolContext) {
636166
636374
  const config = this.agentConfig;
636167
636375
  const forcedRoute = this.interactionMode === "chat" || this.interactionMode === "action" ? this.interactionMode : null;
@@ -636190,11 +636398,8 @@ ${retryText}`,
636190
636398
  };
636191
636399
  return fallback;
636192
636400
  }
636193
- const backend = new OllamaAgenticBackend(
636194
- config.backendUrl,
636195
- config.model,
636196
- config.apiKey
636197
- );
636401
+ const routerBackend = await this.resolveTelegramRouterBackend(config);
636402
+ const backend = routerBackend.backend;
636198
636403
  const forcedLine = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred live from context.` : `The operator selected Telegram mode "auto". Infer route live from context.`;
636199
636404
  const context2 = this.buildTelegramConversationContextStream(sessionKey, msg, isGroup ? 36 : 20, identitySalienceSignals);
636200
636405
  const currentReplyContext = this.buildTelegramCurrentReplyContext(sessionKey, msg);
@@ -636334,7 +636539,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636334
636539
  } catch {
636335
636540
  }
636336
636541
  }
636337
- const diagnostics = {};
636542
+ const diagnostics = {
636543
+ backendType: config.backendType,
636544
+ backendUrl: config.backendUrl,
636545
+ backendModel: routerBackend.model,
636546
+ routerModelSource: routerBackend.source,
636547
+ routerModelDetail: routerBackend.detail
636548
+ };
636338
636549
  const routerStartMs = Date.now();
636339
636550
  try {
636340
636551
  const result = await this.telegramRouterJsonCompletion(backend, {
@@ -636357,7 +636568,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636357
636568
  const routerLatencyMs = Date.now() - routerStartMs;
636358
636569
  try {
636359
636570
  const pidReg = getPidRegistry();
636360
- const modelKey = this.agentConfig?.model ?? "?";
636571
+ const modelKey = routerBackend.model ?? this.agentConfig?.model ?? "?";
636361
636572
  pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
636362
636573
  pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
636363
636574
  } catch {
@@ -636384,7 +636595,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636384
636595
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636385
636596
  void 0,
636386
636597
  failureNarrative2,
636387
- "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open"
636598
+ "router produced no visible attention decision content; repair/strict retry skipped for direct private/admin fail-open",
636599
+ diagnostics
636388
636600
  ),
636389
636601
  raw: text
636390
636602
  }),
@@ -636458,7 +636670,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636458
636670
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636459
636671
  invalidRouterPreview,
636460
636672
  failureNarrative,
636461
- backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it"
636673
+ backendLivenessFailure ? "router backend failed during attention-decision recovery; no usable router decision was available" : dualEmptyVisible ? "router returned no visible decision content in JSON or plain mode; repair/strict retry skipped" : invalidRouterPreview ? "router produced an invalid attention decision payload; repair and strict retry did not recover it" : "router produced an empty attention decision payload; strict retry did not recover it",
636674
+ diagnostics
636462
636675
  ),
636463
636676
  raw: text
636464
636677
  }), reflectionNotes);
@@ -636472,7 +636685,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636472
636685
  diagnosticNote: this.composeTelegramRouterDiagnosticNote(
636473
636686
  void 0,
636474
636687
  failureNarrative,
636475
- `router failed before live notes were generated: ${errMsg.slice(0, 160)}`
636688
+ `router failed before live notes were generated: ${errMsg.slice(0, 160)}`,
636689
+ diagnostics
636476
636690
  )
636477
636691
  }), reflectionNotes);
636478
636692
  return withRouterTelemetry(fallback);
@@ -636592,10 +636806,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
636592
636806
  operatorHint
636593
636807
  };
636594
636808
  }
636595
- composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline) {
636809
+ composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline, diagnostics) {
636596
636810
  const segments = [];
636597
636811
  segments.push(headline);
636598
- segments.push(this.telegramBackendDiagnostic());
636812
+ segments.push(this.telegramBackendDiagnostic(
636813
+ diagnostics?.backendModel,
636814
+ diagnostics?.routerModelSource,
636815
+ diagnostics?.routerModelDetail
636816
+ ));
636599
636817
  if (failureNarrative.summary) segments.push(failureNarrative.summary);
636600
636818
  if (invalidRouterPreview) segments.push(`invalid router output preview: ${invalidRouterPreview}`);
636601
636819
  if (failureNarrative.detail) segments.push(`router-failure trace: ${failureNarrative.detail}`);
@@ -641830,10 +642048,12 @@ ${caption}\r
641830
642048
  /** Long polling loop */
641831
642049
  async pollLoop() {
641832
642050
  while (this.polling) {
642051
+ const longPollTimeoutSeconds = this.telegramLongPollTimeoutSeconds();
641833
642052
  try {
641834
642053
  const result = await this.apiCall("getUpdates", {
641835
642054
  offset: this.lastUpdateId + 1,
641836
- timeout: 30,
642055
+ timeout: longPollTimeoutSeconds,
642056
+ limit: 100,
641837
642057
  allowed_updates: TELEGRAM_ALLOWED_UPDATES
641838
642058
  });
641839
642059
  if (result.ok && Array.isArray(result.result)) {
@@ -641875,7 +642095,7 @@ ${caption}\r
641875
642095
  if (now - this.telegramPollWarningLastAtMs > 3e4) {
641876
642096
  this.telegramPollWarningLastAtMs = now;
641877
642097
  this.tuiWrite(() => renderWarning(
641878
- `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); retrying`
642098
+ `Telegram polling warning: getUpdates failed (${err instanceof Error ? err.message : String(err)}); long_poll_timeout=${longPollTimeoutSeconds}s client_deadline_ms=${this.telegramLongPollClientTimeoutMs(longPollTimeoutSeconds) ?? "none"}; retrying`
641879
642099
  ));
641880
642100
  }
641881
642101
  await new Promise((r2) => setTimeout(r2, 5e3));
@@ -641883,10 +642103,17 @@ ${caption}\r
641883
642103
  }
641884
642104
  }
641885
642105
  }
641886
- telegramLongPollClientTimeoutMs() {
642106
+ telegramLongPollTimeoutSeconds() {
642107
+ const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_TIMEOUT_SECONDS"] ?? "", 10);
642108
+ if (Number.isFinite(raw) && raw >= 0 && raw <= 120) return raw;
642109
+ return TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS;
642110
+ }
642111
+ telegramLongPollClientTimeoutMs(serverTimeoutSeconds) {
641887
642112
  const raw = Number.parseInt(process.env["OMNIUS_TG_LONG_POLL_CLIENT_TIMEOUT_MS"] ?? "", 10);
641888
- if (Number.isFinite(raw) && raw >= 35e3 && raw <= 3e5) return raw;
641889
- return 45e3;
642113
+ if (!Number.isFinite(raw)) return null;
642114
+ const floor = Math.max(5e3, Math.floor((serverTimeoutSeconds ?? 0) * 1e3) + 5e3);
642115
+ if (raw >= floor && raw <= 3e5) return raw;
642116
+ return null;
641890
642117
  }
641891
642118
  /** Make a Telegram Bot API call with rate-limit retry */
641892
642119
  async apiCall(method, body, _retryDepth = 0) {
@@ -641902,9 +642129,11 @@ ${caption}\r
641902
642129
  if (isLongPoll && this.abortController) {
641903
642130
  const timeoutFn = AbortSignal.timeout;
641904
642131
  const anyFn = AbortSignal.any;
642132
+ const bodyTimeout = typeof body?.["timeout"] === "number" ? body["timeout"] : void 0;
642133
+ const clientTimeoutMs = this.telegramLongPollClientTimeoutMs(bodyTimeout);
641905
642134
  const signals = [
641906
642135
  this.abortController.signal,
641907
- typeof timeoutFn === "function" ? timeoutFn(this.telegramLongPollClientTimeoutMs()) : void 0
642136
+ clientTimeoutMs && typeof timeoutFn === "function" ? timeoutFn(clientTimeoutMs) : void 0
641908
642137
  ].filter((signal) => signal instanceof AbortSignal);
641909
642138
  options2.signal = typeof anyFn === "function" && signals.length > 1 ? anyFn(signals) : signals[0];
641910
642139
  } else if (!isLongPoll) {
@@ -661562,9 +661791,14 @@ async function handleV1ChatCompletions(req2, res, ollamaUrl) {
661562
661791
  return;
661563
661792
  }
661564
661793
  const callerProvidedThink = "think" in routedBody;
661565
- const callerProvidedTools = Array.isArray(routedBody["tools"]) && routedBody["tools"].length > 0;
661566
- const finalThink = callerProvidedThink ? routedBody["think"] : callerProvidedTools ? void 0 : false;
661794
+ const thinkingAllowed = process.env["OMNIUS_ENABLE_THINKING"] === "1" && process.env["OMNIUS_FORCE_NO_THINK"] !== "1";
661795
+ const finalThink = thinkingAllowed && callerProvidedThink ? routedBody["think"] : false;
661567
661796
  const ollamaBody = { ...routedBody };
661797
+ if (finalThink === false && Array.isArray(ollamaBody["messages"])) {
661798
+ ollamaBody["messages"] = appendNoThinkDirectivesToMessages(
661799
+ ollamaBody["messages"]
661800
+ );
661801
+ }
661568
661802
  const ollamaOptions = ollamaBody["options"] && typeof ollamaBody["options"] === "object" ? { ...ollamaBody["options"] } : {};
661569
661803
  if (typeof ollamaBody["max_tokens"] === "number") {
661570
661804
  ollamaOptions["num_predict"] = ollamaBody["max_tokens"];
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.183",
3
+ "version": "1.0.184",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.183",
9
+ "version": "1.0.184",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.183",
3
+ "version": "1.0.184",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",