omnius 1.0.83 → 1.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -524968,8 +524968,11 @@ async function findFreePort(start2) {
524968
524968
  }
524969
524969
  function resolveDefaultPoolConfig() {
524970
524970
  const baseInstanceUrl = process.env["OMNIUS_OLLAMA_BASE_URL"] || process.env["OLLAMA_HOST"]?.replace(/^([^:/]+:[0-9]+)$/, "http://$1") || "http://127.0.0.1:11434";
524971
- const maxParallelPerInstance = Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 4;
524971
+ const maxParallelPerInstance = Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1;
524972
524972
  const maxSpawnedInstances = Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0;
524973
+ const targetGpuInstances = Number(process.env["OMNIUS_OLLAMA_TARGET_GPU_INSTANCES"]) || 0;
524974
+ const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? "auto").toLowerCase();
524975
+ const gpuPlacement = gpuPlacementRaw === "dedicated" || gpuPlacementRaw === "elastic" || gpuPlacementRaw === "auto" ? gpuPlacementRaw : "auto";
524973
524976
  const idleMs = Number(process.env["OMNIUS_OLLAMA_IDLE_MS"]) || 5 * 60 * 1e3;
524974
524977
  const reaperIntervalMs = Number(process.env["OMNIUS_OLLAMA_REAPER_MS"]) || 3e4;
524975
524978
  const spawnPortStart = Number(process.env["OMNIUS_OLLAMA_SPAWN_PORT"]) || 11435;
@@ -524983,6 +524986,8 @@ function resolveDefaultPoolConfig() {
524983
524986
  baseInstanceUrl: baseInstanceUrl.replace(/\/+$/, ""),
524984
524987
  maxParallelPerInstance,
524985
524988
  maxSpawnedInstances,
524989
+ targetGpuInstances,
524990
+ gpuPlacement,
524986
524991
  idleMs,
524987
524992
  reaperIntervalMs,
524988
524993
  spawnPortStart,
@@ -525110,15 +525115,17 @@ var init_ollama_pool = __esm({
525110
525115
  this.proc = null;
525111
525116
  }
525112
525117
  };
525113
- realInstanceSpawner = async ({ port, gpuUuid, config }) => {
525118
+ realInstanceSpawner = async ({ port, gpuUuid, gpuIndex, config }) => {
525114
525119
  const env2 = { ...process.env };
525115
525120
  env2["OLLAMA_HOST"] = `127.0.0.1:${port}`;
525116
525121
  if (config.sharedModelStore) {
525117
525122
  env2["OLLAMA_MODELS"] = config.sharedModelStore;
525118
525123
  }
525119
525124
  env2["OLLAMA_NUM_PARALLEL"] = String(config.maxParallelPerInstance);
525120
- if (gpuUuid)
525125
+ if (gpuUuid) {
525121
525126
  env2["CUDA_VISIBLE_DEVICES"] = gpuUuid;
525127
+ env2["GPU_DEVICE_ORDINAL"] = gpuIndex === null ? "" : String(gpuIndex);
525128
+ }
525122
525129
  const child = spawn21(config.ollamaBinary, ["serve"], {
525123
525130
  env: env2,
525124
525131
  stdio: ["ignore", "pipe", "pipe"],
@@ -525160,17 +525167,27 @@ var init_ollama_pool = __esm({
525160
525167
  instances = [];
525161
525168
  reaperHandle = null;
525162
525169
  spawner;
525170
+ gpuDetector;
525171
+ portAllocator;
525163
525172
  /** Serializes concurrent spawn requests so two callers don't both create instance N+1. */
525164
525173
  spawnGate = Promise.resolve();
525174
+ /** True after dedicated mode has successfully started at least one pool-owned GPU runner. */
525175
+ dedicatedGpuPoolActive = false;
525176
+ activePlacementMode = "constrained";
525177
+ gpuCache = null;
525178
+ slotWaiters = [];
525165
525179
  constructor(config, opts) {
525166
525180
  super();
525167
525181
  this.config = { ...resolveDefaultPoolConfig(), ...config };
525168
525182
  this.spawner = opts?.spawner ?? realInstanceSpawner;
525183
+ this.gpuDetector = opts?.gpuDetector ?? detectGpus;
525184
+ this.portAllocator = opts?.portAllocator ?? findFreePort;
525169
525185
  this.instances.push(new OllamaInstance({
525170
525186
  id: "omnius-ollama-base",
525171
525187
  baseUrl: this.config.baseInstanceUrl,
525172
525188
  port: this.portFromUrl(this.config.baseInstanceUrl),
525173
525189
  gpuUuid: null,
525190
+ gpuIndex: null,
525174
525191
  poolOwned: false,
525175
525192
  inflight: 0,
525176
525193
  peakInflight: 0,
@@ -525191,32 +525208,42 @@ var init_ollama_pool = __esm({
525191
525208
  * 2. Any instance with free slots (least-loaded first).
525192
525209
  * 3. Spawn a new instance pinned to the least-utilized GPU, when the
525193
525210
  * pool hasn't hit `maxSpawnedInstances`.
525194
- * 4. Fall back to the least-loaded instance even if saturated the
525195
- * caller will block inside Ollama's internal queue rather than fail.
525211
+ * 4. Queue at the pool boundary when all allowed lanes are busy.
525196
525212
  */
525197
525213
  async acquire(opts) {
525214
+ const gpus = await this.getGpusForPlacement();
525215
+ let placementMode = this.placementModeFor(gpus);
525216
+ this.activePlacementMode = placementMode;
525217
+ if (placementMode === "dedicated") {
525218
+ await this.ensureDedicatedGpuPool(opts.model, gpus);
525219
+ if (!this.instances.some((i2) => i2.state.poolOwned)) {
525220
+ placementMode = "constrained";
525221
+ this.activePlacementMode = placementMode;
525222
+ }
525223
+ }
525198
525224
  const pick = this.pickInstance(opts);
525199
525225
  if (pick) {
525200
525226
  pick.acquire(opts.model);
525201
525227
  return this.buildSlot(pick);
525202
525228
  }
525203
- const spawned = await this.maybeSpawnInstance(opts.model);
525204
- if (spawned) {
525229
+ if (placementMode === "constrained") {
525230
+ return this.acquireQueued(opts);
525231
+ }
525232
+ const spawned = placementMode === "elastic" ? await this.maybeSpawnInstance(opts.model) : null;
525233
+ if (spawned && !spawned.isSaturated()) {
525205
525234
  spawned.acquire(opts.model);
525206
525235
  return this.buildSlot(spawned);
525207
525236
  }
525208
- const fallback = this.instances.slice().sort((a2, b) => a2.state.inflight - b.state.inflight)[0];
525209
- fallback.acquire(opts.model);
525210
- return this.buildSlot(fallback);
525237
+ return this.acquireQueued(opts);
525211
525238
  }
525212
525239
  /** Synchronous routing decision; returns the instance or null if every one is saturated. */
525213
525240
  pickInstance(opts) {
525214
- const candidates = this.instances.filter((inst) => !inst.isSaturated());
525241
+ const candidates = this.instances.filter((inst) => !this.isEffectivelySaturated(inst) && !(this.activePlacementMode === "dedicated" && this.dedicatedGpuPoolActive && !inst.state.poolOwned && !opts.preferBaseInstance));
525215
525242
  if (candidates.length === 0)
525216
525243
  return null;
525217
525244
  const scored = candidates.map((inst) => ({
525218
525245
  inst,
525219
- score: (inst.state.knownModels.has(opts.model) ? 100 : 0) + (opts.preferBaseInstance && !inst.state.poolOwned ? 25 : 0) + inst.freeSlots() * 10 - inst.state.inflight
525246
+ score: (inst.state.knownModels.has(opts.model) ? 100 : 0) + (opts.preferBaseInstance && !inst.state.poolOwned ? 25 : 0) + this.effectiveFreeSlots(inst) * 10 - inst.state.inflight
525220
525247
  }));
525221
525248
  scored.sort((a2, b) => b.score - a2.score);
525222
525249
  return scored[0].inst;
@@ -525227,9 +525254,39 @@ var init_ollama_pool = __esm({
525227
525254
  baseUrl: inst.state.baseUrl,
525228
525255
  poolOwned: inst.state.poolOwned,
525229
525256
  gpuUuid: inst.state.gpuUuid,
525230
- release: (success) => inst.release(success)
525257
+ gpuIndex: inst.state.gpuIndex,
525258
+ release: (success) => {
525259
+ inst.release(success);
525260
+ this.wakeNextSlotWaiter();
525261
+ }
525231
525262
  };
525232
525263
  }
525264
+ async acquireQueued(opts) {
525265
+ for (; ; ) {
525266
+ const pick = this.pickInstance(opts);
525267
+ if (pick) {
525268
+ pick.acquire(opts.model);
525269
+ return this.buildSlot(pick);
525270
+ }
525271
+ await new Promise((resolve52) => this.slotWaiters.push(resolve52));
525272
+ }
525273
+ }
525274
+ wakeNextSlotWaiter() {
525275
+ const waiter = this.slotWaiters.shift();
525276
+ if (waiter)
525277
+ waiter();
525278
+ }
525279
+ effectiveMaxParallel(inst) {
525280
+ if (this.activePlacementMode === "constrained")
525281
+ return 1;
525282
+ return Math.max(1, inst.state.maxParallel);
525283
+ }
525284
+ isEffectivelySaturated(inst) {
525285
+ return inst.state.inflight >= this.effectiveMaxParallel(inst);
525286
+ }
525287
+ effectiveFreeSlots(inst) {
525288
+ return Math.max(0, this.effectiveMaxParallel(inst) - inst.state.inflight);
525289
+ }
525233
525290
  /**
525234
525291
  * Spawn a new instance pinned to a GPU when policy allows. Returns the
525235
525292
  * spawned instance or null when:
@@ -525241,6 +525298,42 @@ var init_ollama_pool = __esm({
525241
525298
  * over-allocate.
525242
525299
  */
525243
525300
  async maybeSpawnInstance(model) {
525301
+ return this.withSpawnGate(async () => {
525302
+ if (!this.canSpawnWithSharedModelStore(model))
525303
+ return null;
525304
+ const poolOwnedCount = this.instances.filter((i2) => i2.state.poolOwned).length;
525305
+ const gpus = await this.getGpusForPlacement();
525306
+ const cap = this.elasticSpawnCap(gpus);
525307
+ if (poolOwnedCount >= cap)
525308
+ return null;
525309
+ const freedPick = this.pickInstance({ model });
525310
+ if (freedPick)
525311
+ return freedPick;
525312
+ const gpu = this.pickGpuForSpawn(gpus);
525313
+ return this.spawnInstance(model, gpu);
525314
+ });
525315
+ }
525316
+ async ensureDedicatedGpuPool(model, gpus) {
525317
+ if (this.placementModeFor(gpus) !== "dedicated")
525318
+ return;
525319
+ await this.withSpawnGate(async () => {
525320
+ if (!this.canSpawnWithSharedModelStore(model))
525321
+ return;
525322
+ const target = this.dedicatedTargetCount(gpus);
525323
+ while (this.instances.filter((i2) => i2.state.poolOwned).length < target) {
525324
+ const gpu = this.pickGpuForSpawn(gpus);
525325
+ if (!gpu)
525326
+ break;
525327
+ const inst = await this.spawnInstance(model, gpu);
525328
+ if (!inst)
525329
+ break;
525330
+ }
525331
+ if (this.instances.some((i2) => i2.state.poolOwned)) {
525332
+ this.dedicatedGpuPoolActive = true;
525333
+ }
525334
+ });
525335
+ }
525336
+ async withSpawnGate(fn) {
525244
525337
  let resolveGate = () => {
525245
525338
  };
525246
525339
  const myTurn = new Promise((r2) => {
@@ -525250,55 +525343,90 @@ var init_ollama_pool = __esm({
525250
525343
  this.spawnGate = myTurn;
525251
525344
  await prev;
525252
525345
  try {
525253
- if (!this.config.sharedModelStore && !this.config.allowUnsharedModelStore) {
525254
- this.emit("spawn-skipped", {
525255
- reason: "missing-shared-model-store",
525256
- model,
525257
- baseInstanceUrl: this.config.baseInstanceUrl
525258
- });
525259
- return null;
525260
- }
525261
- const poolOwnedCount = this.instances.filter((i2) => i2.state.poolOwned).length;
525262
- const gpus = await detectGpus();
525263
- const cap = this.config.maxSpawnedInstances > 0 ? this.config.maxSpawnedInstances : Math.max(0, gpus.length - 1);
525264
- if (poolOwnedCount >= cap)
525265
- return null;
525266
- const freedPick = this.pickInstance({ model });
525267
- if (freedPick)
525268
- return freedPick;
525269
- const port = await findFreePort(this.config.spawnPortStart);
525270
- const gpuUuid = this.pickGpuForSpawn(gpus);
525271
- const { proc, ready } = await this.spawner({ port, gpuUuid, config: this.config });
525272
- try {
525273
- await ready;
525274
- } catch (err) {
525275
- try {
525276
- proc.kill();
525277
- } catch {
525278
- }
525279
- this.emit("spawn-failed", { port, gpuUuid, error: err });
525280
- return null;
525281
- }
525282
- const inst = new OllamaInstance({
525283
- id: `omnius-ollama-${port}`,
525284
- baseUrl: `http://127.0.0.1:${port}`,
525285
- port,
525286
- gpuUuid,
525287
- poolOwned: true,
525288
- inflight: 0,
525289
- peakInflight: 0,
525290
- lastUsedMs: Date.now(),
525291
- knownModels: /* @__PURE__ */ new Set(),
525292
- maxParallel: this.config.maxParallelPerInstance,
525293
- totalRequests: 0
525294
- }, proc);
525295
- this.instances.push(inst);
525296
- this.emit("instance-spawned", { id: inst.state.id, port, gpuUuid });
525297
- return inst;
525346
+ return await fn();
525298
525347
  } finally {
525299
525348
  resolveGate();
525300
525349
  }
525301
525350
  }
525351
+ canSpawnWithSharedModelStore(model) {
525352
+ if (this.config.sharedModelStore || this.config.allowUnsharedModelStore)
525353
+ return true;
525354
+ this.emit("spawn-skipped", {
525355
+ reason: "missing-shared-model-store",
525356
+ model,
525357
+ baseInstanceUrl: this.config.baseInstanceUrl
525358
+ });
525359
+ return false;
525360
+ }
525361
+ placementModeFor(gpus) {
525362
+ const canShareModelStore = Boolean(this.config.sharedModelStore) || this.config.allowUnsharedModelStore;
525363
+ if (!canShareModelStore || gpus.length < 2)
525364
+ return "constrained";
525365
+ if (this.config.gpuPlacement === "elastic")
525366
+ return "elastic";
525367
+ return "dedicated";
525368
+ }
525369
+ dedicatedTargetCount(gpus) {
525370
+ const requested = this.config.targetGpuInstances > 0 ? this.config.targetGpuInstances : gpus.length;
525371
+ const cappedByGpuCount = Math.min(requested, gpus.length);
525372
+ return this.config.maxSpawnedInstances > 0 ? Math.min(cappedByGpuCount, this.config.maxSpawnedInstances) : cappedByGpuCount;
525373
+ }
525374
+ elasticSpawnCap(gpus) {
525375
+ return this.config.maxSpawnedInstances > 0 ? this.config.maxSpawnedInstances : Math.max(0, gpus.length - 1);
525376
+ }
525377
+ async getGpusForPlacement(maxAgeMs = 3e3) {
525378
+ const now = Date.now();
525379
+ if (this.gpuCache && now - this.gpuCache.takenAtMs <= maxAgeMs) {
525380
+ return this.gpuCache.gpus;
525381
+ }
525382
+ const gpus = await this.gpuDetector();
525383
+ this.gpuCache = { gpus, takenAtMs: now };
525384
+ return gpus;
525385
+ }
525386
+ async spawnInstance(model, gpu) {
525387
+ let port;
525388
+ try {
525389
+ port = await this.portAllocator(this.config.spawnPortStart);
525390
+ } catch (err) {
525391
+ this.emit("spawn-failed", {
525392
+ reason: "port-allocation-failed",
525393
+ gpuUuid: gpu?.uuid ?? null,
525394
+ gpuIndex: gpu?.index ?? null,
525395
+ error: err
525396
+ });
525397
+ return null;
525398
+ }
525399
+ const gpuUuid = gpu?.uuid || null;
525400
+ const gpuIndex = gpu?.index ?? null;
525401
+ const { proc, ready } = await this.spawner({ port, gpuUuid, gpuIndex, config: this.config });
525402
+ try {
525403
+ await ready;
525404
+ } catch (err) {
525405
+ try {
525406
+ proc.kill();
525407
+ } catch {
525408
+ }
525409
+ this.emit("spawn-failed", { port, gpuUuid, gpuIndex, error: err });
525410
+ return null;
525411
+ }
525412
+ const inst = new OllamaInstance({
525413
+ id: `omnius-ollama-${port}`,
525414
+ baseUrl: `http://127.0.0.1:${port}`,
525415
+ port,
525416
+ gpuUuid,
525417
+ gpuIndex,
525418
+ poolOwned: true,
525419
+ inflight: 0,
525420
+ peakInflight: 0,
525421
+ lastUsedMs: Date.now(),
525422
+ knownModels: /* @__PURE__ */ new Set([model]),
525423
+ maxParallel: this.config.maxParallelPerInstance,
525424
+ totalRequests: 0
525425
+ }, proc);
525426
+ this.instances.push(inst);
525427
+ this.emit("instance-spawned", { id: inst.state.id, port, gpuUuid, gpuIndex });
525428
+ return inst;
525429
+ }
525302
525430
  /**
525303
525431
  * Pick a GPU for a freshly-spawned instance. Prefers GPUs that no
525304
525432
  * pool-owned instance is already pinned to, then most free VRAM. Returns
@@ -525313,7 +525441,7 @@ var init_ollama_pool = __esm({
525313
525441
  pool3.sort((a2, b) => b.vramFreeMB - a2.vramFreeMB);
525314
525442
  const best = pool3[_gpuCursor % pool3.length];
525315
525443
  _gpuCursor++;
525316
- return best.uuid;
525444
+ return best;
525317
525445
  }
525318
525446
  /**
525319
525447
  * Periodically reap pool-owned instances that have been idle past the
@@ -525360,13 +525488,24 @@ var init_ollama_pool = __esm({
525360
525488
  }
525361
525489
  async status() {
525362
525490
  const hardware = await getHardwareSnapshot();
525491
+ const placementGpus = this.gpuCache?.gpus ?? hardware.gpus;
525492
+ const placementMode = this.placementModeFor(placementGpus);
525493
+ const targetGpuInstances = placementMode === "dedicated" ? this.dedicatedTargetCount(placementGpus) : placementMode === "elastic" ? this.elasticSpawnCap(placementGpus) : 1;
525494
+ const readyGpuInstances = this.instances.filter((inst) => inst.state.poolOwned).length;
525363
525495
  return {
525364
525496
  config: this.config,
525497
+ placement: {
525498
+ mode: placementMode,
525499
+ targetGpuInstances,
525500
+ readyGpuInstances,
525501
+ sharedModelStore: this.config.sharedModelStore
525502
+ },
525365
525503
  instances: this.instances.map((inst) => ({
525366
525504
  id: inst.state.id,
525367
525505
  baseUrl: inst.state.baseUrl,
525368
525506
  poolOwned: inst.state.poolOwned,
525369
525507
  gpuUuid: inst.state.gpuUuid,
525508
+ gpuIndex: inst.state.gpuIndex,
525370
525509
  inflight: inst.state.inflight,
525371
525510
  peakInflight: inst.state.peakInflight,
525372
525511
  maxParallel: inst.state.maxParallel,
@@ -569697,29 +569836,60 @@ async function collectNetworkMetrics() {
569697
569836
  return { rxBytesPerSec: 0, txBytesPerSec: 0 };
569698
569837
  }
569699
569838
  async function collectGpuMetrics() {
569700
- const noGpu = { available: false, name: "", utilization: 0, vramUsedMB: 0, vramTotalMB: 0, vramUtilization: 0 };
569839
+ const noGpu = {
569840
+ available: false,
569841
+ count: 0,
569842
+ name: "",
569843
+ utilization: 0,
569844
+ vramUsedMB: 0,
569845
+ vramTotalMB: 0,
569846
+ vramUtilization: 0,
569847
+ devices: []
569848
+ };
569701
569849
  if (_nvidiaSmiAvailable2 === false) return noGpu;
569702
569850
  try {
569703
569851
  const smi = await new Promise((resolve52, reject) => {
569704
569852
  exec3(
569705
- "nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
569853
+ "nvidia-smi --query-gpu=index,uuid,utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
569706
569854
  { encoding: "utf8", timeout: 3e3 },
569707
569855
  (err, stdout) => err ? reject(err) : resolve52(stdout)
569708
569856
  );
569709
569857
  });
569710
569858
  _nvidiaSmiAvailable2 = true;
569711
- const line = smi.trim().split("\n")[0];
569712
- if (!line) return noGpu;
569713
- const parts = line.split(",").map((s2) => s2.trim());
569714
- const vramUsed = parseInt(parts[1] ?? "0", 10) || 0;
569715
- const vramTotal = parseInt(parts[2] ?? "0", 10) || 0;
569859
+ const devices = [];
569860
+ for (const line of smi.trim().split("\n")) {
569861
+ if (!line.trim()) continue;
569862
+ const parts = line.split(",").map((s2) => s2.trim());
569863
+ const index = parseInt(parts[0] ?? "-1", 10);
569864
+ const utilization = parseInt(parts[2] ?? "0", 10) || 0;
569865
+ const vramUsed2 = parseInt(parts[3] ?? "0", 10) || 0;
569866
+ const vramTotal2 = parseInt(parts[4] ?? "0", 10) || 0;
569867
+ if (!Number.isFinite(index) || index < 0) continue;
569868
+ devices.push({
569869
+ index,
569870
+ uuid: parts[1] ?? "",
569871
+ utilization,
569872
+ vramUsedMB: vramUsed2,
569873
+ vramTotalMB: vramTotal2,
569874
+ name: parts.slice(5).join(", ") || "",
569875
+ vramUtilization: vramTotal2 > 0 ? Math.round(vramUsed2 / vramTotal2 * 100) : 0
569876
+ });
569877
+ }
569878
+ if (devices.length === 0) return noGpu;
569879
+ const vramUsed = devices.reduce((sum, gpu) => sum + gpu.vramUsedMB, 0);
569880
+ const vramTotal = devices.reduce((sum, gpu) => sum + gpu.vramTotalMB, 0);
569881
+ const avgUtil = Math.round(devices.reduce((sum, gpu) => sum + gpu.utilization, 0) / devices.length);
569882
+ const firstName = devices[0]?.name ?? "";
569883
+ const allSameName = devices.every((gpu) => gpu.name === firstName);
569716
569884
  return {
569717
569885
  available: true,
569718
- utilization: parseInt(parts[0] ?? "0", 10) || 0,
569886
+ count: devices.length,
569887
+ utilization: avgUtil,
569719
569888
  vramUsedMB: vramUsed,
569720
569889
  vramTotalMB: vramTotal,
569721
- name: parts[3] ?? "",
569722
- vramUtilization: vramTotal > 0 ? Math.round(vramUsed / vramTotal * 100) : 0
569890
+ name: devices.length > 1 && allSameName ? `${devices.length}x ${firstName}` : firstName,
569891
+ vramUtilization: vramTotal > 0 ? Math.round(vramUsed / vramTotal * 100) : 0,
569892
+ devices
569723
569893
  };
569724
569894
  } catch {
569725
569895
  _nvidiaSmiAvailable2 = false;
@@ -569736,7 +569906,9 @@ function getInstantSnapshot() {
569736
569906
  cpuCores: cr.cpuCores,
569737
569907
  cpuModel: cr.cpuModel,
569738
569908
  gpuUtil: -1,
569909
+ gpuCount: 0,
569739
569910
  gpuName: "",
569911
+ gpuDevices: [],
569740
569912
  vramUtil: -1,
569741
569913
  vramUsedMB: 0,
569742
569914
  vramTotalMB: 0,
@@ -569794,10 +569966,11 @@ function collectCpuRam() {
569794
569966
  }
569795
569967
  async function collectLocalMetrics() {
569796
569968
  const cpuRam = collectCpuRam();
569797
- const [gpu, disk, network] = await Promise.all([
569969
+ const [gpu, disk, network, ollamaPool] = await Promise.all([
569798
569970
  collectGpuMetrics(),
569799
569971
  collectDiskMetrics(),
569800
- collectNetworkMetrics()
569972
+ collectNetworkMetrics(),
569973
+ collectOllamaPoolMetrics()
569801
569974
  ]);
569802
569975
  return {
569803
569976
  source: "local",
@@ -569806,7 +569979,9 @@ async function collectLocalMetrics() {
569806
569979
  cpuCores: cpuRam.cpuCores,
569807
569980
  cpuModel: cpuRam.cpuModel,
569808
569981
  gpuUtil: gpu.available ? gpu.utilization : -1,
569982
+ gpuCount: gpu.count,
569809
569983
  gpuName: gpu.name,
569984
+ gpuDevices: gpu.devices,
569810
569985
  vramUtil: gpu.available ? gpu.vramUtilization : -1,
569811
569986
  vramUsedMB: gpu.vramUsedMB,
569812
569987
  vramTotalMB: gpu.vramTotalMB,
@@ -569817,15 +569992,43 @@ async function collectLocalMetrics() {
569817
569992
  diskUsedGB: disk.usedGB,
569818
569993
  diskTotalGB: disk.totalGB,
569819
569994
  diskFreeGB: disk.freeGB,
569820
- diskPath: disk.path
569995
+ diskPath: disk.path,
569996
+ ollamaPool
569821
569997
  },
569822
569998
  network
569823
569999
  };
569824
570000
  }
570001
+ async function collectOllamaPoolMetrics() {
570002
+ try {
570003
+ const config = resolveDefaultPoolConfig();
570004
+ if (!shouldUseOllamaPoolForBaseUrl(config.baseInstanceUrl)) return null;
570005
+ const status = await getOllamaPool({ baseInstanceUrl: config.baseInstanceUrl }).status();
570006
+ return {
570007
+ enabled: true,
570008
+ mode: status.placement.mode,
570009
+ targetGpuInstances: status.placement.targetGpuInstances,
570010
+ readyGpuInstances: status.placement.readyGpuInstances,
570011
+ sharedModelStore: status.placement.sharedModelStore,
570012
+ instances: status.instances.map((inst) => ({
570013
+ id: inst.id,
570014
+ baseUrl: inst.baseUrl,
570015
+ poolOwned: inst.poolOwned,
570016
+ gpuUuid: inst.gpuUuid,
570017
+ gpuIndex: inst.gpuIndex,
570018
+ inflight: inst.inflight,
570019
+ maxParallel: inst.maxParallel,
570020
+ totalRequests: inst.totalRequests
570021
+ }))
570022
+ };
570023
+ } catch {
570024
+ return null;
570025
+ }
570026
+ }
569825
570027
  var _lastNetSnapshot, _nvidiaSmiAvailable2, _cpuPrevSnapshot, SystemMetricsCollector;
569826
570028
  var init_system_metrics = __esm({
569827
570029
  "packages/cli/src/tui/system-metrics.ts"() {
569828
570030
  "use strict";
570031
+ init_dist8();
569829
570032
  init_disk_monitor();
569830
570033
  _lastNetSnapshot = null;
569831
570034
  _nvidiaSmiAvailable2 = null;
@@ -569881,7 +570084,9 @@ var init_system_metrics = __esm({
569881
570084
  cpuCores: hw.cpuCores ?? 0,
569882
570085
  cpuModel: hw.cpuModel ?? "",
569883
570086
  gpuUtil: hw.gpuUtil ?? -1,
570087
+ gpuCount: hw.gpuCount ?? 0,
569884
570088
  gpuName: hw.gpuName ?? "",
570089
+ gpuDevices: hw.gpuDevices ?? [],
569885
570090
  vramUtil: hw.vramUtil ?? -1,
569886
570091
  vramUsedMB: hw.vramUsedMB ?? 0,
569887
570092
  vramTotalMB: hw.vramTotalMB ?? 0,
@@ -569892,7 +570097,8 @@ var init_system_metrics = __esm({
569892
570097
  diskUsedGB: hw.diskUsedGB ?? 0,
569893
570098
  diskTotalGB: hw.diskTotalGB ?? 0,
569894
570099
  diskFreeGB: hw.diskFreeGB ?? 0,
569895
- diskPath: hw.diskPath ?? ""
570100
+ diskPath: hw.diskPath ?? "",
570101
+ ollamaPool: hw.ollamaPool ?? null
569896
570102
  };
569897
570103
  this._latest = {
569898
570104
  source: "remote",
@@ -573751,6 +573957,19 @@ ${CONTENT_BG_SEQ}`);
573751
573957
  hwExpW += 6 + `${rm4.vramUtil}%`.length + vramDetail.length;
573752
573958
  hwCompW += 6 + `${rm4.vramUtil}%`.length;
573753
573959
  }
573960
+ if (rm4.ollamaPool?.enabled) {
573961
+ const pool3 = rm4.ollamaPool;
573962
+ const ready = pool3.readyGpuInstances;
573963
+ const target = pool3.targetGpuInstances;
573964
+ const poolColor = pool3.mode === "constrained" ? c3.yellow : target > 0 && ready < target ? c3.yellow : c3.green;
573965
+ const poolDetail = pool3.mode === "constrained" ? "queue" : `${ready}/${target}`;
573966
+ const poolText = ` OLLAMA ${poolColor(`${pool3.mode}:${poolDetail}`)}`;
573967
+ const compactText = ` OLLAMA ${poolColor(pool3.mode === "constrained" ? "queue" : `${ready}/${target}`)}`;
573968
+ hwExpStr += poolText;
573969
+ hwCompStr += compactText;
573970
+ hwExpW += 8 + `${pool3.mode}:${poolDetail}`.length;
573971
+ hwCompW += 8 + (pool3.mode === "constrained" ? "queue".length : `${ready}/${target}`.length);
573972
+ }
573754
573973
  if (!isLocal && hwExpW === 0) {
573755
573974
  const statusMsg = rm4.gpuName && rm4.gpuName !== "peer" ? rm4.gpuName : "awaiting metrics...";
573756
573975
  hwExpStr = c3.dim(statusMsg);
@@ -607467,6 +607686,271 @@ var init_tool_policy = __esm({
607467
607686
  }
607468
607687
  });
607469
607688
 
607689
+ // packages/cli/src/tui/telegram-help-menu.ts
607690
+ function telegramSyntheticHelpSignatures() {
607691
+ return [
607692
+ { signature: "/help", description: "Show interactive command help" },
607693
+ { signature: "/start", description: "Show Telegram bridge status and authentication instructions" },
607694
+ { signature: "/auth <code>", description: "Authenticate this Telegram user as bot admin using the TUI code" },
607695
+ { signature: "/call", description: "Get the active voice call link when a call session is running" },
607696
+ { signature: "/reflect", description: "Run scoped Telegram chat reflection over retained chat history" },
607697
+ { signature: "/reflect status", description: "Show the latest scoped Telegram reflection artifact" },
607698
+ { signature: "/reflect now", description: "Run reflection and let the model decide whether to post a public follow-up" },
607699
+ { signature: "/reflect auto on|off", description: "Enable or disable model-gated idle follow-ups for this chat" },
607700
+ { signature: "/reflection", description: "Alias for /reflect" },
607701
+ { signature: "/daydream", description: "Alias for /reflect in Telegram chats" }
607702
+ ];
607703
+ }
607704
+ function telegramHelpCommandAllowed(cmd, scope) {
607705
+ if (cmd.name === "dream") return false;
607706
+ if (scope === "admin") return cmd.implementationStatus === "implemented";
607707
+ return TELEGRAM_PUBLIC_HELP_COMMANDS.has(cmd.name);
607708
+ }
607709
+ function parseArgsHint(argsHint) {
607710
+ if (!argsHint) return [];
607711
+ const args = [];
607712
+ const re = /<(\w+)(\?)?>/g;
607713
+ let match;
607714
+ while ((match = re.exec(argsHint)) !== null) {
607715
+ const name10 = match[1];
607716
+ const optional2 = match[2] === "?";
607717
+ args.push({
607718
+ name: name10,
607719
+ type: "string",
607720
+ required: !optional2,
607721
+ description: ""
607722
+ });
607723
+ }
607724
+ return args;
607725
+ }
607726
+ function buildScopedToolList(scope) {
607727
+ const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => telegramHelpCommandAllowed(cmd, scope));
607728
+ const syntheticSigs = telegramSyntheticHelpSignatures();
607729
+ const commandSigs = commands.flatMap((cmd) => cmd.signatures);
607730
+ const allSigs = [...syntheticSigs, ...commandSigs];
607731
+ const seen = /* @__PURE__ */ new Set();
607732
+ const unique = allSigs.filter((sig) => {
607733
+ if (seen.has(sig.signature)) return false;
607734
+ seen.add(sig.signature);
607735
+ return true;
607736
+ });
607737
+ const entries = [];
607738
+ for (const sig of unique) {
607739
+ const matchingCmd = commands.find(
607740
+ (cmd) => cmd.signatures.some((s2) => s2.signature === sig.signature)
607741
+ );
607742
+ const args = matchingCmd ? parseArgsHint(matchingCmd.argsHint) : [];
607743
+ const name10 = sig.signature.replace(/^\//, "").split(/\s+/)[0] ?? sig.signature;
607744
+ entries.push({
607745
+ name: name10,
607746
+ signature: sig.signature,
607747
+ description: sig.description,
607748
+ arguments: args
607749
+ });
607750
+ }
607751
+ return entries;
607752
+ }
607753
+ function encodeHelpCallback(action, value2) {
607754
+ const raw = `${CALLBACK_PREFIX}:${action}:${value2}`;
607755
+ return raw.slice(0, MAX_CALLBACK_DATA);
607756
+ }
607757
+ function decodeHelpCallback(data) {
607758
+ if (!data.startsWith(CALLBACK_PREFIX + ":")) return null;
607759
+ const parts = data.split(":");
607760
+ if (parts.length < 3) return null;
607761
+ const action = parts[1];
607762
+ const value2 = parts.slice(2).join(":");
607763
+ if (action !== "page" && action !== "detail" && action !== "back") return null;
607764
+ return { action, value: value2 };
607765
+ }
607766
+ function buildToolListKeyboard(tools, page2, scope) {
607767
+ const totalPages = Math.max(1, Math.ceil(tools.length / TOOLS_PER_PAGE));
607768
+ const start2 = page2 * TOOLS_PER_PAGE;
607769
+ const pageTools = tools.slice(start2, start2 + TOOLS_PER_PAGE);
607770
+ const rows = [];
607771
+ let currentRow = [];
607772
+ for (const tool of pageTools) {
607773
+ const label = tool.name.length > 18 ? tool.name.slice(0, 17) + "…" : tool.name;
607774
+ currentRow.push({
607775
+ text: label,
607776
+ callback_data: encodeHelpCallback("detail", tool.name)
607777
+ });
607778
+ if (currentRow.length >= GRID_COLS) {
607779
+ rows.push(currentRow);
607780
+ currentRow = [];
607781
+ }
607782
+ }
607783
+ if (currentRow.length > 0) {
607784
+ rows.push(currentRow);
607785
+ }
607786
+ const navRow = [];
607787
+ if (page2 > 0) {
607788
+ navRow.push({ text: "◀️ Back", callback_data: encodeHelpCallback("page", page2 - 1) });
607789
+ }
607790
+ navRow.push({
607791
+ text: `${page2 + 1}/${totalPages}`,
607792
+ callback_data: encodeHelpCallback("page", page2.toString())
607793
+ // re-send same page (no-op, just indicator)
607794
+ });
607795
+ if (page2 < totalPages - 1) {
607796
+ navRow.push({ text: "▶️ Next", callback_data: encodeHelpCallback("page", page2 + 1) });
607797
+ }
607798
+ rows.push(navRow);
607799
+ return rows;
607800
+ }
607801
+ function buildToolDetailKeyboard(tool, page2) {
607802
+ const rows = [];
607803
+ for (const arg of tool.arguments) {
607804
+ const req2 = arg.required ? "●" : "○";
607805
+ const label = `${req2} ${arg.name}`.slice(0, 30);
607806
+ rows.push([{
607807
+ text: label,
607808
+ callback_data: encodeHelpCallback("detail", tool.name)
607809
+ // stay on detail
607810
+ }]);
607811
+ }
607812
+ rows.push([{
607813
+ text: "🔙 Back to tools",
607814
+ callback_data: encodeHelpCallback("back", page2.toString())
607815
+ }]);
607816
+ return rows;
607817
+ }
607818
+ function escapeHTML(text) {
607819
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
607820
+ }
607821
+ function buildToolListText(tools, page2, scope) {
607822
+ const totalPages = Math.max(1, Math.ceil(tools.length / TOOLS_PER_PAGE));
607823
+ const start2 = page2 * TOOLS_PER_PAGE;
607824
+ const pageTools = tools.slice(start2, start2 + TOOLS_PER_PAGE);
607825
+ const lines = [
607826
+ `<b>🛠 Commands (${scope === "admin" ? "admin full scope" : "public secure scope"})</b>`,
607827
+ `<i>Page ${page2 + 1} of ${totalPages} — ${tools.length} commands</i>`,
607828
+ ""
607829
+ ];
607830
+ for (const tool of pageTools) {
607831
+ lines.push(`<code>${escapeHTML(tool.signature)}</code>`);
607832
+ lines.push(` ${escapeHTML(tool.description)}`);
607833
+ }
607834
+ lines.push("");
607835
+ lines.push("<i>Tap a command for details, or use ◀️▶️ to navigate.</i>");
607836
+ return lines.join("\n");
607837
+ }
607838
+ function buildToolDetailText(tool) {
607839
+ const lines = [
607840
+ `<b>📌 ${escapeHTML(tool.signature)}</b>`,
607841
+ "",
607842
+ escapeHTML(tool.description),
607843
+ ""
607844
+ ];
607845
+ if (tool.arguments.length > 0) {
607846
+ lines.push("<b>Arguments:</b>");
607847
+ lines.push("");
607848
+ for (const arg of tool.arguments) {
607849
+ const reqBadge = arg.required ? "<b>[required]</b>" : "<i>[optional]</i>";
607850
+ lines.push(`<code>${escapeHTML(arg.name)}</code> ${reqBadge} <i>${escapeHTML(arg.type)}</i>`);
607851
+ if (arg.description) {
607852
+ lines.push(` ${escapeHTML(arg.description)}`);
607853
+ }
607854
+ lines.push("");
607855
+ }
607856
+ } else {
607857
+ lines.push("<i>No arguments.</i>");
607858
+ }
607859
+ lines.push("🔙 <i>Tap Back to return to the tool list.</i>");
607860
+ return lines.join("\n");
607861
+ }
607862
+ function renderHelpMenu(scope) {
607863
+ const tools = buildScopedToolList(scope);
607864
+ const text = buildToolListText(tools, 0, scope);
607865
+ const inline_keyboard = buildToolListKeyboard(tools, 0, scope);
607866
+ return { text, replyMarkup: { inline_keyboard } };
607867
+ }
607868
+ function renderHelpMenuPage(scope, page2) {
607869
+ const tools = buildScopedToolList(scope);
607870
+ const totalPages = Math.max(1, Math.ceil(tools.length / TOOLS_PER_PAGE));
607871
+ page2 = Math.max(0, Math.min(page2, totalPages - 1));
607872
+ const text = buildToolListText(tools, page2, scope);
607873
+ const inline_keyboard = buildToolListKeyboard(tools, page2, scope);
607874
+ return { text, replyMarkup: { inline_keyboard } };
607875
+ }
607876
+ function renderHelpToolDetail(scope, toolName, returnPage) {
607877
+ const tools = buildScopedToolList(scope);
607878
+ const tool = tools.find((t2) => t2.name === toolName);
607879
+ if (!tool) return null;
607880
+ const text = buildToolDetailText(tool);
607881
+ const inline_keyboard = buildToolDetailKeyboard(tool, returnPage);
607882
+ return { text, replyMarkup: { inline_keyboard } };
607883
+ }
607884
+ function handleHelpCallback(callbackData, currentState) {
607885
+ const decoded = decodeHelpCallback(callbackData);
607886
+ if (!decoded) return null;
607887
+ const { action, value: value2 } = decoded;
607888
+ let newState;
607889
+ let render2;
607890
+ switch (action) {
607891
+ case "page": {
607892
+ const page2 = parseInt(value2, 10);
607893
+ if (isNaN(page2)) return null;
607894
+ render2 = renderHelpMenuPage(currentState.scope, page2);
607895
+ newState = { ...currentState, page: page2, view: "list", detailToolName: null };
607896
+ break;
607897
+ }
607898
+ case "detail": {
607899
+ const detail = renderHelpToolDetail(currentState.scope, value2, currentState.page);
607900
+ if (!detail) return null;
607901
+ render2 = detail;
607902
+ newState = { ...currentState, view: "detail", detailToolName: value2 };
607903
+ break;
607904
+ }
607905
+ case "back": {
607906
+ const page2 = parseInt(value2, 10);
607907
+ if (isNaN(page2)) return null;
607908
+ render2 = renderHelpMenuPage(currentState.scope, page2);
607909
+ newState = { ...currentState, page: page2, view: "list", detailToolName: null };
607910
+ break;
607911
+ }
607912
+ default:
607913
+ return null;
607914
+ }
607915
+ return { render: render2, newState };
607916
+ }
607917
+ var TOOLS_PER_PAGE, GRID_COLS, CALLBACK_PREFIX, MAX_CALLBACK_DATA, TELEGRAM_PUBLIC_HELP_COMMANDS, HelpMenuStateStore;
607918
+ var init_telegram_help_menu = __esm({
607919
+ "packages/cli/src/tui/telegram-help-menu.ts"() {
607920
+ "use strict";
607921
+ init_command_registry();
607922
+ TOOLS_PER_PAGE = 10;
607923
+ GRID_COLS = 5;
607924
+ CALLBACK_PREFIX = "help";
607925
+ MAX_CALLBACK_DATA = 64;
607926
+ TELEGRAM_PUBLIC_HELP_COMMANDS = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
607927
+ HelpMenuStateStore = class {
607928
+ states = /* @__PURE__ */ new Map();
607929
+ TTL_MS = 30 * 60 * 1e3;
607930
+ // 30 min
607931
+ key(chatId, messageId) {
607932
+ return `${chatId}:${messageId}`;
607933
+ }
607934
+ set(state) {
607935
+ this.states.set(this.key(state.chatId, state.messageId), state);
607936
+ }
607937
+ get(chatId, messageId) {
607938
+ return this.states.get(this.key(chatId, messageId));
607939
+ }
607940
+ delete(chatId, messageId) {
607941
+ this.states.delete(this.key(chatId, messageId));
607942
+ }
607943
+ /** Prune expired states */
607944
+ prune() {
607945
+ const now = Date.now();
607946
+ for (const [k, v] of this.states) {
607947
+ if (now - v.createdAt > this.TTL_MS) this.states.delete(k);
607948
+ }
607949
+ }
607950
+ };
607951
+ }
607952
+ });
607953
+
607470
607954
  // packages/cli/src/tui/telegram-creative-tools.ts
607471
607955
  import { createCipheriv as createCipheriv4, createDecipheriv as createDecipheriv4, randomBytes as randomBytes21 } from "node:crypto";
607472
607956
  import {
@@ -611136,7 +611620,7 @@ function splitTelegramReminderDue(raw) {
611136
611620
  if (suffixMatch) return { due: suffixMatch[2].trim(), message: suffixMatch[1].trim() };
611137
611621
  return { message: text };
611138
611622
  }
611139
- function telegramSyntheticHelpSignatures() {
611623
+ function telegramSyntheticHelpSignatures2() {
611140
611624
  return [
611141
611625
  { signature: "/help", description: "Show Telegram command help" },
611142
611626
  { signature: "/start", description: "Show Telegram bridge status and authentication instructions" },
@@ -611150,15 +611634,15 @@ function telegramSyntheticHelpSignatures() {
611150
611634
  { signature: "/daydream", description: "Alias for /reflect in Telegram chats" }
611151
611635
  ];
611152
611636
  }
611153
- function telegramHelpCommandAllowed(cmd, scope) {
611637
+ function telegramHelpCommandAllowed2(cmd, scope) {
611154
611638
  if (cmd.name === "dream") return false;
611155
611639
  if (scope === "admin") return cmd.implementationStatus === "implemented";
611156
- return TELEGRAM_PUBLIC_HELP_COMMANDS.has(cmd.name) || TELEGRAM_PUBLIC_BOT_COMMAND_NAMES.has(cmd.name);
611640
+ return TELEGRAM_PUBLIC_HELP_COMMANDS2.has(cmd.name) || TELEGRAM_PUBLIC_BOT_COMMAND_NAMES.has(cmd.name);
611157
611641
  }
611158
611642
  function buildTelegramHelpHTML(scope, maxPublicCommands = 24) {
611159
- const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => telegramHelpCommandAllowed(cmd, scope));
611643
+ const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => telegramHelpCommandAllowed2(cmd, scope));
611160
611644
  const signatures = [
611161
- ...telegramSyntheticHelpSignatures(),
611645
+ ...telegramSyntheticHelpSignatures2(),
611162
611646
  ...commands.flatMap((cmd) => cmd.signatures)
611163
611647
  ];
611164
611648
  const seen = /* @__PURE__ */ new Set();
@@ -611872,7 +612356,7 @@ function renderTelegramSubAgentError(username, error) {
611872
612356
  process.stdout.write(` ${c3.dim("│")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
611873
612357
  `);
611874
612358
  }
611875
- var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
612359
+ var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
611876
612360
  var init_telegram_bridge = __esm({
611877
612361
  "packages/cli/src/tui/telegram-bridge.ts"() {
611878
612362
  "use strict";
@@ -611883,6 +612367,7 @@ var init_telegram_bridge = __esm({
611883
612367
  init_tool_policy();
611884
612368
  init_media_routing();
611885
612369
  init_command_registry();
612370
+ init_telegram_help_menu();
611886
612371
  init_scoped_personality();
611887
612372
  init_telegram_creative_tools();
611888
612373
  init_omnius_directory();
@@ -612249,7 +612734,7 @@ External acquisition contract:
612249
612734
  bruteForceMaxCycles: 0,
612250
612735
  allowTurnExtension: false
612251
612736
  };
612252
- TELEGRAM_PUBLIC_HELP_COMMANDS = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
612737
+ TELEGRAM_PUBLIC_HELP_COMMANDS2 = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
612253
612738
  TELEGRAM_REMINDER_SLASH_COMMANDS = /* @__PURE__ */ new Set(["remind", "reminder", "reminders"]);
612254
612739
  TELEGRAM_REFLECTION_SLASH_COMMANDS = /* @__PURE__ */ new Set(["reflect", "reflection", "daydream", "dream"]);
612255
612740
  TELEGRAM_PUBLIC_BOT_COMMAND_NAMES = new Set(
@@ -612343,6 +612828,10 @@ External acquisition contract:
612343
612828
  telegramBotRightsCache = /* @__PURE__ */ new Map();
612344
612829
  /** Short-lived Telegram inline button state directory */
612345
612830
  telegramToolButtonDir;
612831
+ /** Interactive help menu state store (inline keyboard navigation) */
612832
+ helpMenuStates = new HelpMenuStateStore();
612833
+ /** Prune expired help menu states every 5 minutes */
612834
+ helpMenuPruneTimer = null;
612346
612835
  /** Command handler for admin DM slash commands (wired from interactive.ts) */
612347
612836
  commandHandler = null;
612348
612837
  /** Callback fired after a Telegram user completes the TUI-only admin auth challenge */
@@ -612764,21 +613253,30 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
612764
613253
  }
612765
613254
  async replyWithTelegramHelp(msg, isAdmin) {
612766
613255
  const scope = isAdmin ? "admin" : "public";
612767
- const chunks = splitTelegramHTMLMessage(buildTelegramHelpHTML(scope));
613256
+ const menu = renderHelpMenu(scope);
612768
613257
  if (msg.guestQueryId) {
613258
+ const chunks = splitTelegramHTMLMessage(buildTelegramHelpHTML(isAdmin ? "admin" : "public"));
612769
613259
  await this.answerGuestQuery(msg.guestQueryId, chunks[0] ?? "", { parseMode: "HTML" });
612770
613260
  return;
612771
613261
  }
612772
- for (let i2 = 0; i2 < chunks.length; i2++) {
612773
- const chunk = chunks[i2];
612774
- if (i2 === 0) {
612775
- await this.replyToTelegramMessage(msg, chunk, {
612776
- html: true,
612777
- replyToMessageId: msg.chatType !== "private" ? msg.messageId : void 0
612778
- });
612779
- } else {
612780
- await this.sendMessageHTML(msg.chatId, chunk);
612781
- }
613262
+ const sent = await this.apiCall("sendMessage", {
613263
+ chat_id: msg.chatId,
613264
+ text: menu.text,
613265
+ parse_mode: "HTML",
613266
+ reply_markup: JSON.stringify(menu.replyMarkup),
613267
+ ...msg.chatType !== "private" ? { reply_to_message_id: msg.messageId } : {}
613268
+ });
613269
+ if (sent.ok && sent.result?.message_id) {
613270
+ this.helpMenuStates.set({
613271
+ chatId: msg.chatId,
613272
+ messageId: sent.result.message_id,
613273
+ scope,
613274
+ page: 0,
613275
+ view: "list",
613276
+ detailToolName: null,
613277
+ fromUserId: msg.fromUserId ?? 0,
613278
+ createdAt: Date.now()
613279
+ });
612782
613280
  }
612783
613281
  }
612784
613282
  recordChatHistory(sessionKey, entry) {
@@ -619091,6 +619589,57 @@ Scoped workspace: ${scopedRoot}`,
619091
619589
  return Boolean(result.ok);
619092
619590
  }
619093
619591
  async handleTelegramCallbackQuery(callback) {
619592
+ const helpDecoded = decodeHelpCallback(callback.data);
619593
+ if (helpDecoded) {
619594
+ let answerText2 = "";
619595
+ let alert2 = false;
619596
+ try {
619597
+ const chatId = callback.chatId;
619598
+ const messageId = callback.messageId;
619599
+ if (!chatId || !messageId) {
619600
+ answerText2 = "Cannot identify menu message.";
619601
+ alert2 = true;
619602
+ return;
619603
+ }
619604
+ const menuState = this.helpMenuStates.get(chatId, messageId);
619605
+ if (!menuState) {
619606
+ answerText2 = "This help menu expired. Send /help for a fresh one.";
619607
+ alert2 = true;
619608
+ return;
619609
+ }
619610
+ const isAdmin = this.isAdminActor(callback.fromUserId, callback.username);
619611
+ if (callback.fromUserId !== menuState.fromUserId && !isAdmin) {
619612
+ answerText2 = "Only the user who opened this menu can navigate it.";
619613
+ alert2 = true;
619614
+ return;
619615
+ }
619616
+ const result = handleHelpCallback(callback.data, menuState);
619617
+ if (!result) {
619618
+ answerText2 = "Unknown menu action.";
619619
+ alert2 = true;
619620
+ return;
619621
+ }
619622
+ this.helpMenuStates.set(result.newState);
619623
+ await this.apiCall("editMessageText", {
619624
+ chat_id: chatId,
619625
+ message_id: messageId,
619626
+ text: result.render.text,
619627
+ parse_mode: "HTML",
619628
+ reply_markup: JSON.stringify(result.render.replyMarkup)
619629
+ });
619630
+ answerText2 = "";
619631
+ } catch (err) {
619632
+ answerText2 = err instanceof Error ? err.message : String(err);
619633
+ alert2 = true;
619634
+ } finally {
619635
+ if (answerText2) {
619636
+ await this.answerCallbackQuery(callback.id, answerText2.slice(0, 180), alert2).catch(() => false);
619637
+ } else {
619638
+ await this.answerCallbackQuery(callback.id).catch(() => false);
619639
+ }
619640
+ }
619641
+ return;
619642
+ }
619094
619643
  let answerText = "Updated.";
619095
619644
  let alert = false;
619096
619645
  try {
@@ -627073,17 +627622,50 @@ async function handleAimsResources(ctx3) {
627073
627622
  try {
627074
627623
  const os9 = __require("node:os");
627075
627624
  const config = loadConfig();
627625
+ let ollamaPool = null;
627626
+ let hardware = null;
627627
+ try {
627628
+ const {
627629
+ getHardwareSnapshot: getHardwareSnapshot2,
627630
+ getOllamaPool: getOllamaPool2,
627631
+ resolveDefaultPoolConfig: resolveDefaultPoolConfig2,
627632
+ shouldUseOllamaPoolForBaseUrl: shouldUseOllamaPoolForBaseUrl2
627633
+ } = await Promise.resolve().then(() => (init_dist8(), dist_exports3));
627634
+ hardware = await getHardwareSnapshot2();
627635
+ const poolConfig = resolveDefaultPoolConfig2();
627636
+ if (shouldUseOllamaPoolForBaseUrl2(poolConfig.baseInstanceUrl)) {
627637
+ const status = await getOllamaPool2({ baseInstanceUrl: poolConfig.baseInstanceUrl }).status();
627638
+ ollamaPool = {
627639
+ placement: status.placement,
627640
+ instances: status.instances.map((inst) => ({
627641
+ id: inst.id,
627642
+ base_url: inst.baseUrl,
627643
+ pool_owned: inst.poolOwned,
627644
+ gpu_uuid: inst.gpuUuid,
627645
+ gpu_index: inst.gpuIndex,
627646
+ inflight: inst.inflight,
627647
+ max_parallel: inst.maxParallel,
627648
+ total_requests: inst.totalRequests
627649
+ }))
627650
+ };
627651
+ }
627652
+ } catch {
627653
+ hardware = null;
627654
+ ollamaPool = null;
627655
+ }
627076
627656
  sendJson(res, 200, {
627077
627657
  compute: {
627078
627658
  cpu: os9.cpus()[0]?.model ?? "unknown",
627079
627659
  cores: os9.cpus().length,
627080
627660
  ram_gb: Math.round(os9.totalmem() / 1024 ** 3),
627081
- platform: process.platform
627661
+ platform: process.platform,
627662
+ hardware
627082
627663
  },
627083
627664
  backend: {
627084
627665
  type: config.backendType,
627085
627666
  url: config.backendUrl,
627086
- model: config.model
627667
+ model: config.model,
627668
+ ollama_pool: ollamaPool
627087
627669
  },
627088
627670
  "aims:control": "A.4"
627089
627671
  });
@@ -641443,6 +642025,32 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
641443
642025
  }
641444
642026
  } catch {
641445
642027
  }
642028
+ let ollamaPool = null;
642029
+ try {
642030
+ const {
642031
+ getOllamaPool: getOllamaPool2,
642032
+ resolveDefaultPoolConfig: resolveDefaultPoolConfig2,
642033
+ shouldUseOllamaPoolForBaseUrl: shouldUseOllamaPoolForBaseUrl2
642034
+ } = await Promise.resolve().then(() => (init_dist8(), dist_exports3));
642035
+ const poolConfig = resolveDefaultPoolConfig2();
642036
+ if (shouldUseOllamaPoolForBaseUrl2(poolConfig.baseInstanceUrl)) {
642037
+ const status2 = await getOllamaPool2({ baseInstanceUrl: poolConfig.baseInstanceUrl }).status();
642038
+ ollamaPool = {
642039
+ placement: status2.placement,
642040
+ instances: status2.instances.map((inst) => ({
642041
+ id: inst.id,
642042
+ base_url: inst.baseUrl,
642043
+ pool_owned: inst.poolOwned,
642044
+ gpu_uuid: inst.gpuUuid,
642045
+ gpu_index: inst.gpuIndex,
642046
+ inflight: inst.inflight,
642047
+ max_parallel: inst.maxParallel,
642048
+ total_requests: inst.totalRequests
642049
+ }))
642050
+ };
642051
+ }
642052
+ } catch {
642053
+ }
641446
642054
  let latestVersion = null;
641447
642055
  try {
641448
642056
  const ver = es("npm view omnius version 2>/dev/null", { encoding: "utf8", timeout: 5e3, stdio: "pipe" }).trim();
@@ -641452,6 +642060,7 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
641452
642060
  jsonResponse(res, 200, {
641453
642061
  gpu: gpus,
641454
642062
  gpu_utilization: gpuUtil,
642063
+ ollama_pool: ollamaPool,
641455
642064
  total_vram_gb: totalVram,
641456
642065
  ram_gb: Math.round(totalMem / 1024 ** 3),
641457
642066
  ram_used_pct: ramUsedPct,
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.83",
3
+ "version": "1.0.85",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.83",
9
+ "version": "1.0.85",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.83",
3
+ "version": "1.0.85",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",