npm - omnius - Versions diffs - 1.0.83 → 1.0.85 - Mend

omnius 1.0.83 → 1.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -524968,8 +524968,11 @@ async function findFreePort(start2) {
 }
 function resolveDefaultPoolConfig() {
   const baseInstanceUrl = process.env["OMNIUS_OLLAMA_BASE_URL"] || process.env["OLLAMA_HOST"]?.replace(/^([^:/]+:[0-9]+)$/, "http://$1") || "http://127.0.0.1:11434";
-  const maxParallelPerInstance = Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 4;
+  const maxParallelPerInstance = Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1;
   const maxSpawnedInstances = Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0;
+  const targetGpuInstances = Number(process.env["OMNIUS_OLLAMA_TARGET_GPU_INSTANCES"]) || 0;
+  const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? "auto").toLowerCase();
+  const gpuPlacement = gpuPlacementRaw === "dedicated" || gpuPlacementRaw === "elastic" || gpuPlacementRaw === "auto" ? gpuPlacementRaw : "auto";
   const idleMs = Number(process.env["OMNIUS_OLLAMA_IDLE_MS"]) || 5 * 60 * 1e3;
   const reaperIntervalMs = Number(process.env["OMNIUS_OLLAMA_REAPER_MS"]) || 3e4;
   const spawnPortStart = Number(process.env["OMNIUS_OLLAMA_SPAWN_PORT"]) || 11435;
@@ -524983,6 +524986,8 @@ function resolveDefaultPoolConfig() {
     baseInstanceUrl: baseInstanceUrl.replace(/\/+$/, ""),
     maxParallelPerInstance,
     maxSpawnedInstances,
+    targetGpuInstances,
+    gpuPlacement,
     idleMs,
     reaperIntervalMs,
     spawnPortStart,
@@ -525110,15 +525115,17 @@ var init_ollama_pool = __esm({
         this.proc = null;
       }
     };
-    realInstanceSpawner = async ({ port, gpuUuid, config }) => {
+    realInstanceSpawner = async ({ port, gpuUuid, gpuIndex, config }) => {
       const env2 = { ...process.env };
       env2["OLLAMA_HOST"] = `127.0.0.1:${port}`;
       if (config.sharedModelStore) {
         env2["OLLAMA_MODELS"] = config.sharedModelStore;
       }
       env2["OLLAMA_NUM_PARALLEL"] = String(config.maxParallelPerInstance);
-      if (gpuUuid)
+      if (gpuUuid) {
         env2["CUDA_VISIBLE_DEVICES"] = gpuUuid;
+        env2["GPU_DEVICE_ORDINAL"] = gpuIndex === null ? "" : String(gpuIndex);
+      }
       const child = spawn21(config.ollamaBinary, ["serve"], {
         env: env2,
         stdio: ["ignore", "pipe", "pipe"],
@@ -525160,17 +525167,27 @@ var init_ollama_pool = __esm({
       instances = [];
       reaperHandle = null;
       spawner;
+      gpuDetector;
+      portAllocator;
       /** Serializes concurrent spawn requests so two callers don't both create instance N+1. */
       spawnGate = Promise.resolve();
+      /** True after dedicated mode has successfully started at least one pool-owned GPU runner. */
+      dedicatedGpuPoolActive = false;
+      activePlacementMode = "constrained";
+      gpuCache = null;
+      slotWaiters = [];
       constructor(config, opts) {
         super();
         this.config = { ...resolveDefaultPoolConfig(), ...config };
         this.spawner = opts?.spawner ?? realInstanceSpawner;
+        this.gpuDetector = opts?.gpuDetector ?? detectGpus;
+        this.portAllocator = opts?.portAllocator ?? findFreePort;
         this.instances.push(new OllamaInstance({
           id: "omnius-ollama-base",
           baseUrl: this.config.baseInstanceUrl,
           port: this.portFromUrl(this.config.baseInstanceUrl),
           gpuUuid: null,
+          gpuIndex: null,
           poolOwned: false,
           inflight: 0,
           peakInflight: 0,
@@ -525191,32 +525208,42 @@ var init_ollama_pool = __esm({
        *   2. Any instance with free slots (least-loaded first).
        *   3. Spawn a new instance pinned to the least-utilized GPU, when the
        *      pool hasn't hit `maxSpawnedInstances`.
-       *   4. Fall back to the least-loaded instance even if saturated — the
-       *      caller will block inside Ollama's internal queue rather than fail.
+       *   4. Queue at the pool boundary when all allowed lanes are busy.
        */
       async acquire(opts) {
+        const gpus = await this.getGpusForPlacement();
+        let placementMode = this.placementModeFor(gpus);
+        this.activePlacementMode = placementMode;
+        if (placementMode === "dedicated") {
+          await this.ensureDedicatedGpuPool(opts.model, gpus);
+          if (!this.instances.some((i2) => i2.state.poolOwned)) {
+            placementMode = "constrained";
+            this.activePlacementMode = placementMode;
+          }
+        }
         const pick = this.pickInstance(opts);
         if (pick) {
           pick.acquire(opts.model);
           return this.buildSlot(pick);
         }
-        const spawned = await this.maybeSpawnInstance(opts.model);
-        if (spawned) {
+        if (placementMode === "constrained") {
+          return this.acquireQueued(opts);
+        }
+        const spawned = placementMode === "elastic" ? await this.maybeSpawnInstance(opts.model) : null;
+        if (spawned && !spawned.isSaturated()) {
           spawned.acquire(opts.model);
           return this.buildSlot(spawned);
         }
-        const fallback = this.instances.slice().sort((a2, b) => a2.state.inflight - b.state.inflight)[0];
-        fallback.acquire(opts.model);
-        return this.buildSlot(fallback);
+        return this.acquireQueued(opts);
       }
       /** Synchronous routing decision; returns the instance or null if every one is saturated. */
       pickInstance(opts) {
-        const candidates = this.instances.filter((inst) => !inst.isSaturated());
+        const candidates = this.instances.filter((inst) => !this.isEffectivelySaturated(inst) && !(this.activePlacementMode === "dedicated" && this.dedicatedGpuPoolActive && !inst.state.poolOwned && !opts.preferBaseInstance));
         if (candidates.length === 0)
           return null;
         const scored = candidates.map((inst) => ({
           inst,
-          score: (inst.state.knownModels.has(opts.model) ? 100 : 0) + (opts.preferBaseInstance && !inst.state.poolOwned ? 25 : 0) + inst.freeSlots() * 10 - inst.state.inflight
+          score: (inst.state.knownModels.has(opts.model) ? 100 : 0) + (opts.preferBaseInstance && !inst.state.poolOwned ? 25 : 0) + this.effectiveFreeSlots(inst) * 10 - inst.state.inflight
         }));
         scored.sort((a2, b) => b.score - a2.score);
         return scored[0].inst;
@@ -525227,9 +525254,39 @@ var init_ollama_pool = __esm({
           baseUrl: inst.state.baseUrl,
           poolOwned: inst.state.poolOwned,
           gpuUuid: inst.state.gpuUuid,
-          release: (success) => inst.release(success)
+          gpuIndex: inst.state.gpuIndex,
+          release: (success) => {
+            inst.release(success);
+            this.wakeNextSlotWaiter();
+          }
         };
       }
+      async acquireQueued(opts) {
+        for (; ; ) {
+          const pick = this.pickInstance(opts);
+          if (pick) {
+            pick.acquire(opts.model);
+            return this.buildSlot(pick);
+          }
+          await new Promise((resolve52) => this.slotWaiters.push(resolve52));
+        }
+      }
+      wakeNextSlotWaiter() {
+        const waiter = this.slotWaiters.shift();
+        if (waiter)
+          waiter();
+      }
+      effectiveMaxParallel(inst) {
+        if (this.activePlacementMode === "constrained")
+          return 1;
+        return Math.max(1, inst.state.maxParallel);
+      }
+      isEffectivelySaturated(inst) {
+        return inst.state.inflight >= this.effectiveMaxParallel(inst);
+      }
+      effectiveFreeSlots(inst) {
+        return Math.max(0, this.effectiveMaxParallel(inst) - inst.state.inflight);
+      }
       /**
        * Spawn a new instance pinned to a GPU when policy allows. Returns the
        * spawned instance or null when:
@@ -525241,6 +525298,42 @@ var init_ollama_pool = __esm({
        * over-allocate.
        */
       async maybeSpawnInstance(model) {
+        return this.withSpawnGate(async () => {
+          if (!this.canSpawnWithSharedModelStore(model))
+            return null;
+          const poolOwnedCount = this.instances.filter((i2) => i2.state.poolOwned).length;
+          const gpus = await this.getGpusForPlacement();
+          const cap = this.elasticSpawnCap(gpus);
+          if (poolOwnedCount >= cap)
+            return null;
+          const freedPick = this.pickInstance({ model });
+          if (freedPick)
+            return freedPick;
+          const gpu = this.pickGpuForSpawn(gpus);
+          return this.spawnInstance(model, gpu);
+        });
+      }
+      async ensureDedicatedGpuPool(model, gpus) {
+        if (this.placementModeFor(gpus) !== "dedicated")
+          return;
+        await this.withSpawnGate(async () => {
+          if (!this.canSpawnWithSharedModelStore(model))
+            return;
+          const target = this.dedicatedTargetCount(gpus);
+          while (this.instances.filter((i2) => i2.state.poolOwned).length < target) {
+            const gpu = this.pickGpuForSpawn(gpus);
+            if (!gpu)
+              break;
+            const inst = await this.spawnInstance(model, gpu);
+            if (!inst)
+              break;
+          }
+          if (this.instances.some((i2) => i2.state.poolOwned)) {
+            this.dedicatedGpuPoolActive = true;
+          }
+        });
+      }
+      async withSpawnGate(fn) {
         let resolveGate = () => {
         };
         const myTurn = new Promise((r2) => {
@@ -525250,55 +525343,90 @@ var init_ollama_pool = __esm({
         this.spawnGate = myTurn;
         await prev;
         try {
-          if (!this.config.sharedModelStore && !this.config.allowUnsharedModelStore) {
-            this.emit("spawn-skipped", {
-              reason: "missing-shared-model-store",
-              model,
-              baseInstanceUrl: this.config.baseInstanceUrl
-            });
-            return null;
-          }
-          const poolOwnedCount = this.instances.filter((i2) => i2.state.poolOwned).length;
-          const gpus = await detectGpus();
-          const cap = this.config.maxSpawnedInstances > 0 ? this.config.maxSpawnedInstances : Math.max(0, gpus.length - 1);
-          if (poolOwnedCount >= cap)
-            return null;
-          const freedPick = this.pickInstance({ model });
-          if (freedPick)
-            return freedPick;
-          const port = await findFreePort(this.config.spawnPortStart);
-          const gpuUuid = this.pickGpuForSpawn(gpus);
-          const { proc, ready } = await this.spawner({ port, gpuUuid, config: this.config });
-          try {
-            await ready;
-          } catch (err) {
-            try {
-              proc.kill();
-            } catch {
-            }
-            this.emit("spawn-failed", { port, gpuUuid, error: err });
-            return null;
-          }
-          const inst = new OllamaInstance({
-            id: `omnius-ollama-${port}`,
-            baseUrl: `http://127.0.0.1:${port}`,
-            port,
-            gpuUuid,
-            poolOwned: true,
-            inflight: 0,
-            peakInflight: 0,
-            lastUsedMs: Date.now(),
-            knownModels: /* @__PURE__ */ new Set(),
-            maxParallel: this.config.maxParallelPerInstance,
-            totalRequests: 0
-          }, proc);
-          this.instances.push(inst);
-          this.emit("instance-spawned", { id: inst.state.id, port, gpuUuid });
-          return inst;
+          return await fn();
         } finally {
           resolveGate();
         }
       }
+      canSpawnWithSharedModelStore(model) {
+        if (this.config.sharedModelStore || this.config.allowUnsharedModelStore)
+          return true;
+        this.emit("spawn-skipped", {
+          reason: "missing-shared-model-store",
+          model,
+          baseInstanceUrl: this.config.baseInstanceUrl
+        });
+        return false;
+      }
+      placementModeFor(gpus) {
+        const canShareModelStore = Boolean(this.config.sharedModelStore) || this.config.allowUnsharedModelStore;
+        if (!canShareModelStore || gpus.length < 2)
+          return "constrained";
+        if (this.config.gpuPlacement === "elastic")
+          return "elastic";
+        return "dedicated";
+      }
+      dedicatedTargetCount(gpus) {
+        const requested = this.config.targetGpuInstances > 0 ? this.config.targetGpuInstances : gpus.length;
+        const cappedByGpuCount = Math.min(requested, gpus.length);
+        return this.config.maxSpawnedInstances > 0 ? Math.min(cappedByGpuCount, this.config.maxSpawnedInstances) : cappedByGpuCount;
+      }
+      elasticSpawnCap(gpus) {
+        return this.config.maxSpawnedInstances > 0 ? this.config.maxSpawnedInstances : Math.max(0, gpus.length - 1);
+      }
+      async getGpusForPlacement(maxAgeMs = 3e3) {
+        const now = Date.now();
+        if (this.gpuCache && now - this.gpuCache.takenAtMs <= maxAgeMs) {
+          return this.gpuCache.gpus;
+        }
+        const gpus = await this.gpuDetector();
+        this.gpuCache = { gpus, takenAtMs: now };
+        return gpus;
+      }
+      async spawnInstance(model, gpu) {
+        let port;
+        try {
+          port = await this.portAllocator(this.config.spawnPortStart);
+        } catch (err) {
+          this.emit("spawn-failed", {
+            reason: "port-allocation-failed",
+            gpuUuid: gpu?.uuid ?? null,
+            gpuIndex: gpu?.index ?? null,
+            error: err
+          });
+          return null;
+        }
+        const gpuUuid = gpu?.uuid || null;
+        const gpuIndex = gpu?.index ?? null;
+        const { proc, ready } = await this.spawner({ port, gpuUuid, gpuIndex, config: this.config });
+        try {
+          await ready;
+        } catch (err) {
+          try {
+            proc.kill();
+          } catch {
+          }
+          this.emit("spawn-failed", { port, gpuUuid, gpuIndex, error: err });
+          return null;
+        }
+        const inst = new OllamaInstance({
+          id: `omnius-ollama-${port}`,
+          baseUrl: `http://127.0.0.1:${port}`,
+          port,
+          gpuUuid,
+          gpuIndex,
+          poolOwned: true,
+          inflight: 0,
+          peakInflight: 0,
+          lastUsedMs: Date.now(),
+          knownModels: /* @__PURE__ */ new Set([model]),
+          maxParallel: this.config.maxParallelPerInstance,
+          totalRequests: 0
+        }, proc);
+        this.instances.push(inst);
+        this.emit("instance-spawned", { id: inst.state.id, port, gpuUuid, gpuIndex });
+        return inst;
+      }
       /**
        * Pick a GPU for a freshly-spawned instance. Prefers GPUs that no
        * pool-owned instance is already pinned to, then most free VRAM. Returns
@@ -525313,7 +525441,7 @@ var init_ollama_pool = __esm({
         pool3.sort((a2, b) => b.vramFreeMB - a2.vramFreeMB);
         const best = pool3[_gpuCursor % pool3.length];
         _gpuCursor++;
-        return best.uuid;
+        return best;
       }
       /**
        * Periodically reap pool-owned instances that have been idle past the
@@ -525360,13 +525488,24 @@ var init_ollama_pool = __esm({
       }
       async status() {
         const hardware = await getHardwareSnapshot();
+        const placementGpus = this.gpuCache?.gpus ?? hardware.gpus;
+        const placementMode = this.placementModeFor(placementGpus);
+        const targetGpuInstances = placementMode === "dedicated" ? this.dedicatedTargetCount(placementGpus) : placementMode === "elastic" ? this.elasticSpawnCap(placementGpus) : 1;
+        const readyGpuInstances = this.instances.filter((inst) => inst.state.poolOwned).length;
         return {
           config: this.config,
+          placement: {
+            mode: placementMode,
+            targetGpuInstances,
+            readyGpuInstances,
+            sharedModelStore: this.config.sharedModelStore
+          },
           instances: this.instances.map((inst) => ({
             id: inst.state.id,
             baseUrl: inst.state.baseUrl,
             poolOwned: inst.state.poolOwned,
             gpuUuid: inst.state.gpuUuid,
+            gpuIndex: inst.state.gpuIndex,
             inflight: inst.state.inflight,
             peakInflight: inst.state.peakInflight,
             maxParallel: inst.state.maxParallel,
@@ -569697,29 +569836,60 @@ async function collectNetworkMetrics() {
   return { rxBytesPerSec: 0, txBytesPerSec: 0 };
 }
 async function collectGpuMetrics() {
-  const noGpu = { available: false, name: "", utilization: 0, vramUsedMB: 0, vramTotalMB: 0, vramUtilization: 0 };
+  const noGpu = {
+    available: false,
+    count: 0,
+    name: "",
+    utilization: 0,
+    vramUsedMB: 0,
+    vramTotalMB: 0,
+    vramUtilization: 0,
+    devices: []
+  };
   if (_nvidiaSmiAvailable2 === false) return noGpu;
   try {
     const smi = await new Promise((resolve52, reject) => {
       exec3(
-        "nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
+        "nvidia-smi --query-gpu=index,uuid,utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
         { encoding: "utf8", timeout: 3e3 },
         (err, stdout) => err ? reject(err) : resolve52(stdout)
       );
     });
     _nvidiaSmiAvailable2 = true;
-    const line = smi.trim().split("\n")[0];
-    if (!line) return noGpu;
-    const parts = line.split(",").map((s2) => s2.trim());
-    const vramUsed = parseInt(parts[1] ?? "0", 10) || 0;
-    const vramTotal = parseInt(parts[2] ?? "0", 10) || 0;
+    const devices = [];
+    for (const line of smi.trim().split("\n")) {
+      if (!line.trim()) continue;
+      const parts = line.split(",").map((s2) => s2.trim());
+      const index = parseInt(parts[0] ?? "-1", 10);
+      const utilization = parseInt(parts[2] ?? "0", 10) || 0;
+      const vramUsed2 = parseInt(parts[3] ?? "0", 10) || 0;
+      const vramTotal2 = parseInt(parts[4] ?? "0", 10) || 0;
+      if (!Number.isFinite(index) || index < 0) continue;
+      devices.push({
+        index,
+        uuid: parts[1] ?? "",
+        utilization,
+        vramUsedMB: vramUsed2,
+        vramTotalMB: vramTotal2,
+        name: parts.slice(5).join(", ") || "",
+        vramUtilization: vramTotal2 > 0 ? Math.round(vramUsed2 / vramTotal2 * 100) : 0
+      });
+    }
+    if (devices.length === 0) return noGpu;
+    const vramUsed = devices.reduce((sum, gpu) => sum + gpu.vramUsedMB, 0);
+    const vramTotal = devices.reduce((sum, gpu) => sum + gpu.vramTotalMB, 0);
+    const avgUtil = Math.round(devices.reduce((sum, gpu) => sum + gpu.utilization, 0) / devices.length);
+    const firstName = devices[0]?.name ?? "";
+    const allSameName = devices.every((gpu) => gpu.name === firstName);
     return {
       available: true,
-      utilization: parseInt(parts[0] ?? "0", 10) || 0,
+      count: devices.length,
+      utilization: avgUtil,
       vramUsedMB: vramUsed,
       vramTotalMB: vramTotal,
-      name: parts[3] ?? "",
-      vramUtilization: vramTotal > 0 ? Math.round(vramUsed / vramTotal * 100) : 0
+      name: devices.length > 1 && allSameName ? `${devices.length}x ${firstName}` : firstName,
+      vramUtilization: vramTotal > 0 ? Math.round(vramUsed / vramTotal * 100) : 0,
+      devices
     };
   } catch {
     _nvidiaSmiAvailable2 = false;
@@ -569736,7 +569906,9 @@ function getInstantSnapshot() {
       cpuCores: cr.cpuCores,
       cpuModel: cr.cpuModel,
       gpuUtil: -1,
+      gpuCount: 0,
       gpuName: "",
+      gpuDevices: [],
       vramUtil: -1,
       vramUsedMB: 0,
       vramTotalMB: 0,
@@ -569794,10 +569966,11 @@ function collectCpuRam() {
 }
 async function collectLocalMetrics() {
   const cpuRam = collectCpuRam();
-  const [gpu, disk, network] = await Promise.all([
+  const [gpu, disk, network, ollamaPool] = await Promise.all([
     collectGpuMetrics(),
     collectDiskMetrics(),
-    collectNetworkMetrics()
+    collectNetworkMetrics(),
+    collectOllamaPoolMetrics()
   ]);
   return {
     source: "local",
@@ -569806,7 +569979,9 @@ async function collectLocalMetrics() {
       cpuCores: cpuRam.cpuCores,
       cpuModel: cpuRam.cpuModel,
       gpuUtil: gpu.available ? gpu.utilization : -1,
+      gpuCount: gpu.count,
       gpuName: gpu.name,
+      gpuDevices: gpu.devices,
       vramUtil: gpu.available ? gpu.vramUtilization : -1,
       vramUsedMB: gpu.vramUsedMB,
       vramTotalMB: gpu.vramTotalMB,
@@ -569817,15 +569992,43 @@ async function collectLocalMetrics() {
       diskUsedGB: disk.usedGB,
       diskTotalGB: disk.totalGB,
       diskFreeGB: disk.freeGB,
-      diskPath: disk.path
+      diskPath: disk.path,
+      ollamaPool
     },
     network
   };
 }
+async function collectOllamaPoolMetrics() {
+  try {
+    const config = resolveDefaultPoolConfig();
+    if (!shouldUseOllamaPoolForBaseUrl(config.baseInstanceUrl)) return null;
+    const status = await getOllamaPool({ baseInstanceUrl: config.baseInstanceUrl }).status();
+    return {
+      enabled: true,
+      mode: status.placement.mode,
+      targetGpuInstances: status.placement.targetGpuInstances,
+      readyGpuInstances: status.placement.readyGpuInstances,
+      sharedModelStore: status.placement.sharedModelStore,
+      instances: status.instances.map((inst) => ({
+        id: inst.id,
+        baseUrl: inst.baseUrl,
+        poolOwned: inst.poolOwned,
+        gpuUuid: inst.gpuUuid,
+        gpuIndex: inst.gpuIndex,
+        inflight: inst.inflight,
+        maxParallel: inst.maxParallel,
+        totalRequests: inst.totalRequests
+      }))
+    };
+  } catch {
+    return null;
+  }
+}
 var _lastNetSnapshot, _nvidiaSmiAvailable2, _cpuPrevSnapshot, SystemMetricsCollector;
 var init_system_metrics = __esm({
   "packages/cli/src/tui/system-metrics.ts"() {
     "use strict";
+    init_dist8();
     init_disk_monitor();
     _lastNetSnapshot = null;
     _nvidiaSmiAvailable2 = null;
@@ -569881,7 +570084,9 @@ var init_system_metrics = __esm({
           cpuCores: hw.cpuCores ?? 0,
           cpuModel: hw.cpuModel ?? "",
           gpuUtil: hw.gpuUtil ?? -1,
+          gpuCount: hw.gpuCount ?? 0,
           gpuName: hw.gpuName ?? "",
+          gpuDevices: hw.gpuDevices ?? [],
           vramUtil: hw.vramUtil ?? -1,
           vramUsedMB: hw.vramUsedMB ?? 0,
           vramTotalMB: hw.vramTotalMB ?? 0,
@@ -569892,7 +570097,8 @@ var init_system_metrics = __esm({
           diskUsedGB: hw.diskUsedGB ?? 0,
           diskTotalGB: hw.diskTotalGB ?? 0,
           diskFreeGB: hw.diskFreeGB ?? 0,
-          diskPath: hw.diskPath ?? ""
+          diskPath: hw.diskPath ?? "",
+          ollamaPool: hw.ollamaPool ?? null
         };
         this._latest = {
           source: "remote",
@@ -573751,6 +573957,19 @@ ${CONTENT_BG_SEQ}`);
             hwExpW += 6 + `${rm4.vramUtil}%`.length + vramDetail.length;
             hwCompW += 6 + `${rm4.vramUtil}%`.length;
           }
+          if (rm4.ollamaPool?.enabled) {
+            const pool3 = rm4.ollamaPool;
+            const ready = pool3.readyGpuInstances;
+            const target = pool3.targetGpuInstances;
+            const poolColor = pool3.mode === "constrained" ? c3.yellow : target > 0 && ready < target ? c3.yellow : c3.green;
+            const poolDetail = pool3.mode === "constrained" ? "queue" : `${ready}/${target}`;
+            const poolText = ` OLLAMA ${poolColor(`${pool3.mode}:${poolDetail}`)}`;
+            const compactText = ` OLLAMA ${poolColor(pool3.mode === "constrained" ? "queue" : `${ready}/${target}`)}`;
+            hwExpStr += poolText;
+            hwCompStr += compactText;
+            hwExpW += 8 + `${pool3.mode}:${poolDetail}`.length;
+            hwCompW += 8 + (pool3.mode === "constrained" ? "queue".length : `${ready}/${target}`.length);
+          }
           if (!isLocal && hwExpW === 0) {
             const statusMsg = rm4.gpuName && rm4.gpuName !== "peer" ? rm4.gpuName : "awaiting metrics...";
             hwExpStr = c3.dim(statusMsg);
@@ -607467,6 +607686,271 @@ var init_tool_policy = __esm({
   }
 });
+// packages/cli/src/tui/telegram-help-menu.ts
+function telegramSyntheticHelpSignatures() {
+  return [
+    { signature: "/help", description: "Show interactive command help" },
+    { signature: "/start", description: "Show Telegram bridge status and authentication instructions" },
+    { signature: "/auth <code>", description: "Authenticate this Telegram user as bot admin using the TUI code" },
+    { signature: "/call", description: "Get the active voice call link when a call session is running" },
+    { signature: "/reflect", description: "Run scoped Telegram chat reflection over retained chat history" },
+    { signature: "/reflect status", description: "Show the latest scoped Telegram reflection artifact" },
+    { signature: "/reflect now", description: "Run reflection and let the model decide whether to post a public follow-up" },
+    { signature: "/reflect auto on|off", description: "Enable or disable model-gated idle follow-ups for this chat" },
+    { signature: "/reflection", description: "Alias for /reflect" },
+    { signature: "/daydream", description: "Alias for /reflect in Telegram chats" }
+  ];
+}
+function telegramHelpCommandAllowed(cmd, scope) {
+  if (cmd.name === "dream") return false;
+  if (scope === "admin") return cmd.implementationStatus === "implemented";
+  return TELEGRAM_PUBLIC_HELP_COMMANDS.has(cmd.name);
+}
+function parseArgsHint(argsHint) {
+  if (!argsHint) return [];
+  const args = [];
+  const re = /<(\w+)(\?)?>/g;
+  let match;
+  while ((match = re.exec(argsHint)) !== null) {
+    const name10 = match[1];
+    const optional2 = match[2] === "?";
+    args.push({
+      name: name10,
+      type: "string",
+      required: !optional2,
+      description: ""
+    });
+  }
+  return args;
+}
+function buildScopedToolList(scope) {
+  const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => telegramHelpCommandAllowed(cmd, scope));
+  const syntheticSigs = telegramSyntheticHelpSignatures();
+  const commandSigs = commands.flatMap((cmd) => cmd.signatures);
+  const allSigs = [...syntheticSigs, ...commandSigs];
+  const seen = /* @__PURE__ */ new Set();
+  const unique = allSigs.filter((sig) => {
+    if (seen.has(sig.signature)) return false;
+    seen.add(sig.signature);
+    return true;
+  });
+  const entries = [];
+  for (const sig of unique) {
+    const matchingCmd = commands.find(
+      (cmd) => cmd.signatures.some((s2) => s2.signature === sig.signature)
+    );
+    const args = matchingCmd ? parseArgsHint(matchingCmd.argsHint) : [];
+    const name10 = sig.signature.replace(/^\//, "").split(/\s+/)[0] ?? sig.signature;
+    entries.push({
+      name: name10,
+      signature: sig.signature,
+      description: sig.description,
+      arguments: args
+    });
+  }
+  return entries;
+}
+function encodeHelpCallback(action, value2) {
+  const raw = `${CALLBACK_PREFIX}:${action}:${value2}`;
+  return raw.slice(0, MAX_CALLBACK_DATA);
+}
+function decodeHelpCallback(data) {
+  if (!data.startsWith(CALLBACK_PREFIX + ":")) return null;
+  const parts = data.split(":");
+  if (parts.length < 3) return null;
+  const action = parts[1];
+  const value2 = parts.slice(2).join(":");
+  if (action !== "page" && action !== "detail" && action !== "back") return null;
+  return { action, value: value2 };
+}
+function buildToolListKeyboard(tools, page2, scope) {
+  const totalPages = Math.max(1, Math.ceil(tools.length / TOOLS_PER_PAGE));
+  const start2 = page2 * TOOLS_PER_PAGE;
+  const pageTools = tools.slice(start2, start2 + TOOLS_PER_PAGE);
+  const rows = [];
+  let currentRow = [];
+  for (const tool of pageTools) {
+    const label = tool.name.length > 18 ? tool.name.slice(0, 17) + "…" : tool.name;
+    currentRow.push({
+      text: label,
+      callback_data: encodeHelpCallback("detail", tool.name)
+    });
+    if (currentRow.length >= GRID_COLS) {
+      rows.push(currentRow);
+      currentRow = [];
+    }
+  }
+  if (currentRow.length > 0) {
+    rows.push(currentRow);
+  }
+  const navRow = [];
+  if (page2 > 0) {
+    navRow.push({ text: "◀️ Back", callback_data: encodeHelpCallback("page", page2 - 1) });
+  }
+  navRow.push({
+    text: `${page2 + 1}/${totalPages}`,
+    callback_data: encodeHelpCallback("page", page2.toString())
+    // re-send same page (no-op, just indicator)
+  });
+  if (page2 < totalPages - 1) {
+    navRow.push({ text: "▶️ Next", callback_data: encodeHelpCallback("page", page2 + 1) });
+  }
+  rows.push(navRow);
+  return rows;
+}
+function buildToolDetailKeyboard(tool, page2) {
+  const rows = [];
+  for (const arg of tool.arguments) {
+    const req2 = arg.required ? "●" : "○";
+    const label = `${req2} ${arg.name}`.slice(0, 30);
+    rows.push([{
+      text: label,
+      callback_data: encodeHelpCallback("detail", tool.name)
+      // stay on detail
+    }]);
+  }
+  rows.push([{
+    text: "🔙 Back to tools",
+    callback_data: encodeHelpCallback("back", page2.toString())
+  }]);
+  return rows;
+}
+function escapeHTML(text) {
+  return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+}
+function buildToolListText(tools, page2, scope) {
+  const totalPages = Math.max(1, Math.ceil(tools.length / TOOLS_PER_PAGE));
+  const start2 = page2 * TOOLS_PER_PAGE;
+  const pageTools = tools.slice(start2, start2 + TOOLS_PER_PAGE);
+  const lines = [
+    `<b>🛠 Commands (${scope === "admin" ? "admin full scope" : "public secure scope"})</b>`,
+    `<i>Page ${page2 + 1} of ${totalPages} — ${tools.length} commands</i>`,
+    ""
+  ];
+  for (const tool of pageTools) {
+    lines.push(`<code>${escapeHTML(tool.signature)}</code>`);
+    lines.push(`  ${escapeHTML(tool.description)}`);
+  }
+  lines.push("");
+  lines.push("<i>Tap a command for details, or use ◀️▶️ to navigate.</i>");
+  return lines.join("\n");
+}
+function buildToolDetailText(tool) {
+  const lines = [
+    `<b>📌 ${escapeHTML(tool.signature)}</b>`,
+    "",
+    escapeHTML(tool.description),
+    ""
+  ];
+  if (tool.arguments.length > 0) {
+    lines.push("<b>Arguments:</b>");
+    lines.push("");
+    for (const arg of tool.arguments) {
+      const reqBadge = arg.required ? "<b>[required]</b>" : "<i>[optional]</i>";
+      lines.push(`<code>${escapeHTML(arg.name)}</code> ${reqBadge} <i>${escapeHTML(arg.type)}</i>`);
+      if (arg.description) {
+        lines.push(`  ${escapeHTML(arg.description)}`);
+      }
+      lines.push("");
+    }
+  } else {
+    lines.push("<i>No arguments.</i>");
+  }
+  lines.push("🔙 <i>Tap Back to return to the tool list.</i>");
+  return lines.join("\n");
+}
+function renderHelpMenu(scope) {
+  const tools = buildScopedToolList(scope);
+  const text = buildToolListText(tools, 0, scope);
+  const inline_keyboard = buildToolListKeyboard(tools, 0, scope);
+  return { text, replyMarkup: { inline_keyboard } };
+}
+function renderHelpMenuPage(scope, page2) {
+  const tools = buildScopedToolList(scope);
+  const totalPages = Math.max(1, Math.ceil(tools.length / TOOLS_PER_PAGE));
+  page2 = Math.max(0, Math.min(page2, totalPages - 1));
+  const text = buildToolListText(tools, page2, scope);
+  const inline_keyboard = buildToolListKeyboard(tools, page2, scope);
+  return { text, replyMarkup: { inline_keyboard } };
+}
+function renderHelpToolDetail(scope, toolName, returnPage) {
+  const tools = buildScopedToolList(scope);
+  const tool = tools.find((t2) => t2.name === toolName);
+  if (!tool) return null;
+  const text = buildToolDetailText(tool);
+  const inline_keyboard = buildToolDetailKeyboard(tool, returnPage);
+  return { text, replyMarkup: { inline_keyboard } };
+}
+function handleHelpCallback(callbackData, currentState) {
+  const decoded = decodeHelpCallback(callbackData);
+  if (!decoded) return null;
+  const { action, value: value2 } = decoded;
+  let newState;
+  let render2;
+  switch (action) {
+    case "page": {
+      const page2 = parseInt(value2, 10);
+      if (isNaN(page2)) return null;
+      render2 = renderHelpMenuPage(currentState.scope, page2);
+      newState = { ...currentState, page: page2, view: "list", detailToolName: null };
+      break;
+    }
+    case "detail": {
+      const detail = renderHelpToolDetail(currentState.scope, value2, currentState.page);
+      if (!detail) return null;
+      render2 = detail;
+      newState = { ...currentState, view: "detail", detailToolName: value2 };
+      break;
+    }
+    case "back": {
+      const page2 = parseInt(value2, 10);
+      if (isNaN(page2)) return null;
+      render2 = renderHelpMenuPage(currentState.scope, page2);
+      newState = { ...currentState, page: page2, view: "list", detailToolName: null };
+      break;
+    }
+    default:
+      return null;
+  }
+  return { render: render2, newState };
+}
+var TOOLS_PER_PAGE, GRID_COLS, CALLBACK_PREFIX, MAX_CALLBACK_DATA, TELEGRAM_PUBLIC_HELP_COMMANDS, HelpMenuStateStore;
+var init_telegram_help_menu = __esm({
+  "packages/cli/src/tui/telegram-help-menu.ts"() {
+    "use strict";
+    init_command_registry();
+    TOOLS_PER_PAGE = 10;
+    GRID_COLS = 5;
+    CALLBACK_PREFIX = "help";
+    MAX_CALLBACK_DATA = 64;
+    TELEGRAM_PUBLIC_HELP_COMMANDS = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
+    HelpMenuStateStore = class {
+      states = /* @__PURE__ */ new Map();
+      TTL_MS = 30 * 60 * 1e3;
+      // 30 min
+      key(chatId, messageId) {
+        return `${chatId}:${messageId}`;
+      }
+      set(state) {
+        this.states.set(this.key(state.chatId, state.messageId), state);
+      }
+      get(chatId, messageId) {
+        return this.states.get(this.key(chatId, messageId));
+      }
+      delete(chatId, messageId) {
+        this.states.delete(this.key(chatId, messageId));
+      }
+      /** Prune expired states */
+      prune() {
+        const now = Date.now();
+        for (const [k, v] of this.states) {
+          if (now - v.createdAt > this.TTL_MS) this.states.delete(k);
+        }
+      }
+    };
+  }
+});
 // packages/cli/src/tui/telegram-creative-tools.ts
 import { createCipheriv as createCipheriv4, createDecipheriv as createDecipheriv4, randomBytes as randomBytes21 } from "node:crypto";
 import {
@@ -611136,7 +611620,7 @@ function splitTelegramReminderDue(raw) {
   if (suffixMatch) return { due: suffixMatch[2].trim(), message: suffixMatch[1].trim() };
   return { message: text };
 }
-function telegramSyntheticHelpSignatures() {
+function telegramSyntheticHelpSignatures2() {
   return [
     { signature: "/help", description: "Show Telegram command help" },
     { signature: "/start", description: "Show Telegram bridge status and authentication instructions" },
@@ -611150,15 +611634,15 @@ function telegramSyntheticHelpSignatures() {
     { signature: "/daydream", description: "Alias for /reflect in Telegram chats" }
   ];
 }
-function telegramHelpCommandAllowed(cmd, scope) {
+function telegramHelpCommandAllowed2(cmd, scope) {
   if (cmd.name === "dream") return false;
   if (scope === "admin") return cmd.implementationStatus === "implemented";
-  return TELEGRAM_PUBLIC_HELP_COMMANDS.has(cmd.name) || TELEGRAM_PUBLIC_BOT_COMMAND_NAMES.has(cmd.name);
+  return TELEGRAM_PUBLIC_HELP_COMMANDS2.has(cmd.name) || TELEGRAM_PUBLIC_BOT_COMMAND_NAMES.has(cmd.name);
 }
 function buildTelegramHelpHTML(scope, maxPublicCommands = 24) {
-  const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => telegramHelpCommandAllowed(cmd, scope));
+  const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => telegramHelpCommandAllowed2(cmd, scope));
   const signatures = [
-    ...telegramSyntheticHelpSignatures(),
+    ...telegramSyntheticHelpSignatures2(),
     ...commands.flatMap((cmd) => cmd.signatures)
   ];
   const seen = /* @__PURE__ */ new Set();
@@ -611872,7 +612356,7 @@ function renderTelegramSubAgentError(username, error) {
   process.stdout.write(`    ${c3.dim("│")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
 `);
 }
-var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
+var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
 var init_telegram_bridge = __esm({
   "packages/cli/src/tui/telegram-bridge.ts"() {
     "use strict";
@@ -611883,6 +612367,7 @@ var init_telegram_bridge = __esm({
     init_tool_policy();
     init_media_routing();
     init_command_registry();
+    init_telegram_help_menu();
     init_scoped_personality();
     init_telegram_creative_tools();
     init_omnius_directory();
@@ -612249,7 +612734,7 @@ External acquisition contract:
       bruteForceMaxCycles: 0,
       allowTurnExtension: false
     };
-    TELEGRAM_PUBLIC_HELP_COMMANDS = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
+    TELEGRAM_PUBLIC_HELP_COMMANDS2 = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
     TELEGRAM_REMINDER_SLASH_COMMANDS = /* @__PURE__ */ new Set(["remind", "reminder", "reminders"]);
     TELEGRAM_REFLECTION_SLASH_COMMANDS = /* @__PURE__ */ new Set(["reflect", "reflection", "daydream", "dream"]);
     TELEGRAM_PUBLIC_BOT_COMMAND_NAMES = new Set(
@@ -612343,6 +612828,10 @@ External acquisition contract:
       telegramBotRightsCache = /* @__PURE__ */ new Map();
       /** Short-lived Telegram inline button state directory */
       telegramToolButtonDir;
+      /** Interactive help menu state store (inline keyboard navigation) */
+      helpMenuStates = new HelpMenuStateStore();
+      /** Prune expired help menu states every 5 minutes */
+      helpMenuPruneTimer = null;
       /** Command handler for admin DM slash commands (wired from interactive.ts) */
       commandHandler = null;
       /** Callback fired after a Telegram user completes the TUI-only admin auth challenge */
@@ -612764,21 +613253,30 @@ No scoped reflection artifact exists yet for this chat. Use <code>/reflect</code
       }
       async replyWithTelegramHelp(msg, isAdmin) {
         const scope = isAdmin ? "admin" : "public";
-        const chunks = splitTelegramHTMLMessage(buildTelegramHelpHTML(scope));
+        const menu = renderHelpMenu(scope);
         if (msg.guestQueryId) {
+          const chunks = splitTelegramHTMLMessage(buildTelegramHelpHTML(isAdmin ? "admin" : "public"));
           await this.answerGuestQuery(msg.guestQueryId, chunks[0] ?? "", { parseMode: "HTML" });
           return;
         }
-        for (let i2 = 0; i2 < chunks.length; i2++) {
-          const chunk = chunks[i2];
-          if (i2 === 0) {
-            await this.replyToTelegramMessage(msg, chunk, {
-              html: true,
-              replyToMessageId: msg.chatType !== "private" ? msg.messageId : void 0
-            });
-          } else {
-            await this.sendMessageHTML(msg.chatId, chunk);
-          }
+        const sent = await this.apiCall("sendMessage", {
+          chat_id: msg.chatId,
+          text: menu.text,
+          parse_mode: "HTML",
+          reply_markup: JSON.stringify(menu.replyMarkup),
+          ...msg.chatType !== "private" ? { reply_to_message_id: msg.messageId } : {}
+        });
+        if (sent.ok && sent.result?.message_id) {
+          this.helpMenuStates.set({
+            chatId: msg.chatId,
+            messageId: sent.result.message_id,
+            scope,
+            page: 0,
+            view: "list",
+            detailToolName: null,
+            fromUserId: msg.fromUserId ?? 0,
+            createdAt: Date.now()
+          });
         }
       }
       recordChatHistory(sessionKey, entry) {
@@ -619091,6 +619589,57 @@ Scoped workspace: ${scopedRoot}`,
         return Boolean(result.ok);
       }
       async handleTelegramCallbackQuery(callback) {
+        const helpDecoded = decodeHelpCallback(callback.data);
+        if (helpDecoded) {
+          let answerText2 = "";
+          let alert2 = false;
+          try {
+            const chatId = callback.chatId;
+            const messageId = callback.messageId;
+            if (!chatId || !messageId) {
+              answerText2 = "Cannot identify menu message.";
+              alert2 = true;
+              return;
+            }
+            const menuState = this.helpMenuStates.get(chatId, messageId);
+            if (!menuState) {
+              answerText2 = "This help menu expired. Send /help for a fresh one.";
+              alert2 = true;
+              return;
+            }
+            const isAdmin = this.isAdminActor(callback.fromUserId, callback.username);
+            if (callback.fromUserId !== menuState.fromUserId && !isAdmin) {
+              answerText2 = "Only the user who opened this menu can navigate it.";
+              alert2 = true;
+              return;
+            }
+            const result = handleHelpCallback(callback.data, menuState);
+            if (!result) {
+              answerText2 = "Unknown menu action.";
+              alert2 = true;
+              return;
+            }
+            this.helpMenuStates.set(result.newState);
+            await this.apiCall("editMessageText", {
+              chat_id: chatId,
+              message_id: messageId,
+              text: result.render.text,
+              parse_mode: "HTML",
+              reply_markup: JSON.stringify(result.render.replyMarkup)
+            });
+            answerText2 = "";
+          } catch (err) {
+            answerText2 = err instanceof Error ? err.message : String(err);
+            alert2 = true;
+          } finally {
+            if (answerText2) {
+              await this.answerCallbackQuery(callback.id, answerText2.slice(0, 180), alert2).catch(() => false);
+            } else {
+              await this.answerCallbackQuery(callback.id).catch(() => false);
+            }
+          }
+          return;
+        }
         let answerText = "Updated.";
         let alert = false;
         try {
@@ -627073,17 +627622,50 @@ async function handleAimsResources(ctx3) {
   try {
     const os9 = __require("node:os");
     const config = loadConfig();
+    let ollamaPool = null;
+    let hardware = null;
+    try {
+      const {
+        getHardwareSnapshot: getHardwareSnapshot2,
+        getOllamaPool: getOllamaPool2,
+        resolveDefaultPoolConfig: resolveDefaultPoolConfig2,
+        shouldUseOllamaPoolForBaseUrl: shouldUseOllamaPoolForBaseUrl2
+      } = await Promise.resolve().then(() => (init_dist8(), dist_exports3));
+      hardware = await getHardwareSnapshot2();
+      const poolConfig = resolveDefaultPoolConfig2();
+      if (shouldUseOllamaPoolForBaseUrl2(poolConfig.baseInstanceUrl)) {
+        const status = await getOllamaPool2({ baseInstanceUrl: poolConfig.baseInstanceUrl }).status();
+        ollamaPool = {
+          placement: status.placement,
+          instances: status.instances.map((inst) => ({
+            id: inst.id,
+            base_url: inst.baseUrl,
+            pool_owned: inst.poolOwned,
+            gpu_uuid: inst.gpuUuid,
+            gpu_index: inst.gpuIndex,
+            inflight: inst.inflight,
+            max_parallel: inst.maxParallel,
+            total_requests: inst.totalRequests
+          }))
+        };
+      }
+    } catch {
+      hardware = null;
+      ollamaPool = null;
+    }
     sendJson(res, 200, {
       compute: {
         cpu: os9.cpus()[0]?.model ?? "unknown",
         cores: os9.cpus().length,
         ram_gb: Math.round(os9.totalmem() / 1024 ** 3),
-        platform: process.platform
+        platform: process.platform,
+        hardware
       },
       backend: {
         type: config.backendType,
         url: config.backendUrl,
-        model: config.model
+        model: config.model,
+        ollama_pool: ollamaPool
       },
       "aims:control": "A.4"
     });
@@ -641443,6 +642025,32 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
         }
       } catch {
       }
+      let ollamaPool = null;
+      try {
+        const {
+          getOllamaPool: getOllamaPool2,
+          resolveDefaultPoolConfig: resolveDefaultPoolConfig2,
+          shouldUseOllamaPoolForBaseUrl: shouldUseOllamaPoolForBaseUrl2
+        } = await Promise.resolve().then(() => (init_dist8(), dist_exports3));
+        const poolConfig = resolveDefaultPoolConfig2();
+        if (shouldUseOllamaPoolForBaseUrl2(poolConfig.baseInstanceUrl)) {
+          const status2 = await getOllamaPool2({ baseInstanceUrl: poolConfig.baseInstanceUrl }).status();
+          ollamaPool = {
+            placement: status2.placement,
+            instances: status2.instances.map((inst) => ({
+              id: inst.id,
+              base_url: inst.baseUrl,
+              pool_owned: inst.poolOwned,
+              gpu_uuid: inst.gpuUuid,
+              gpu_index: inst.gpuIndex,
+              inflight: inst.inflight,
+              max_parallel: inst.maxParallel,
+              total_requests: inst.totalRequests
+            }))
+          };
+        }
+      } catch {
+      }
       let latestVersion = null;
       try {
         const ver = es("npm view omnius version 2>/dev/null", { encoding: "utf8", timeout: 5e3, stdio: "pipe" }).trim();
@@ -641452,6 +642060,7 @@ async function handleRequest(req2, res, ollamaUrl, verbose) {
       jsonResponse(res, 200, {
         gpu: gpus,
         gpu_utilization: gpuUtil,
+        ollama_pool: ollamaPool,
         total_vram_gb: totalVram,
         ram_gb: Math.round(totalMem / 1024 ** 3),
         ram_used_pct: ramUsedPct,

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.83",
+  "version": "1.0.85",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.83",
+      "version": "1.0.85",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.83",
+  "version": "1.0.85",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",