npm - omnius - Versions diffs - 1.0.146 → 1.0.148 - Mend

omnius 1.0.146 → 1.0.148

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -13138,6 +13138,27 @@ async function handleCmd(cmd) {
         var _csAvgLatency = _cohereStats.queriesAnswered > 0 ? Math.round(_cohereStats.totalLatencyMs / _cohereStats.queriesAnswered) : 0;
         var _csModels = Object.entries(_cohereStats.modelsUsed).sort(function(a, b) { return b[1] - a[1]; });
         var _csPeers = Object.entries(_cohereStats.peersServed).sort(function(a, b) { return b[1] - a[1]; });
+        var _csSnapshot = {
+          status: cohereActive ? 'active' : 'inactive',
+          active: cohereActive,
+          daemonPid: process.pid,
+          uptimeSec: _csUptime,
+          lastQueryAt: _cohereStats.lastQueryAt || 0,
+          queriesReceived: _cohereStats.queriesReceived,
+          queriesAnswered: _cohereStats.queriesAnswered,
+          queriesErrors: _cohereStats.queriesErrors,
+          queriesSent: _cohereStats.queriesSent,
+          avgLatencyMs: _csAvgLatency,
+          bytesIn: _cohereStats.bytesIn,
+          bytesOut: _cohereStats.bytesOut,
+          modelsUsed: _cohereStats.modelsUsed,
+          peersServed: _cohereStats.peersServed,
+          allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null
+        };
+        if (args.format === 'json' || args.json === true || args.json === 'true' || args.json === '1') {
+          writeResp(id, { ok: true, output: JSON.stringify(_csSnapshot) });
+          break;
+        }
         var _csLines = [
           '═══ COHERE Network Stats ═══',
           '',
@@ -16549,6 +16570,14 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
           max_tokens: {
             type: "string",
             description: "For remote_infer: maximum tokens to generate (e.g. '4096'). Default: 4096"
+          },
+          format: {
+            type: "string",
+            description: "For cohere_stats: set to 'json' for structured stats"
+          },
+          json: {
+            type: "string",
+            description: "For cohere_stats: set to '1' for structured stats"
           }
         },
         required: ["action"],
@@ -16686,7 +16715,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
               result = await this.sendDaemonCmd("cohere_disable", {});
               break;
             case "cohere_stats":
-              result = await this.sendDaemonCmd("cohere_stats", {});
+              result = await this.sendDaemonCmd("cohere_stats", { format: String(args.format ?? ""), json: String(args.json ?? "") });
               break;
             case "cohere_allow_model":
               result = await this.sendDaemonCmd("cohere_allow_model", { model: String(args.model ?? "") });
@@ -529012,38 +529041,136 @@ function inferHomeFromProcUid(pid) {
   }
   return null;
 }
+function detectPeerOmniusOllamaPool() {
+  if (!isDirectory("/proc"))
+    return false;
+  const selfPid = String(process.pid);
+  const selfPpid = String(process.ppid ?? "");
+  const peerNodePids = /* @__PURE__ */ new Set();
+  let entries;
+  try {
+    entries = readdirSync21("/proc", { withFileTypes: true }).filter((d2) => d2.isDirectory() && /^\d+$/.test(d2.name)).map((d2) => ({ name: d2.name }));
+  } catch {
+    return false;
+  }
+  for (const e2 of entries) {
+    if (e2.name === selfPid || e2.name === selfPpid)
+      continue;
+    try {
+      const cmdline = readFileSync50(`/proc/${e2.name}/cmdline`, "utf8");
+      if (!cmdline.includes("node"))
+        continue;
+      if (!/[/\\]omnius[/\\]dist[/\\]index\.js|[/\\]omnius[/\\]/i.test(cmdline))
+        continue;
+      peerNodePids.add(e2.name);
+    } catch {
+    }
+  }
+  if (peerNodePids.size === 0)
+    return false;
+  for (const e2 of entries) {
+    try {
+      const cmd = readFileSync50(`/proc/${e2.name}/cmdline`, "utf8");
+      if (!cmd.includes("ollama"))
+        continue;
+      if (!cmd.split("\0").includes("serve"))
+        continue;
+      const status = readFileSync50(`/proc/${e2.name}/status`, "utf8");
+      const ppid = status.match(/^PPid:\s+(\d+)/m)?.[1];
+      if (ppid && peerNodePids.has(ppid))
+        return true;
+    } catch {
+    }
+  }
+  return false;
+}
 async function detectGpus() {
   if (_nvidiaSmiAvailable === false)
     return [];
   return new Promise((resolve56) => {
-    exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
+    const queryFields = "index,uuid,name,memory.total,memory.free,utilization.gpu,compute_cap";
+    exec2(`nvidia-smi --query-gpu=${queryFields} --format=csv,noheader,nounits 2>/dev/null`, { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
       if (err) {
-        _nvidiaSmiAvailable = false;
-        resolve56([]);
+        exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err2, stdout2) => {
+          if (err2) {
+            _nvidiaSmiAvailable = false;
+            resolve56([]);
+            return;
+          }
+          _nvidiaSmiAvailable = true;
+          resolve56(parseGpuQueryOutput(
+            stdout2,
+            /* hasComputeCap */
+            false
+          ));
+        });
         return;
       }
       _nvidiaSmiAvailable = true;
-      const gpus = [];
-      for (const line of stdout.split("\n")) {
-        const parts = line.split(",").map((s2) => s2.trim());
-        if (parts.length < 6)
-          continue;
-        const idx = Number(parts[0]);
-        if (!Number.isFinite(idx))
-          continue;
-        gpus.push({
-          index: idx,
-          uuid: parts[1] ?? "",
-          name: parts[2] ?? "",
-          vramTotalMB: Number(parts[3]) || 0,
-          vramFreeMB: Number(parts[4]) || 0,
-          utilization: Number(parts[5]) || 0
-        });
-      }
-      resolve56(gpus);
+      resolve56(parseGpuQueryOutput(
+        stdout,
+        /* hasComputeCap */
+        true
+      ));
     });
   });
 }
+function parseGpuQueryOutput(stdout, hasComputeCap) {
+  const gpus = [];
+  const minFields = hasComputeCap ? 7 : 6;
+  for (const line of stdout.split("\n")) {
+    const parts = line.split(",").map((s2) => s2.trim());
+    if (parts.length < minFields)
+      continue;
+    const idx = Number(parts[0]);
+    if (!Number.isFinite(idx))
+      continue;
+    const info = {
+      index: idx,
+      uuid: parts[1] ?? "",
+      name: parts[2] ?? "",
+      vramTotalMB: Number(parts[3]) || 0,
+      vramFreeMB: Number(parts[4]) || 0,
+      utilization: Number(parts[5]) || 0
+    };
+    if (hasComputeCap) {
+      const cap = Number(parts[6]);
+      if (Number.isFinite(cap))
+        info.computeCapability = cap;
+    }
+    gpus.push(info);
+  }
+  return gpus;
+}
+function resolveMinGpuVramMB() {
+  const fromEnv = Number(process.env["OMNIUS_OLLAMA_MIN_GPU_MB"]);
+  return Number.isFinite(fromEnv) && fromEnv > 0 ? fromEnv : DEFAULT_MIN_GPU_VRAM_MB;
+}
+function resolveMinComputeCapability() {
+  const fromEnv = Number(process.env["OMNIUS_OLLAMA_MIN_COMPUTE_CAP"]);
+  return Number.isFinite(fromEnv) && fromEnv > 0 ? fromEnv : DEFAULT_MIN_COMPUTE_CAPABILITY;
+}
+function isCapableForLLM(gpu, thresholds = {}) {
+  const minVramMB = thresholds.minVramMB ?? resolveMinGpuVramMB();
+  const minComputeCap = thresholds.minComputeCap ?? resolveMinComputeCapability();
+  if (gpu.vramTotalMB < minVramMB)
+    return false;
+  if (gpu.computeCapability !== void 0 && gpu.computeCapability < minComputeCap)
+    return false;
+  return true;
+}
+function filterCapableGpus(gpus, thresholds) {
+  return gpus.filter((g) => isCapableForLLM(g, thresholds));
+}
+function recommendMaxParallelFromVram(minFreeMB) {
+  if (minFreeMB >= 60 * 1024)
+    return 8;
+  if (minFreeMB >= 40 * 1024)
+    return 4;
+  if (minFreeMB >= 24 * 1024)
+    return 2;
+  return 1;
+}
 async function getHardwareSnapshot() {
   const { totalmem: totalmem8, freemem: freemem7, cpus: cpus5 } = await import("node:os");
   const gpus = await detectGpus();
@@ -529127,10 +529254,15 @@ async function findFreePort(start2) {
 }
 function resolveDefaultPoolConfig() {
   const baseInstanceUrl = process.env["OMNIUS_OLLAMA_BASE_URL"] || process.env["OLLAMA_HOST"]?.replace(/^([^:/]+:[0-9]+)$/, "http://$1") || "http://127.0.0.1:11434";
-  const maxParallelPerInstance = Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1;
-  const maxSpawnedInstances = Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0;
+  const maxParallelExplicit = process.env["OMNIUS_OLLAMA_MAX_PARALLEL"] !== void 0;
+  const maxParallelPerInstance = maxParallelExplicit ? Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1 : 1;
+  const autoTuneMaxParallel = !maxParallelExplicit;
+  const gpuPlacementExplicit = process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] !== void 0;
+  const maxInstancesExplicit = process.env["OMNIUS_OLLAMA_MAX_INSTANCES"] !== void 0;
+  const peerPoolActive = !gpuPlacementExplicit && !maxInstancesExplicit && detectPeerOmniusOllamaPool();
+  const maxSpawnedInstances = maxInstancesExplicit ? Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0 : peerPoolActive ? 1 : 0;
   const targetGpuInstances = Number(process.env["OMNIUS_OLLAMA_TARGET_GPU_INSTANCES"]) || 0;
-  const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? "auto").toLowerCase();
+  const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? (peerPoolActive ? "elastic" : "auto")).toLowerCase();
   const gpuPlacement = gpuPlacementRaw === "dedicated" || gpuPlacementRaw === "elastic" || gpuPlacementRaw === "auto" ? gpuPlacementRaw : "auto";
   const idleMs = Number(process.env["OMNIUS_OLLAMA_IDLE_MS"]) || 3 * 60 * 60 * 1e3;
   const reaperIntervalMs = Number(process.env["OMNIUS_OLLAMA_REAPER_MS"]) || 3e4;
@@ -529155,7 +529287,8 @@ function resolveDefaultPoolConfig() {
     ollamaBinary,
     spawnReadyTimeoutSec,
     networkRxBudgetBytesPerSec,
-    networkTxBudgetBytesPerSec
+    networkTxBudgetBytesPerSec,
+    autoTuneMaxParallel
   };
 }
 function parseNullableNumber(value2) {
@@ -529223,11 +529356,13 @@ function setOllamaPool(pool3) {
     _poolByBaseUrl.set(pool3.statusConfig().baseInstanceUrl, pool3);
   }
 }
-var _nvidiaSmiAvailable, _lastNetworkSnapshot, OllamaInstance, realInstanceSpawner, _gpuCursor, OllamaPool, _poolSingleton, _poolByBaseUrl;
+var _nvidiaSmiAvailable, DEFAULT_MIN_GPU_VRAM_MB, DEFAULT_MIN_COMPUTE_CAPABILITY, _lastNetworkSnapshot, OllamaInstance, realInstanceSpawner, _gpuCursor, OllamaPool, _poolSingleton, _poolByBaseUrl;
 var init_ollama_pool = __esm({
   "packages/orchestrator/dist/ollama-pool.js"() {
     "use strict";
     _nvidiaSmiAvailable = null;
+    DEFAULT_MIN_GPU_VRAM_MB = 16 * 1024;
+    DEFAULT_MIN_COMPUTE_CAPABILITY = 7;
     _lastNetworkSnapshot = null;
     OllamaInstance = class {
       state;
@@ -529281,6 +529416,7 @@ var init_ollama_pool = __esm({
         env2["OLLAMA_MODELS"] = config.sharedModelStore;
       }
       env2["OLLAMA_NUM_PARALLEL"] = String(config.maxParallelPerInstance);
+      env2["OLLAMA_KEEP_ALIVE"] = process.env["OMNIUS_OLLAMA_SPAWN_KEEP_ALIVE"] ?? "-1";
       if (gpuUuid) {
         env2["CUDA_VISIBLE_DEVICES"] = gpuUuid;
         env2["GPU_DEVICE_ORDINAL"] = gpuIndex === null ? "" : String(gpuIndex);
@@ -529334,6 +529470,20 @@ var init_ollama_pool = __esm({
       dedicatedGpuPoolActive = false;
       activePlacementMode = "constrained";
       gpuCache = null;
+      /**
+       * Set once after the first capability-filtered GPU detection. Prevents the
+       * auto-tune from oscillating maxParallelPerInstance as free VRAM fluctuates
+       * during normal inference.
+       */
+      _autoTuned = false;
+      /** UUIDs we've already emitted gpu-excluded for. Prevents log spam. */
+      _excludedGpusReported = /* @__PURE__ */ new Set();
+      /**
+       * Cached model footprint in MiB (model name → estimated VRAM required).
+       * Populated lazily via /api/show on the base instance. null sentinel means
+       * "we tried but failed" so we don't re-probe in a tight loop.
+       */
+      _modelVramEstimateMB = /* @__PURE__ */ new Map();
       slotWaiters = [];
       /**
        * Agent → preferred instance id. Set whenever an acquire resolves an
@@ -529533,7 +529683,18 @@ var init_ollama_pool = __esm({
           const freedPick = this.pickInstance({ model });
           if (freedPick)
             return freedPick;
-          const gpu = this.pickGpuForSpawn(gpus);
+          const vramNeededMB = await this.estimateModelVramMB(model);
+          const capable = this.gpusWithCapacityForModel(gpus, vramNeededMB);
+          if (capable.length === 0 && vramNeededMB !== null) {
+            this.emit("spawn-skipped", {
+              reason: "insufficient-vram",
+              model,
+              vramNeededMB,
+              gpuFreeMBs: gpus.map((g) => g.vramFreeMB)
+            });
+            return null;
+          }
+          const gpu = this.pickGpuForSpawn(capable.length > 0 ? capable : gpus);
           return this.spawnInstance(model, gpu);
         });
       }
@@ -529544,8 +529705,20 @@ var init_ollama_pool = __esm({
           if (!this.canSpawnWithSharedModelStore(model))
             return;
           const target = this.dedicatedTargetCount(gpus);
+          const vramNeededMB = await this.estimateModelVramMB(model);
           while (this.instances.filter((i2) => i2.state.poolOwned).length < target) {
-            const gpu = this.pickGpuForSpawn(gpus);
+            const candidates = this.gpusWithCapacityForModel(gpus, vramNeededMB);
+            const pool3 = candidates.length > 0 ? candidates : vramNeededMB === null ? gpus : [];
+            if (pool3.length === 0) {
+              this.emit("spawn-skipped", {
+                reason: "insufficient-vram",
+                model,
+                vramNeededMB,
+                gpuFreeMBs: gpus.map((g) => g.vramFreeMB)
+              });
+              break;
+            }
+            const gpu = this.pickGpuForSpawn(pool3);
             if (!gpu)
               break;
             const inst = await this.spawnInstance(model, gpu);
@@ -529603,9 +529776,59 @@ var init_ollama_pool = __esm({
         if (this.gpuCache && now - this.gpuCache.takenAtMs <= maxAgeMs) {
           return this.gpuCache.gpus;
         }
-        const gpus = await this.gpuDetector();
-        this.gpuCache = { gpus, takenAtMs: now };
-        return gpus;
+        const rawGpus = await this.gpuDetector();
+        const filtered = filterCapableGpus(rawGpus);
+        const filteredUuids = new Set(filtered.map((g) => g.uuid));
+        for (const g of rawGpus) {
+          if (filteredUuids.has(g.uuid))
+            continue;
+          if (this._excludedGpusReported.has(g.uuid))
+            continue;
+          this._excludedGpusReported.add(g.uuid);
+          const reason = g.vramTotalMB < resolveMinGpuVramMB() ? "insufficient-vram" : "insufficient-compute-capability";
+          this.emit("gpu-excluded", {
+            uuid: g.uuid,
+            index: g.index,
+            name: g.name,
+            vramTotalMB: g.vramTotalMB,
+            computeCapability: g.computeCapability,
+            reason
+          });
+        }
+        this.gpuCache = { gpus: filtered, takenAtMs: now };
+        this.maybeAutoTuneMaxParallel(filtered);
+        return filtered;
+      }
+      /**
+       * One-shot: bump `maxParallelPerInstance` from the worst-case free VRAM
+       * across capable GPUs the first time we see them. We never tune down (a
+       * subsequent low-VRAM read shouldn't strip concurrency from in-flight
+       * requests), and we never tune again once successful — the recommendation
+       * ladder is stable enough that a single read at startup is correct.
+       */
+      maybeAutoTuneMaxParallel(filtered) {
+        if (!this.config.autoTuneMaxParallel)
+          return;
+        if (this._autoTuned)
+          return;
+        if (filtered.length === 0)
+          return;
+        const minFreeMB = filtered.reduce((m2, g) => Math.min(m2, g.vramFreeMB), Number.POSITIVE_INFINITY);
+        const recommended = recommendMaxParallelFromVram(minFreeMB);
+        if (recommended > this.config.maxParallelPerInstance) {
+          const previous = this.config.maxParallelPerInstance;
+          this.config.maxParallelPerInstance = recommended;
+          for (const inst of this.instances) {
+            inst.state.maxParallel = recommended;
+          }
+          this.emit("max-parallel-tuned", {
+            previous,
+            recommended,
+            minFreeMB,
+            capableGpuCount: filtered.length
+          });
+        }
+        this._autoTuned = true;
       }
       async spawnInstance(model, gpu) {
         let port;
@@ -529668,6 +529891,56 @@ var init_ollama_pool = __esm({
         });
         return inst;
       }
+      /**
+       * Best-effort: estimate the VRAM (in MiB) a model needs to be served
+       * without CPU spill. Hits the base instance's `/api/show` once per model
+       * and caches the result. Returns null when the probe fails (the caller
+       * then falls back to "no estimate" semantics — capacity check is skipped).
+       *
+       * The number returned is `disk_size * 1.15 + maxParallel * 1024` (1 GiB of
+       * KV cache per parallel slot — conservative for 30B-class models). Newer
+       * model families may exceed this margin slightly; bumps are safe via
+       * OMNIUS_OLLAMA_VRAM_SAFETY_MARGIN.
+       */
+      async estimateModelVramMB(model) {
+        if (this._modelVramEstimateMB.has(model)) {
+          return this._modelVramEstimateMB.get(model) ?? null;
+        }
+        let bytesOnDisk = null;
+        try {
+          const url = `${this.config.baseInstanceUrl.replace(/\/+$/, "")}/api/show`;
+          const resp = await fetch(url, {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({ name: model }),
+            signal: AbortSignal.timeout(2e3)
+          });
+          if (resp.ok) {
+            const data = await resp.json();
+            if (typeof data.size === "number" && data.size > 0)
+              bytesOnDisk = data.size;
+          }
+        } catch {
+        }
+        if (bytesOnDisk === null) {
+          this._modelVramEstimateMB.set(model, null);
+          return null;
+        }
+        const safetyMargin = Number(process.env["OMNIUS_OLLAMA_VRAM_SAFETY_MARGIN"]) || 1.15;
+        const kvCacheMB = this.config.maxParallelPerInstance * 1024;
+        const estimateMB = Math.ceil(bytesOnDisk / (1024 * 1024) * safetyMargin + kvCacheMB);
+        this._modelVramEstimateMB.set(model, estimateMB);
+        return estimateMB;
+      }
+      /**
+       * Filter GPUs to those with enough free VRAM for the model. Caller decides
+       * how to react to an empty list (skip spawn vs degrade to constrained).
+       */
+      gpusWithCapacityForModel(gpus, vramNeededMB) {
+        if (vramNeededMB === null)
+          return gpus;
+        return gpus.filter((g) => g.vramFreeMB >= vramNeededMB);
+      }
       /**
        * Pick a GPU for a freshly-spawned instance. Prefers GPUs that no
        * pool-owned instance is already pinned to, then most free VRAM. Returns
@@ -529704,6 +529977,28 @@ var init_ollama_pool = __esm({
             survivors.push(inst);
             continue;
           }
+          const PROBE_GRACE_MS = 3e4;
+          if (inst.state.inflight === 0 && Date.now() - inst.state.spawnedAtMs > PROBE_GRACE_MS && await this.isPartialVramSpilled(inst)) {
+            const reapedAtMs = Date.now();
+            await inst.terminate();
+            this.dropAffinityFor(inst.state.id);
+            this.emit("instance-reaped", {
+              id: inst.state.id,
+              pid: inst.state.pid,
+              reason: "partial-vram",
+              totalRequests: inst.state.totalRequests,
+              peakInflight: inst.state.peakInflight,
+              ageMs: reapedAtMs - inst.state.spawnedAtMs,
+              idleMs: reapedAtMs - inst.state.lastUsedMs,
+              provenance: {
+                entity: `urn:omnius:ollama-instance:${inst.state.id}`,
+                activity: "ollama-instance-reap-partial-vram",
+                agent: "orchestrator.ollama-pool",
+                timestampMs: reapedAtMs
+              }
+            });
+            continue;
+          }
           if (inst.isIdleLongerThan(this.config.idleMs)) {
             const reapedAtMs = Date.now();
             await inst.terminate();
@@ -529711,6 +530006,7 @@ var init_ollama_pool = __esm({
             this.emit("instance-reaped", {
               id: inst.state.id,
               pid: inst.state.pid,
+              reason: "idle",
               totalRequests: inst.state.totalRequests,
               peakInflight: inst.state.peakInflight,
               ageMs: reapedAtMs - inst.state.spawnedAtMs,
@@ -529728,6 +530024,33 @@ var init_ollama_pool = __esm({
         }
         this.instances = survivors;
       }
+      /**
+       * Probe `/api/ps` on the instance and return true if any resident model has
+       * less than 95% of its weights in VRAM — the unmistakable CPU-offload
+       * signature that produces 50× slowdowns. Defensive: any HTTP failure
+       * returns false so a transient network blip never triggers a reap.
+       */
+      async isPartialVramSpilled(inst) {
+        const PARTIAL_VRAM_THRESHOLD = 0.95;
+        try {
+          const url = `${inst.state.baseUrl.replace(/\/+$/, "")}/api/ps`;
+          const resp = await fetch(url, { signal: AbortSignal.timeout(2e3) });
+          if (!resp.ok)
+            return false;
+          const data = await resp.json();
+          if (!data.models || data.models.length === 0)
+            return false;
+          return data.models.some((m2) => {
+            const total = m2.size ?? 0;
+            const vram = m2.size_vram ?? 0;
+            if (total <= 0)
+              return false;
+            return vram / total < PARTIAL_VRAM_THRESHOLD;
+          });
+        } catch {
+          return false;
+        }
+      }
       /** Stop the reaper and terminate every spawned instance. Call on process exit. */
       async shutdown() {
         if (this.reaperHandle) {
@@ -571197,6 +571520,47 @@ var init_voice_soul = __esm({
   }
 });
+// packages/cli/src/tui/usage-bars.ts
+function formatCompactCount(value2) {
+  const n2 = Math.max(0, Math.floor(Number.isFinite(value2) ? value2 : 0));
+  if (n2 < 1e3) return String(n2);
+  if (n2 < 1e6) return `${(n2 / 1e3).toFixed(n2 < 1e4 ? 1 : 0)}K`;
+  return `${(n2 / 1e6).toFixed(n2 < 1e7 ? 1 : 0)}M`;
+}
+function formatResetDelta(resetAt, now = Date.now()) {
+  if (!Number.isFinite(resetAt) || resetAt <= now) return "";
+  const totalMinutes = Math.ceil((resetAt - now) / 6e4);
+  if (totalMinutes < 60) return ` reset ${totalMinutes}m`;
+  const hours = Math.floor(totalMinutes / 60);
+  const minutes = totalMinutes % 60;
+  return minutes > 0 ? ` reset ${hours}h ${minutes}m` : ` reset ${hours}h`;
+}
+function formatUsageBar(options2) {
+  const total = Math.max(0, Math.floor(Number.isFinite(options2.total) ? options2.total : 0));
+  const rawUsed = Math.max(0, Math.floor(Number.isFinite(options2.used) ? options2.used : 0));
+  const used = total > 0 ? Math.min(total, rawUsed) : 0;
+  const width = Math.max(4, options2.width ?? 18);
+  const labelWidth = Math.max(options2.label.length, options2.labelWidth ?? 16);
+  const pct = total > 0 ? Math.round(used / total * 100) : 0;
+  const filled = total > 0 ? Math.min(width, Math.round(pct / 100 * width)) : 0;
+  const color = pct >= 90 ? c3.red : pct >= 70 ? c3.yellow : c3.green;
+  const bar = color("█".repeat(filled)) + c3.dim("░".repeat(width - filled));
+  const reset = options2.resetAt ? c3.dim(formatResetDelta(options2.resetAt)) : "";
+  return [
+    c3.cyan(options2.label.padEnd(labelWidth)),
+    bar,
+    color(`${pct}%`.padStart(4)),
+    c3.dim(`${formatCompactCount(rawUsed)}/${formatCompactCount(total)}`),
+    reset
+  ].join(" ").trimEnd();
+}
+var init_usage_bars = __esm({
+  "packages/cli/src/tui/usage-bars.ts"() {
+    "use strict";
+    init_render();
+  }
+});
 // packages/cli/src/tui/expose.ts
 import { createServer as createServer5, request as httpRequest } from "node:http";
 import { request as httpsRequest } from "node:https";
@@ -571243,6 +571607,38 @@ function fmtTokens(n2) {
   if (n2 < 1e6) return `${(n2 / 1e3).toFixed(1)}K`;
   return `${(n2 / 1e6).toFixed(1)}M`;
 }
+function safeNonNegativeInt(value2) {
+  const n2 = Number(value2);
+  return Number.isFinite(n2) && n2 > 0 ? Math.floor(n2) : 0;
+}
+function nextSponsorDailyReset(now = Date.now()) {
+  return now + SPONSOR_DAILY_WINDOW_MS;
+}
+function readSponsorUsageState(stateDir) {
+  try {
+    const path12 = join105(stateDir, "sponsor", SPONSOR_USAGE_FILE_NAME);
+    if (!existsSync90(path12)) return null;
+    const parsed = JSON.parse(readFileSync71(path12, "utf8"));
+    const dailyTokensUsed = safeNonNegativeInt(parsed.dailyTokensUsed);
+    const dailyTokensResetAt = safeNonNegativeInt(parsed.dailyTokensResetAt);
+    if (!dailyTokensResetAt) return null;
+    return {
+      dailyTokensUsed,
+      dailyTokensResetAt,
+      updatedAt: typeof parsed.updatedAt === "string" ? parsed.updatedAt : (/* @__PURE__ */ new Date()).toISOString()
+    };
+  } catch {
+    return null;
+  }
+}
+function writeSponsorUsageState(stateDir, state) {
+  try {
+    const dir = join105(stateDir, "sponsor");
+    mkdirSync50(dir, { recursive: true });
+    writeFileSync45(join105(dir, SPONSOR_USAGE_FILE_NAME), JSON.stringify(state, null, 2));
+  } catch {
+  }
+}
 function readExposeState(stateDir) {
   try {
     const path12 = join105(stateDir, STATE_FILE_NAME);
@@ -571406,11 +571802,12 @@ function removeP2PExposeState(stateDir) {
   } catch {
   }
 }
-var HOP_BY_HOP_HEADERS, CF_HEADERS_PREFIX, DEFAULT_EXPOSE_MAX_BODY_BYTES, INTERNAL_CAPABILITIES, DEFAULT_TARGETS, STATE_FILE_NAME, ExposeGateway, P2P_STATE_FILE_NAME, ExposeP2PGateway;
+var HOP_BY_HOP_HEADERS, CF_HEADERS_PREFIX, DEFAULT_EXPOSE_MAX_BODY_BYTES, INTERNAL_CAPABILITIES, DEFAULT_TARGETS, STATE_FILE_NAME, SPONSOR_USAGE_FILE_NAME, SPONSOR_DAILY_WINDOW_MS, SPONSOR_REQUEST_WINDOW_MS, ExposeGateway, P2P_STATE_FILE_NAME, ExposeP2PGateway;
 var init_expose = __esm({
   "packages/cli/src/tui/expose.ts"() {
     "use strict";
     init_render();
+    init_usage_bars();
     init_typed_node_events();
     HOP_BY_HOP_HEADERS = /* @__PURE__ */ new Set([
       "connection",
@@ -571432,6 +571829,9 @@ var init_expose = __esm({
       custom: "http://127.0.0.1:11434"
     };
     STATE_FILE_NAME = "expose-state.json";
+    SPONSOR_USAGE_FILE_NAME = "usage.json";
+    SPONSOR_DAILY_WINDOW_MS = 864e5;
+    SPONSOR_REQUEST_WINDOW_MS = 6e4;
     ExposeGateway = class _ExposeGateway extends EventEmitter8 {
       constructor(options2) {
         super();
@@ -571449,6 +571849,8 @@ var init_expose = __esm({
         } else {
           this._authKey = options2.authKey;
         }
+        this.loadSponsorUsage();
+        this.refreshSponsorUsageStats();
       }
       options;
       server = null;
@@ -571471,6 +571873,7 @@ var init_expose = __esm({
       _dailyTokensResetAt = 0;
       /** Sponsor rate limits (set via setSponsorLimits) */
       _sponsorLimits = null;
+      _sponsorBlockedRequests = 0;
       _authKey;
       _targetUrl;
       _kind;
@@ -571489,7 +571892,8 @@ var init_expose = __esm({
         users: /* @__PURE__ */ new Map(),
         budgetTokensRemaining: 0,
         budgetTokensTotal: 0,
-        budgetResetAt: 0
+        budgetResetAt: 0,
+        sponsorUsage: null
       };
       get tunnelUrl() {
         return this._tunnelUrl;
@@ -571509,42 +571913,140 @@ var init_expose = __esm({
       /** Set sponsor rate limits — enables rate limiting middleware in the proxy */
       setSponsorLimits(limits) {
         this._sponsorLimits = limits;
+        this.ensureSponsorDailyWindow();
+        this.refreshSponsorUsageStats();
+        this.emitStats();
+      }
+      getSponsorUsageSnapshot() {
+        this.refreshSponsorUsageStats();
+        return this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null;
+      }
+      loadSponsorUsage() {
+        if (!this._stateDir) {
+          this._dailyTokensResetAt = nextSponsorDailyReset();
+          return;
+        }
+        const saved = readSponsorUsageState(this._stateDir);
+        if (!saved) {
+          this._dailyTokensUsed = 0;
+          this._dailyTokensResetAt = nextSponsorDailyReset();
+          return;
+        }
+        const now = Date.now();
+        if (saved.dailyTokensResetAt <= now) {
+          this._dailyTokensUsed = 0;
+          this._dailyTokensResetAt = nextSponsorDailyReset(now);
+          this.saveSponsorUsage();
+        } else {
+          this._dailyTokensUsed = saved.dailyTokensUsed;
+          this._dailyTokensResetAt = saved.dailyTokensResetAt;
+        }
+      }
+      saveSponsorUsage() {
+        if (!this._stateDir) return;
+        writeSponsorUsageState(this._stateDir, {
+          dailyTokensUsed: this._dailyTokensUsed,
+          dailyTokensResetAt: this._dailyTokensResetAt,
+          updatedAt: (/* @__PURE__ */ new Date()).toISOString()
+        });
+      }
+      ensureSponsorDailyWindow(now = Date.now()) {
+        if (!this._dailyTokensResetAt || this._dailyTokensResetAt <= now) {
+          this._dailyTokensUsed = 0;
+          this._dailyTokensResetAt = nextSponsorDailyReset(now);
+          this.saveSponsorUsage();
+        }
+      }
+      pruneSponsorRequestWindows(now = Date.now()) {
+        for (const [ip, window2] of this._rateLimitWindows.entries()) {
+          while (window2.length > 0 && window2[0] < now - SPONSOR_REQUEST_WINDOW_MS) window2.shift();
+          if (window2.length === 0) this._rateLimitWindows.delete(ip);
+        }
+      }
+      sponsorRequestWindowUsage(now = Date.now()) {
+        this.pruneSponsorRequestWindows(now);
+        let count = 0;
+        let oldest = Number.POSITIVE_INFINITY;
+        for (const window2 of this._rateLimitWindows.values()) {
+          count += window2.length;
+          if (window2.length > 0) oldest = Math.min(oldest, window2[0]);
+        }
+        return {
+          count,
+          resetAt: Number.isFinite(oldest) ? oldest + SPONSOR_REQUEST_WINDOW_MS : now + SPONSOR_REQUEST_WINDOW_MS
+        };
+      }
+      refreshSponsorUsageStats(now = Date.now()) {
+        if (!this._sponsorLimits) {
+          this._stats.sponsorUsage = null;
+          return;
+        }
+        this.ensureSponsorDailyWindow(now);
+        const req2 = this.sponsorRequestWindowUsage(now);
+        this._stats.sponsorUsage = {
+          enabled: true,
+          transport: "tunnel",
+          dailyTokensUsed: this._dailyTokensUsed,
+          dailyTokensLimit: this._sponsorLimits.maxTokensPerDay,
+          dailyResetAt: this._dailyTokensResetAt,
+          requestsInWindow: req2.count,
+          requestsPerMinuteLimit: this._sponsorLimits.maxRequestsPerMinute,
+          requestWindowResetAt: req2.resetAt,
+          activeConnections: this._stats.activeConnections,
+          maxConcurrent: this._sponsorLimits.maxConcurrent,
+          blockedRequests: this._sponsorBlockedRequests,
+          allowedModels: this._sponsorLimits.allowedModels === "all" ? "all" : [...this._sponsorLimits.allowedModels]
+        };
+      }
+      markSponsorBlocked() {
+        this._sponsorBlockedRequests++;
+        this.refreshSponsorUsageStats();
       }
       /** Check rate limits for a request. Returns null if OK, or error message string if blocked. */
-      checkRateLimit(userIp, model) {
+      checkRateLimit(userIp, model, options2 = {}) {
         if (!this._sponsorLimits) return null;
         const lim = this._sponsorLimits;
+        const now = Date.now();
+        if (lim.maxRequestsPerMinute <= 0 || lim.maxTokensPerDay <= 0 || lim.maxConcurrent <= 0) {
+          this.markSponsorBlocked();
+          return "Sponsored endpoint is paused or has no quota configured.";
+        }
         if (lim.allowedModels !== "all" && model && !lim.allowedModels.includes(model)) {
+          this.markSponsorBlocked();
           return `Model '${model}' is not available on this sponsored endpoint. Available: ${lim.allowedModels.join(", ")}`;
         }
-        if (this._stats.activeConnections >= lim.maxConcurrent) {
+        if (this._stats.activeConnections > lim.maxConcurrent) {
+          this.markSponsorBlocked();
           return `Too many concurrent requests (${this._stats.activeConnections}/${lim.maxConcurrent}). Try again shortly.`;
         }
-        const now = Date.now();
-        const windowMs = 6e4;
         let window2 = this._rateLimitWindows.get(userIp);
         if (!window2) {
           window2 = [];
           this._rateLimitWindows.set(userIp, window2);
         }
-        while (window2.length > 0 && window2[0] < now - windowMs) window2.shift();
+        while (window2.length > 0 && window2[0] < now - SPONSOR_REQUEST_WINDOW_MS) window2.shift();
         if (window2.length >= lim.maxRequestsPerMinute) {
-          const retryAfterMs = window2[0] + windowMs - now;
+          this.markSponsorBlocked();
+          const retryAfterMs = window2[0] + SPONSOR_REQUEST_WINDOW_MS - now;
           return `Rate limited (${lim.maxRequestsPerMinute} req/min). Retry in ${Math.ceil(retryAfterMs / 1e3)}s.`;
         }
-        window2.push(now);
-        if (this._dailyTokensResetAt < now) {
-          this._dailyTokensUsed = 0;
-          this._dailyTokensResetAt = now + 864e5;
-        }
+        if (options2.commitRequest) window2.push(now);
+        this.ensureSponsorDailyWindow(now);
         if (this._dailyTokensUsed >= lim.maxTokensPerDay) {
+          this.markSponsorBlocked();
           return `Daily token budget exhausted (${fmtTokens(lim.maxTokensPerDay)}). Resets in ${Math.ceil((this._dailyTokensResetAt - now) / 36e5)}h.`;
         }
+        this.refreshSponsorUsageStats(now);
         return null;
       }
       /** Track token usage from a completed response */
       trackTokenUsage(tokensIn, tokensOut) {
-        this._dailyTokensUsed += tokensIn + tokensOut;
+        const total = safeNonNegativeInt(tokensIn) + safeNonNegativeInt(tokensOut);
+        if (total <= 0) return;
+        this.ensureSponsorDailyWindow();
+        this._dailyTokensUsed += total;
+        this.saveSponsorUsage();
+        this.refreshSponsorUsageStats();
       }
       // ── Lifecycle ───────────────────────────────────────────────────────────
       async start() {
@@ -571753,7 +572255,7 @@ var init_expose = __esm({
           user.activeRequests++;
           user.lastSeen = Date.now();
           this.emitStats();
-          const preRateLimitCheck = this.checkRateLimit(userIp, "");
+          const preRateLimitCheck = this.checkRateLimit(userIp, "", { commitRequest: false });
           if (preRateLimitCheck) {
             this._stats.activeConnections--;
             user.activeRequests--;
@@ -571842,8 +572344,8 @@ var init_expose = __esm({
               } catch {
               }
             }
-            if (requestModel && this._sponsorLimits) {
-              const modelCheck = this.checkRateLimit(userIp, requestModel);
+            if (this._sponsorLimits) {
+              const modelCheck = this.checkRateLimit(userIp, requestModel, { commitRequest: true });
               if (modelCheck) {
                 this._stats.activeConnections--;
                 user.activeRequests--;
@@ -572229,10 +572731,12 @@ ${this.formatConnectionInfo()}`);
         });
       }
       emitStats() {
+        this.refreshSponsorUsageStats();
         this.emit("stats", {
           ...this._stats,
           modelUsage: new Map(this._stats.modelUsage),
-          users: new Map(this._stats.users)
+          users: new Map(this._stats.users),
+          sponsorUsage: this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null
         });
       }
       /** Format connection info for display */
@@ -572274,6 +572778,28 @@ ${this.formatConnectionInfo()}`);
           const budgetColor = pct > 50 ? c3.green : pct > 20 ? c3.yellow : c3.red;
           lines.push(`  ${c3.cyan("Budget".padEnd(18))} ${budgetColor(fmtTokens(s2.budgetTokensRemaining))}${c3.dim("/")}${fmtTokens(s2.budgetTokensTotal)} ${c3.dim(`(${pct}% left)`)}`);
         }
+        if (s2.sponsorUsage) {
+          lines.push("");
+          lines.push(`  ${c3.bold("Sponsor Quota")}`);
+          lines.push(`  ${formatUsageBar({
+            label: "Daily tokens",
+            used: s2.sponsorUsage.dailyTokensUsed,
+            total: s2.sponsorUsage.dailyTokensLimit,
+            resetAt: s2.sponsorUsage.dailyResetAt
+          })}`);
+          lines.push(`  ${formatUsageBar({
+            label: "Requests/min",
+            used: s2.sponsorUsage.requestsInWindow,
+            total: s2.sponsorUsage.requestsPerMinuteLimit,
+            resetAt: s2.sponsorUsage.requestWindowResetAt
+          })}`);
+          lines.push(`  ${formatUsageBar({
+            label: "Concurrency",
+            used: s2.sponsorUsage.activeConnections,
+            total: s2.sponsorUsage.maxConcurrent
+          })}`);
+          lines.push(`  ${c3.cyan("Blocked".padEnd(18))} ${s2.sponsorUsage.blockedRequests}`);
+        }
         const visibleModels = Array.from(s2.modelUsage.entries()).filter(([model]) => !INTERNAL_CAPABILITIES.has(model));
         if (visibleModels.length > 0) {
           lines.push("");
@@ -572335,6 +572861,11 @@ ${this.formatConnectionInfo()}`);
       _passthrough = false;
       _loadbalance = false;
       _endpointAuth;
+      _sponsorLimits = null;
+      _sponsorBlockedRequests = 0;
+      _sponsorRequestWindow = [];
+      _dailyTokensUsed = 0;
+      _dailyTokensResetAt = 0;
       _pollTimer = null;
       _activityPollTimer = null;
       /** Fast token flash timer — pulses LED at 200ms while inference is active */
@@ -572353,7 +572884,8 @@ ${this.formatConnectionInfo()}`);
         users: /* @__PURE__ */ new Map(),
         budgetTokensRemaining: 0,
         budgetTokensTotal: 0,
-        budgetResetAt: 0
+        budgetResetAt: 0,
+        sponsorUsage: null
       };
       get peerId() {
         return this._peerId;
@@ -572395,6 +572927,93 @@ ${this.formatConnectionInfo()}`);
         } else {
           this._authKey = options2.authKey;
         }
+        this.loadSponsorUsage();
+        this.refreshSponsorUsageStats();
+      }
+      setSponsorLimits(limits) {
+        this._sponsorLimits = limits;
+        this.ensureSponsorDailyWindow();
+        this.refreshSponsorUsageStats();
+        this.emitStats();
+      }
+      getSponsorUsageSnapshot() {
+        this.refreshSponsorUsageStats();
+        return this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null;
+      }
+      loadSponsorUsage() {
+        if (!this._stateDir) {
+          this._dailyTokensResetAt = nextSponsorDailyReset();
+          return;
+        }
+        const saved = readSponsorUsageState(this._stateDir);
+        if (!saved) {
+          this._dailyTokensUsed = 0;
+          this._dailyTokensResetAt = nextSponsorDailyReset();
+          return;
+        }
+        const now = Date.now();
+        if (saved.dailyTokensResetAt <= now) {
+          this._dailyTokensUsed = 0;
+          this._dailyTokensResetAt = nextSponsorDailyReset(now);
+          this.saveSponsorUsage();
+        } else {
+          this._dailyTokensUsed = saved.dailyTokensUsed;
+          this._dailyTokensResetAt = saved.dailyTokensResetAt;
+        }
+      }
+      saveSponsorUsage() {
+        if (!this._stateDir) return;
+        writeSponsorUsageState(this._stateDir, {
+          dailyTokensUsed: this._dailyTokensUsed,
+          dailyTokensResetAt: this._dailyTokensResetAt,
+          updatedAt: (/* @__PURE__ */ new Date()).toISOString()
+        });
+      }
+      ensureSponsorDailyWindow(now = Date.now()) {
+        if (!this._dailyTokensResetAt || this._dailyTokensResetAt <= now) {
+          this._dailyTokensUsed = 0;
+          this._dailyTokensResetAt = nextSponsorDailyReset(now);
+          this.saveSponsorUsage();
+        }
+      }
+      recordSponsorRequest(now = Date.now()) {
+        this._sponsorRequestWindow.push(now);
+        this.pruneSponsorRequestWindow(now);
+      }
+      pruneSponsorRequestWindow(now = Date.now()) {
+        while (this._sponsorRequestWindow.length > 0 && this._sponsorRequestWindow[0] < now - SPONSOR_REQUEST_WINDOW_MS) {
+          this._sponsorRequestWindow.shift();
+        }
+      }
+      trackTokenUsage(tokensIn, tokensOut) {
+        const total = safeNonNegativeInt(tokensIn) + safeNonNegativeInt(tokensOut);
+        if (total <= 0) return;
+        this.ensureSponsorDailyWindow();
+        this._dailyTokensUsed += total;
+        this.saveSponsorUsage();
+        this.refreshSponsorUsageStats();
+      }
+      refreshSponsorUsageStats(now = Date.now()) {
+        if (!this._sponsorLimits) {
+          this._stats.sponsorUsage = null;
+          return;
+        }
+        this.ensureSponsorDailyWindow(now);
+        this.pruneSponsorRequestWindow(now);
+        this._stats.sponsorUsage = {
+          enabled: true,
+          transport: "libp2p",
+          dailyTokensUsed: this._dailyTokensUsed,
+          dailyTokensLimit: this._sponsorLimits.maxTokensPerDay,
+          dailyResetAt: this._dailyTokensResetAt,
+          requestsInWindow: this._sponsorRequestWindow.length,
+          requestsPerMinuteLimit: this._sponsorLimits.maxRequestsPerMinute,
+          requestWindowResetAt: this._sponsorRequestWindow[0] ? this._sponsorRequestWindow[0] + SPONSOR_REQUEST_WINDOW_MS : now + SPONSOR_REQUEST_WINDOW_MS,
+          activeConnections: this._stats.activeConnections,
+          maxConcurrent: this._sponsorLimits.maxConcurrent,
+          blockedRequests: this._sponsorBlockedRequests,
+          allowedModels: this._sponsorLimits.allowedModels === "all" ? "all" : [...this._sponsorLimits.allowedModels]
+        };
       }
       async start() {
         this._onInfo?.("Connecting to nexus P2P network...");
@@ -572653,6 +573272,8 @@ ${this.formatConnectionInfo()}`);
                     }
                     this._stats.totalTokensIn += tokIn;
                     this._stats.totalTokensOut += tokOut;
+                    this.recordSponsorRequest();
+                    this.trackTokenUsage(tokIn, tokOut);
                     const peerId = record.from || record.peerId || "unknown";
                     const shortPeer = peerId.length > 16 ? peerId.slice(0, 16) + "..." : peerId;
                     let user = this._stats.users.get(shortPeer);
@@ -572716,10 +573337,12 @@ ${this.formatConnectionInfo()}`);
         }
       }
       emitStats() {
+        this.refreshSponsorUsageStats();
         this.emit("stats", {
           ...this._stats,
           modelUsage: new Map(this._stats.modelUsage),
-          users: new Map(this._stats.users)
+          users: new Map(this._stats.users),
+          sponsorUsage: this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null
         });
       }
       /** Format connection info for display */
@@ -572767,6 +573390,28 @@ ${this.formatConnectionInfo()}`);
           const budgetColor = pct > 50 ? c3.green : pct > 20 ? c3.yellow : c3.red;
           lines.push(`  ${c3.cyan("Budget".padEnd(18))} ${budgetColor(fmtTokens(s2.budgetTokensRemaining))}${c3.dim("/")}${fmtTokens(s2.budgetTokensTotal)} ${c3.dim(`(${pct}% left)`)}`);
         }
+        if (s2.sponsorUsage) {
+          lines.push("");
+          lines.push(`  ${c3.bold("Sponsor Quota")}`);
+          lines.push(`  ${formatUsageBar({
+            label: "Daily tokens",
+            used: s2.sponsorUsage.dailyTokensUsed,
+            total: s2.sponsorUsage.dailyTokensLimit,
+            resetAt: s2.sponsorUsage.dailyResetAt
+          })}`);
+          lines.push(`  ${formatUsageBar({
+            label: "Requests/min",
+            used: s2.sponsorUsage.requestsInWindow,
+            total: s2.sponsorUsage.requestsPerMinuteLimit,
+            resetAt: s2.sponsorUsage.requestWindowResetAt
+          })}`);
+          lines.push(`  ${formatUsageBar({
+            label: "Concurrency",
+            used: s2.sponsorUsage.activeConnections,
+            total: s2.sponsorUsage.maxConcurrent
+          })}`);
+          lines.push(`  ${c3.cyan("Blocked".padEnd(18))} ${s2.sponsorUsage.blockedRequests}`);
+        }
         const visibleModels = Array.from(s2.modelUsage.entries()).filter(([model]) => !INTERNAL_CAPABILITIES.has(model));
         if (visibleModels.length > 0) {
           lines.push("");
@@ -590439,15 +591084,52 @@ async function stepReview(config, rl, availableRows) {
   if (!result.confirmed || result.key === "cancel") return false;
   return result.key === "go_live";
 }
-async function showSponsorDashboard(config, projectDir2, rl, availableRows) {
+async function showSponsorDashboard(config, projectDir2, rl, availableRows, sponsorUsage) {
   const isPaused = config.status === "paused";
   const enabledEps = config.endpoints.filter((e2) => e2.enabled);
+  const dailyTokensLimit = sponsorUsage?.dailyTokensLimit || config.rateLimits.maxTokensPerDay;
+  const requestsPerMinuteLimit = sponsorUsage?.requestsPerMinuteLimit || config.rateLimits.maxRequestsPerMinute;
+  const maxConcurrent = sponsorUsage?.maxConcurrent || config.rateLimits.maxConcurrent;
+  const usageItems = [
+    {
+      key: "info_usage_daily",
+      label: `  ${formatUsageBar({
+        label: "Daily tokens",
+        used: sponsorUsage?.dailyTokensUsed ?? 0,
+        total: dailyTokensLimit,
+        resetAt: sponsorUsage?.dailyResetAt
+      })}`
+    },
+    {
+      key: "info_usage_rpm",
+      label: `  ${formatUsageBar({
+        label: "Requests/min",
+        used: sponsorUsage?.requestsInWindow ?? 0,
+        total: requestsPerMinuteLimit,
+        resetAt: sponsorUsage?.requestWindowResetAt
+      })}`
+    },
+    {
+      key: "info_usage_concurrent",
+      label: `  ${formatUsageBar({
+        label: "Concurrency",
+        used: sponsorUsage?.activeConnections ?? 0,
+        total: maxConcurrent
+      })}`
+    },
+    {
+      key: "info_usage_blocked",
+      label: `  Blocked: ${sponsorUsage?.blockedRequests ?? 0}`
+    }
+  ];
   const items = [
     { key: "hdr", label: "Sponsor Dashboard" },
     { key: "info_status", label: `  Status: ${isPaused ? "● PAUSED" : "● ACTIVE"}` },
     { key: "info_ep", label: `  Endpoints: ${enabledEps.map((e2) => e2.label).join(", ")}` },
     { key: "info_transport", label: `  Transport: ${[config.transport.cloudflared ? "Cloudflared" : "", config.transport.libp2p ? "libp2p" : ""].filter(Boolean).join(" + ")}` },
     { key: "info_limits", label: `  Limits: ${config.rateLimits.maxRequestsPerMinute} req/min, ${config.rateLimits.maxTokensPerDay.toLocaleString()} tokens/day` },
+    { key: "info_usage_hdr", label: "  Usage" },
+    ...usageItems,
     { key: "sep", label: "" },
     { key: "modify", label: "  [Modify Settings]" },
     { key: isPaused ? "resume" : "pause", label: isPaused ? "  [Resume Sponsorship]" : "  [Pause Sponsorship]" },
@@ -590457,7 +591139,7 @@ async function showSponsorDashboard(config, projectDir2, rl, availableRows) {
     items,
     title: "Sponsor Dashboard",
     rl,
-    skipKeys: ["hdr", "sep", "info_status", "info_ep", "info_transport", "info_limits"],
+    skipKeys: ["hdr", "sep", "info_status", "info_ep", "info_transport", "info_limits", "info_usage_hdr", "info_usage_daily", "info_usage_rpm", "info_usage_concurrent", "info_usage_blocked"],
     availableRows
   });
   if (!result.confirmed) return "close";
@@ -590522,6 +591204,7 @@ var init_sponsor_wizard = __esm({
     init_dist();
     init_tui_select();
     init_render();
+    init_usage_bars();
   }
 });
@@ -594808,6 +595491,17 @@ function stopSponsorHeartbeat() {
   }
   _lastRegisteredSponsorPayload = null;
 }
+function sponsorUsageFromGateway(gateway) {
+  if (!gateway) return null;
+  try {
+    if (typeof gateway.getSponsorUsageSnapshot === "function") {
+      return gateway.getSponsorUsageSnapshot();
+    }
+    return gateway.stats?.sponsorUsage ?? null;
+  } catch {
+    return null;
+  }
+}
 function registerCommandHelp2(items) {
   registerCommandHelp(items);
 }
@@ -598287,6 +598981,10 @@ The session corrections MUST become hard rules in the SKILL.md Rules section.`;
       return "handled";
     }
     case "cohere": {
+      if (arg === "status" || arg === "stats") {
+        await showCohereStatus(ctx3);
+        return "handled";
+      }
       await showCohereDashboard(ctx3);
       return "handled";
     }
@@ -599030,11 +599728,16 @@ sleep 1
           renderInfo("No active sponsorship. Run /sponsor to start.");
           return "handled";
         }
+        const dashboardGw = ctx3.getExposeGateway?.();
+        if (existingConfig.status === "active" && dashboardGw && "setSponsorLimits" in dashboardGw) {
+          dashboardGw.setSponsorLimits(existingConfig.rateLimits);
+        }
         const action = await showSponsorDashboard2(
           existingConfig,
           projectDir2,
           sponsorRl,
-          ctx3.availableContentRows?.()
+          ctx3.availableContentRows?.(),
+          sponsorUsageFromGateway(dashboardGw)
         );
         switch (action) {
           case "modify":
@@ -599061,6 +599764,9 @@ sleep 1
             existingConfig.status = "active";
             saveSponsorConfig2(projectDir2, existingConfig);
             const resumeGw = ctx3.getExposeGateway?.();
+            if (resumeGw && "setSponsorLimits" in resumeGw) {
+              resumeGw.setSponsorLimits(existingConfig.rateLimits);
+            }
             if (resumeGw?.tunnelUrl) {
               const resumePayload = {
                 name: existingConfig.header?.message || "Omnius Sponsor",
@@ -602969,15 +603675,65 @@ async function showHelpMenu(ctx3) {
     }
   }
 }
-async function showCohereDashboard(ctx3) {
-  const isActive = ctx3.isCohere?.() ?? false;
-  let stats = {
+function emptyCohereStats(isActive = false) {
+  return {
+    status: isActive ? "active" : "inactive",
+    active: isActive,
+    daemonPid: 0,
+    uptimeSec: 0,
+    lastQueryAt: 0,
+    queriesReceived: 0,
     queriesAnswered: 0,
+    queriesErrors: 0,
     queriesSent: 0,
-    insightsShared: 0,
-    peersConnected: 0
+    avgLatencyMs: 0,
+    bytesIn: 0,
+    bytesOut: 0,
+    modelsUsed: {},
+    peersServed: {},
+    allowedModels: null
   };
-  let modelList = [];
+}
+function numberField(value2) {
+  const n2 = Number(value2);
+  return Number.isFinite(n2) && n2 > 0 ? Math.floor(n2) : 0;
+}
+function mapNumberRecord(value2) {
+  if (!value2 || typeof value2 !== "object" || Array.isArray(value2)) return {};
+  const out = {};
+  for (const [key, raw] of Object.entries(value2)) {
+    out[key] = numberField(raw);
+  }
+  return out;
+}
+function parseCohereStatsOutput(output, isActive = false) {
+  try {
+    const parsed = JSON.parse(output);
+    const active = typeof parsed.active === "boolean" ? parsed.active : String(parsed.status ?? "").toLowerCase() === "active";
+    return {
+      status: active ? "active" : "inactive",
+      active,
+      daemonPid: numberField(parsed.daemonPid),
+      uptimeSec: numberField(parsed.uptimeSec),
+      lastQueryAt: numberField(parsed.lastQueryAt),
+      queriesReceived: numberField(parsed.queriesReceived),
+      queriesAnswered: numberField(parsed.queriesAnswered),
+      queriesErrors: numberField(parsed.queriesErrors),
+      queriesSent: numberField(parsed.queriesSent),
+      avgLatencyMs: numberField(parsed.avgLatencyMs),
+      bytesIn: numberField(parsed.bytesIn),
+      bytesOut: numberField(parsed.bytesOut),
+      modelsUsed: mapNumberRecord(parsed.modelsUsed),
+      peersServed: mapNumberRecord(parsed.peersServed),
+      allowedModels: Array.isArray(parsed.allowedModels) ? parsed.allowedModels.map(String) : null
+    };
+  } catch {
+    return emptyCohereStats(isActive);
+  }
+}
+async function fetchCohereDashboardState(ctx3) {
+  const isActive = ctx3.isCohere?.() ?? false;
+  const state = { stats: emptyCohereStats(isActive), modelList: [] };
   try {
     const nexus = new NexusTool(ctx3.repoRoot);
     try {
@@ -602989,29 +603745,52 @@ async function showCohereDashboard(ctx3) {
     } catch {
     }
     try {
-      const r2 = await nexus.execute({ action: "cohere_stats" });
-      if (r2.success) {
-        try {
-          const d2 = JSON.parse(r2.output);
-          Object.assign(stats, d2);
-        } catch {
-        }
-      }
+      const r2 = await nexus.execute({ action: "cohere_stats", format: "json" });
+      if (r2.success) state.stats = parseCohereStatsOutput(r2.output, isActive);
     } catch {
     }
     try {
       const r2 = await nexus.execute({ action: "cohere_list_models" });
       if (r2.success) {
         try {
-          modelList = JSON.parse(r2.output).models || [];
+          state.modelList = JSON.parse(r2.output).models || [];
         } catch {
-          modelList = r2.output.split("\n").filter((l2) => l2.trim());
+          state.modelList = r2.output.split("\n").map((l2) => l2.trim()).filter(Boolean);
         }
       }
     } catch {
     }
   } catch {
   }
+  return state;
+}
+function cohereStatusLines(stats, modelList) {
+  const modelEntries = Object.entries(stats.modelsUsed).sort((a2, b) => b[1] - a2[1]);
+  const peerEntries = Object.entries(stats.peersServed).sort((a2, b) => b[1] - a2[1]);
+  const uptime2 = stats.uptimeSec < 60 ? `${stats.uptimeSec}s` : stats.uptimeSec < 3600 ? `${Math.floor(stats.uptimeSec / 60)}m ${stats.uptimeSec % 60}s` : `${Math.floor(stats.uptimeSec / 3600)}h ${Math.floor(stats.uptimeSec % 3600 / 60)}m`;
+  return [
+    c3.bold("COHERE Status"),
+    `Status: ${stats.active ? c3.green("ACTIVE") : c3.dim("inactive")}`,
+    `Daemon: ${stats.daemonPid ? `pid ${stats.daemonPid}` : "not connected"} · uptime ${uptime2}`,
+    `Last query: ${stats.lastQueryAt ? new Date(stats.lastQueryAt).toISOString() : "never"}`,
+    "",
+    formatUsageBar({ label: "Answered", used: stats.queriesAnswered, total: Math.max(1, stats.queriesReceived), width: 18 }),
+    formatUsageBar({ label: "Errors", used: stats.queriesErrors, total: Math.max(1, stats.queriesReceived), width: 18 }),
+    `Sent out: ${stats.queriesSent} · avg latency ${stats.avgLatencyMs}ms`,
+    `Data: in ${formatFileSize(stats.bytesIn)} · out ${formatFileSize(stats.bytesOut)}`,
+    "",
+    `Models exposed: ${modelList.length}`,
+    `Allowlist: ${stats.allowedModels ? stats.allowedModels.join(", ") || "(empty)" : "all downloaded models"}`,
+    `Top models: ${modelEntries.length ? modelEntries.slice(0, 5).map(([m2, n2]) => `${m2} (${n2})`).join(", ") : "none yet"}`,
+    `Peers served: ${peerEntries.length ? peerEntries.slice(0, 5).map(([p2, n2]) => `${p2.slice(0, 20)} (${n2})`).join(", ") : "none yet"}`
+  ];
+}
+async function showCohereStatus(ctx3) {
+  const { stats, modelList } = await fetchCohereDashboardState(ctx3);
+  safeLog(cohereStatusLines(stats, modelList).join("\n"));
+}
+async function showCohereDashboard(ctx3) {
+  let { stats, modelList } = await fetchCohereDashboardState(ctx3);
   while (true) {
     const currentActive = ctx3.isCohere?.() ?? false;
     const toggleLabel = currentActive ? "Disable COHERE" : "Enable COHERE";
@@ -603028,7 +603807,7 @@ async function showCohereDashboard(ctx3) {
       {
         key: "stats",
         label: "Network Stats",
-        detail: `${stats.queriesAnswered} answered · ${stats.queriesSent} sent · ${stats.insightsShared} shared`
+        detail: `${stats.queriesAnswered} answered · ${stats.queriesSent} sent · ${stats.queriesErrors} errors`
       },
       {
         key: "identity",
@@ -603081,11 +603860,11 @@ async function showCohereDashboard(ctx3) {
           },
           {
             key: "insights",
-            label: `Insights shared: ${c3.bold(String(stats.insightsShared || 0))}`
+            label: `Avg latency: ${c3.bold(String(stats.avgLatencyMs || 0))}ms`
           },
           {
             key: "peers",
-            label: `Peers connected: ${c3.bold(String(stats.peersConnected || 0))}`
+            label: `Peers served: ${c3.bold(String(Object.keys(stats.peersServed || {}).length))}`
           },
           { key: "hdr2", label: selectColors.dim("─── Actions ───") },
           {
@@ -603103,17 +603882,9 @@ async function showCohereDashboard(ctx3) {
           availableRows: ctx3.availableContentRows?.()
         });
         if (statResult.key === "refresh") {
-          try {
-            const nexus = new NexusTool(ctx3.repoRoot);
-            const r2 = await nexus.execute({ action: "cohere_stats" });
-            if (r2.success) {
-              try {
-                Object.assign(stats, JSON.parse(r2.output));
-              } catch {
-              }
-            }
-          } catch {
-          }
+          const refreshed = await fetchCohereDashboardState(ctx3);
+          stats = refreshed.stats;
+          modelList = refreshed.modelList;
         }
         continue;
       }
@@ -607479,6 +608250,7 @@ var init_commands = __esm({
     init_listen();
     init_dist();
     init_tui_select();
+    init_usage_bars();
     init_overlay_lock();
     init_drop_panel();
     init_memory_menu();
@@ -617189,6 +617961,13 @@ function senderKey2(entry) {
   if (entry.role === "assistant") return entry.username || entry.speaker || "assistant";
   return String(entry.fromUserId || entry.username || entry.firstName || senderLabel(entry));
 }
+function speakerRole(entry) {
+  if (entry.role === "assistant") return "agent_self";
+  return entry.isBot ? "participant_bot" : "participant_human";
+}
+function identityBoundary(entry) {
+  return speakerRole(entry) === "agent_self" ? "this message is authored by the Telegram agent itself" : "this message is authored by another Telegram participant; first-person claims belong to that participant, not the agent";
+}
 function scopeFor(entry, options2) {
   const chatType = entry.chatType || options2.chatType || "unknown";
   return {
@@ -617202,7 +617981,7 @@ function senderFor(entry) {
     id: senderKey2(entry),
     username: entry.username,
     displayName: senderLabel(entry),
-    isBot: entry.role === "assistant"
+    isBot: entry.role === "assistant" || entry.isBot === true
   };
 }
 function messageIdFor(entry, sessionKey) {
@@ -617241,7 +618020,11 @@ function contentFor(entry, sessionKey, options2) {
     `message_id: ${messageIdFor(entry, sessionKey)}`,
     entry.messageThreadId != null ? `thread_id: ${entry.messageThreadId}` : "",
     entry.replyToMessageId != null ? `reply_to_message_id: ${entry.replyToMessageId}` : "",
+    `actor_key: ${senderKey2(entry)}`,
     `speaker: ${senderLabel(entry)}`,
+    `speaker_role: ${speakerRole(entry)}`,
+    `identity_boundary: ${identityBoundary(entry)}`,
+    entry.replyContext?.sender ? `reply_sender: ${entry.replyContext.sender.username || entry.replyContext.sender.firstName || entry.replyContext.sender.id || "unknown"} [${entry.replyContext.sender.isBot ? "participant_bot" : "participant_human"}]` : "",
     entry.mode ? `mode: ${entry.mode}` : "",
     entry.mediaSummary ? `media: ${compact(entry.mediaSummary, 260)}` : "",
     "",
@@ -617265,7 +618048,11 @@ function metadataFor(entry, sessionKey, options2) {
       username: entry.username,
       firstName: entry.firstName,
       fromUserId: entry.fromUserId,
+      isBot: entry.isBot,
       speaker: senderLabel(entry),
+      actorKey: senderKey2(entry),
+      speakerRole: speakerRole(entry),
+      identityBoundary: identityBoundary(entry),
       mediaSummary: entry.mediaSummary
     }
   };
@@ -617516,12 +618303,14 @@ function episodeLine(episode) {
   const meta = episode.metadata;
   const telegram = meta?.telegram;
   const speaker = clean4(telegram?.speaker || telegram?.username || "unknown", 80);
+  const role = clean4(telegram?.speakerRole || "participant_human", 40);
   const messageId = telegram?.messageId == null ? "unknown" : String(telegram.messageId);
   const replyTo = telegram?.replyToMessageId == null ? "" : ` reply_to=${telegram.replyToMessageId}`;
   return [
     `episode_id=${episode.id}`,
     `message_id=${messageId}${replyTo}`,
     `speaker=${speaker}`,
+    `speaker_role=${role}`,
     `modality=${episode.modality}`,
     `content=${clean4(episode.content, 700)}`
   ].join(" | ");
@@ -617542,6 +618331,9 @@ function buildTelegramReflectionExtractionPrompt(options2) {
     "- Use only the scoped Telegram corpus, graph nodes, graph edges, and source anchors below.",
     "- Preserve message_id and episode_id anchors on every item when possible.",
     "- Do not infer identity from a face, voice, or name unless the corpus explicitly says it.",
+    "- speaker_role=agent_self is the Telegram agent; speaker_role=participant_human or participant_bot is another chat participant.",
+    "- Do not assign participant first-person claims, preferences, names, or self-descriptions to the agent/self unless the source episode has speaker_role=agent_self.",
+    "- Replies between non-agent participants are social context and relationship evidence, not direct agent self-reflection.",
     "- Private DM followups may be proposed but must not be framed as already sent.",
     "- same_group followups must be concise, low-intrusion, and anchored to a source message id.",
     "- If a category has no evidence, return an empty array for that category.",
@@ -617959,6 +618751,8 @@ function formatTelegramSocialStateContext(state, input) {
   const replyKey = input.replySender ? telegramSocialActorKey(input.replySender) : void 0;
   const thread = state.threads[telegramSocialThreadKey(input)];
   const participant = state.participants[senderKey3];
+  const senderIdentity = selfKey && senderKey3 === selfKey ? "agent_self" : "participant";
+  const replyIdentity = replyKey ? selfKey && replyKey === selfKey ? "agent_self" : "participant" : "none";
   const relevantKeys = new Set([senderKey3, selfKey, replyKey].filter(Boolean));
   const edges = state.relationships.filter((edge) => relevantKeys.has(edge.fromKey) || relevantKeys.has(edge.toKey)).sort((a2, b) => b.lastSeenAt - a2.lastSeenAt).slice(0, limit);
   const outcomes = state.outcomes.filter((outcome) => outcome.senderKey === senderKey3 || outcome.chatId === String(input.chatId)).sort((a2, b) => b.ts - a2.ts).slice(0, limit);
@@ -617967,6 +618761,8 @@ function formatTelegramSocialStateContext(state, input) {
   const preferences = preferenceLines(state.preferences[senderKey3]);
   return [
     "### Telegram Structured Social State",
+    selfKey ? `Agent self node: ${selfKey}` : "Agent self node: unknown",
+    `Identity boundary: the agent is the self node only. Current actor ${senderKey3} is ${senderIdentity}; reply target ${replyKey ?? "none"} is ${replyIdentity}. Participant first-person claims belong to their actor node, not the agent, unless that actor is the self node.`,
     `Current actor node: ${senderKey3} [${participant?.actorKind || telegramSocialActorKind(input)}] messages=${participant?.messageCount ?? 0}${participant?.lastText ? ` last=${jsonLine(participant.lastText, 140)}` : ""}`,
     thread ? `Active channel/thread: ${thread.key}; messages=${thread.messageCount}; participants=${thread.participantKeys.slice(-8).join(", ") || "none"}; last_outcomes=${thread.lastOutcomeIds.slice(-5).join(", ") || "none"}` : "",
     preferences.length ? `Relevant preference vector for ${senderKey3}:
@@ -624624,6 +625420,7 @@ ${lines.join("\n")}`);
           "Classify the live scenario by inference from the full context. Do not use a fixed taxonomy, keyword list, or preset scenario enum.",
           "Create a situation-specific scenario_id and scenario_label, then summarize the active state loop that should govern the later attention decision.",
           "Use the persona docs below as binding behavioral guidance.",
+          "Maintain the Telegram identity boundary: the agent is only the bot/self actor. Other users and peer bots replying to each other are participants; their first-person claims are not the agent's identity or self-reflection.",
           "Return JSON only. No markdown. No <think> tags.",
           "",
           'Schema: {"silent_disposition":"what happens silently with this message","mental_note":"concise observation of the turn","memory_note":"what scoped memory should retain or connect","relationship_note":"relationship/thread implication","procedure_note":"active tree/branch/abort implication","voice_note":"final voice implication if a reply happens","scenario_note":"identified scenario and transition state","scenario_id":"dynamic inferred scenario id","scenario_label":"human readable dynamic scenario label","scenario_confidence":0.0-1.0,"scenario_objective":"current scenario objective","scenario_state_loop":"state loop to maintain until transition"}',