omnius 1.0.146 → 1.0.148

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -13138,6 +13138,27 @@ async function handleCmd(cmd) {
13138
13138
  var _csAvgLatency = _cohereStats.queriesAnswered > 0 ? Math.round(_cohereStats.totalLatencyMs / _cohereStats.queriesAnswered) : 0;
13139
13139
  var _csModels = Object.entries(_cohereStats.modelsUsed).sort(function(a, b) { return b[1] - a[1]; });
13140
13140
  var _csPeers = Object.entries(_cohereStats.peersServed).sort(function(a, b) { return b[1] - a[1]; });
13141
+ var _csSnapshot = {
13142
+ status: cohereActive ? 'active' : 'inactive',
13143
+ active: cohereActive,
13144
+ daemonPid: process.pid,
13145
+ uptimeSec: _csUptime,
13146
+ lastQueryAt: _cohereStats.lastQueryAt || 0,
13147
+ queriesReceived: _cohereStats.queriesReceived,
13148
+ queriesAnswered: _cohereStats.queriesAnswered,
13149
+ queriesErrors: _cohereStats.queriesErrors,
13150
+ queriesSent: _cohereStats.queriesSent,
13151
+ avgLatencyMs: _csAvgLatency,
13152
+ bytesIn: _cohereStats.bytesIn,
13153
+ bytesOut: _cohereStats.bytesOut,
13154
+ modelsUsed: _cohereStats.modelsUsed,
13155
+ peersServed: _cohereStats.peersServed,
13156
+ allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null
13157
+ };
13158
+ if (args.format === 'json' || args.json === true || args.json === 'true' || args.json === '1') {
13159
+ writeResp(id, { ok: true, output: JSON.stringify(_csSnapshot) });
13160
+ break;
13161
+ }
13141
13162
  var _csLines = [
13142
13163
  '═══ COHERE Network Stats ═══',
13143
13164
  '',
@@ -16549,6 +16570,14 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
16549
16570
  max_tokens: {
16550
16571
  type: "string",
16551
16572
  description: "For remote_infer: maximum tokens to generate (e.g. '4096'). Default: 4096"
16573
+ },
16574
+ format: {
16575
+ type: "string",
16576
+ description: "For cohere_stats: set to 'json' for structured stats"
16577
+ },
16578
+ json: {
16579
+ type: "string",
16580
+ description: "For cohere_stats: set to '1' for structured stats"
16552
16581
  }
16553
16582
  },
16554
16583
  required: ["action"],
@@ -16686,7 +16715,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
16686
16715
  result = await this.sendDaemonCmd("cohere_disable", {});
16687
16716
  break;
16688
16717
  case "cohere_stats":
16689
- result = await this.sendDaemonCmd("cohere_stats", {});
16718
+ result = await this.sendDaemonCmd("cohere_stats", { format: String(args.format ?? ""), json: String(args.json ?? "") });
16690
16719
  break;
16691
16720
  case "cohere_allow_model":
16692
16721
  result = await this.sendDaemonCmd("cohere_allow_model", { model: String(args.model ?? "") });
@@ -529012,38 +529041,136 @@ function inferHomeFromProcUid(pid) {
529012
529041
  }
529013
529042
  return null;
529014
529043
  }
529044
+ function detectPeerOmniusOllamaPool() {
529045
+ if (!isDirectory("/proc"))
529046
+ return false;
529047
+ const selfPid = String(process.pid);
529048
+ const selfPpid = String(process.ppid ?? "");
529049
+ const peerNodePids = /* @__PURE__ */ new Set();
529050
+ let entries;
529051
+ try {
529052
+ entries = readdirSync21("/proc", { withFileTypes: true }).filter((d2) => d2.isDirectory() && /^\d+$/.test(d2.name)).map((d2) => ({ name: d2.name }));
529053
+ } catch {
529054
+ return false;
529055
+ }
529056
+ for (const e2 of entries) {
529057
+ if (e2.name === selfPid || e2.name === selfPpid)
529058
+ continue;
529059
+ try {
529060
+ const cmdline = readFileSync50(`/proc/${e2.name}/cmdline`, "utf8");
529061
+ if (!cmdline.includes("node"))
529062
+ continue;
529063
+ if (!/[/\\]omnius[/\\]dist[/\\]index\.js|[/\\]omnius[/\\]/i.test(cmdline))
529064
+ continue;
529065
+ peerNodePids.add(e2.name);
529066
+ } catch {
529067
+ }
529068
+ }
529069
+ if (peerNodePids.size === 0)
529070
+ return false;
529071
+ for (const e2 of entries) {
529072
+ try {
529073
+ const cmd = readFileSync50(`/proc/${e2.name}/cmdline`, "utf8");
529074
+ if (!cmd.includes("ollama"))
529075
+ continue;
529076
+ if (!cmd.split("\0").includes("serve"))
529077
+ continue;
529078
+ const status = readFileSync50(`/proc/${e2.name}/status`, "utf8");
529079
+ const ppid = status.match(/^PPid:\s+(\d+)/m)?.[1];
529080
+ if (ppid && peerNodePids.has(ppid))
529081
+ return true;
529082
+ } catch {
529083
+ }
529084
+ }
529085
+ return false;
529086
+ }
529015
529087
  async function detectGpus() {
529016
529088
  if (_nvidiaSmiAvailable === false)
529017
529089
  return [];
529018
529090
  return new Promise((resolve56) => {
529019
- exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
529091
+ const queryFields = "index,uuid,name,memory.total,memory.free,utilization.gpu,compute_cap";
529092
+ exec2(`nvidia-smi --query-gpu=${queryFields} --format=csv,noheader,nounits 2>/dev/null`, { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
529020
529093
  if (err) {
529021
- _nvidiaSmiAvailable = false;
529022
- resolve56([]);
529094
+ exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err2, stdout2) => {
529095
+ if (err2) {
529096
+ _nvidiaSmiAvailable = false;
529097
+ resolve56([]);
529098
+ return;
529099
+ }
529100
+ _nvidiaSmiAvailable = true;
529101
+ resolve56(parseGpuQueryOutput(
529102
+ stdout2,
529103
+ /* hasComputeCap */
529104
+ false
529105
+ ));
529106
+ });
529023
529107
  return;
529024
529108
  }
529025
529109
  _nvidiaSmiAvailable = true;
529026
- const gpus = [];
529027
- for (const line of stdout.split("\n")) {
529028
- const parts = line.split(",").map((s2) => s2.trim());
529029
- if (parts.length < 6)
529030
- continue;
529031
- const idx = Number(parts[0]);
529032
- if (!Number.isFinite(idx))
529033
- continue;
529034
- gpus.push({
529035
- index: idx,
529036
- uuid: parts[1] ?? "",
529037
- name: parts[2] ?? "",
529038
- vramTotalMB: Number(parts[3]) || 0,
529039
- vramFreeMB: Number(parts[4]) || 0,
529040
- utilization: Number(parts[5]) || 0
529041
- });
529042
- }
529043
- resolve56(gpus);
529110
+ resolve56(parseGpuQueryOutput(
529111
+ stdout,
529112
+ /* hasComputeCap */
529113
+ true
529114
+ ));
529044
529115
  });
529045
529116
  });
529046
529117
  }
529118
+ function parseGpuQueryOutput(stdout, hasComputeCap) {
529119
+ const gpus = [];
529120
+ const minFields = hasComputeCap ? 7 : 6;
529121
+ for (const line of stdout.split("\n")) {
529122
+ const parts = line.split(",").map((s2) => s2.trim());
529123
+ if (parts.length < minFields)
529124
+ continue;
529125
+ const idx = Number(parts[0]);
529126
+ if (!Number.isFinite(idx))
529127
+ continue;
529128
+ const info = {
529129
+ index: idx,
529130
+ uuid: parts[1] ?? "",
529131
+ name: parts[2] ?? "",
529132
+ vramTotalMB: Number(parts[3]) || 0,
529133
+ vramFreeMB: Number(parts[4]) || 0,
529134
+ utilization: Number(parts[5]) || 0
529135
+ };
529136
+ if (hasComputeCap) {
529137
+ const cap = Number(parts[6]);
529138
+ if (Number.isFinite(cap))
529139
+ info.computeCapability = cap;
529140
+ }
529141
+ gpus.push(info);
529142
+ }
529143
+ return gpus;
529144
+ }
529145
+ function resolveMinGpuVramMB() {
529146
+ const fromEnv = Number(process.env["OMNIUS_OLLAMA_MIN_GPU_MB"]);
529147
+ return Number.isFinite(fromEnv) && fromEnv > 0 ? fromEnv : DEFAULT_MIN_GPU_VRAM_MB;
529148
+ }
529149
+ function resolveMinComputeCapability() {
529150
+ const fromEnv = Number(process.env["OMNIUS_OLLAMA_MIN_COMPUTE_CAP"]);
529151
+ return Number.isFinite(fromEnv) && fromEnv > 0 ? fromEnv : DEFAULT_MIN_COMPUTE_CAPABILITY;
529152
+ }
529153
+ function isCapableForLLM(gpu, thresholds = {}) {
529154
+ const minVramMB = thresholds.minVramMB ?? resolveMinGpuVramMB();
529155
+ const minComputeCap = thresholds.minComputeCap ?? resolveMinComputeCapability();
529156
+ if (gpu.vramTotalMB < minVramMB)
529157
+ return false;
529158
+ if (gpu.computeCapability !== void 0 && gpu.computeCapability < minComputeCap)
529159
+ return false;
529160
+ return true;
529161
+ }
529162
+ function filterCapableGpus(gpus, thresholds) {
529163
+ return gpus.filter((g) => isCapableForLLM(g, thresholds));
529164
+ }
529165
+ function recommendMaxParallelFromVram(minFreeMB) {
529166
+ if (minFreeMB >= 60 * 1024)
529167
+ return 8;
529168
+ if (minFreeMB >= 40 * 1024)
529169
+ return 4;
529170
+ if (minFreeMB >= 24 * 1024)
529171
+ return 2;
529172
+ return 1;
529173
+ }
529047
529174
  async function getHardwareSnapshot() {
529048
529175
  const { totalmem: totalmem8, freemem: freemem7, cpus: cpus5 } = await import("node:os");
529049
529176
  const gpus = await detectGpus();
@@ -529127,10 +529254,15 @@ async function findFreePort(start2) {
529127
529254
  }
529128
529255
  function resolveDefaultPoolConfig() {
529129
529256
  const baseInstanceUrl = process.env["OMNIUS_OLLAMA_BASE_URL"] || process.env["OLLAMA_HOST"]?.replace(/^([^:/]+:[0-9]+)$/, "http://$1") || "http://127.0.0.1:11434";
529130
- const maxParallelPerInstance = Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1;
529131
- const maxSpawnedInstances = Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0;
529257
+ const maxParallelExplicit = process.env["OMNIUS_OLLAMA_MAX_PARALLEL"] !== void 0;
529258
+ const maxParallelPerInstance = maxParallelExplicit ? Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1 : 1;
529259
+ const autoTuneMaxParallel = !maxParallelExplicit;
529260
+ const gpuPlacementExplicit = process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] !== void 0;
529261
+ const maxInstancesExplicit = process.env["OMNIUS_OLLAMA_MAX_INSTANCES"] !== void 0;
529262
+ const peerPoolActive = !gpuPlacementExplicit && !maxInstancesExplicit && detectPeerOmniusOllamaPool();
529263
+ const maxSpawnedInstances = maxInstancesExplicit ? Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0 : peerPoolActive ? 1 : 0;
529132
529264
  const targetGpuInstances = Number(process.env["OMNIUS_OLLAMA_TARGET_GPU_INSTANCES"]) || 0;
529133
- const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? "auto").toLowerCase();
529265
+ const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? (peerPoolActive ? "elastic" : "auto")).toLowerCase();
529134
529266
  const gpuPlacement = gpuPlacementRaw === "dedicated" || gpuPlacementRaw === "elastic" || gpuPlacementRaw === "auto" ? gpuPlacementRaw : "auto";
529135
529267
  const idleMs = Number(process.env["OMNIUS_OLLAMA_IDLE_MS"]) || 3 * 60 * 60 * 1e3;
529136
529268
  const reaperIntervalMs = Number(process.env["OMNIUS_OLLAMA_REAPER_MS"]) || 3e4;
@@ -529155,7 +529287,8 @@ function resolveDefaultPoolConfig() {
529155
529287
  ollamaBinary,
529156
529288
  spawnReadyTimeoutSec,
529157
529289
  networkRxBudgetBytesPerSec,
529158
- networkTxBudgetBytesPerSec
529290
+ networkTxBudgetBytesPerSec,
529291
+ autoTuneMaxParallel
529159
529292
  };
529160
529293
  }
529161
529294
  function parseNullableNumber(value2) {
@@ -529223,11 +529356,13 @@ function setOllamaPool(pool3) {
529223
529356
  _poolByBaseUrl.set(pool3.statusConfig().baseInstanceUrl, pool3);
529224
529357
  }
529225
529358
  }
529226
- var _nvidiaSmiAvailable, _lastNetworkSnapshot, OllamaInstance, realInstanceSpawner, _gpuCursor, OllamaPool, _poolSingleton, _poolByBaseUrl;
529359
+ var _nvidiaSmiAvailable, DEFAULT_MIN_GPU_VRAM_MB, DEFAULT_MIN_COMPUTE_CAPABILITY, _lastNetworkSnapshot, OllamaInstance, realInstanceSpawner, _gpuCursor, OllamaPool, _poolSingleton, _poolByBaseUrl;
529227
529360
  var init_ollama_pool = __esm({
529228
529361
  "packages/orchestrator/dist/ollama-pool.js"() {
529229
529362
  "use strict";
529230
529363
  _nvidiaSmiAvailable = null;
529364
+ DEFAULT_MIN_GPU_VRAM_MB = 16 * 1024;
529365
+ DEFAULT_MIN_COMPUTE_CAPABILITY = 7;
529231
529366
  _lastNetworkSnapshot = null;
529232
529367
  OllamaInstance = class {
529233
529368
  state;
@@ -529281,6 +529416,7 @@ var init_ollama_pool = __esm({
529281
529416
  env2["OLLAMA_MODELS"] = config.sharedModelStore;
529282
529417
  }
529283
529418
  env2["OLLAMA_NUM_PARALLEL"] = String(config.maxParallelPerInstance);
529419
+ env2["OLLAMA_KEEP_ALIVE"] = process.env["OMNIUS_OLLAMA_SPAWN_KEEP_ALIVE"] ?? "-1";
529284
529420
  if (gpuUuid) {
529285
529421
  env2["CUDA_VISIBLE_DEVICES"] = gpuUuid;
529286
529422
  env2["GPU_DEVICE_ORDINAL"] = gpuIndex === null ? "" : String(gpuIndex);
@@ -529334,6 +529470,20 @@ var init_ollama_pool = __esm({
529334
529470
  dedicatedGpuPoolActive = false;
529335
529471
  activePlacementMode = "constrained";
529336
529472
  gpuCache = null;
529473
+ /**
529474
+ * Set once after the first capability-filtered GPU detection. Prevents the
529475
+ * auto-tune from oscillating maxParallelPerInstance as free VRAM fluctuates
529476
+ * during normal inference.
529477
+ */
529478
+ _autoTuned = false;
529479
+ /** UUIDs we've already emitted gpu-excluded for. Prevents log spam. */
529480
+ _excludedGpusReported = /* @__PURE__ */ new Set();
529481
+ /**
529482
+ * Cached model footprint in MiB (model name → estimated VRAM required).
529483
+ * Populated lazily via /api/show on the base instance. null sentinel means
529484
+ * "we tried but failed" so we don't re-probe in a tight loop.
529485
+ */
529486
+ _modelVramEstimateMB = /* @__PURE__ */ new Map();
529337
529487
  slotWaiters = [];
529338
529488
  /**
529339
529489
  * Agent → preferred instance id. Set whenever an acquire resolves an
@@ -529533,7 +529683,18 @@ var init_ollama_pool = __esm({
529533
529683
  const freedPick = this.pickInstance({ model });
529534
529684
  if (freedPick)
529535
529685
  return freedPick;
529536
- const gpu = this.pickGpuForSpawn(gpus);
529686
+ const vramNeededMB = await this.estimateModelVramMB(model);
529687
+ const capable = this.gpusWithCapacityForModel(gpus, vramNeededMB);
529688
+ if (capable.length === 0 && vramNeededMB !== null) {
529689
+ this.emit("spawn-skipped", {
529690
+ reason: "insufficient-vram",
529691
+ model,
529692
+ vramNeededMB,
529693
+ gpuFreeMBs: gpus.map((g) => g.vramFreeMB)
529694
+ });
529695
+ return null;
529696
+ }
529697
+ const gpu = this.pickGpuForSpawn(capable.length > 0 ? capable : gpus);
529537
529698
  return this.spawnInstance(model, gpu);
529538
529699
  });
529539
529700
  }
@@ -529544,8 +529705,20 @@ var init_ollama_pool = __esm({
529544
529705
  if (!this.canSpawnWithSharedModelStore(model))
529545
529706
  return;
529546
529707
  const target = this.dedicatedTargetCount(gpus);
529708
+ const vramNeededMB = await this.estimateModelVramMB(model);
529547
529709
  while (this.instances.filter((i2) => i2.state.poolOwned).length < target) {
529548
- const gpu = this.pickGpuForSpawn(gpus);
529710
+ const candidates = this.gpusWithCapacityForModel(gpus, vramNeededMB);
529711
+ const pool3 = candidates.length > 0 ? candidates : vramNeededMB === null ? gpus : [];
529712
+ if (pool3.length === 0) {
529713
+ this.emit("spawn-skipped", {
529714
+ reason: "insufficient-vram",
529715
+ model,
529716
+ vramNeededMB,
529717
+ gpuFreeMBs: gpus.map((g) => g.vramFreeMB)
529718
+ });
529719
+ break;
529720
+ }
529721
+ const gpu = this.pickGpuForSpawn(pool3);
529549
529722
  if (!gpu)
529550
529723
  break;
529551
529724
  const inst = await this.spawnInstance(model, gpu);
@@ -529603,9 +529776,59 @@ var init_ollama_pool = __esm({
529603
529776
  if (this.gpuCache && now - this.gpuCache.takenAtMs <= maxAgeMs) {
529604
529777
  return this.gpuCache.gpus;
529605
529778
  }
529606
- const gpus = await this.gpuDetector();
529607
- this.gpuCache = { gpus, takenAtMs: now };
529608
- return gpus;
529779
+ const rawGpus = await this.gpuDetector();
529780
+ const filtered = filterCapableGpus(rawGpus);
529781
+ const filteredUuids = new Set(filtered.map((g) => g.uuid));
529782
+ for (const g of rawGpus) {
529783
+ if (filteredUuids.has(g.uuid))
529784
+ continue;
529785
+ if (this._excludedGpusReported.has(g.uuid))
529786
+ continue;
529787
+ this._excludedGpusReported.add(g.uuid);
529788
+ const reason = g.vramTotalMB < resolveMinGpuVramMB() ? "insufficient-vram" : "insufficient-compute-capability";
529789
+ this.emit("gpu-excluded", {
529790
+ uuid: g.uuid,
529791
+ index: g.index,
529792
+ name: g.name,
529793
+ vramTotalMB: g.vramTotalMB,
529794
+ computeCapability: g.computeCapability,
529795
+ reason
529796
+ });
529797
+ }
529798
+ this.gpuCache = { gpus: filtered, takenAtMs: now };
529799
+ this.maybeAutoTuneMaxParallel(filtered);
529800
+ return filtered;
529801
+ }
529802
+ /**
529803
+ * One-shot: bump `maxParallelPerInstance` from the worst-case free VRAM
529804
+ * across capable GPUs the first time we see them. We never tune down (a
529805
+ * subsequent low-VRAM read shouldn't strip concurrency from in-flight
529806
+ * requests), and we never tune again once successful — the recommendation
529807
+ * ladder is stable enough that a single read at startup is correct.
529808
+ */
529809
+ maybeAutoTuneMaxParallel(filtered) {
529810
+ if (!this.config.autoTuneMaxParallel)
529811
+ return;
529812
+ if (this._autoTuned)
529813
+ return;
529814
+ if (filtered.length === 0)
529815
+ return;
529816
+ const minFreeMB = filtered.reduce((m2, g) => Math.min(m2, g.vramFreeMB), Number.POSITIVE_INFINITY);
529817
+ const recommended = recommendMaxParallelFromVram(minFreeMB);
529818
+ if (recommended > this.config.maxParallelPerInstance) {
529819
+ const previous = this.config.maxParallelPerInstance;
529820
+ this.config.maxParallelPerInstance = recommended;
529821
+ for (const inst of this.instances) {
529822
+ inst.state.maxParallel = recommended;
529823
+ }
529824
+ this.emit("max-parallel-tuned", {
529825
+ previous,
529826
+ recommended,
529827
+ minFreeMB,
529828
+ capableGpuCount: filtered.length
529829
+ });
529830
+ }
529831
+ this._autoTuned = true;
529609
529832
  }
529610
529833
  async spawnInstance(model, gpu) {
529611
529834
  let port;
@@ -529668,6 +529891,56 @@ var init_ollama_pool = __esm({
529668
529891
  });
529669
529892
  return inst;
529670
529893
  }
529894
+ /**
529895
+ * Best-effort: estimate the VRAM (in MiB) a model needs to be served
529896
+ * without CPU spill. Hits the base instance's `/api/show` once per model
529897
+ * and caches the result. Returns null when the probe fails (the caller
529898
+ * then falls back to "no estimate" semantics — capacity check is skipped).
529899
+ *
529900
+ * The number returned is `disk_size * 1.15 + maxParallel * 1024` (1 GiB of
529901
+ * KV cache per parallel slot — conservative for 30B-class models). Newer
529902
+ * model families may exceed this margin slightly; bumps are safe via
529903
+ * OMNIUS_OLLAMA_VRAM_SAFETY_MARGIN.
529904
+ */
529905
+ async estimateModelVramMB(model) {
529906
+ if (this._modelVramEstimateMB.has(model)) {
529907
+ return this._modelVramEstimateMB.get(model) ?? null;
529908
+ }
529909
+ let bytesOnDisk = null;
529910
+ try {
529911
+ const url = `${this.config.baseInstanceUrl.replace(/\/+$/, "")}/api/show`;
529912
+ const resp = await fetch(url, {
529913
+ method: "POST",
529914
+ headers: { "Content-Type": "application/json" },
529915
+ body: JSON.stringify({ name: model }),
529916
+ signal: AbortSignal.timeout(2e3)
529917
+ });
529918
+ if (resp.ok) {
529919
+ const data = await resp.json();
529920
+ if (typeof data.size === "number" && data.size > 0)
529921
+ bytesOnDisk = data.size;
529922
+ }
529923
+ } catch {
529924
+ }
529925
+ if (bytesOnDisk === null) {
529926
+ this._modelVramEstimateMB.set(model, null);
529927
+ return null;
529928
+ }
529929
+ const safetyMargin = Number(process.env["OMNIUS_OLLAMA_VRAM_SAFETY_MARGIN"]) || 1.15;
529930
+ const kvCacheMB = this.config.maxParallelPerInstance * 1024;
529931
+ const estimateMB = Math.ceil(bytesOnDisk / (1024 * 1024) * safetyMargin + kvCacheMB);
529932
+ this._modelVramEstimateMB.set(model, estimateMB);
529933
+ return estimateMB;
529934
+ }
529935
+ /**
529936
+ * Filter GPUs to those with enough free VRAM for the model. Caller decides
529937
+ * how to react to an empty list (skip spawn vs degrade to constrained).
529938
+ */
529939
+ gpusWithCapacityForModel(gpus, vramNeededMB) {
529940
+ if (vramNeededMB === null)
529941
+ return gpus;
529942
+ return gpus.filter((g) => g.vramFreeMB >= vramNeededMB);
529943
+ }
529671
529944
  /**
529672
529945
  * Pick a GPU for a freshly-spawned instance. Prefers GPUs that no
529673
529946
  * pool-owned instance is already pinned to, then most free VRAM. Returns
@@ -529704,6 +529977,28 @@ var init_ollama_pool = __esm({
529704
529977
  survivors.push(inst);
529705
529978
  continue;
529706
529979
  }
529980
+ const PROBE_GRACE_MS = 3e4;
529981
+ if (inst.state.inflight === 0 && Date.now() - inst.state.spawnedAtMs > PROBE_GRACE_MS && await this.isPartialVramSpilled(inst)) {
529982
+ const reapedAtMs = Date.now();
529983
+ await inst.terminate();
529984
+ this.dropAffinityFor(inst.state.id);
529985
+ this.emit("instance-reaped", {
529986
+ id: inst.state.id,
529987
+ pid: inst.state.pid,
529988
+ reason: "partial-vram",
529989
+ totalRequests: inst.state.totalRequests,
529990
+ peakInflight: inst.state.peakInflight,
529991
+ ageMs: reapedAtMs - inst.state.spawnedAtMs,
529992
+ idleMs: reapedAtMs - inst.state.lastUsedMs,
529993
+ provenance: {
529994
+ entity: `urn:omnius:ollama-instance:${inst.state.id}`,
529995
+ activity: "ollama-instance-reap-partial-vram",
529996
+ agent: "orchestrator.ollama-pool",
529997
+ timestampMs: reapedAtMs
529998
+ }
529999
+ });
530000
+ continue;
530001
+ }
529707
530002
  if (inst.isIdleLongerThan(this.config.idleMs)) {
529708
530003
  const reapedAtMs = Date.now();
529709
530004
  await inst.terminate();
@@ -529711,6 +530006,7 @@ var init_ollama_pool = __esm({
529711
530006
  this.emit("instance-reaped", {
529712
530007
  id: inst.state.id,
529713
530008
  pid: inst.state.pid,
530009
+ reason: "idle",
529714
530010
  totalRequests: inst.state.totalRequests,
529715
530011
  peakInflight: inst.state.peakInflight,
529716
530012
  ageMs: reapedAtMs - inst.state.spawnedAtMs,
@@ -529728,6 +530024,33 @@ var init_ollama_pool = __esm({
529728
530024
  }
529729
530025
  this.instances = survivors;
529730
530026
  }
530027
+ /**
530028
+ * Probe `/api/ps` on the instance and return true if any resident model has
530029
+ * less than 95% of its weights in VRAM — the unmistakable CPU-offload
530030
+ * signature that produces 50× slowdowns. Defensive: any HTTP failure
530031
+ * returns false so a transient network blip never triggers a reap.
530032
+ */
530033
+ async isPartialVramSpilled(inst) {
530034
+ const PARTIAL_VRAM_THRESHOLD = 0.95;
530035
+ try {
530036
+ const url = `${inst.state.baseUrl.replace(/\/+$/, "")}/api/ps`;
530037
+ const resp = await fetch(url, { signal: AbortSignal.timeout(2e3) });
530038
+ if (!resp.ok)
530039
+ return false;
530040
+ const data = await resp.json();
530041
+ if (!data.models || data.models.length === 0)
530042
+ return false;
530043
+ return data.models.some((m2) => {
530044
+ const total = m2.size ?? 0;
530045
+ const vram = m2.size_vram ?? 0;
530046
+ if (total <= 0)
530047
+ return false;
530048
+ return vram / total < PARTIAL_VRAM_THRESHOLD;
530049
+ });
530050
+ } catch {
530051
+ return false;
530052
+ }
530053
+ }
529731
530054
  /** Stop the reaper and terminate every spawned instance. Call on process exit. */
529732
530055
  async shutdown() {
529733
530056
  if (this.reaperHandle) {
@@ -571197,6 +571520,47 @@ var init_voice_soul = __esm({
571197
571520
  }
571198
571521
  });
571199
571522
 
571523
+ // packages/cli/src/tui/usage-bars.ts
571524
+ function formatCompactCount(value2) {
571525
+ const n2 = Math.max(0, Math.floor(Number.isFinite(value2) ? value2 : 0));
571526
+ if (n2 < 1e3) return String(n2);
571527
+ if (n2 < 1e6) return `${(n2 / 1e3).toFixed(n2 < 1e4 ? 1 : 0)}K`;
571528
+ return `${(n2 / 1e6).toFixed(n2 < 1e7 ? 1 : 0)}M`;
571529
+ }
571530
+ function formatResetDelta(resetAt, now = Date.now()) {
571531
+ if (!Number.isFinite(resetAt) || resetAt <= now) return "";
571532
+ const totalMinutes = Math.ceil((resetAt - now) / 6e4);
571533
+ if (totalMinutes < 60) return ` reset ${totalMinutes}m`;
571534
+ const hours = Math.floor(totalMinutes / 60);
571535
+ const minutes = totalMinutes % 60;
571536
+ return minutes > 0 ? ` reset ${hours}h ${minutes}m` : ` reset ${hours}h`;
571537
+ }
571538
+ function formatUsageBar(options2) {
571539
+ const total = Math.max(0, Math.floor(Number.isFinite(options2.total) ? options2.total : 0));
571540
+ const rawUsed = Math.max(0, Math.floor(Number.isFinite(options2.used) ? options2.used : 0));
571541
+ const used = total > 0 ? Math.min(total, rawUsed) : 0;
571542
+ const width = Math.max(4, options2.width ?? 18);
571543
+ const labelWidth = Math.max(options2.label.length, options2.labelWidth ?? 16);
571544
+ const pct = total > 0 ? Math.round(used / total * 100) : 0;
571545
+ const filled = total > 0 ? Math.min(width, Math.round(pct / 100 * width)) : 0;
571546
+ const color = pct >= 90 ? c3.red : pct >= 70 ? c3.yellow : c3.green;
571547
+ const bar = color("█".repeat(filled)) + c3.dim("░".repeat(width - filled));
571548
+ const reset = options2.resetAt ? c3.dim(formatResetDelta(options2.resetAt)) : "";
571549
+ return [
571550
+ c3.cyan(options2.label.padEnd(labelWidth)),
571551
+ bar,
571552
+ color(`${pct}%`.padStart(4)),
571553
+ c3.dim(`${formatCompactCount(rawUsed)}/${formatCompactCount(total)}`),
571554
+ reset
571555
+ ].join(" ").trimEnd();
571556
+ }
571557
+ var init_usage_bars = __esm({
571558
+ "packages/cli/src/tui/usage-bars.ts"() {
571559
+ "use strict";
571560
+ init_render();
571561
+ }
571562
+ });
571563
+
571200
571564
  // packages/cli/src/tui/expose.ts
571201
571565
  import { createServer as createServer5, request as httpRequest } from "node:http";
571202
571566
  import { request as httpsRequest } from "node:https";
@@ -571243,6 +571607,38 @@ function fmtTokens(n2) {
571243
571607
  if (n2 < 1e6) return `${(n2 / 1e3).toFixed(1)}K`;
571244
571608
  return `${(n2 / 1e6).toFixed(1)}M`;
571245
571609
  }
571610
+ function safeNonNegativeInt(value2) {
571611
+ const n2 = Number(value2);
571612
+ return Number.isFinite(n2) && n2 > 0 ? Math.floor(n2) : 0;
571613
+ }
571614
+ function nextSponsorDailyReset(now = Date.now()) {
571615
+ return now + SPONSOR_DAILY_WINDOW_MS;
571616
+ }
571617
+ function readSponsorUsageState(stateDir) {
571618
+ try {
571619
+ const path12 = join105(stateDir, "sponsor", SPONSOR_USAGE_FILE_NAME);
571620
+ if (!existsSync90(path12)) return null;
571621
+ const parsed = JSON.parse(readFileSync71(path12, "utf8"));
571622
+ const dailyTokensUsed = safeNonNegativeInt(parsed.dailyTokensUsed);
571623
+ const dailyTokensResetAt = safeNonNegativeInt(parsed.dailyTokensResetAt);
571624
+ if (!dailyTokensResetAt) return null;
571625
+ return {
571626
+ dailyTokensUsed,
571627
+ dailyTokensResetAt,
571628
+ updatedAt: typeof parsed.updatedAt === "string" ? parsed.updatedAt : (/* @__PURE__ */ new Date()).toISOString()
571629
+ };
571630
+ } catch {
571631
+ return null;
571632
+ }
571633
+ }
571634
+ function writeSponsorUsageState(stateDir, state) {
571635
+ try {
571636
+ const dir = join105(stateDir, "sponsor");
571637
+ mkdirSync50(dir, { recursive: true });
571638
+ writeFileSync45(join105(dir, SPONSOR_USAGE_FILE_NAME), JSON.stringify(state, null, 2));
571639
+ } catch {
571640
+ }
571641
+ }
571246
571642
  function readExposeState(stateDir) {
571247
571643
  try {
571248
571644
  const path12 = join105(stateDir, STATE_FILE_NAME);
@@ -571406,11 +571802,12 @@ function removeP2PExposeState(stateDir) {
571406
571802
  } catch {
571407
571803
  }
571408
571804
  }
571409
- var HOP_BY_HOP_HEADERS, CF_HEADERS_PREFIX, DEFAULT_EXPOSE_MAX_BODY_BYTES, INTERNAL_CAPABILITIES, DEFAULT_TARGETS, STATE_FILE_NAME, ExposeGateway, P2P_STATE_FILE_NAME, ExposeP2PGateway;
571805
+ var HOP_BY_HOP_HEADERS, CF_HEADERS_PREFIX, DEFAULT_EXPOSE_MAX_BODY_BYTES, INTERNAL_CAPABILITIES, DEFAULT_TARGETS, STATE_FILE_NAME, SPONSOR_USAGE_FILE_NAME, SPONSOR_DAILY_WINDOW_MS, SPONSOR_REQUEST_WINDOW_MS, ExposeGateway, P2P_STATE_FILE_NAME, ExposeP2PGateway;
571410
571806
  var init_expose = __esm({
571411
571807
  "packages/cli/src/tui/expose.ts"() {
571412
571808
  "use strict";
571413
571809
  init_render();
571810
+ init_usage_bars();
571414
571811
  init_typed_node_events();
571415
571812
  HOP_BY_HOP_HEADERS = /* @__PURE__ */ new Set([
571416
571813
  "connection",
@@ -571432,6 +571829,9 @@ var init_expose = __esm({
571432
571829
  custom: "http://127.0.0.1:11434"
571433
571830
  };
571434
571831
  STATE_FILE_NAME = "expose-state.json";
571832
+ SPONSOR_USAGE_FILE_NAME = "usage.json";
571833
+ SPONSOR_DAILY_WINDOW_MS = 864e5;
571834
+ SPONSOR_REQUEST_WINDOW_MS = 6e4;
571435
571835
  ExposeGateway = class _ExposeGateway extends EventEmitter8 {
571436
571836
  constructor(options2) {
571437
571837
  super();
@@ -571449,6 +571849,8 @@ var init_expose = __esm({
571449
571849
  } else {
571450
571850
  this._authKey = options2.authKey;
571451
571851
  }
571852
+ this.loadSponsorUsage();
571853
+ this.refreshSponsorUsageStats();
571452
571854
  }
571453
571855
  options;
571454
571856
  server = null;
@@ -571471,6 +571873,7 @@ var init_expose = __esm({
571471
571873
  _dailyTokensResetAt = 0;
571472
571874
  /** Sponsor rate limits (set via setSponsorLimits) */
571473
571875
  _sponsorLimits = null;
571876
+ _sponsorBlockedRequests = 0;
571474
571877
  _authKey;
571475
571878
  _targetUrl;
571476
571879
  _kind;
@@ -571489,7 +571892,8 @@ var init_expose = __esm({
571489
571892
  users: /* @__PURE__ */ new Map(),
571490
571893
  budgetTokensRemaining: 0,
571491
571894
  budgetTokensTotal: 0,
571492
- budgetResetAt: 0
571895
+ budgetResetAt: 0,
571896
+ sponsorUsage: null
571493
571897
  };
571494
571898
  get tunnelUrl() {
571495
571899
  return this._tunnelUrl;
@@ -571509,42 +571913,140 @@ var init_expose = __esm({
571509
571913
  /** Set sponsor rate limits — enables rate limiting middleware in the proxy */
571510
571914
  setSponsorLimits(limits) {
571511
571915
  this._sponsorLimits = limits;
571916
+ this.ensureSponsorDailyWindow();
571917
+ this.refreshSponsorUsageStats();
571918
+ this.emitStats();
571919
+ }
571920
+ getSponsorUsageSnapshot() {
571921
+ this.refreshSponsorUsageStats();
571922
+ return this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null;
571923
+ }
571924
+ loadSponsorUsage() {
571925
+ if (!this._stateDir) {
571926
+ this._dailyTokensResetAt = nextSponsorDailyReset();
571927
+ return;
571928
+ }
571929
+ const saved = readSponsorUsageState(this._stateDir);
571930
+ if (!saved) {
571931
+ this._dailyTokensUsed = 0;
571932
+ this._dailyTokensResetAt = nextSponsorDailyReset();
571933
+ return;
571934
+ }
571935
+ const now = Date.now();
571936
+ if (saved.dailyTokensResetAt <= now) {
571937
+ this._dailyTokensUsed = 0;
571938
+ this._dailyTokensResetAt = nextSponsorDailyReset(now);
571939
+ this.saveSponsorUsage();
571940
+ } else {
571941
+ this._dailyTokensUsed = saved.dailyTokensUsed;
571942
+ this._dailyTokensResetAt = saved.dailyTokensResetAt;
571943
+ }
571944
+ }
571945
+ saveSponsorUsage() {
571946
+ if (!this._stateDir) return;
571947
+ writeSponsorUsageState(this._stateDir, {
571948
+ dailyTokensUsed: this._dailyTokensUsed,
571949
+ dailyTokensResetAt: this._dailyTokensResetAt,
571950
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
571951
+ });
571952
+ }
571953
+ ensureSponsorDailyWindow(now = Date.now()) {
571954
+ if (!this._dailyTokensResetAt || this._dailyTokensResetAt <= now) {
571955
+ this._dailyTokensUsed = 0;
571956
+ this._dailyTokensResetAt = nextSponsorDailyReset(now);
571957
+ this.saveSponsorUsage();
571958
+ }
571959
+ }
571960
+ pruneSponsorRequestWindows(now = Date.now()) {
571961
+ for (const [ip, window2] of this._rateLimitWindows.entries()) {
571962
+ while (window2.length > 0 && window2[0] < now - SPONSOR_REQUEST_WINDOW_MS) window2.shift();
571963
+ if (window2.length === 0) this._rateLimitWindows.delete(ip);
571964
+ }
571965
+ }
571966
+ sponsorRequestWindowUsage(now = Date.now()) {
571967
+ this.pruneSponsorRequestWindows(now);
571968
+ let count = 0;
571969
+ let oldest = Number.POSITIVE_INFINITY;
571970
+ for (const window2 of this._rateLimitWindows.values()) {
571971
+ count += window2.length;
571972
+ if (window2.length > 0) oldest = Math.min(oldest, window2[0]);
571973
+ }
571974
+ return {
571975
+ count,
571976
+ resetAt: Number.isFinite(oldest) ? oldest + SPONSOR_REQUEST_WINDOW_MS : now + SPONSOR_REQUEST_WINDOW_MS
571977
+ };
571978
+ }
571979
+ refreshSponsorUsageStats(now = Date.now()) {
571980
+ if (!this._sponsorLimits) {
571981
+ this._stats.sponsorUsage = null;
571982
+ return;
571983
+ }
571984
+ this.ensureSponsorDailyWindow(now);
571985
+ const req2 = this.sponsorRequestWindowUsage(now);
571986
+ this._stats.sponsorUsage = {
571987
+ enabled: true,
571988
+ transport: "tunnel",
571989
+ dailyTokensUsed: this._dailyTokensUsed,
571990
+ dailyTokensLimit: this._sponsorLimits.maxTokensPerDay,
571991
+ dailyResetAt: this._dailyTokensResetAt,
571992
+ requestsInWindow: req2.count,
571993
+ requestsPerMinuteLimit: this._sponsorLimits.maxRequestsPerMinute,
571994
+ requestWindowResetAt: req2.resetAt,
571995
+ activeConnections: this._stats.activeConnections,
571996
+ maxConcurrent: this._sponsorLimits.maxConcurrent,
571997
+ blockedRequests: this._sponsorBlockedRequests,
571998
+ allowedModels: this._sponsorLimits.allowedModels === "all" ? "all" : [...this._sponsorLimits.allowedModels]
571999
+ };
572000
+ }
572001
+ markSponsorBlocked() {
572002
+ this._sponsorBlockedRequests++;
572003
+ this.refreshSponsorUsageStats();
571512
572004
  }
571513
572005
  /** Check rate limits for a request. Returns null if OK, or error message string if blocked. */
571514
- checkRateLimit(userIp, model) {
572006
+ checkRateLimit(userIp, model, options2 = {}) {
571515
572007
  if (!this._sponsorLimits) return null;
571516
572008
  const lim = this._sponsorLimits;
572009
+ const now = Date.now();
572010
+ if (lim.maxRequestsPerMinute <= 0 || lim.maxTokensPerDay <= 0 || lim.maxConcurrent <= 0) {
572011
+ this.markSponsorBlocked();
572012
+ return "Sponsored endpoint is paused or has no quota configured.";
572013
+ }
571517
572014
  if (lim.allowedModels !== "all" && model && !lim.allowedModels.includes(model)) {
572015
+ this.markSponsorBlocked();
571518
572016
  return `Model '${model}' is not available on this sponsored endpoint. Available: ${lim.allowedModels.join(", ")}`;
571519
572017
  }
571520
- if (this._stats.activeConnections >= lim.maxConcurrent) {
572018
+ if (this._stats.activeConnections > lim.maxConcurrent) {
572019
+ this.markSponsorBlocked();
571521
572020
  return `Too many concurrent requests (${this._stats.activeConnections}/${lim.maxConcurrent}). Try again shortly.`;
571522
572021
  }
571523
- const now = Date.now();
571524
- const windowMs = 6e4;
571525
572022
  let window2 = this._rateLimitWindows.get(userIp);
571526
572023
  if (!window2) {
571527
572024
  window2 = [];
571528
572025
  this._rateLimitWindows.set(userIp, window2);
571529
572026
  }
571530
- while (window2.length > 0 && window2[0] < now - windowMs) window2.shift();
572027
+ while (window2.length > 0 && window2[0] < now - SPONSOR_REQUEST_WINDOW_MS) window2.shift();
571531
572028
  if (window2.length >= lim.maxRequestsPerMinute) {
571532
- const retryAfterMs = window2[0] + windowMs - now;
572029
+ this.markSponsorBlocked();
572030
+ const retryAfterMs = window2[0] + SPONSOR_REQUEST_WINDOW_MS - now;
571533
572031
  return `Rate limited (${lim.maxRequestsPerMinute} req/min). Retry in ${Math.ceil(retryAfterMs / 1e3)}s.`;
571534
572032
  }
571535
- window2.push(now);
571536
- if (this._dailyTokensResetAt < now) {
571537
- this._dailyTokensUsed = 0;
571538
- this._dailyTokensResetAt = now + 864e5;
571539
- }
572033
+ if (options2.commitRequest) window2.push(now);
572034
+ this.ensureSponsorDailyWindow(now);
571540
572035
  if (this._dailyTokensUsed >= lim.maxTokensPerDay) {
572036
+ this.markSponsorBlocked();
571541
572037
  return `Daily token budget exhausted (${fmtTokens(lim.maxTokensPerDay)}). Resets in ${Math.ceil((this._dailyTokensResetAt - now) / 36e5)}h.`;
571542
572038
  }
572039
+ this.refreshSponsorUsageStats(now);
571543
572040
  return null;
571544
572041
  }
571545
572042
  /** Track token usage from a completed response */
571546
572043
  trackTokenUsage(tokensIn, tokensOut) {
571547
- this._dailyTokensUsed += tokensIn + tokensOut;
572044
+ const total = safeNonNegativeInt(tokensIn) + safeNonNegativeInt(tokensOut);
572045
+ if (total <= 0) return;
572046
+ this.ensureSponsorDailyWindow();
572047
+ this._dailyTokensUsed += total;
572048
+ this.saveSponsorUsage();
572049
+ this.refreshSponsorUsageStats();
571548
572050
  }
571549
572051
  // ── Lifecycle ───────────────────────────────────────────────────────────
571550
572052
  async start() {
@@ -571753,7 +572255,7 @@ var init_expose = __esm({
571753
572255
  user.activeRequests++;
571754
572256
  user.lastSeen = Date.now();
571755
572257
  this.emitStats();
571756
- const preRateLimitCheck = this.checkRateLimit(userIp, "");
572258
+ const preRateLimitCheck = this.checkRateLimit(userIp, "", { commitRequest: false });
571757
572259
  if (preRateLimitCheck) {
571758
572260
  this._stats.activeConnections--;
571759
572261
  user.activeRequests--;
@@ -571842,8 +572344,8 @@ var init_expose = __esm({
571842
572344
  } catch {
571843
572345
  }
571844
572346
  }
571845
- if (requestModel && this._sponsorLimits) {
571846
- const modelCheck = this.checkRateLimit(userIp, requestModel);
572347
+ if (this._sponsorLimits) {
572348
+ const modelCheck = this.checkRateLimit(userIp, requestModel, { commitRequest: true });
571847
572349
  if (modelCheck) {
571848
572350
  this._stats.activeConnections--;
571849
572351
  user.activeRequests--;
@@ -572229,10 +572731,12 @@ ${this.formatConnectionInfo()}`);
572229
572731
  });
572230
572732
  }
572231
572733
  emitStats() {
572734
+ this.refreshSponsorUsageStats();
572232
572735
  this.emit("stats", {
572233
572736
  ...this._stats,
572234
572737
  modelUsage: new Map(this._stats.modelUsage),
572235
- users: new Map(this._stats.users)
572738
+ users: new Map(this._stats.users),
572739
+ sponsorUsage: this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null
572236
572740
  });
572237
572741
  }
572238
572742
  /** Format connection info for display */
@@ -572274,6 +572778,28 @@ ${this.formatConnectionInfo()}`);
572274
572778
  const budgetColor = pct > 50 ? c3.green : pct > 20 ? c3.yellow : c3.red;
572275
572779
  lines.push(` ${c3.cyan("Budget".padEnd(18))} ${budgetColor(fmtTokens(s2.budgetTokensRemaining))}${c3.dim("/")}${fmtTokens(s2.budgetTokensTotal)} ${c3.dim(`(${pct}% left)`)}`);
572276
572780
  }
572781
+ if (s2.sponsorUsage) {
572782
+ lines.push("");
572783
+ lines.push(` ${c3.bold("Sponsor Quota")}`);
572784
+ lines.push(` ${formatUsageBar({
572785
+ label: "Daily tokens",
572786
+ used: s2.sponsorUsage.dailyTokensUsed,
572787
+ total: s2.sponsorUsage.dailyTokensLimit,
572788
+ resetAt: s2.sponsorUsage.dailyResetAt
572789
+ })}`);
572790
+ lines.push(` ${formatUsageBar({
572791
+ label: "Requests/min",
572792
+ used: s2.sponsorUsage.requestsInWindow,
572793
+ total: s2.sponsorUsage.requestsPerMinuteLimit,
572794
+ resetAt: s2.sponsorUsage.requestWindowResetAt
572795
+ })}`);
572796
+ lines.push(` ${formatUsageBar({
572797
+ label: "Concurrency",
572798
+ used: s2.sponsorUsage.activeConnections,
572799
+ total: s2.sponsorUsage.maxConcurrent
572800
+ })}`);
572801
+ lines.push(` ${c3.cyan("Blocked".padEnd(18))} ${s2.sponsorUsage.blockedRequests}`);
572802
+ }
572277
572803
  const visibleModels = Array.from(s2.modelUsage.entries()).filter(([model]) => !INTERNAL_CAPABILITIES.has(model));
572278
572804
  if (visibleModels.length > 0) {
572279
572805
  lines.push("");
@@ -572335,6 +572861,11 @@ ${this.formatConnectionInfo()}`);
572335
572861
  _passthrough = false;
572336
572862
  _loadbalance = false;
572337
572863
  _endpointAuth;
572864
+ _sponsorLimits = null;
572865
+ _sponsorBlockedRequests = 0;
572866
+ _sponsorRequestWindow = [];
572867
+ _dailyTokensUsed = 0;
572868
+ _dailyTokensResetAt = 0;
572338
572869
  _pollTimer = null;
572339
572870
  _activityPollTimer = null;
572340
572871
  /** Fast token flash timer — pulses LED at 200ms while inference is active */
@@ -572353,7 +572884,8 @@ ${this.formatConnectionInfo()}`);
572353
572884
  users: /* @__PURE__ */ new Map(),
572354
572885
  budgetTokensRemaining: 0,
572355
572886
  budgetTokensTotal: 0,
572356
- budgetResetAt: 0
572887
+ budgetResetAt: 0,
572888
+ sponsorUsage: null
572357
572889
  };
572358
572890
  get peerId() {
572359
572891
  return this._peerId;
@@ -572395,6 +572927,93 @@ ${this.formatConnectionInfo()}`);
572395
572927
  } else {
572396
572928
  this._authKey = options2.authKey;
572397
572929
  }
572930
+ this.loadSponsorUsage();
572931
+ this.refreshSponsorUsageStats();
572932
+ }
572933
+ setSponsorLimits(limits) {
572934
+ this._sponsorLimits = limits;
572935
+ this.ensureSponsorDailyWindow();
572936
+ this.refreshSponsorUsageStats();
572937
+ this.emitStats();
572938
+ }
572939
+ getSponsorUsageSnapshot() {
572940
+ this.refreshSponsorUsageStats();
572941
+ return this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null;
572942
+ }
572943
+ loadSponsorUsage() {
572944
+ if (!this._stateDir) {
572945
+ this._dailyTokensResetAt = nextSponsorDailyReset();
572946
+ return;
572947
+ }
572948
+ const saved = readSponsorUsageState(this._stateDir);
572949
+ if (!saved) {
572950
+ this._dailyTokensUsed = 0;
572951
+ this._dailyTokensResetAt = nextSponsorDailyReset();
572952
+ return;
572953
+ }
572954
+ const now = Date.now();
572955
+ if (saved.dailyTokensResetAt <= now) {
572956
+ this._dailyTokensUsed = 0;
572957
+ this._dailyTokensResetAt = nextSponsorDailyReset(now);
572958
+ this.saveSponsorUsage();
572959
+ } else {
572960
+ this._dailyTokensUsed = saved.dailyTokensUsed;
572961
+ this._dailyTokensResetAt = saved.dailyTokensResetAt;
572962
+ }
572963
+ }
572964
+ saveSponsorUsage() {
572965
+ if (!this._stateDir) return;
572966
+ writeSponsorUsageState(this._stateDir, {
572967
+ dailyTokensUsed: this._dailyTokensUsed,
572968
+ dailyTokensResetAt: this._dailyTokensResetAt,
572969
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
572970
+ });
572971
+ }
572972
+ ensureSponsorDailyWindow(now = Date.now()) {
572973
+ if (!this._dailyTokensResetAt || this._dailyTokensResetAt <= now) {
572974
+ this._dailyTokensUsed = 0;
572975
+ this._dailyTokensResetAt = nextSponsorDailyReset(now);
572976
+ this.saveSponsorUsage();
572977
+ }
572978
+ }
572979
+ recordSponsorRequest(now = Date.now()) {
572980
+ this._sponsorRequestWindow.push(now);
572981
+ this.pruneSponsorRequestWindow(now);
572982
+ }
572983
+ pruneSponsorRequestWindow(now = Date.now()) {
572984
+ while (this._sponsorRequestWindow.length > 0 && this._sponsorRequestWindow[0] < now - SPONSOR_REQUEST_WINDOW_MS) {
572985
+ this._sponsorRequestWindow.shift();
572986
+ }
572987
+ }
572988
+ trackTokenUsage(tokensIn, tokensOut) {
572989
+ const total = safeNonNegativeInt(tokensIn) + safeNonNegativeInt(tokensOut);
572990
+ if (total <= 0) return;
572991
+ this.ensureSponsorDailyWindow();
572992
+ this._dailyTokensUsed += total;
572993
+ this.saveSponsorUsage();
572994
+ this.refreshSponsorUsageStats();
572995
+ }
572996
+ refreshSponsorUsageStats(now = Date.now()) {
572997
+ if (!this._sponsorLimits) {
572998
+ this._stats.sponsorUsage = null;
572999
+ return;
573000
+ }
573001
+ this.ensureSponsorDailyWindow(now);
573002
+ this.pruneSponsorRequestWindow(now);
573003
+ this._stats.sponsorUsage = {
573004
+ enabled: true,
573005
+ transport: "libp2p",
573006
+ dailyTokensUsed: this._dailyTokensUsed,
573007
+ dailyTokensLimit: this._sponsorLimits.maxTokensPerDay,
573008
+ dailyResetAt: this._dailyTokensResetAt,
573009
+ requestsInWindow: this._sponsorRequestWindow.length,
573010
+ requestsPerMinuteLimit: this._sponsorLimits.maxRequestsPerMinute,
573011
+ requestWindowResetAt: this._sponsorRequestWindow[0] ? this._sponsorRequestWindow[0] + SPONSOR_REQUEST_WINDOW_MS : now + SPONSOR_REQUEST_WINDOW_MS,
573012
+ activeConnections: this._stats.activeConnections,
573013
+ maxConcurrent: this._sponsorLimits.maxConcurrent,
573014
+ blockedRequests: this._sponsorBlockedRequests,
573015
+ allowedModels: this._sponsorLimits.allowedModels === "all" ? "all" : [...this._sponsorLimits.allowedModels]
573016
+ };
572398
573017
  }
572399
573018
  async start() {
572400
573019
  this._onInfo?.("Connecting to nexus P2P network...");
@@ -572653,6 +573272,8 @@ ${this.formatConnectionInfo()}`);
572653
573272
  }
572654
573273
  this._stats.totalTokensIn += tokIn;
572655
573274
  this._stats.totalTokensOut += tokOut;
573275
+ this.recordSponsorRequest();
573276
+ this.trackTokenUsage(tokIn, tokOut);
572656
573277
  const peerId = record.from || record.peerId || "unknown";
572657
573278
  const shortPeer = peerId.length > 16 ? peerId.slice(0, 16) + "..." : peerId;
572658
573279
  let user = this._stats.users.get(shortPeer);
@@ -572716,10 +573337,12 @@ ${this.formatConnectionInfo()}`);
572716
573337
  }
572717
573338
  }
572718
573339
  emitStats() {
573340
+ this.refreshSponsorUsageStats();
572719
573341
  this.emit("stats", {
572720
573342
  ...this._stats,
572721
573343
  modelUsage: new Map(this._stats.modelUsage),
572722
- users: new Map(this._stats.users)
573344
+ users: new Map(this._stats.users),
573345
+ sponsorUsage: this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null
572723
573346
  });
572724
573347
  }
572725
573348
  /** Format connection info for display */
@@ -572767,6 +573390,28 @@ ${this.formatConnectionInfo()}`);
572767
573390
  const budgetColor = pct > 50 ? c3.green : pct > 20 ? c3.yellow : c3.red;
572768
573391
  lines.push(` ${c3.cyan("Budget".padEnd(18))} ${budgetColor(fmtTokens(s2.budgetTokensRemaining))}${c3.dim("/")}${fmtTokens(s2.budgetTokensTotal)} ${c3.dim(`(${pct}% left)`)}`);
572769
573392
  }
573393
+ if (s2.sponsorUsage) {
573394
+ lines.push("");
573395
+ lines.push(` ${c3.bold("Sponsor Quota")}`);
573396
+ lines.push(` ${formatUsageBar({
573397
+ label: "Daily tokens",
573398
+ used: s2.sponsorUsage.dailyTokensUsed,
573399
+ total: s2.sponsorUsage.dailyTokensLimit,
573400
+ resetAt: s2.sponsorUsage.dailyResetAt
573401
+ })}`);
573402
+ lines.push(` ${formatUsageBar({
573403
+ label: "Requests/min",
573404
+ used: s2.sponsorUsage.requestsInWindow,
573405
+ total: s2.sponsorUsage.requestsPerMinuteLimit,
573406
+ resetAt: s2.sponsorUsage.requestWindowResetAt
573407
+ })}`);
573408
+ lines.push(` ${formatUsageBar({
573409
+ label: "Concurrency",
573410
+ used: s2.sponsorUsage.activeConnections,
573411
+ total: s2.sponsorUsage.maxConcurrent
573412
+ })}`);
573413
+ lines.push(` ${c3.cyan("Blocked".padEnd(18))} ${s2.sponsorUsage.blockedRequests}`);
573414
+ }
572770
573415
  const visibleModels = Array.from(s2.modelUsage.entries()).filter(([model]) => !INTERNAL_CAPABILITIES.has(model));
572771
573416
  if (visibleModels.length > 0) {
572772
573417
  lines.push("");
@@ -590439,15 +591084,52 @@ async function stepReview(config, rl, availableRows) {
590439
591084
  if (!result.confirmed || result.key === "cancel") return false;
590440
591085
  return result.key === "go_live";
590441
591086
  }
590442
- async function showSponsorDashboard(config, projectDir2, rl, availableRows) {
591087
+ async function showSponsorDashboard(config, projectDir2, rl, availableRows, sponsorUsage) {
590443
591088
  const isPaused = config.status === "paused";
590444
591089
  const enabledEps = config.endpoints.filter((e2) => e2.enabled);
591090
+ const dailyTokensLimit = sponsorUsage?.dailyTokensLimit || config.rateLimits.maxTokensPerDay;
591091
+ const requestsPerMinuteLimit = sponsorUsage?.requestsPerMinuteLimit || config.rateLimits.maxRequestsPerMinute;
591092
+ const maxConcurrent = sponsorUsage?.maxConcurrent || config.rateLimits.maxConcurrent;
591093
+ const usageItems = [
591094
+ {
591095
+ key: "info_usage_daily",
591096
+ label: ` ${formatUsageBar({
591097
+ label: "Daily tokens",
591098
+ used: sponsorUsage?.dailyTokensUsed ?? 0,
591099
+ total: dailyTokensLimit,
591100
+ resetAt: sponsorUsage?.dailyResetAt
591101
+ })}`
591102
+ },
591103
+ {
591104
+ key: "info_usage_rpm",
591105
+ label: ` ${formatUsageBar({
591106
+ label: "Requests/min",
591107
+ used: sponsorUsage?.requestsInWindow ?? 0,
591108
+ total: requestsPerMinuteLimit,
591109
+ resetAt: sponsorUsage?.requestWindowResetAt
591110
+ })}`
591111
+ },
591112
+ {
591113
+ key: "info_usage_concurrent",
591114
+ label: ` ${formatUsageBar({
591115
+ label: "Concurrency",
591116
+ used: sponsorUsage?.activeConnections ?? 0,
591117
+ total: maxConcurrent
591118
+ })}`
591119
+ },
591120
+ {
591121
+ key: "info_usage_blocked",
591122
+ label: ` Blocked: ${sponsorUsage?.blockedRequests ?? 0}`
591123
+ }
591124
+ ];
590445
591125
  const items = [
590446
591126
  { key: "hdr", label: "Sponsor Dashboard" },
590447
591127
  { key: "info_status", label: ` Status: ${isPaused ? "● PAUSED" : "● ACTIVE"}` },
590448
591128
  { key: "info_ep", label: ` Endpoints: ${enabledEps.map((e2) => e2.label).join(", ")}` },
590449
591129
  { key: "info_transport", label: ` Transport: ${[config.transport.cloudflared ? "Cloudflared" : "", config.transport.libp2p ? "libp2p" : ""].filter(Boolean).join(" + ")}` },
590450
591130
  { key: "info_limits", label: ` Limits: ${config.rateLimits.maxRequestsPerMinute} req/min, ${config.rateLimits.maxTokensPerDay.toLocaleString()} tokens/day` },
591131
+ { key: "info_usage_hdr", label: " Usage" },
591132
+ ...usageItems,
590451
591133
  { key: "sep", label: "" },
590452
591134
  { key: "modify", label: " [Modify Settings]" },
590453
591135
  { key: isPaused ? "resume" : "pause", label: isPaused ? " [Resume Sponsorship]" : " [Pause Sponsorship]" },
@@ -590457,7 +591139,7 @@ async function showSponsorDashboard(config, projectDir2, rl, availableRows) {
590457
591139
  items,
590458
591140
  title: "Sponsor Dashboard",
590459
591141
  rl,
590460
- skipKeys: ["hdr", "sep", "info_status", "info_ep", "info_transport", "info_limits"],
591142
+ skipKeys: ["hdr", "sep", "info_status", "info_ep", "info_transport", "info_limits", "info_usage_hdr", "info_usage_daily", "info_usage_rpm", "info_usage_concurrent", "info_usage_blocked"],
590461
591143
  availableRows
590462
591144
  });
590463
591145
  if (!result.confirmed) return "close";
@@ -590522,6 +591204,7 @@ var init_sponsor_wizard = __esm({
590522
591204
  init_dist();
590523
591205
  init_tui_select();
590524
591206
  init_render();
591207
+ init_usage_bars();
590525
591208
  }
590526
591209
  });
590527
591210
 
@@ -594808,6 +595491,17 @@ function stopSponsorHeartbeat() {
594808
595491
  }
594809
595492
  _lastRegisteredSponsorPayload = null;
594810
595493
  }
595494
+ function sponsorUsageFromGateway(gateway) {
595495
+ if (!gateway) return null;
595496
+ try {
595497
+ if (typeof gateway.getSponsorUsageSnapshot === "function") {
595498
+ return gateway.getSponsorUsageSnapshot();
595499
+ }
595500
+ return gateway.stats?.sponsorUsage ?? null;
595501
+ } catch {
595502
+ return null;
595503
+ }
595504
+ }
594811
595505
  function registerCommandHelp2(items) {
594812
595506
  registerCommandHelp(items);
594813
595507
  }
@@ -598287,6 +598981,10 @@ The session corrections MUST become hard rules in the SKILL.md Rules section.`;
598287
598981
  return "handled";
598288
598982
  }
598289
598983
  case "cohere": {
598984
+ if (arg === "status" || arg === "stats") {
598985
+ await showCohereStatus(ctx3);
598986
+ return "handled";
598987
+ }
598290
598988
  await showCohereDashboard(ctx3);
598291
598989
  return "handled";
598292
598990
  }
@@ -599030,11 +599728,16 @@ sleep 1
599030
599728
  renderInfo("No active sponsorship. Run /sponsor to start.");
599031
599729
  return "handled";
599032
599730
  }
599731
+ const dashboardGw = ctx3.getExposeGateway?.();
599732
+ if (existingConfig.status === "active" && dashboardGw && "setSponsorLimits" in dashboardGw) {
599733
+ dashboardGw.setSponsorLimits(existingConfig.rateLimits);
599734
+ }
599033
599735
  const action = await showSponsorDashboard2(
599034
599736
  existingConfig,
599035
599737
  projectDir2,
599036
599738
  sponsorRl,
599037
- ctx3.availableContentRows?.()
599739
+ ctx3.availableContentRows?.(),
599740
+ sponsorUsageFromGateway(dashboardGw)
599038
599741
  );
599039
599742
  switch (action) {
599040
599743
  case "modify":
@@ -599061,6 +599764,9 @@ sleep 1
599061
599764
  existingConfig.status = "active";
599062
599765
  saveSponsorConfig2(projectDir2, existingConfig);
599063
599766
  const resumeGw = ctx3.getExposeGateway?.();
599767
+ if (resumeGw && "setSponsorLimits" in resumeGw) {
599768
+ resumeGw.setSponsorLimits(existingConfig.rateLimits);
599769
+ }
599064
599770
  if (resumeGw?.tunnelUrl) {
599065
599771
  const resumePayload = {
599066
599772
  name: existingConfig.header?.message || "Omnius Sponsor",
@@ -602969,15 +603675,65 @@ async function showHelpMenu(ctx3) {
602969
603675
  }
602970
603676
  }
602971
603677
  }
602972
- async function showCohereDashboard(ctx3) {
602973
- const isActive = ctx3.isCohere?.() ?? false;
602974
- let stats = {
603678
+ function emptyCohereStats(isActive = false) {
603679
+ return {
603680
+ status: isActive ? "active" : "inactive",
603681
+ active: isActive,
603682
+ daemonPid: 0,
603683
+ uptimeSec: 0,
603684
+ lastQueryAt: 0,
603685
+ queriesReceived: 0,
602975
603686
  queriesAnswered: 0,
603687
+ queriesErrors: 0,
602976
603688
  queriesSent: 0,
602977
- insightsShared: 0,
602978
- peersConnected: 0
603689
+ avgLatencyMs: 0,
603690
+ bytesIn: 0,
603691
+ bytesOut: 0,
603692
+ modelsUsed: {},
603693
+ peersServed: {},
603694
+ allowedModels: null
602979
603695
  };
602980
- let modelList = [];
603696
+ }
603697
+ function numberField(value2) {
603698
+ const n2 = Number(value2);
603699
+ return Number.isFinite(n2) && n2 > 0 ? Math.floor(n2) : 0;
603700
+ }
603701
+ function mapNumberRecord(value2) {
603702
+ if (!value2 || typeof value2 !== "object" || Array.isArray(value2)) return {};
603703
+ const out = {};
603704
+ for (const [key, raw] of Object.entries(value2)) {
603705
+ out[key] = numberField(raw);
603706
+ }
603707
+ return out;
603708
+ }
603709
+ function parseCohereStatsOutput(output, isActive = false) {
603710
+ try {
603711
+ const parsed = JSON.parse(output);
603712
+ const active = typeof parsed.active === "boolean" ? parsed.active : String(parsed.status ?? "").toLowerCase() === "active";
603713
+ return {
603714
+ status: active ? "active" : "inactive",
603715
+ active,
603716
+ daemonPid: numberField(parsed.daemonPid),
603717
+ uptimeSec: numberField(parsed.uptimeSec),
603718
+ lastQueryAt: numberField(parsed.lastQueryAt),
603719
+ queriesReceived: numberField(parsed.queriesReceived),
603720
+ queriesAnswered: numberField(parsed.queriesAnswered),
603721
+ queriesErrors: numberField(parsed.queriesErrors),
603722
+ queriesSent: numberField(parsed.queriesSent),
603723
+ avgLatencyMs: numberField(parsed.avgLatencyMs),
603724
+ bytesIn: numberField(parsed.bytesIn),
603725
+ bytesOut: numberField(parsed.bytesOut),
603726
+ modelsUsed: mapNumberRecord(parsed.modelsUsed),
603727
+ peersServed: mapNumberRecord(parsed.peersServed),
603728
+ allowedModels: Array.isArray(parsed.allowedModels) ? parsed.allowedModels.map(String) : null
603729
+ };
603730
+ } catch {
603731
+ return emptyCohereStats(isActive);
603732
+ }
603733
+ }
603734
+ async function fetchCohereDashboardState(ctx3) {
603735
+ const isActive = ctx3.isCohere?.() ?? false;
603736
+ const state = { stats: emptyCohereStats(isActive), modelList: [] };
602981
603737
  try {
602982
603738
  const nexus = new NexusTool(ctx3.repoRoot);
602983
603739
  try {
@@ -602989,29 +603745,52 @@ async function showCohereDashboard(ctx3) {
602989
603745
  } catch {
602990
603746
  }
602991
603747
  try {
602992
- const r2 = await nexus.execute({ action: "cohere_stats" });
602993
- if (r2.success) {
602994
- try {
602995
- const d2 = JSON.parse(r2.output);
602996
- Object.assign(stats, d2);
602997
- } catch {
602998
- }
602999
- }
603748
+ const r2 = await nexus.execute({ action: "cohere_stats", format: "json" });
603749
+ if (r2.success) state.stats = parseCohereStatsOutput(r2.output, isActive);
603000
603750
  } catch {
603001
603751
  }
603002
603752
  try {
603003
603753
  const r2 = await nexus.execute({ action: "cohere_list_models" });
603004
603754
  if (r2.success) {
603005
603755
  try {
603006
- modelList = JSON.parse(r2.output).models || [];
603756
+ state.modelList = JSON.parse(r2.output).models || [];
603007
603757
  } catch {
603008
- modelList = r2.output.split("\n").filter((l2) => l2.trim());
603758
+ state.modelList = r2.output.split("\n").map((l2) => l2.trim()).filter(Boolean);
603009
603759
  }
603010
603760
  }
603011
603761
  } catch {
603012
603762
  }
603013
603763
  } catch {
603014
603764
  }
603765
+ return state;
603766
+ }
603767
+ function cohereStatusLines(stats, modelList) {
603768
+ const modelEntries = Object.entries(stats.modelsUsed).sort((a2, b) => b[1] - a2[1]);
603769
+ const peerEntries = Object.entries(stats.peersServed).sort((a2, b) => b[1] - a2[1]);
603770
+ const uptime2 = stats.uptimeSec < 60 ? `${stats.uptimeSec}s` : stats.uptimeSec < 3600 ? `${Math.floor(stats.uptimeSec / 60)}m ${stats.uptimeSec % 60}s` : `${Math.floor(stats.uptimeSec / 3600)}h ${Math.floor(stats.uptimeSec % 3600 / 60)}m`;
603771
+ return [
603772
+ c3.bold("COHERE Status"),
603773
+ `Status: ${stats.active ? c3.green("ACTIVE") : c3.dim("inactive")}`,
603774
+ `Daemon: ${stats.daemonPid ? `pid ${stats.daemonPid}` : "not connected"} · uptime ${uptime2}`,
603775
+ `Last query: ${stats.lastQueryAt ? new Date(stats.lastQueryAt).toISOString() : "never"}`,
603776
+ "",
603777
+ formatUsageBar({ label: "Answered", used: stats.queriesAnswered, total: Math.max(1, stats.queriesReceived), width: 18 }),
603778
+ formatUsageBar({ label: "Errors", used: stats.queriesErrors, total: Math.max(1, stats.queriesReceived), width: 18 }),
603779
+ `Sent out: ${stats.queriesSent} · avg latency ${stats.avgLatencyMs}ms`,
603780
+ `Data: in ${formatFileSize(stats.bytesIn)} · out ${formatFileSize(stats.bytesOut)}`,
603781
+ "",
603782
+ `Models exposed: ${modelList.length}`,
603783
+ `Allowlist: ${stats.allowedModels ? stats.allowedModels.join(", ") || "(empty)" : "all downloaded models"}`,
603784
+ `Top models: ${modelEntries.length ? modelEntries.slice(0, 5).map(([m2, n2]) => `${m2} (${n2})`).join(", ") : "none yet"}`,
603785
+ `Peers served: ${peerEntries.length ? peerEntries.slice(0, 5).map(([p2, n2]) => `${p2.slice(0, 20)} (${n2})`).join(", ") : "none yet"}`
603786
+ ];
603787
+ }
603788
+ async function showCohereStatus(ctx3) {
603789
+ const { stats, modelList } = await fetchCohereDashboardState(ctx3);
603790
+ safeLog(cohereStatusLines(stats, modelList).join("\n"));
603791
+ }
603792
+ async function showCohereDashboard(ctx3) {
603793
+ let { stats, modelList } = await fetchCohereDashboardState(ctx3);
603015
603794
  while (true) {
603016
603795
  const currentActive = ctx3.isCohere?.() ?? false;
603017
603796
  const toggleLabel = currentActive ? "Disable COHERE" : "Enable COHERE";
@@ -603028,7 +603807,7 @@ async function showCohereDashboard(ctx3) {
603028
603807
  {
603029
603808
  key: "stats",
603030
603809
  label: "Network Stats",
603031
- detail: `${stats.queriesAnswered} answered · ${stats.queriesSent} sent · ${stats.insightsShared} shared`
603810
+ detail: `${stats.queriesAnswered} answered · ${stats.queriesSent} sent · ${stats.queriesErrors} errors`
603032
603811
  },
603033
603812
  {
603034
603813
  key: "identity",
@@ -603081,11 +603860,11 @@ async function showCohereDashboard(ctx3) {
603081
603860
  },
603082
603861
  {
603083
603862
  key: "insights",
603084
- label: `Insights shared: ${c3.bold(String(stats.insightsShared || 0))}`
603863
+ label: `Avg latency: ${c3.bold(String(stats.avgLatencyMs || 0))}ms`
603085
603864
  },
603086
603865
  {
603087
603866
  key: "peers",
603088
- label: `Peers connected: ${c3.bold(String(stats.peersConnected || 0))}`
603867
+ label: `Peers served: ${c3.bold(String(Object.keys(stats.peersServed || {}).length))}`
603089
603868
  },
603090
603869
  { key: "hdr2", label: selectColors.dim("─── Actions ───") },
603091
603870
  {
@@ -603103,17 +603882,9 @@ async function showCohereDashboard(ctx3) {
603103
603882
  availableRows: ctx3.availableContentRows?.()
603104
603883
  });
603105
603884
  if (statResult.key === "refresh") {
603106
- try {
603107
- const nexus = new NexusTool(ctx3.repoRoot);
603108
- const r2 = await nexus.execute({ action: "cohere_stats" });
603109
- if (r2.success) {
603110
- try {
603111
- Object.assign(stats, JSON.parse(r2.output));
603112
- } catch {
603113
- }
603114
- }
603115
- } catch {
603116
- }
603885
+ const refreshed = await fetchCohereDashboardState(ctx3);
603886
+ stats = refreshed.stats;
603887
+ modelList = refreshed.modelList;
603117
603888
  }
603118
603889
  continue;
603119
603890
  }
@@ -607479,6 +608250,7 @@ var init_commands = __esm({
607479
608250
  init_listen();
607480
608251
  init_dist();
607481
608252
  init_tui_select();
608253
+ init_usage_bars();
607482
608254
  init_overlay_lock();
607483
608255
  init_drop_panel();
607484
608256
  init_memory_menu();
@@ -617189,6 +617961,13 @@ function senderKey2(entry) {
617189
617961
  if (entry.role === "assistant") return entry.username || entry.speaker || "assistant";
617190
617962
  return String(entry.fromUserId || entry.username || entry.firstName || senderLabel(entry));
617191
617963
  }
617964
+ function speakerRole(entry) {
617965
+ if (entry.role === "assistant") return "agent_self";
617966
+ return entry.isBot ? "participant_bot" : "participant_human";
617967
+ }
617968
+ function identityBoundary(entry) {
617969
+ return speakerRole(entry) === "agent_self" ? "this message is authored by the Telegram agent itself" : "this message is authored by another Telegram participant; first-person claims belong to that participant, not the agent";
617970
+ }
617192
617971
  function scopeFor(entry, options2) {
617193
617972
  const chatType = entry.chatType || options2.chatType || "unknown";
617194
617973
  return {
@@ -617202,7 +617981,7 @@ function senderFor(entry) {
617202
617981
  id: senderKey2(entry),
617203
617982
  username: entry.username,
617204
617983
  displayName: senderLabel(entry),
617205
- isBot: entry.role === "assistant"
617984
+ isBot: entry.role === "assistant" || entry.isBot === true
617206
617985
  };
617207
617986
  }
617208
617987
  function messageIdFor(entry, sessionKey) {
@@ -617241,7 +618020,11 @@ function contentFor(entry, sessionKey, options2) {
617241
618020
  `message_id: ${messageIdFor(entry, sessionKey)}`,
617242
618021
  entry.messageThreadId != null ? `thread_id: ${entry.messageThreadId}` : "",
617243
618022
  entry.replyToMessageId != null ? `reply_to_message_id: ${entry.replyToMessageId}` : "",
618023
+ `actor_key: ${senderKey2(entry)}`,
617244
618024
  `speaker: ${senderLabel(entry)}`,
618025
+ `speaker_role: ${speakerRole(entry)}`,
618026
+ `identity_boundary: ${identityBoundary(entry)}`,
618027
+ entry.replyContext?.sender ? `reply_sender: ${entry.replyContext.sender.username || entry.replyContext.sender.firstName || entry.replyContext.sender.id || "unknown"} [${entry.replyContext.sender.isBot ? "participant_bot" : "participant_human"}]` : "",
617245
618028
  entry.mode ? `mode: ${entry.mode}` : "",
617246
618029
  entry.mediaSummary ? `media: ${compact(entry.mediaSummary, 260)}` : "",
617247
618030
  "",
@@ -617265,7 +618048,11 @@ function metadataFor(entry, sessionKey, options2) {
617265
618048
  username: entry.username,
617266
618049
  firstName: entry.firstName,
617267
618050
  fromUserId: entry.fromUserId,
618051
+ isBot: entry.isBot,
617268
618052
  speaker: senderLabel(entry),
618053
+ actorKey: senderKey2(entry),
618054
+ speakerRole: speakerRole(entry),
618055
+ identityBoundary: identityBoundary(entry),
617269
618056
  mediaSummary: entry.mediaSummary
617270
618057
  }
617271
618058
  };
@@ -617516,12 +618303,14 @@ function episodeLine(episode) {
617516
618303
  const meta = episode.metadata;
617517
618304
  const telegram = meta?.telegram;
617518
618305
  const speaker = clean4(telegram?.speaker || telegram?.username || "unknown", 80);
618306
+ const role = clean4(telegram?.speakerRole || "participant_human", 40);
617519
618307
  const messageId = telegram?.messageId == null ? "unknown" : String(telegram.messageId);
617520
618308
  const replyTo = telegram?.replyToMessageId == null ? "" : ` reply_to=${telegram.replyToMessageId}`;
617521
618309
  return [
617522
618310
  `episode_id=${episode.id}`,
617523
618311
  `message_id=${messageId}${replyTo}`,
617524
618312
  `speaker=${speaker}`,
618313
+ `speaker_role=${role}`,
617525
618314
  `modality=${episode.modality}`,
617526
618315
  `content=${clean4(episode.content, 700)}`
617527
618316
  ].join(" | ");
@@ -617542,6 +618331,9 @@ function buildTelegramReflectionExtractionPrompt(options2) {
617542
618331
  "- Use only the scoped Telegram corpus, graph nodes, graph edges, and source anchors below.",
617543
618332
  "- Preserve message_id and episode_id anchors on every item when possible.",
617544
618333
  "- Do not infer identity from a face, voice, or name unless the corpus explicitly says it.",
618334
+ "- speaker_role=agent_self is the Telegram agent; speaker_role=participant_human or participant_bot is another chat participant.",
618335
+ "- Do not assign participant first-person claims, preferences, names, or self-descriptions to the agent/self unless the source episode has speaker_role=agent_self.",
618336
+ "- Replies between non-agent participants are social context and relationship evidence, not direct agent self-reflection.",
617545
618337
  "- Private DM followups may be proposed but must not be framed as already sent.",
617546
618338
  "- same_group followups must be concise, low-intrusion, and anchored to a source message id.",
617547
618339
  "- If a category has no evidence, return an empty array for that category.",
@@ -617959,6 +618751,8 @@ function formatTelegramSocialStateContext(state, input) {
617959
618751
  const replyKey = input.replySender ? telegramSocialActorKey(input.replySender) : void 0;
617960
618752
  const thread = state.threads[telegramSocialThreadKey(input)];
617961
618753
  const participant = state.participants[senderKey3];
618754
+ const senderIdentity = selfKey && senderKey3 === selfKey ? "agent_self" : "participant";
618755
+ const replyIdentity = replyKey ? selfKey && replyKey === selfKey ? "agent_self" : "participant" : "none";
617962
618756
  const relevantKeys = new Set([senderKey3, selfKey, replyKey].filter(Boolean));
617963
618757
  const edges = state.relationships.filter((edge) => relevantKeys.has(edge.fromKey) || relevantKeys.has(edge.toKey)).sort((a2, b) => b.lastSeenAt - a2.lastSeenAt).slice(0, limit);
617964
618758
  const outcomes = state.outcomes.filter((outcome) => outcome.senderKey === senderKey3 || outcome.chatId === String(input.chatId)).sort((a2, b) => b.ts - a2.ts).slice(0, limit);
@@ -617967,6 +618761,8 @@ function formatTelegramSocialStateContext(state, input) {
617967
618761
  const preferences = preferenceLines(state.preferences[senderKey3]);
617968
618762
  return [
617969
618763
  "### Telegram Structured Social State",
618764
+ selfKey ? `Agent self node: ${selfKey}` : "Agent self node: unknown",
618765
+ `Identity boundary: the agent is the self node only. Current actor ${senderKey3} is ${senderIdentity}; reply target ${replyKey ?? "none"} is ${replyIdentity}. Participant first-person claims belong to their actor node, not the agent, unless that actor is the self node.`,
617970
618766
  `Current actor node: ${senderKey3} [${participant?.actorKind || telegramSocialActorKind(input)}] messages=${participant?.messageCount ?? 0}${participant?.lastText ? ` last=${jsonLine(participant.lastText, 140)}` : ""}`,
617971
618767
  thread ? `Active channel/thread: ${thread.key}; messages=${thread.messageCount}; participants=${thread.participantKeys.slice(-8).join(", ") || "none"}; last_outcomes=${thread.lastOutcomeIds.slice(-5).join(", ") || "none"}` : "",
617972
618768
  preferences.length ? `Relevant preference vector for ${senderKey3}:
@@ -624624,6 +625420,7 @@ ${lines.join("\n")}`);
624624
625420
  "Classify the live scenario by inference from the full context. Do not use a fixed taxonomy, keyword list, or preset scenario enum.",
624625
625421
  "Create a situation-specific scenario_id and scenario_label, then summarize the active state loop that should govern the later attention decision.",
624626
625422
  "Use the persona docs below as binding behavioral guidance.",
625423
+ "Maintain the Telegram identity boundary: the agent is only the bot/self actor. Other users and peer bots replying to each other are participants; their first-person claims are not the agent's identity or self-reflection.",
624627
625424
  "Return JSON only. No markdown. No <think> tags.",
624628
625425
  "",
624629
625426
  'Schema: {"silent_disposition":"what happens silently with this message","mental_note":"concise observation of the turn","memory_note":"what scoped memory should retain or connect","relationship_note":"relationship/thread implication","procedure_note":"active tree/branch/abort implication","voice_note":"final voice implication if a reply happens","scenario_note":"identified scenario and transition state","scenario_id":"dynamic inferred scenario id","scenario_label":"human readable dynamic scenario label","scenario_confidence":0.0-1.0,"scenario_objective":"current scenario objective","scenario_state_loop":"state loop to maintain until transition"}',