omnius 1.0.146 → 1.0.148
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +883 -86
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -13138,6 +13138,27 @@ async function handleCmd(cmd) {
|
|
|
13138
13138
|
var _csAvgLatency = _cohereStats.queriesAnswered > 0 ? Math.round(_cohereStats.totalLatencyMs / _cohereStats.queriesAnswered) : 0;
|
|
13139
13139
|
var _csModels = Object.entries(_cohereStats.modelsUsed).sort(function(a, b) { return b[1] - a[1]; });
|
|
13140
13140
|
var _csPeers = Object.entries(_cohereStats.peersServed).sort(function(a, b) { return b[1] - a[1]; });
|
|
13141
|
+
var _csSnapshot = {
|
|
13142
|
+
status: cohereActive ? 'active' : 'inactive',
|
|
13143
|
+
active: cohereActive,
|
|
13144
|
+
daemonPid: process.pid,
|
|
13145
|
+
uptimeSec: _csUptime,
|
|
13146
|
+
lastQueryAt: _cohereStats.lastQueryAt || 0,
|
|
13147
|
+
queriesReceived: _cohereStats.queriesReceived,
|
|
13148
|
+
queriesAnswered: _cohereStats.queriesAnswered,
|
|
13149
|
+
queriesErrors: _cohereStats.queriesErrors,
|
|
13150
|
+
queriesSent: _cohereStats.queriesSent,
|
|
13151
|
+
avgLatencyMs: _csAvgLatency,
|
|
13152
|
+
bytesIn: _cohereStats.bytesIn,
|
|
13153
|
+
bytesOut: _cohereStats.bytesOut,
|
|
13154
|
+
modelsUsed: _cohereStats.modelsUsed,
|
|
13155
|
+
peersServed: _cohereStats.peersServed,
|
|
13156
|
+
allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null
|
|
13157
|
+
};
|
|
13158
|
+
if (args.format === 'json' || args.json === true || args.json === 'true' || args.json === '1') {
|
|
13159
|
+
writeResp(id, { ok: true, output: JSON.stringify(_csSnapshot) });
|
|
13160
|
+
break;
|
|
13161
|
+
}
|
|
13141
13162
|
var _csLines = [
|
|
13142
13163
|
'═══ COHERE Network Stats ═══',
|
|
13143
13164
|
'',
|
|
@@ -16549,6 +16570,14 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
16549
16570
|
max_tokens: {
|
|
16550
16571
|
type: "string",
|
|
16551
16572
|
description: "For remote_infer: maximum tokens to generate (e.g. '4096'). Default: 4096"
|
|
16573
|
+
},
|
|
16574
|
+
format: {
|
|
16575
|
+
type: "string",
|
|
16576
|
+
description: "For cohere_stats: set to 'json' for structured stats"
|
|
16577
|
+
},
|
|
16578
|
+
json: {
|
|
16579
|
+
type: "string",
|
|
16580
|
+
description: "For cohere_stats: set to '1' for structured stats"
|
|
16552
16581
|
}
|
|
16553
16582
|
},
|
|
16554
16583
|
required: ["action"],
|
|
@@ -16686,7 +16715,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
16686
16715
|
result = await this.sendDaemonCmd("cohere_disable", {});
|
|
16687
16716
|
break;
|
|
16688
16717
|
case "cohere_stats":
|
|
16689
|
-
result = await this.sendDaemonCmd("cohere_stats", {});
|
|
16718
|
+
result = await this.sendDaemonCmd("cohere_stats", { format: String(args.format ?? ""), json: String(args.json ?? "") });
|
|
16690
16719
|
break;
|
|
16691
16720
|
case "cohere_allow_model":
|
|
16692
16721
|
result = await this.sendDaemonCmd("cohere_allow_model", { model: String(args.model ?? "") });
|
|
@@ -529012,38 +529041,136 @@ function inferHomeFromProcUid(pid) {
|
|
|
529012
529041
|
}
|
|
529013
529042
|
return null;
|
|
529014
529043
|
}
|
|
529044
|
+
function detectPeerOmniusOllamaPool() {
|
|
529045
|
+
if (!isDirectory("/proc"))
|
|
529046
|
+
return false;
|
|
529047
|
+
const selfPid = String(process.pid);
|
|
529048
|
+
const selfPpid = String(process.ppid ?? "");
|
|
529049
|
+
const peerNodePids = /* @__PURE__ */ new Set();
|
|
529050
|
+
let entries;
|
|
529051
|
+
try {
|
|
529052
|
+
entries = readdirSync21("/proc", { withFileTypes: true }).filter((d2) => d2.isDirectory() && /^\d+$/.test(d2.name)).map((d2) => ({ name: d2.name }));
|
|
529053
|
+
} catch {
|
|
529054
|
+
return false;
|
|
529055
|
+
}
|
|
529056
|
+
for (const e2 of entries) {
|
|
529057
|
+
if (e2.name === selfPid || e2.name === selfPpid)
|
|
529058
|
+
continue;
|
|
529059
|
+
try {
|
|
529060
|
+
const cmdline = readFileSync50(`/proc/${e2.name}/cmdline`, "utf8");
|
|
529061
|
+
if (!cmdline.includes("node"))
|
|
529062
|
+
continue;
|
|
529063
|
+
if (!/[/\\]omnius[/\\]dist[/\\]index\.js|[/\\]omnius[/\\]/i.test(cmdline))
|
|
529064
|
+
continue;
|
|
529065
|
+
peerNodePids.add(e2.name);
|
|
529066
|
+
} catch {
|
|
529067
|
+
}
|
|
529068
|
+
}
|
|
529069
|
+
if (peerNodePids.size === 0)
|
|
529070
|
+
return false;
|
|
529071
|
+
for (const e2 of entries) {
|
|
529072
|
+
try {
|
|
529073
|
+
const cmd = readFileSync50(`/proc/${e2.name}/cmdline`, "utf8");
|
|
529074
|
+
if (!cmd.includes("ollama"))
|
|
529075
|
+
continue;
|
|
529076
|
+
if (!cmd.split("\0").includes("serve"))
|
|
529077
|
+
continue;
|
|
529078
|
+
const status = readFileSync50(`/proc/${e2.name}/status`, "utf8");
|
|
529079
|
+
const ppid = status.match(/^PPid:\s+(\d+)/m)?.[1];
|
|
529080
|
+
if (ppid && peerNodePids.has(ppid))
|
|
529081
|
+
return true;
|
|
529082
|
+
} catch {
|
|
529083
|
+
}
|
|
529084
|
+
}
|
|
529085
|
+
return false;
|
|
529086
|
+
}
|
|
529015
529087
|
async function detectGpus() {
|
|
529016
529088
|
if (_nvidiaSmiAvailable === false)
|
|
529017
529089
|
return [];
|
|
529018
529090
|
return new Promise((resolve56) => {
|
|
529019
|
-
|
|
529091
|
+
const queryFields = "index,uuid,name,memory.total,memory.free,utilization.gpu,compute_cap";
|
|
529092
|
+
exec2(`nvidia-smi --query-gpu=${queryFields} --format=csv,noheader,nounits 2>/dev/null`, { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
|
|
529020
529093
|
if (err) {
|
|
529021
|
-
|
|
529022
|
-
|
|
529094
|
+
exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err2, stdout2) => {
|
|
529095
|
+
if (err2) {
|
|
529096
|
+
_nvidiaSmiAvailable = false;
|
|
529097
|
+
resolve56([]);
|
|
529098
|
+
return;
|
|
529099
|
+
}
|
|
529100
|
+
_nvidiaSmiAvailable = true;
|
|
529101
|
+
resolve56(parseGpuQueryOutput(
|
|
529102
|
+
stdout2,
|
|
529103
|
+
/* hasComputeCap */
|
|
529104
|
+
false
|
|
529105
|
+
));
|
|
529106
|
+
});
|
|
529023
529107
|
return;
|
|
529024
529108
|
}
|
|
529025
529109
|
_nvidiaSmiAvailable = true;
|
|
529026
|
-
|
|
529027
|
-
|
|
529028
|
-
|
|
529029
|
-
|
|
529030
|
-
|
|
529031
|
-
const idx = Number(parts[0]);
|
|
529032
|
-
if (!Number.isFinite(idx))
|
|
529033
|
-
continue;
|
|
529034
|
-
gpus.push({
|
|
529035
|
-
index: idx,
|
|
529036
|
-
uuid: parts[1] ?? "",
|
|
529037
|
-
name: parts[2] ?? "",
|
|
529038
|
-
vramTotalMB: Number(parts[3]) || 0,
|
|
529039
|
-
vramFreeMB: Number(parts[4]) || 0,
|
|
529040
|
-
utilization: Number(parts[5]) || 0
|
|
529041
|
-
});
|
|
529042
|
-
}
|
|
529043
|
-
resolve56(gpus);
|
|
529110
|
+
resolve56(parseGpuQueryOutput(
|
|
529111
|
+
stdout,
|
|
529112
|
+
/* hasComputeCap */
|
|
529113
|
+
true
|
|
529114
|
+
));
|
|
529044
529115
|
});
|
|
529045
529116
|
});
|
|
529046
529117
|
}
|
|
529118
|
+
function parseGpuQueryOutput(stdout, hasComputeCap) {
|
|
529119
|
+
const gpus = [];
|
|
529120
|
+
const minFields = hasComputeCap ? 7 : 6;
|
|
529121
|
+
for (const line of stdout.split("\n")) {
|
|
529122
|
+
const parts = line.split(",").map((s2) => s2.trim());
|
|
529123
|
+
if (parts.length < minFields)
|
|
529124
|
+
continue;
|
|
529125
|
+
const idx = Number(parts[0]);
|
|
529126
|
+
if (!Number.isFinite(idx))
|
|
529127
|
+
continue;
|
|
529128
|
+
const info = {
|
|
529129
|
+
index: idx,
|
|
529130
|
+
uuid: parts[1] ?? "",
|
|
529131
|
+
name: parts[2] ?? "",
|
|
529132
|
+
vramTotalMB: Number(parts[3]) || 0,
|
|
529133
|
+
vramFreeMB: Number(parts[4]) || 0,
|
|
529134
|
+
utilization: Number(parts[5]) || 0
|
|
529135
|
+
};
|
|
529136
|
+
if (hasComputeCap) {
|
|
529137
|
+
const cap = Number(parts[6]);
|
|
529138
|
+
if (Number.isFinite(cap))
|
|
529139
|
+
info.computeCapability = cap;
|
|
529140
|
+
}
|
|
529141
|
+
gpus.push(info);
|
|
529142
|
+
}
|
|
529143
|
+
return gpus;
|
|
529144
|
+
}
|
|
529145
|
+
function resolveMinGpuVramMB() {
|
|
529146
|
+
const fromEnv = Number(process.env["OMNIUS_OLLAMA_MIN_GPU_MB"]);
|
|
529147
|
+
return Number.isFinite(fromEnv) && fromEnv > 0 ? fromEnv : DEFAULT_MIN_GPU_VRAM_MB;
|
|
529148
|
+
}
|
|
529149
|
+
function resolveMinComputeCapability() {
|
|
529150
|
+
const fromEnv = Number(process.env["OMNIUS_OLLAMA_MIN_COMPUTE_CAP"]);
|
|
529151
|
+
return Number.isFinite(fromEnv) && fromEnv > 0 ? fromEnv : DEFAULT_MIN_COMPUTE_CAPABILITY;
|
|
529152
|
+
}
|
|
529153
|
+
function isCapableForLLM(gpu, thresholds = {}) {
|
|
529154
|
+
const minVramMB = thresholds.minVramMB ?? resolveMinGpuVramMB();
|
|
529155
|
+
const minComputeCap = thresholds.minComputeCap ?? resolveMinComputeCapability();
|
|
529156
|
+
if (gpu.vramTotalMB < minVramMB)
|
|
529157
|
+
return false;
|
|
529158
|
+
if (gpu.computeCapability !== void 0 && gpu.computeCapability < minComputeCap)
|
|
529159
|
+
return false;
|
|
529160
|
+
return true;
|
|
529161
|
+
}
|
|
529162
|
+
function filterCapableGpus(gpus, thresholds) {
|
|
529163
|
+
return gpus.filter((g) => isCapableForLLM(g, thresholds));
|
|
529164
|
+
}
|
|
529165
|
+
function recommendMaxParallelFromVram(minFreeMB) {
|
|
529166
|
+
if (minFreeMB >= 60 * 1024)
|
|
529167
|
+
return 8;
|
|
529168
|
+
if (minFreeMB >= 40 * 1024)
|
|
529169
|
+
return 4;
|
|
529170
|
+
if (minFreeMB >= 24 * 1024)
|
|
529171
|
+
return 2;
|
|
529172
|
+
return 1;
|
|
529173
|
+
}
|
|
529047
529174
|
async function getHardwareSnapshot() {
|
|
529048
529175
|
const { totalmem: totalmem8, freemem: freemem7, cpus: cpus5 } = await import("node:os");
|
|
529049
529176
|
const gpus = await detectGpus();
|
|
@@ -529127,10 +529254,15 @@ async function findFreePort(start2) {
|
|
|
529127
529254
|
}
|
|
529128
529255
|
function resolveDefaultPoolConfig() {
|
|
529129
529256
|
const baseInstanceUrl = process.env["OMNIUS_OLLAMA_BASE_URL"] || process.env["OLLAMA_HOST"]?.replace(/^([^:/]+:[0-9]+)$/, "http://$1") || "http://127.0.0.1:11434";
|
|
529130
|
-
const
|
|
529131
|
-
const
|
|
529257
|
+
const maxParallelExplicit = process.env["OMNIUS_OLLAMA_MAX_PARALLEL"] !== void 0;
|
|
529258
|
+
const maxParallelPerInstance = maxParallelExplicit ? Number(process.env["OMNIUS_OLLAMA_MAX_PARALLEL"]) || 1 : 1;
|
|
529259
|
+
const autoTuneMaxParallel = !maxParallelExplicit;
|
|
529260
|
+
const gpuPlacementExplicit = process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] !== void 0;
|
|
529261
|
+
const maxInstancesExplicit = process.env["OMNIUS_OLLAMA_MAX_INSTANCES"] !== void 0;
|
|
529262
|
+
const peerPoolActive = !gpuPlacementExplicit && !maxInstancesExplicit && detectPeerOmniusOllamaPool();
|
|
529263
|
+
const maxSpawnedInstances = maxInstancesExplicit ? Number(process.env["OMNIUS_OLLAMA_MAX_INSTANCES"]) || 0 : peerPoolActive ? 1 : 0;
|
|
529132
529264
|
const targetGpuInstances = Number(process.env["OMNIUS_OLLAMA_TARGET_GPU_INSTANCES"]) || 0;
|
|
529133
|
-
const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? "auto").toLowerCase();
|
|
529265
|
+
const gpuPlacementRaw = (process.env["OMNIUS_OLLAMA_GPU_PLACEMENT"] ?? (peerPoolActive ? "elastic" : "auto")).toLowerCase();
|
|
529134
529266
|
const gpuPlacement = gpuPlacementRaw === "dedicated" || gpuPlacementRaw === "elastic" || gpuPlacementRaw === "auto" ? gpuPlacementRaw : "auto";
|
|
529135
529267
|
const idleMs = Number(process.env["OMNIUS_OLLAMA_IDLE_MS"]) || 3 * 60 * 60 * 1e3;
|
|
529136
529268
|
const reaperIntervalMs = Number(process.env["OMNIUS_OLLAMA_REAPER_MS"]) || 3e4;
|
|
@@ -529155,7 +529287,8 @@ function resolveDefaultPoolConfig() {
|
|
|
529155
529287
|
ollamaBinary,
|
|
529156
529288
|
spawnReadyTimeoutSec,
|
|
529157
529289
|
networkRxBudgetBytesPerSec,
|
|
529158
|
-
networkTxBudgetBytesPerSec
|
|
529290
|
+
networkTxBudgetBytesPerSec,
|
|
529291
|
+
autoTuneMaxParallel
|
|
529159
529292
|
};
|
|
529160
529293
|
}
|
|
529161
529294
|
function parseNullableNumber(value2) {
|
|
@@ -529223,11 +529356,13 @@ function setOllamaPool(pool3) {
|
|
|
529223
529356
|
_poolByBaseUrl.set(pool3.statusConfig().baseInstanceUrl, pool3);
|
|
529224
529357
|
}
|
|
529225
529358
|
}
|
|
529226
|
-
var _nvidiaSmiAvailable, _lastNetworkSnapshot, OllamaInstance, realInstanceSpawner, _gpuCursor, OllamaPool, _poolSingleton, _poolByBaseUrl;
|
|
529359
|
+
var _nvidiaSmiAvailable, DEFAULT_MIN_GPU_VRAM_MB, DEFAULT_MIN_COMPUTE_CAPABILITY, _lastNetworkSnapshot, OllamaInstance, realInstanceSpawner, _gpuCursor, OllamaPool, _poolSingleton, _poolByBaseUrl;
|
|
529227
529360
|
var init_ollama_pool = __esm({
|
|
529228
529361
|
"packages/orchestrator/dist/ollama-pool.js"() {
|
|
529229
529362
|
"use strict";
|
|
529230
529363
|
_nvidiaSmiAvailable = null;
|
|
529364
|
+
DEFAULT_MIN_GPU_VRAM_MB = 16 * 1024;
|
|
529365
|
+
DEFAULT_MIN_COMPUTE_CAPABILITY = 7;
|
|
529231
529366
|
_lastNetworkSnapshot = null;
|
|
529232
529367
|
OllamaInstance = class {
|
|
529233
529368
|
state;
|
|
@@ -529281,6 +529416,7 @@ var init_ollama_pool = __esm({
|
|
|
529281
529416
|
env2["OLLAMA_MODELS"] = config.sharedModelStore;
|
|
529282
529417
|
}
|
|
529283
529418
|
env2["OLLAMA_NUM_PARALLEL"] = String(config.maxParallelPerInstance);
|
|
529419
|
+
env2["OLLAMA_KEEP_ALIVE"] = process.env["OMNIUS_OLLAMA_SPAWN_KEEP_ALIVE"] ?? "-1";
|
|
529284
529420
|
if (gpuUuid) {
|
|
529285
529421
|
env2["CUDA_VISIBLE_DEVICES"] = gpuUuid;
|
|
529286
529422
|
env2["GPU_DEVICE_ORDINAL"] = gpuIndex === null ? "" : String(gpuIndex);
|
|
@@ -529334,6 +529470,20 @@ var init_ollama_pool = __esm({
|
|
|
529334
529470
|
dedicatedGpuPoolActive = false;
|
|
529335
529471
|
activePlacementMode = "constrained";
|
|
529336
529472
|
gpuCache = null;
|
|
529473
|
+
/**
|
|
529474
|
+
* Set once after the first capability-filtered GPU detection. Prevents the
|
|
529475
|
+
* auto-tune from oscillating maxParallelPerInstance as free VRAM fluctuates
|
|
529476
|
+
* during normal inference.
|
|
529477
|
+
*/
|
|
529478
|
+
_autoTuned = false;
|
|
529479
|
+
/** UUIDs we've already emitted gpu-excluded for. Prevents log spam. */
|
|
529480
|
+
_excludedGpusReported = /* @__PURE__ */ new Set();
|
|
529481
|
+
/**
|
|
529482
|
+
* Cached model footprint in MiB (model name → estimated VRAM required).
|
|
529483
|
+
* Populated lazily via /api/show on the base instance. null sentinel means
|
|
529484
|
+
* "we tried but failed" so we don't re-probe in a tight loop.
|
|
529485
|
+
*/
|
|
529486
|
+
_modelVramEstimateMB = /* @__PURE__ */ new Map();
|
|
529337
529487
|
slotWaiters = [];
|
|
529338
529488
|
/**
|
|
529339
529489
|
* Agent → preferred instance id. Set whenever an acquire resolves an
|
|
@@ -529533,7 +529683,18 @@ var init_ollama_pool = __esm({
|
|
|
529533
529683
|
const freedPick = this.pickInstance({ model });
|
|
529534
529684
|
if (freedPick)
|
|
529535
529685
|
return freedPick;
|
|
529536
|
-
const
|
|
529686
|
+
const vramNeededMB = await this.estimateModelVramMB(model);
|
|
529687
|
+
const capable = this.gpusWithCapacityForModel(gpus, vramNeededMB);
|
|
529688
|
+
if (capable.length === 0 && vramNeededMB !== null) {
|
|
529689
|
+
this.emit("spawn-skipped", {
|
|
529690
|
+
reason: "insufficient-vram",
|
|
529691
|
+
model,
|
|
529692
|
+
vramNeededMB,
|
|
529693
|
+
gpuFreeMBs: gpus.map((g) => g.vramFreeMB)
|
|
529694
|
+
});
|
|
529695
|
+
return null;
|
|
529696
|
+
}
|
|
529697
|
+
const gpu = this.pickGpuForSpawn(capable.length > 0 ? capable : gpus);
|
|
529537
529698
|
return this.spawnInstance(model, gpu);
|
|
529538
529699
|
});
|
|
529539
529700
|
}
|
|
@@ -529544,8 +529705,20 @@ var init_ollama_pool = __esm({
|
|
|
529544
529705
|
if (!this.canSpawnWithSharedModelStore(model))
|
|
529545
529706
|
return;
|
|
529546
529707
|
const target = this.dedicatedTargetCount(gpus);
|
|
529708
|
+
const vramNeededMB = await this.estimateModelVramMB(model);
|
|
529547
529709
|
while (this.instances.filter((i2) => i2.state.poolOwned).length < target) {
|
|
529548
|
-
const
|
|
529710
|
+
const candidates = this.gpusWithCapacityForModel(gpus, vramNeededMB);
|
|
529711
|
+
const pool3 = candidates.length > 0 ? candidates : vramNeededMB === null ? gpus : [];
|
|
529712
|
+
if (pool3.length === 0) {
|
|
529713
|
+
this.emit("spawn-skipped", {
|
|
529714
|
+
reason: "insufficient-vram",
|
|
529715
|
+
model,
|
|
529716
|
+
vramNeededMB,
|
|
529717
|
+
gpuFreeMBs: gpus.map((g) => g.vramFreeMB)
|
|
529718
|
+
});
|
|
529719
|
+
break;
|
|
529720
|
+
}
|
|
529721
|
+
const gpu = this.pickGpuForSpawn(pool3);
|
|
529549
529722
|
if (!gpu)
|
|
529550
529723
|
break;
|
|
529551
529724
|
const inst = await this.spawnInstance(model, gpu);
|
|
@@ -529603,9 +529776,59 @@ var init_ollama_pool = __esm({
|
|
|
529603
529776
|
if (this.gpuCache && now - this.gpuCache.takenAtMs <= maxAgeMs) {
|
|
529604
529777
|
return this.gpuCache.gpus;
|
|
529605
529778
|
}
|
|
529606
|
-
const
|
|
529607
|
-
|
|
529608
|
-
|
|
529779
|
+
const rawGpus = await this.gpuDetector();
|
|
529780
|
+
const filtered = filterCapableGpus(rawGpus);
|
|
529781
|
+
const filteredUuids = new Set(filtered.map((g) => g.uuid));
|
|
529782
|
+
for (const g of rawGpus) {
|
|
529783
|
+
if (filteredUuids.has(g.uuid))
|
|
529784
|
+
continue;
|
|
529785
|
+
if (this._excludedGpusReported.has(g.uuid))
|
|
529786
|
+
continue;
|
|
529787
|
+
this._excludedGpusReported.add(g.uuid);
|
|
529788
|
+
const reason = g.vramTotalMB < resolveMinGpuVramMB() ? "insufficient-vram" : "insufficient-compute-capability";
|
|
529789
|
+
this.emit("gpu-excluded", {
|
|
529790
|
+
uuid: g.uuid,
|
|
529791
|
+
index: g.index,
|
|
529792
|
+
name: g.name,
|
|
529793
|
+
vramTotalMB: g.vramTotalMB,
|
|
529794
|
+
computeCapability: g.computeCapability,
|
|
529795
|
+
reason
|
|
529796
|
+
});
|
|
529797
|
+
}
|
|
529798
|
+
this.gpuCache = { gpus: filtered, takenAtMs: now };
|
|
529799
|
+
this.maybeAutoTuneMaxParallel(filtered);
|
|
529800
|
+
return filtered;
|
|
529801
|
+
}
|
|
529802
|
+
/**
|
|
529803
|
+
* One-shot: bump `maxParallelPerInstance` from the worst-case free VRAM
|
|
529804
|
+
* across capable GPUs the first time we see them. We never tune down (a
|
|
529805
|
+
* subsequent low-VRAM read shouldn't strip concurrency from in-flight
|
|
529806
|
+
* requests), and we never tune again once successful — the recommendation
|
|
529807
|
+
* ladder is stable enough that a single read at startup is correct.
|
|
529808
|
+
*/
|
|
529809
|
+
maybeAutoTuneMaxParallel(filtered) {
|
|
529810
|
+
if (!this.config.autoTuneMaxParallel)
|
|
529811
|
+
return;
|
|
529812
|
+
if (this._autoTuned)
|
|
529813
|
+
return;
|
|
529814
|
+
if (filtered.length === 0)
|
|
529815
|
+
return;
|
|
529816
|
+
const minFreeMB = filtered.reduce((m2, g) => Math.min(m2, g.vramFreeMB), Number.POSITIVE_INFINITY);
|
|
529817
|
+
const recommended = recommendMaxParallelFromVram(minFreeMB);
|
|
529818
|
+
if (recommended > this.config.maxParallelPerInstance) {
|
|
529819
|
+
const previous = this.config.maxParallelPerInstance;
|
|
529820
|
+
this.config.maxParallelPerInstance = recommended;
|
|
529821
|
+
for (const inst of this.instances) {
|
|
529822
|
+
inst.state.maxParallel = recommended;
|
|
529823
|
+
}
|
|
529824
|
+
this.emit("max-parallel-tuned", {
|
|
529825
|
+
previous,
|
|
529826
|
+
recommended,
|
|
529827
|
+
minFreeMB,
|
|
529828
|
+
capableGpuCount: filtered.length
|
|
529829
|
+
});
|
|
529830
|
+
}
|
|
529831
|
+
this._autoTuned = true;
|
|
529609
529832
|
}
|
|
529610
529833
|
async spawnInstance(model, gpu) {
|
|
529611
529834
|
let port;
|
|
@@ -529668,6 +529891,56 @@ var init_ollama_pool = __esm({
|
|
|
529668
529891
|
});
|
|
529669
529892
|
return inst;
|
|
529670
529893
|
}
|
|
529894
|
+
/**
|
|
529895
|
+
* Best-effort: estimate the VRAM (in MiB) a model needs to be served
|
|
529896
|
+
* without CPU spill. Hits the base instance's `/api/show` once per model
|
|
529897
|
+
* and caches the result. Returns null when the probe fails (the caller
|
|
529898
|
+
* then falls back to "no estimate" semantics — capacity check is skipped).
|
|
529899
|
+
*
|
|
529900
|
+
* The number returned is `disk_size * 1.15 + maxParallel * 1024` (1 GiB of
|
|
529901
|
+
* KV cache per parallel slot — conservative for 30B-class models). Newer
|
|
529902
|
+
* model families may exceed this margin slightly; bumps are safe via
|
|
529903
|
+
* OMNIUS_OLLAMA_VRAM_SAFETY_MARGIN.
|
|
529904
|
+
*/
|
|
529905
|
+
async estimateModelVramMB(model) {
|
|
529906
|
+
if (this._modelVramEstimateMB.has(model)) {
|
|
529907
|
+
return this._modelVramEstimateMB.get(model) ?? null;
|
|
529908
|
+
}
|
|
529909
|
+
let bytesOnDisk = null;
|
|
529910
|
+
try {
|
|
529911
|
+
const url = `${this.config.baseInstanceUrl.replace(/\/+$/, "")}/api/show`;
|
|
529912
|
+
const resp = await fetch(url, {
|
|
529913
|
+
method: "POST",
|
|
529914
|
+
headers: { "Content-Type": "application/json" },
|
|
529915
|
+
body: JSON.stringify({ name: model }),
|
|
529916
|
+
signal: AbortSignal.timeout(2e3)
|
|
529917
|
+
});
|
|
529918
|
+
if (resp.ok) {
|
|
529919
|
+
const data = await resp.json();
|
|
529920
|
+
if (typeof data.size === "number" && data.size > 0)
|
|
529921
|
+
bytesOnDisk = data.size;
|
|
529922
|
+
}
|
|
529923
|
+
} catch {
|
|
529924
|
+
}
|
|
529925
|
+
if (bytesOnDisk === null) {
|
|
529926
|
+
this._modelVramEstimateMB.set(model, null);
|
|
529927
|
+
return null;
|
|
529928
|
+
}
|
|
529929
|
+
const safetyMargin = Number(process.env["OMNIUS_OLLAMA_VRAM_SAFETY_MARGIN"]) || 1.15;
|
|
529930
|
+
const kvCacheMB = this.config.maxParallelPerInstance * 1024;
|
|
529931
|
+
const estimateMB = Math.ceil(bytesOnDisk / (1024 * 1024) * safetyMargin + kvCacheMB);
|
|
529932
|
+
this._modelVramEstimateMB.set(model, estimateMB);
|
|
529933
|
+
return estimateMB;
|
|
529934
|
+
}
|
|
529935
|
+
/**
|
|
529936
|
+
* Filter GPUs to those with enough free VRAM for the model. Caller decides
|
|
529937
|
+
* how to react to an empty list (skip spawn vs degrade to constrained).
|
|
529938
|
+
*/
|
|
529939
|
+
gpusWithCapacityForModel(gpus, vramNeededMB) {
|
|
529940
|
+
if (vramNeededMB === null)
|
|
529941
|
+
return gpus;
|
|
529942
|
+
return gpus.filter((g) => g.vramFreeMB >= vramNeededMB);
|
|
529943
|
+
}
|
|
529671
529944
|
/**
|
|
529672
529945
|
* Pick a GPU for a freshly-spawned instance. Prefers GPUs that no
|
|
529673
529946
|
* pool-owned instance is already pinned to, then most free VRAM. Returns
|
|
@@ -529704,6 +529977,28 @@ var init_ollama_pool = __esm({
|
|
|
529704
529977
|
survivors.push(inst);
|
|
529705
529978
|
continue;
|
|
529706
529979
|
}
|
|
529980
|
+
const PROBE_GRACE_MS = 3e4;
|
|
529981
|
+
if (inst.state.inflight === 0 && Date.now() - inst.state.spawnedAtMs > PROBE_GRACE_MS && await this.isPartialVramSpilled(inst)) {
|
|
529982
|
+
const reapedAtMs = Date.now();
|
|
529983
|
+
await inst.terminate();
|
|
529984
|
+
this.dropAffinityFor(inst.state.id);
|
|
529985
|
+
this.emit("instance-reaped", {
|
|
529986
|
+
id: inst.state.id,
|
|
529987
|
+
pid: inst.state.pid,
|
|
529988
|
+
reason: "partial-vram",
|
|
529989
|
+
totalRequests: inst.state.totalRequests,
|
|
529990
|
+
peakInflight: inst.state.peakInflight,
|
|
529991
|
+
ageMs: reapedAtMs - inst.state.spawnedAtMs,
|
|
529992
|
+
idleMs: reapedAtMs - inst.state.lastUsedMs,
|
|
529993
|
+
provenance: {
|
|
529994
|
+
entity: `urn:omnius:ollama-instance:${inst.state.id}`,
|
|
529995
|
+
activity: "ollama-instance-reap-partial-vram",
|
|
529996
|
+
agent: "orchestrator.ollama-pool",
|
|
529997
|
+
timestampMs: reapedAtMs
|
|
529998
|
+
}
|
|
529999
|
+
});
|
|
530000
|
+
continue;
|
|
530001
|
+
}
|
|
529707
530002
|
if (inst.isIdleLongerThan(this.config.idleMs)) {
|
|
529708
530003
|
const reapedAtMs = Date.now();
|
|
529709
530004
|
await inst.terminate();
|
|
@@ -529711,6 +530006,7 @@ var init_ollama_pool = __esm({
|
|
|
529711
530006
|
this.emit("instance-reaped", {
|
|
529712
530007
|
id: inst.state.id,
|
|
529713
530008
|
pid: inst.state.pid,
|
|
530009
|
+
reason: "idle",
|
|
529714
530010
|
totalRequests: inst.state.totalRequests,
|
|
529715
530011
|
peakInflight: inst.state.peakInflight,
|
|
529716
530012
|
ageMs: reapedAtMs - inst.state.spawnedAtMs,
|
|
@@ -529728,6 +530024,33 @@ var init_ollama_pool = __esm({
|
|
|
529728
530024
|
}
|
|
529729
530025
|
this.instances = survivors;
|
|
529730
530026
|
}
|
|
530027
|
+
/**
|
|
530028
|
+
* Probe `/api/ps` on the instance and return true if any resident model has
|
|
530029
|
+
* less than 95% of its weights in VRAM — the unmistakable CPU-offload
|
|
530030
|
+
* signature that produces 50× slowdowns. Defensive: any HTTP failure
|
|
530031
|
+
* returns false so a transient network blip never triggers a reap.
|
|
530032
|
+
*/
|
|
530033
|
+
async isPartialVramSpilled(inst) {
|
|
530034
|
+
const PARTIAL_VRAM_THRESHOLD = 0.95;
|
|
530035
|
+
try {
|
|
530036
|
+
const url = `${inst.state.baseUrl.replace(/\/+$/, "")}/api/ps`;
|
|
530037
|
+
const resp = await fetch(url, { signal: AbortSignal.timeout(2e3) });
|
|
530038
|
+
if (!resp.ok)
|
|
530039
|
+
return false;
|
|
530040
|
+
const data = await resp.json();
|
|
530041
|
+
if (!data.models || data.models.length === 0)
|
|
530042
|
+
return false;
|
|
530043
|
+
return data.models.some((m2) => {
|
|
530044
|
+
const total = m2.size ?? 0;
|
|
530045
|
+
const vram = m2.size_vram ?? 0;
|
|
530046
|
+
if (total <= 0)
|
|
530047
|
+
return false;
|
|
530048
|
+
return vram / total < PARTIAL_VRAM_THRESHOLD;
|
|
530049
|
+
});
|
|
530050
|
+
} catch {
|
|
530051
|
+
return false;
|
|
530052
|
+
}
|
|
530053
|
+
}
|
|
529731
530054
|
/** Stop the reaper and terminate every spawned instance. Call on process exit. */
|
|
529732
530055
|
async shutdown() {
|
|
529733
530056
|
if (this.reaperHandle) {
|
|
@@ -571197,6 +571520,47 @@ var init_voice_soul = __esm({
|
|
|
571197
571520
|
}
|
|
571198
571521
|
});
|
|
571199
571522
|
|
|
571523
|
+
// packages/cli/src/tui/usage-bars.ts
|
|
571524
|
+
function formatCompactCount(value2) {
|
|
571525
|
+
const n2 = Math.max(0, Math.floor(Number.isFinite(value2) ? value2 : 0));
|
|
571526
|
+
if (n2 < 1e3) return String(n2);
|
|
571527
|
+
if (n2 < 1e6) return `${(n2 / 1e3).toFixed(n2 < 1e4 ? 1 : 0)}K`;
|
|
571528
|
+
return `${(n2 / 1e6).toFixed(n2 < 1e7 ? 1 : 0)}M`;
|
|
571529
|
+
}
|
|
571530
|
+
function formatResetDelta(resetAt, now = Date.now()) {
|
|
571531
|
+
if (!Number.isFinite(resetAt) || resetAt <= now) return "";
|
|
571532
|
+
const totalMinutes = Math.ceil((resetAt - now) / 6e4);
|
|
571533
|
+
if (totalMinutes < 60) return ` reset ${totalMinutes}m`;
|
|
571534
|
+
const hours = Math.floor(totalMinutes / 60);
|
|
571535
|
+
const minutes = totalMinutes % 60;
|
|
571536
|
+
return minutes > 0 ? ` reset ${hours}h ${minutes}m` : ` reset ${hours}h`;
|
|
571537
|
+
}
|
|
571538
|
+
function formatUsageBar(options2) {
|
|
571539
|
+
const total = Math.max(0, Math.floor(Number.isFinite(options2.total) ? options2.total : 0));
|
|
571540
|
+
const rawUsed = Math.max(0, Math.floor(Number.isFinite(options2.used) ? options2.used : 0));
|
|
571541
|
+
const used = total > 0 ? Math.min(total, rawUsed) : 0;
|
|
571542
|
+
const width = Math.max(4, options2.width ?? 18);
|
|
571543
|
+
const labelWidth = Math.max(options2.label.length, options2.labelWidth ?? 16);
|
|
571544
|
+
const pct = total > 0 ? Math.round(used / total * 100) : 0;
|
|
571545
|
+
const filled = total > 0 ? Math.min(width, Math.round(pct / 100 * width)) : 0;
|
|
571546
|
+
const color = pct >= 90 ? c3.red : pct >= 70 ? c3.yellow : c3.green;
|
|
571547
|
+
const bar = color("█".repeat(filled)) + c3.dim("░".repeat(width - filled));
|
|
571548
|
+
const reset = options2.resetAt ? c3.dim(formatResetDelta(options2.resetAt)) : "";
|
|
571549
|
+
return [
|
|
571550
|
+
c3.cyan(options2.label.padEnd(labelWidth)),
|
|
571551
|
+
bar,
|
|
571552
|
+
color(`${pct}%`.padStart(4)),
|
|
571553
|
+
c3.dim(`${formatCompactCount(rawUsed)}/${formatCompactCount(total)}`),
|
|
571554
|
+
reset
|
|
571555
|
+
].join(" ").trimEnd();
|
|
571556
|
+
}
|
|
571557
|
+
var init_usage_bars = __esm({
|
|
571558
|
+
"packages/cli/src/tui/usage-bars.ts"() {
|
|
571559
|
+
"use strict";
|
|
571560
|
+
init_render();
|
|
571561
|
+
}
|
|
571562
|
+
});
|
|
571563
|
+
|
|
571200
571564
|
// packages/cli/src/tui/expose.ts
|
|
571201
571565
|
import { createServer as createServer5, request as httpRequest } from "node:http";
|
|
571202
571566
|
import { request as httpsRequest } from "node:https";
|
|
@@ -571243,6 +571607,38 @@ function fmtTokens(n2) {
|
|
|
571243
571607
|
if (n2 < 1e6) return `${(n2 / 1e3).toFixed(1)}K`;
|
|
571244
571608
|
return `${(n2 / 1e6).toFixed(1)}M`;
|
|
571245
571609
|
}
|
|
571610
|
+
function safeNonNegativeInt(value2) {
|
|
571611
|
+
const n2 = Number(value2);
|
|
571612
|
+
return Number.isFinite(n2) && n2 > 0 ? Math.floor(n2) : 0;
|
|
571613
|
+
}
|
|
571614
|
+
function nextSponsorDailyReset(now = Date.now()) {
|
|
571615
|
+
return now + SPONSOR_DAILY_WINDOW_MS;
|
|
571616
|
+
}
|
|
571617
|
+
function readSponsorUsageState(stateDir) {
|
|
571618
|
+
try {
|
|
571619
|
+
const path12 = join105(stateDir, "sponsor", SPONSOR_USAGE_FILE_NAME);
|
|
571620
|
+
if (!existsSync90(path12)) return null;
|
|
571621
|
+
const parsed = JSON.parse(readFileSync71(path12, "utf8"));
|
|
571622
|
+
const dailyTokensUsed = safeNonNegativeInt(parsed.dailyTokensUsed);
|
|
571623
|
+
const dailyTokensResetAt = safeNonNegativeInt(parsed.dailyTokensResetAt);
|
|
571624
|
+
if (!dailyTokensResetAt) return null;
|
|
571625
|
+
return {
|
|
571626
|
+
dailyTokensUsed,
|
|
571627
|
+
dailyTokensResetAt,
|
|
571628
|
+
updatedAt: typeof parsed.updatedAt === "string" ? parsed.updatedAt : (/* @__PURE__ */ new Date()).toISOString()
|
|
571629
|
+
};
|
|
571630
|
+
} catch {
|
|
571631
|
+
return null;
|
|
571632
|
+
}
|
|
571633
|
+
}
|
|
571634
|
+
function writeSponsorUsageState(stateDir, state) {
|
|
571635
|
+
try {
|
|
571636
|
+
const dir = join105(stateDir, "sponsor");
|
|
571637
|
+
mkdirSync50(dir, { recursive: true });
|
|
571638
|
+
writeFileSync45(join105(dir, SPONSOR_USAGE_FILE_NAME), JSON.stringify(state, null, 2));
|
|
571639
|
+
} catch {
|
|
571640
|
+
}
|
|
571641
|
+
}
|
|
571246
571642
|
function readExposeState(stateDir) {
|
|
571247
571643
|
try {
|
|
571248
571644
|
const path12 = join105(stateDir, STATE_FILE_NAME);
|
|
@@ -571406,11 +571802,12 @@ function removeP2PExposeState(stateDir) {
|
|
|
571406
571802
|
} catch {
|
|
571407
571803
|
}
|
|
571408
571804
|
}
|
|
571409
|
-
var HOP_BY_HOP_HEADERS, CF_HEADERS_PREFIX, DEFAULT_EXPOSE_MAX_BODY_BYTES, INTERNAL_CAPABILITIES, DEFAULT_TARGETS, STATE_FILE_NAME, ExposeGateway, P2P_STATE_FILE_NAME, ExposeP2PGateway;
|
|
571805
|
+
var HOP_BY_HOP_HEADERS, CF_HEADERS_PREFIX, DEFAULT_EXPOSE_MAX_BODY_BYTES, INTERNAL_CAPABILITIES, DEFAULT_TARGETS, STATE_FILE_NAME, SPONSOR_USAGE_FILE_NAME, SPONSOR_DAILY_WINDOW_MS, SPONSOR_REQUEST_WINDOW_MS, ExposeGateway, P2P_STATE_FILE_NAME, ExposeP2PGateway;
|
|
571410
571806
|
var init_expose = __esm({
|
|
571411
571807
|
"packages/cli/src/tui/expose.ts"() {
|
|
571412
571808
|
"use strict";
|
|
571413
571809
|
init_render();
|
|
571810
|
+
init_usage_bars();
|
|
571414
571811
|
init_typed_node_events();
|
|
571415
571812
|
HOP_BY_HOP_HEADERS = /* @__PURE__ */ new Set([
|
|
571416
571813
|
"connection",
|
|
@@ -571432,6 +571829,9 @@ var init_expose = __esm({
|
|
|
571432
571829
|
custom: "http://127.0.0.1:11434"
|
|
571433
571830
|
};
|
|
571434
571831
|
STATE_FILE_NAME = "expose-state.json";
|
|
571832
|
+
SPONSOR_USAGE_FILE_NAME = "usage.json";
|
|
571833
|
+
SPONSOR_DAILY_WINDOW_MS = 864e5;
|
|
571834
|
+
SPONSOR_REQUEST_WINDOW_MS = 6e4;
|
|
571435
571835
|
ExposeGateway = class _ExposeGateway extends EventEmitter8 {
|
|
571436
571836
|
constructor(options2) {
|
|
571437
571837
|
super();
|
|
@@ -571449,6 +571849,8 @@ var init_expose = __esm({
|
|
|
571449
571849
|
} else {
|
|
571450
571850
|
this._authKey = options2.authKey;
|
|
571451
571851
|
}
|
|
571852
|
+
this.loadSponsorUsage();
|
|
571853
|
+
this.refreshSponsorUsageStats();
|
|
571452
571854
|
}
|
|
571453
571855
|
options;
|
|
571454
571856
|
server = null;
|
|
@@ -571471,6 +571873,7 @@ var init_expose = __esm({
|
|
|
571471
571873
|
_dailyTokensResetAt = 0;
|
|
571472
571874
|
/** Sponsor rate limits (set via setSponsorLimits) */
|
|
571473
571875
|
_sponsorLimits = null;
|
|
571876
|
+
_sponsorBlockedRequests = 0;
|
|
571474
571877
|
_authKey;
|
|
571475
571878
|
_targetUrl;
|
|
571476
571879
|
_kind;
|
|
@@ -571489,7 +571892,8 @@ var init_expose = __esm({
|
|
|
571489
571892
|
users: /* @__PURE__ */ new Map(),
|
|
571490
571893
|
budgetTokensRemaining: 0,
|
|
571491
571894
|
budgetTokensTotal: 0,
|
|
571492
|
-
budgetResetAt: 0
|
|
571895
|
+
budgetResetAt: 0,
|
|
571896
|
+
sponsorUsage: null
|
|
571493
571897
|
};
|
|
571494
571898
|
get tunnelUrl() {
|
|
571495
571899
|
return this._tunnelUrl;
|
|
@@ -571509,42 +571913,140 @@ var init_expose = __esm({
|
|
|
571509
571913
|
/** Set sponsor rate limits — enables rate limiting middleware in the proxy */
|
|
571510
571914
|
setSponsorLimits(limits) {
|
|
571511
571915
|
this._sponsorLimits = limits;
|
|
571916
|
+
this.ensureSponsorDailyWindow();
|
|
571917
|
+
this.refreshSponsorUsageStats();
|
|
571918
|
+
this.emitStats();
|
|
571919
|
+
}
|
|
571920
|
+
getSponsorUsageSnapshot() {
|
|
571921
|
+
this.refreshSponsorUsageStats();
|
|
571922
|
+
return this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null;
|
|
571923
|
+
}
|
|
571924
|
+
loadSponsorUsage() {
|
|
571925
|
+
if (!this._stateDir) {
|
|
571926
|
+
this._dailyTokensResetAt = nextSponsorDailyReset();
|
|
571927
|
+
return;
|
|
571928
|
+
}
|
|
571929
|
+
const saved = readSponsorUsageState(this._stateDir);
|
|
571930
|
+
if (!saved) {
|
|
571931
|
+
this._dailyTokensUsed = 0;
|
|
571932
|
+
this._dailyTokensResetAt = nextSponsorDailyReset();
|
|
571933
|
+
return;
|
|
571934
|
+
}
|
|
571935
|
+
const now = Date.now();
|
|
571936
|
+
if (saved.dailyTokensResetAt <= now) {
|
|
571937
|
+
this._dailyTokensUsed = 0;
|
|
571938
|
+
this._dailyTokensResetAt = nextSponsorDailyReset(now);
|
|
571939
|
+
this.saveSponsorUsage();
|
|
571940
|
+
} else {
|
|
571941
|
+
this._dailyTokensUsed = saved.dailyTokensUsed;
|
|
571942
|
+
this._dailyTokensResetAt = saved.dailyTokensResetAt;
|
|
571943
|
+
}
|
|
571944
|
+
}
|
|
571945
|
+
saveSponsorUsage() {
|
|
571946
|
+
if (!this._stateDir) return;
|
|
571947
|
+
writeSponsorUsageState(this._stateDir, {
|
|
571948
|
+
dailyTokensUsed: this._dailyTokensUsed,
|
|
571949
|
+
dailyTokensResetAt: this._dailyTokensResetAt,
|
|
571950
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
571951
|
+
});
|
|
571952
|
+
}
|
|
571953
|
+
ensureSponsorDailyWindow(now = Date.now()) {
|
|
571954
|
+
if (!this._dailyTokensResetAt || this._dailyTokensResetAt <= now) {
|
|
571955
|
+
this._dailyTokensUsed = 0;
|
|
571956
|
+
this._dailyTokensResetAt = nextSponsorDailyReset(now);
|
|
571957
|
+
this.saveSponsorUsage();
|
|
571958
|
+
}
|
|
571959
|
+
}
|
|
571960
|
+
pruneSponsorRequestWindows(now = Date.now()) {
|
|
571961
|
+
for (const [ip, window2] of this._rateLimitWindows.entries()) {
|
|
571962
|
+
while (window2.length > 0 && window2[0] < now - SPONSOR_REQUEST_WINDOW_MS) window2.shift();
|
|
571963
|
+
if (window2.length === 0) this._rateLimitWindows.delete(ip);
|
|
571964
|
+
}
|
|
571965
|
+
}
|
|
571966
|
+
sponsorRequestWindowUsage(now = Date.now()) {
|
|
571967
|
+
this.pruneSponsorRequestWindows(now);
|
|
571968
|
+
let count = 0;
|
|
571969
|
+
let oldest = Number.POSITIVE_INFINITY;
|
|
571970
|
+
for (const window2 of this._rateLimitWindows.values()) {
|
|
571971
|
+
count += window2.length;
|
|
571972
|
+
if (window2.length > 0) oldest = Math.min(oldest, window2[0]);
|
|
571973
|
+
}
|
|
571974
|
+
return {
|
|
571975
|
+
count,
|
|
571976
|
+
resetAt: Number.isFinite(oldest) ? oldest + SPONSOR_REQUEST_WINDOW_MS : now + SPONSOR_REQUEST_WINDOW_MS
|
|
571977
|
+
};
|
|
571978
|
+
}
|
|
571979
|
+
refreshSponsorUsageStats(now = Date.now()) {
|
|
571980
|
+
if (!this._sponsorLimits) {
|
|
571981
|
+
this._stats.sponsorUsage = null;
|
|
571982
|
+
return;
|
|
571983
|
+
}
|
|
571984
|
+
this.ensureSponsorDailyWindow(now);
|
|
571985
|
+
const req2 = this.sponsorRequestWindowUsage(now);
|
|
571986
|
+
this._stats.sponsorUsage = {
|
|
571987
|
+
enabled: true,
|
|
571988
|
+
transport: "tunnel",
|
|
571989
|
+
dailyTokensUsed: this._dailyTokensUsed,
|
|
571990
|
+
dailyTokensLimit: this._sponsorLimits.maxTokensPerDay,
|
|
571991
|
+
dailyResetAt: this._dailyTokensResetAt,
|
|
571992
|
+
requestsInWindow: req2.count,
|
|
571993
|
+
requestsPerMinuteLimit: this._sponsorLimits.maxRequestsPerMinute,
|
|
571994
|
+
requestWindowResetAt: req2.resetAt,
|
|
571995
|
+
activeConnections: this._stats.activeConnections,
|
|
571996
|
+
maxConcurrent: this._sponsorLimits.maxConcurrent,
|
|
571997
|
+
blockedRequests: this._sponsorBlockedRequests,
|
|
571998
|
+
allowedModels: this._sponsorLimits.allowedModels === "all" ? "all" : [...this._sponsorLimits.allowedModels]
|
|
571999
|
+
};
|
|
572000
|
+
}
|
|
572001
|
+
markSponsorBlocked() {
|
|
572002
|
+
this._sponsorBlockedRequests++;
|
|
572003
|
+
this.refreshSponsorUsageStats();
|
|
571512
572004
|
}
|
|
571513
572005
|
/** Check rate limits for a request. Returns null if OK, or error message string if blocked. */
|
|
571514
|
-
checkRateLimit(userIp, model) {
|
|
572006
|
+
checkRateLimit(userIp, model, options2 = {}) {
|
|
571515
572007
|
if (!this._sponsorLimits) return null;
|
|
571516
572008
|
const lim = this._sponsorLimits;
|
|
572009
|
+
const now = Date.now();
|
|
572010
|
+
if (lim.maxRequestsPerMinute <= 0 || lim.maxTokensPerDay <= 0 || lim.maxConcurrent <= 0) {
|
|
572011
|
+
this.markSponsorBlocked();
|
|
572012
|
+
return "Sponsored endpoint is paused or has no quota configured.";
|
|
572013
|
+
}
|
|
571517
572014
|
if (lim.allowedModels !== "all" && model && !lim.allowedModels.includes(model)) {
|
|
572015
|
+
this.markSponsorBlocked();
|
|
571518
572016
|
return `Model '${model}' is not available on this sponsored endpoint. Available: ${lim.allowedModels.join(", ")}`;
|
|
571519
572017
|
}
|
|
571520
|
-
if (this._stats.activeConnections
|
|
572018
|
+
if (this._stats.activeConnections > lim.maxConcurrent) {
|
|
572019
|
+
this.markSponsorBlocked();
|
|
571521
572020
|
return `Too many concurrent requests (${this._stats.activeConnections}/${lim.maxConcurrent}). Try again shortly.`;
|
|
571522
572021
|
}
|
|
571523
|
-
const now = Date.now();
|
|
571524
|
-
const windowMs = 6e4;
|
|
571525
572022
|
let window2 = this._rateLimitWindows.get(userIp);
|
|
571526
572023
|
if (!window2) {
|
|
571527
572024
|
window2 = [];
|
|
571528
572025
|
this._rateLimitWindows.set(userIp, window2);
|
|
571529
572026
|
}
|
|
571530
|
-
while (window2.length > 0 && window2[0] < now -
|
|
572027
|
+
while (window2.length > 0 && window2[0] < now - SPONSOR_REQUEST_WINDOW_MS) window2.shift();
|
|
571531
572028
|
if (window2.length >= lim.maxRequestsPerMinute) {
|
|
571532
|
-
|
|
572029
|
+
this.markSponsorBlocked();
|
|
572030
|
+
const retryAfterMs = window2[0] + SPONSOR_REQUEST_WINDOW_MS - now;
|
|
571533
572031
|
return `Rate limited (${lim.maxRequestsPerMinute} req/min). Retry in ${Math.ceil(retryAfterMs / 1e3)}s.`;
|
|
571534
572032
|
}
|
|
571535
|
-
window2.push(now);
|
|
571536
|
-
|
|
571537
|
-
this._dailyTokensUsed = 0;
|
|
571538
|
-
this._dailyTokensResetAt = now + 864e5;
|
|
571539
|
-
}
|
|
572033
|
+
if (options2.commitRequest) window2.push(now);
|
|
572034
|
+
this.ensureSponsorDailyWindow(now);
|
|
571540
572035
|
if (this._dailyTokensUsed >= lim.maxTokensPerDay) {
|
|
572036
|
+
this.markSponsorBlocked();
|
|
571541
572037
|
return `Daily token budget exhausted (${fmtTokens(lim.maxTokensPerDay)}). Resets in ${Math.ceil((this._dailyTokensResetAt - now) / 36e5)}h.`;
|
|
571542
572038
|
}
|
|
572039
|
+
this.refreshSponsorUsageStats(now);
|
|
571543
572040
|
return null;
|
|
571544
572041
|
}
|
|
571545
572042
|
/** Track token usage from a completed response */
|
|
571546
572043
|
trackTokenUsage(tokensIn, tokensOut) {
|
|
571547
|
-
|
|
572044
|
+
const total = safeNonNegativeInt(tokensIn) + safeNonNegativeInt(tokensOut);
|
|
572045
|
+
if (total <= 0) return;
|
|
572046
|
+
this.ensureSponsorDailyWindow();
|
|
572047
|
+
this._dailyTokensUsed += total;
|
|
572048
|
+
this.saveSponsorUsage();
|
|
572049
|
+
this.refreshSponsorUsageStats();
|
|
571548
572050
|
}
|
|
571549
572051
|
// ── Lifecycle ───────────────────────────────────────────────────────────
|
|
571550
572052
|
async start() {
|
|
@@ -571753,7 +572255,7 @@ var init_expose = __esm({
|
|
|
571753
572255
|
user.activeRequests++;
|
|
571754
572256
|
user.lastSeen = Date.now();
|
|
571755
572257
|
this.emitStats();
|
|
571756
|
-
const preRateLimitCheck = this.checkRateLimit(userIp, "");
|
|
572258
|
+
const preRateLimitCheck = this.checkRateLimit(userIp, "", { commitRequest: false });
|
|
571757
572259
|
if (preRateLimitCheck) {
|
|
571758
572260
|
this._stats.activeConnections--;
|
|
571759
572261
|
user.activeRequests--;
|
|
@@ -571842,8 +572344,8 @@ var init_expose = __esm({
|
|
|
571842
572344
|
} catch {
|
|
571843
572345
|
}
|
|
571844
572346
|
}
|
|
571845
|
-
if (
|
|
571846
|
-
const modelCheck = this.checkRateLimit(userIp, requestModel);
|
|
572347
|
+
if (this._sponsorLimits) {
|
|
572348
|
+
const modelCheck = this.checkRateLimit(userIp, requestModel, { commitRequest: true });
|
|
571847
572349
|
if (modelCheck) {
|
|
571848
572350
|
this._stats.activeConnections--;
|
|
571849
572351
|
user.activeRequests--;
|
|
@@ -572229,10 +572731,12 @@ ${this.formatConnectionInfo()}`);
|
|
|
572229
572731
|
});
|
|
572230
572732
|
}
|
|
572231
572733
|
emitStats() {
|
|
572734
|
+
this.refreshSponsorUsageStats();
|
|
572232
572735
|
this.emit("stats", {
|
|
572233
572736
|
...this._stats,
|
|
572234
572737
|
modelUsage: new Map(this._stats.modelUsage),
|
|
572235
|
-
users: new Map(this._stats.users)
|
|
572738
|
+
users: new Map(this._stats.users),
|
|
572739
|
+
sponsorUsage: this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null
|
|
572236
572740
|
});
|
|
572237
572741
|
}
|
|
572238
572742
|
/** Format connection info for display */
|
|
@@ -572274,6 +572778,28 @@ ${this.formatConnectionInfo()}`);
|
|
|
572274
572778
|
const budgetColor = pct > 50 ? c3.green : pct > 20 ? c3.yellow : c3.red;
|
|
572275
572779
|
lines.push(` ${c3.cyan("Budget".padEnd(18))} ${budgetColor(fmtTokens(s2.budgetTokensRemaining))}${c3.dim("/")}${fmtTokens(s2.budgetTokensTotal)} ${c3.dim(`(${pct}% left)`)}`);
|
|
572276
572780
|
}
|
|
572781
|
+
if (s2.sponsorUsage) {
|
|
572782
|
+
lines.push("");
|
|
572783
|
+
lines.push(` ${c3.bold("Sponsor Quota")}`);
|
|
572784
|
+
lines.push(` ${formatUsageBar({
|
|
572785
|
+
label: "Daily tokens",
|
|
572786
|
+
used: s2.sponsorUsage.dailyTokensUsed,
|
|
572787
|
+
total: s2.sponsorUsage.dailyTokensLimit,
|
|
572788
|
+
resetAt: s2.sponsorUsage.dailyResetAt
|
|
572789
|
+
})}`);
|
|
572790
|
+
lines.push(` ${formatUsageBar({
|
|
572791
|
+
label: "Requests/min",
|
|
572792
|
+
used: s2.sponsorUsage.requestsInWindow,
|
|
572793
|
+
total: s2.sponsorUsage.requestsPerMinuteLimit,
|
|
572794
|
+
resetAt: s2.sponsorUsage.requestWindowResetAt
|
|
572795
|
+
})}`);
|
|
572796
|
+
lines.push(` ${formatUsageBar({
|
|
572797
|
+
label: "Concurrency",
|
|
572798
|
+
used: s2.sponsorUsage.activeConnections,
|
|
572799
|
+
total: s2.sponsorUsage.maxConcurrent
|
|
572800
|
+
})}`);
|
|
572801
|
+
lines.push(` ${c3.cyan("Blocked".padEnd(18))} ${s2.sponsorUsage.blockedRequests}`);
|
|
572802
|
+
}
|
|
572277
572803
|
const visibleModels = Array.from(s2.modelUsage.entries()).filter(([model]) => !INTERNAL_CAPABILITIES.has(model));
|
|
572278
572804
|
if (visibleModels.length > 0) {
|
|
572279
572805
|
lines.push("");
|
|
@@ -572335,6 +572861,11 @@ ${this.formatConnectionInfo()}`);
|
|
|
572335
572861
|
_passthrough = false;
|
|
572336
572862
|
_loadbalance = false;
|
|
572337
572863
|
_endpointAuth;
|
|
572864
|
+
_sponsorLimits = null;
|
|
572865
|
+
_sponsorBlockedRequests = 0;
|
|
572866
|
+
_sponsorRequestWindow = [];
|
|
572867
|
+
_dailyTokensUsed = 0;
|
|
572868
|
+
_dailyTokensResetAt = 0;
|
|
572338
572869
|
_pollTimer = null;
|
|
572339
572870
|
_activityPollTimer = null;
|
|
572340
572871
|
/** Fast token flash timer — pulses LED at 200ms while inference is active */
|
|
@@ -572353,7 +572884,8 @@ ${this.formatConnectionInfo()}`);
|
|
|
572353
572884
|
users: /* @__PURE__ */ new Map(),
|
|
572354
572885
|
budgetTokensRemaining: 0,
|
|
572355
572886
|
budgetTokensTotal: 0,
|
|
572356
|
-
budgetResetAt: 0
|
|
572887
|
+
budgetResetAt: 0,
|
|
572888
|
+
sponsorUsage: null
|
|
572357
572889
|
};
|
|
572358
572890
|
get peerId() {
|
|
572359
572891
|
return this._peerId;
|
|
@@ -572395,6 +572927,93 @@ ${this.formatConnectionInfo()}`);
|
|
|
572395
572927
|
} else {
|
|
572396
572928
|
this._authKey = options2.authKey;
|
|
572397
572929
|
}
|
|
572930
|
+
this.loadSponsorUsage();
|
|
572931
|
+
this.refreshSponsorUsageStats();
|
|
572932
|
+
}
|
|
572933
|
+
setSponsorLimits(limits) {
|
|
572934
|
+
this._sponsorLimits = limits;
|
|
572935
|
+
this.ensureSponsorDailyWindow();
|
|
572936
|
+
this.refreshSponsorUsageStats();
|
|
572937
|
+
this.emitStats();
|
|
572938
|
+
}
|
|
572939
|
+
getSponsorUsageSnapshot() {
|
|
572940
|
+
this.refreshSponsorUsageStats();
|
|
572941
|
+
return this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null;
|
|
572942
|
+
}
|
|
572943
|
+
loadSponsorUsage() {
|
|
572944
|
+
if (!this._stateDir) {
|
|
572945
|
+
this._dailyTokensResetAt = nextSponsorDailyReset();
|
|
572946
|
+
return;
|
|
572947
|
+
}
|
|
572948
|
+
const saved = readSponsorUsageState(this._stateDir);
|
|
572949
|
+
if (!saved) {
|
|
572950
|
+
this._dailyTokensUsed = 0;
|
|
572951
|
+
this._dailyTokensResetAt = nextSponsorDailyReset();
|
|
572952
|
+
return;
|
|
572953
|
+
}
|
|
572954
|
+
const now = Date.now();
|
|
572955
|
+
if (saved.dailyTokensResetAt <= now) {
|
|
572956
|
+
this._dailyTokensUsed = 0;
|
|
572957
|
+
this._dailyTokensResetAt = nextSponsorDailyReset(now);
|
|
572958
|
+
this.saveSponsorUsage();
|
|
572959
|
+
} else {
|
|
572960
|
+
this._dailyTokensUsed = saved.dailyTokensUsed;
|
|
572961
|
+
this._dailyTokensResetAt = saved.dailyTokensResetAt;
|
|
572962
|
+
}
|
|
572963
|
+
}
|
|
572964
|
+
saveSponsorUsage() {
|
|
572965
|
+
if (!this._stateDir) return;
|
|
572966
|
+
writeSponsorUsageState(this._stateDir, {
|
|
572967
|
+
dailyTokensUsed: this._dailyTokensUsed,
|
|
572968
|
+
dailyTokensResetAt: this._dailyTokensResetAt,
|
|
572969
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
572970
|
+
});
|
|
572971
|
+
}
|
|
572972
|
+
ensureSponsorDailyWindow(now = Date.now()) {
|
|
572973
|
+
if (!this._dailyTokensResetAt || this._dailyTokensResetAt <= now) {
|
|
572974
|
+
this._dailyTokensUsed = 0;
|
|
572975
|
+
this._dailyTokensResetAt = nextSponsorDailyReset(now);
|
|
572976
|
+
this.saveSponsorUsage();
|
|
572977
|
+
}
|
|
572978
|
+
}
|
|
572979
|
+
recordSponsorRequest(now = Date.now()) {
|
|
572980
|
+
this._sponsorRequestWindow.push(now);
|
|
572981
|
+
this.pruneSponsorRequestWindow(now);
|
|
572982
|
+
}
|
|
572983
|
+
pruneSponsorRequestWindow(now = Date.now()) {
|
|
572984
|
+
while (this._sponsorRequestWindow.length > 0 && this._sponsorRequestWindow[0] < now - SPONSOR_REQUEST_WINDOW_MS) {
|
|
572985
|
+
this._sponsorRequestWindow.shift();
|
|
572986
|
+
}
|
|
572987
|
+
}
|
|
572988
|
+
trackTokenUsage(tokensIn, tokensOut) {
|
|
572989
|
+
const total = safeNonNegativeInt(tokensIn) + safeNonNegativeInt(tokensOut);
|
|
572990
|
+
if (total <= 0) return;
|
|
572991
|
+
this.ensureSponsorDailyWindow();
|
|
572992
|
+
this._dailyTokensUsed += total;
|
|
572993
|
+
this.saveSponsorUsage();
|
|
572994
|
+
this.refreshSponsorUsageStats();
|
|
572995
|
+
}
|
|
572996
|
+
refreshSponsorUsageStats(now = Date.now()) {
|
|
572997
|
+
if (!this._sponsorLimits) {
|
|
572998
|
+
this._stats.sponsorUsage = null;
|
|
572999
|
+
return;
|
|
573000
|
+
}
|
|
573001
|
+
this.ensureSponsorDailyWindow(now);
|
|
573002
|
+
this.pruneSponsorRequestWindow(now);
|
|
573003
|
+
this._stats.sponsorUsage = {
|
|
573004
|
+
enabled: true,
|
|
573005
|
+
transport: "libp2p",
|
|
573006
|
+
dailyTokensUsed: this._dailyTokensUsed,
|
|
573007
|
+
dailyTokensLimit: this._sponsorLimits.maxTokensPerDay,
|
|
573008
|
+
dailyResetAt: this._dailyTokensResetAt,
|
|
573009
|
+
requestsInWindow: this._sponsorRequestWindow.length,
|
|
573010
|
+
requestsPerMinuteLimit: this._sponsorLimits.maxRequestsPerMinute,
|
|
573011
|
+
requestWindowResetAt: this._sponsorRequestWindow[0] ? this._sponsorRequestWindow[0] + SPONSOR_REQUEST_WINDOW_MS : now + SPONSOR_REQUEST_WINDOW_MS,
|
|
573012
|
+
activeConnections: this._stats.activeConnections,
|
|
573013
|
+
maxConcurrent: this._sponsorLimits.maxConcurrent,
|
|
573014
|
+
blockedRequests: this._sponsorBlockedRequests,
|
|
573015
|
+
allowedModels: this._sponsorLimits.allowedModels === "all" ? "all" : [...this._sponsorLimits.allowedModels]
|
|
573016
|
+
};
|
|
572398
573017
|
}
|
|
572399
573018
|
async start() {
|
|
572400
573019
|
this._onInfo?.("Connecting to nexus P2P network...");
|
|
@@ -572653,6 +573272,8 @@ ${this.formatConnectionInfo()}`);
|
|
|
572653
573272
|
}
|
|
572654
573273
|
this._stats.totalTokensIn += tokIn;
|
|
572655
573274
|
this._stats.totalTokensOut += tokOut;
|
|
573275
|
+
this.recordSponsorRequest();
|
|
573276
|
+
this.trackTokenUsage(tokIn, tokOut);
|
|
572656
573277
|
const peerId = record.from || record.peerId || "unknown";
|
|
572657
573278
|
const shortPeer = peerId.length > 16 ? peerId.slice(0, 16) + "..." : peerId;
|
|
572658
573279
|
let user = this._stats.users.get(shortPeer);
|
|
@@ -572716,10 +573337,12 @@ ${this.formatConnectionInfo()}`);
|
|
|
572716
573337
|
}
|
|
572717
573338
|
}
|
|
572718
573339
|
emitStats() {
|
|
573340
|
+
this.refreshSponsorUsageStats();
|
|
572719
573341
|
this.emit("stats", {
|
|
572720
573342
|
...this._stats,
|
|
572721
573343
|
modelUsage: new Map(this._stats.modelUsage),
|
|
572722
|
-
users: new Map(this._stats.users)
|
|
573344
|
+
users: new Map(this._stats.users),
|
|
573345
|
+
sponsorUsage: this._stats.sponsorUsage ? { ...this._stats.sponsorUsage } : null
|
|
572723
573346
|
});
|
|
572724
573347
|
}
|
|
572725
573348
|
/** Format connection info for display */
|
|
@@ -572767,6 +573390,28 @@ ${this.formatConnectionInfo()}`);
|
|
|
572767
573390
|
const budgetColor = pct > 50 ? c3.green : pct > 20 ? c3.yellow : c3.red;
|
|
572768
573391
|
lines.push(` ${c3.cyan("Budget".padEnd(18))} ${budgetColor(fmtTokens(s2.budgetTokensRemaining))}${c3.dim("/")}${fmtTokens(s2.budgetTokensTotal)} ${c3.dim(`(${pct}% left)`)}`);
|
|
572769
573392
|
}
|
|
573393
|
+
if (s2.sponsorUsage) {
|
|
573394
|
+
lines.push("");
|
|
573395
|
+
lines.push(` ${c3.bold("Sponsor Quota")}`);
|
|
573396
|
+
lines.push(` ${formatUsageBar({
|
|
573397
|
+
label: "Daily tokens",
|
|
573398
|
+
used: s2.sponsorUsage.dailyTokensUsed,
|
|
573399
|
+
total: s2.sponsorUsage.dailyTokensLimit,
|
|
573400
|
+
resetAt: s2.sponsorUsage.dailyResetAt
|
|
573401
|
+
})}`);
|
|
573402
|
+
lines.push(` ${formatUsageBar({
|
|
573403
|
+
label: "Requests/min",
|
|
573404
|
+
used: s2.sponsorUsage.requestsInWindow,
|
|
573405
|
+
total: s2.sponsorUsage.requestsPerMinuteLimit,
|
|
573406
|
+
resetAt: s2.sponsorUsage.requestWindowResetAt
|
|
573407
|
+
})}`);
|
|
573408
|
+
lines.push(` ${formatUsageBar({
|
|
573409
|
+
label: "Concurrency",
|
|
573410
|
+
used: s2.sponsorUsage.activeConnections,
|
|
573411
|
+
total: s2.sponsorUsage.maxConcurrent
|
|
573412
|
+
})}`);
|
|
573413
|
+
lines.push(` ${c3.cyan("Blocked".padEnd(18))} ${s2.sponsorUsage.blockedRequests}`);
|
|
573414
|
+
}
|
|
572770
573415
|
const visibleModels = Array.from(s2.modelUsage.entries()).filter(([model]) => !INTERNAL_CAPABILITIES.has(model));
|
|
572771
573416
|
if (visibleModels.length > 0) {
|
|
572772
573417
|
lines.push("");
|
|
@@ -590439,15 +591084,52 @@ async function stepReview(config, rl, availableRows) {
|
|
|
590439
591084
|
if (!result.confirmed || result.key === "cancel") return false;
|
|
590440
591085
|
return result.key === "go_live";
|
|
590441
591086
|
}
|
|
590442
|
-
async function showSponsorDashboard(config, projectDir2, rl, availableRows) {
|
|
591087
|
+
async function showSponsorDashboard(config, projectDir2, rl, availableRows, sponsorUsage) {
|
|
590443
591088
|
const isPaused = config.status === "paused";
|
|
590444
591089
|
const enabledEps = config.endpoints.filter((e2) => e2.enabled);
|
|
591090
|
+
const dailyTokensLimit = sponsorUsage?.dailyTokensLimit || config.rateLimits.maxTokensPerDay;
|
|
591091
|
+
const requestsPerMinuteLimit = sponsorUsage?.requestsPerMinuteLimit || config.rateLimits.maxRequestsPerMinute;
|
|
591092
|
+
const maxConcurrent = sponsorUsage?.maxConcurrent || config.rateLimits.maxConcurrent;
|
|
591093
|
+
const usageItems = [
|
|
591094
|
+
{
|
|
591095
|
+
key: "info_usage_daily",
|
|
591096
|
+
label: ` ${formatUsageBar({
|
|
591097
|
+
label: "Daily tokens",
|
|
591098
|
+
used: sponsorUsage?.dailyTokensUsed ?? 0,
|
|
591099
|
+
total: dailyTokensLimit,
|
|
591100
|
+
resetAt: sponsorUsage?.dailyResetAt
|
|
591101
|
+
})}`
|
|
591102
|
+
},
|
|
591103
|
+
{
|
|
591104
|
+
key: "info_usage_rpm",
|
|
591105
|
+
label: ` ${formatUsageBar({
|
|
591106
|
+
label: "Requests/min",
|
|
591107
|
+
used: sponsorUsage?.requestsInWindow ?? 0,
|
|
591108
|
+
total: requestsPerMinuteLimit,
|
|
591109
|
+
resetAt: sponsorUsage?.requestWindowResetAt
|
|
591110
|
+
})}`
|
|
591111
|
+
},
|
|
591112
|
+
{
|
|
591113
|
+
key: "info_usage_concurrent",
|
|
591114
|
+
label: ` ${formatUsageBar({
|
|
591115
|
+
label: "Concurrency",
|
|
591116
|
+
used: sponsorUsage?.activeConnections ?? 0,
|
|
591117
|
+
total: maxConcurrent
|
|
591118
|
+
})}`
|
|
591119
|
+
},
|
|
591120
|
+
{
|
|
591121
|
+
key: "info_usage_blocked",
|
|
591122
|
+
label: ` Blocked: ${sponsorUsage?.blockedRequests ?? 0}`
|
|
591123
|
+
}
|
|
591124
|
+
];
|
|
590445
591125
|
const items = [
|
|
590446
591126
|
{ key: "hdr", label: "Sponsor Dashboard" },
|
|
590447
591127
|
{ key: "info_status", label: ` Status: ${isPaused ? "● PAUSED" : "● ACTIVE"}` },
|
|
590448
591128
|
{ key: "info_ep", label: ` Endpoints: ${enabledEps.map((e2) => e2.label).join(", ")}` },
|
|
590449
591129
|
{ key: "info_transport", label: ` Transport: ${[config.transport.cloudflared ? "Cloudflared" : "", config.transport.libp2p ? "libp2p" : ""].filter(Boolean).join(" + ")}` },
|
|
590450
591130
|
{ key: "info_limits", label: ` Limits: ${config.rateLimits.maxRequestsPerMinute} req/min, ${config.rateLimits.maxTokensPerDay.toLocaleString()} tokens/day` },
|
|
591131
|
+
{ key: "info_usage_hdr", label: " Usage" },
|
|
591132
|
+
...usageItems,
|
|
590451
591133
|
{ key: "sep", label: "" },
|
|
590452
591134
|
{ key: "modify", label: " [Modify Settings]" },
|
|
590453
591135
|
{ key: isPaused ? "resume" : "pause", label: isPaused ? " [Resume Sponsorship]" : " [Pause Sponsorship]" },
|
|
@@ -590457,7 +591139,7 @@ async function showSponsorDashboard(config, projectDir2, rl, availableRows) {
|
|
|
590457
591139
|
items,
|
|
590458
591140
|
title: "Sponsor Dashboard",
|
|
590459
591141
|
rl,
|
|
590460
|
-
skipKeys: ["hdr", "sep", "info_status", "info_ep", "info_transport", "info_limits"],
|
|
591142
|
+
skipKeys: ["hdr", "sep", "info_status", "info_ep", "info_transport", "info_limits", "info_usage_hdr", "info_usage_daily", "info_usage_rpm", "info_usage_concurrent", "info_usage_blocked"],
|
|
590461
591143
|
availableRows
|
|
590462
591144
|
});
|
|
590463
591145
|
if (!result.confirmed) return "close";
|
|
@@ -590522,6 +591204,7 @@ var init_sponsor_wizard = __esm({
|
|
|
590522
591204
|
init_dist();
|
|
590523
591205
|
init_tui_select();
|
|
590524
591206
|
init_render();
|
|
591207
|
+
init_usage_bars();
|
|
590525
591208
|
}
|
|
590526
591209
|
});
|
|
590527
591210
|
|
|
@@ -594808,6 +595491,17 @@ function stopSponsorHeartbeat() {
|
|
|
594808
595491
|
}
|
|
594809
595492
|
_lastRegisteredSponsorPayload = null;
|
|
594810
595493
|
}
|
|
595494
|
+
function sponsorUsageFromGateway(gateway) {
|
|
595495
|
+
if (!gateway) return null;
|
|
595496
|
+
try {
|
|
595497
|
+
if (typeof gateway.getSponsorUsageSnapshot === "function") {
|
|
595498
|
+
return gateway.getSponsorUsageSnapshot();
|
|
595499
|
+
}
|
|
595500
|
+
return gateway.stats?.sponsorUsage ?? null;
|
|
595501
|
+
} catch {
|
|
595502
|
+
return null;
|
|
595503
|
+
}
|
|
595504
|
+
}
|
|
594811
595505
|
function registerCommandHelp2(items) {
|
|
594812
595506
|
registerCommandHelp(items);
|
|
594813
595507
|
}
|
|
@@ -598287,6 +598981,10 @@ The session corrections MUST become hard rules in the SKILL.md Rules section.`;
|
|
|
598287
598981
|
return "handled";
|
|
598288
598982
|
}
|
|
598289
598983
|
case "cohere": {
|
|
598984
|
+
if (arg === "status" || arg === "stats") {
|
|
598985
|
+
await showCohereStatus(ctx3);
|
|
598986
|
+
return "handled";
|
|
598987
|
+
}
|
|
598290
598988
|
await showCohereDashboard(ctx3);
|
|
598291
598989
|
return "handled";
|
|
598292
598990
|
}
|
|
@@ -599030,11 +599728,16 @@ sleep 1
|
|
|
599030
599728
|
renderInfo("No active sponsorship. Run /sponsor to start.");
|
|
599031
599729
|
return "handled";
|
|
599032
599730
|
}
|
|
599731
|
+
const dashboardGw = ctx3.getExposeGateway?.();
|
|
599732
|
+
if (existingConfig.status === "active" && dashboardGw && "setSponsorLimits" in dashboardGw) {
|
|
599733
|
+
dashboardGw.setSponsorLimits(existingConfig.rateLimits);
|
|
599734
|
+
}
|
|
599033
599735
|
const action = await showSponsorDashboard2(
|
|
599034
599736
|
existingConfig,
|
|
599035
599737
|
projectDir2,
|
|
599036
599738
|
sponsorRl,
|
|
599037
|
-
ctx3.availableContentRows?.()
|
|
599739
|
+
ctx3.availableContentRows?.(),
|
|
599740
|
+
sponsorUsageFromGateway(dashboardGw)
|
|
599038
599741
|
);
|
|
599039
599742
|
switch (action) {
|
|
599040
599743
|
case "modify":
|
|
@@ -599061,6 +599764,9 @@ sleep 1
|
|
|
599061
599764
|
existingConfig.status = "active";
|
|
599062
599765
|
saveSponsorConfig2(projectDir2, existingConfig);
|
|
599063
599766
|
const resumeGw = ctx3.getExposeGateway?.();
|
|
599767
|
+
if (resumeGw && "setSponsorLimits" in resumeGw) {
|
|
599768
|
+
resumeGw.setSponsorLimits(existingConfig.rateLimits);
|
|
599769
|
+
}
|
|
599064
599770
|
if (resumeGw?.tunnelUrl) {
|
|
599065
599771
|
const resumePayload = {
|
|
599066
599772
|
name: existingConfig.header?.message || "Omnius Sponsor",
|
|
@@ -602969,15 +603675,65 @@ async function showHelpMenu(ctx3) {
|
|
|
602969
603675
|
}
|
|
602970
603676
|
}
|
|
602971
603677
|
}
|
|
602972
|
-
|
|
602973
|
-
|
|
602974
|
-
|
|
603678
|
+
function emptyCohereStats(isActive = false) {
|
|
603679
|
+
return {
|
|
603680
|
+
status: isActive ? "active" : "inactive",
|
|
603681
|
+
active: isActive,
|
|
603682
|
+
daemonPid: 0,
|
|
603683
|
+
uptimeSec: 0,
|
|
603684
|
+
lastQueryAt: 0,
|
|
603685
|
+
queriesReceived: 0,
|
|
602975
603686
|
queriesAnswered: 0,
|
|
603687
|
+
queriesErrors: 0,
|
|
602976
603688
|
queriesSent: 0,
|
|
602977
|
-
|
|
602978
|
-
|
|
603689
|
+
avgLatencyMs: 0,
|
|
603690
|
+
bytesIn: 0,
|
|
603691
|
+
bytesOut: 0,
|
|
603692
|
+
modelsUsed: {},
|
|
603693
|
+
peersServed: {},
|
|
603694
|
+
allowedModels: null
|
|
602979
603695
|
};
|
|
602980
|
-
|
|
603696
|
+
}
|
|
603697
|
+
function numberField(value2) {
|
|
603698
|
+
const n2 = Number(value2);
|
|
603699
|
+
return Number.isFinite(n2) && n2 > 0 ? Math.floor(n2) : 0;
|
|
603700
|
+
}
|
|
603701
|
+
function mapNumberRecord(value2) {
|
|
603702
|
+
if (!value2 || typeof value2 !== "object" || Array.isArray(value2)) return {};
|
|
603703
|
+
const out = {};
|
|
603704
|
+
for (const [key, raw] of Object.entries(value2)) {
|
|
603705
|
+
out[key] = numberField(raw);
|
|
603706
|
+
}
|
|
603707
|
+
return out;
|
|
603708
|
+
}
|
|
603709
|
+
function parseCohereStatsOutput(output, isActive = false) {
|
|
603710
|
+
try {
|
|
603711
|
+
const parsed = JSON.parse(output);
|
|
603712
|
+
const active = typeof parsed.active === "boolean" ? parsed.active : String(parsed.status ?? "").toLowerCase() === "active";
|
|
603713
|
+
return {
|
|
603714
|
+
status: active ? "active" : "inactive",
|
|
603715
|
+
active,
|
|
603716
|
+
daemonPid: numberField(parsed.daemonPid),
|
|
603717
|
+
uptimeSec: numberField(parsed.uptimeSec),
|
|
603718
|
+
lastQueryAt: numberField(parsed.lastQueryAt),
|
|
603719
|
+
queriesReceived: numberField(parsed.queriesReceived),
|
|
603720
|
+
queriesAnswered: numberField(parsed.queriesAnswered),
|
|
603721
|
+
queriesErrors: numberField(parsed.queriesErrors),
|
|
603722
|
+
queriesSent: numberField(parsed.queriesSent),
|
|
603723
|
+
avgLatencyMs: numberField(parsed.avgLatencyMs),
|
|
603724
|
+
bytesIn: numberField(parsed.bytesIn),
|
|
603725
|
+
bytesOut: numberField(parsed.bytesOut),
|
|
603726
|
+
modelsUsed: mapNumberRecord(parsed.modelsUsed),
|
|
603727
|
+
peersServed: mapNumberRecord(parsed.peersServed),
|
|
603728
|
+
allowedModels: Array.isArray(parsed.allowedModels) ? parsed.allowedModels.map(String) : null
|
|
603729
|
+
};
|
|
603730
|
+
} catch {
|
|
603731
|
+
return emptyCohereStats(isActive);
|
|
603732
|
+
}
|
|
603733
|
+
}
|
|
603734
|
+
async function fetchCohereDashboardState(ctx3) {
|
|
603735
|
+
const isActive = ctx3.isCohere?.() ?? false;
|
|
603736
|
+
const state = { stats: emptyCohereStats(isActive), modelList: [] };
|
|
602981
603737
|
try {
|
|
602982
603738
|
const nexus = new NexusTool(ctx3.repoRoot);
|
|
602983
603739
|
try {
|
|
@@ -602989,29 +603745,52 @@ async function showCohereDashboard(ctx3) {
|
|
|
602989
603745
|
} catch {
|
|
602990
603746
|
}
|
|
602991
603747
|
try {
|
|
602992
|
-
const r2 = await nexus.execute({ action: "cohere_stats" });
|
|
602993
|
-
if (r2.success)
|
|
602994
|
-
try {
|
|
602995
|
-
const d2 = JSON.parse(r2.output);
|
|
602996
|
-
Object.assign(stats, d2);
|
|
602997
|
-
} catch {
|
|
602998
|
-
}
|
|
602999
|
-
}
|
|
603748
|
+
const r2 = await nexus.execute({ action: "cohere_stats", format: "json" });
|
|
603749
|
+
if (r2.success) state.stats = parseCohereStatsOutput(r2.output, isActive);
|
|
603000
603750
|
} catch {
|
|
603001
603751
|
}
|
|
603002
603752
|
try {
|
|
603003
603753
|
const r2 = await nexus.execute({ action: "cohere_list_models" });
|
|
603004
603754
|
if (r2.success) {
|
|
603005
603755
|
try {
|
|
603006
|
-
modelList = JSON.parse(r2.output).models || [];
|
|
603756
|
+
state.modelList = JSON.parse(r2.output).models || [];
|
|
603007
603757
|
} catch {
|
|
603008
|
-
modelList = r2.output.split("\n").
|
|
603758
|
+
state.modelList = r2.output.split("\n").map((l2) => l2.trim()).filter(Boolean);
|
|
603009
603759
|
}
|
|
603010
603760
|
}
|
|
603011
603761
|
} catch {
|
|
603012
603762
|
}
|
|
603013
603763
|
} catch {
|
|
603014
603764
|
}
|
|
603765
|
+
return state;
|
|
603766
|
+
}
|
|
603767
|
+
function cohereStatusLines(stats, modelList) {
|
|
603768
|
+
const modelEntries = Object.entries(stats.modelsUsed).sort((a2, b) => b[1] - a2[1]);
|
|
603769
|
+
const peerEntries = Object.entries(stats.peersServed).sort((a2, b) => b[1] - a2[1]);
|
|
603770
|
+
const uptime2 = stats.uptimeSec < 60 ? `${stats.uptimeSec}s` : stats.uptimeSec < 3600 ? `${Math.floor(stats.uptimeSec / 60)}m ${stats.uptimeSec % 60}s` : `${Math.floor(stats.uptimeSec / 3600)}h ${Math.floor(stats.uptimeSec % 3600 / 60)}m`;
|
|
603771
|
+
return [
|
|
603772
|
+
c3.bold("COHERE Status"),
|
|
603773
|
+
`Status: ${stats.active ? c3.green("ACTIVE") : c3.dim("inactive")}`,
|
|
603774
|
+
`Daemon: ${stats.daemonPid ? `pid ${stats.daemonPid}` : "not connected"} · uptime ${uptime2}`,
|
|
603775
|
+
`Last query: ${stats.lastQueryAt ? new Date(stats.lastQueryAt).toISOString() : "never"}`,
|
|
603776
|
+
"",
|
|
603777
|
+
formatUsageBar({ label: "Answered", used: stats.queriesAnswered, total: Math.max(1, stats.queriesReceived), width: 18 }),
|
|
603778
|
+
formatUsageBar({ label: "Errors", used: stats.queriesErrors, total: Math.max(1, stats.queriesReceived), width: 18 }),
|
|
603779
|
+
`Sent out: ${stats.queriesSent} · avg latency ${stats.avgLatencyMs}ms`,
|
|
603780
|
+
`Data: in ${formatFileSize(stats.bytesIn)} · out ${formatFileSize(stats.bytesOut)}`,
|
|
603781
|
+
"",
|
|
603782
|
+
`Models exposed: ${modelList.length}`,
|
|
603783
|
+
`Allowlist: ${stats.allowedModels ? stats.allowedModels.join(", ") || "(empty)" : "all downloaded models"}`,
|
|
603784
|
+
`Top models: ${modelEntries.length ? modelEntries.slice(0, 5).map(([m2, n2]) => `${m2} (${n2})`).join(", ") : "none yet"}`,
|
|
603785
|
+
`Peers served: ${peerEntries.length ? peerEntries.slice(0, 5).map(([p2, n2]) => `${p2.slice(0, 20)} (${n2})`).join(", ") : "none yet"}`
|
|
603786
|
+
];
|
|
603787
|
+
}
|
|
603788
|
+
async function showCohereStatus(ctx3) {
|
|
603789
|
+
const { stats, modelList } = await fetchCohereDashboardState(ctx3);
|
|
603790
|
+
safeLog(cohereStatusLines(stats, modelList).join("\n"));
|
|
603791
|
+
}
|
|
603792
|
+
async function showCohereDashboard(ctx3) {
|
|
603793
|
+
let { stats, modelList } = await fetchCohereDashboardState(ctx3);
|
|
603015
603794
|
while (true) {
|
|
603016
603795
|
const currentActive = ctx3.isCohere?.() ?? false;
|
|
603017
603796
|
const toggleLabel = currentActive ? "Disable COHERE" : "Enable COHERE";
|
|
@@ -603028,7 +603807,7 @@ async function showCohereDashboard(ctx3) {
|
|
|
603028
603807
|
{
|
|
603029
603808
|
key: "stats",
|
|
603030
603809
|
label: "Network Stats",
|
|
603031
|
-
detail: `${stats.queriesAnswered} answered · ${stats.queriesSent} sent · ${stats.
|
|
603810
|
+
detail: `${stats.queriesAnswered} answered · ${stats.queriesSent} sent · ${stats.queriesErrors} errors`
|
|
603032
603811
|
},
|
|
603033
603812
|
{
|
|
603034
603813
|
key: "identity",
|
|
@@ -603081,11 +603860,11 @@ async function showCohereDashboard(ctx3) {
|
|
|
603081
603860
|
},
|
|
603082
603861
|
{
|
|
603083
603862
|
key: "insights",
|
|
603084
|
-
label: `
|
|
603863
|
+
label: `Avg latency: ${c3.bold(String(stats.avgLatencyMs || 0))}ms`
|
|
603085
603864
|
},
|
|
603086
603865
|
{
|
|
603087
603866
|
key: "peers",
|
|
603088
|
-
label: `Peers
|
|
603867
|
+
label: `Peers served: ${c3.bold(String(Object.keys(stats.peersServed || {}).length))}`
|
|
603089
603868
|
},
|
|
603090
603869
|
{ key: "hdr2", label: selectColors.dim("─── Actions ───") },
|
|
603091
603870
|
{
|
|
@@ -603103,17 +603882,9 @@ async function showCohereDashboard(ctx3) {
|
|
|
603103
603882
|
availableRows: ctx3.availableContentRows?.()
|
|
603104
603883
|
});
|
|
603105
603884
|
if (statResult.key === "refresh") {
|
|
603106
|
-
|
|
603107
|
-
|
|
603108
|
-
|
|
603109
|
-
if (r2.success) {
|
|
603110
|
-
try {
|
|
603111
|
-
Object.assign(stats, JSON.parse(r2.output));
|
|
603112
|
-
} catch {
|
|
603113
|
-
}
|
|
603114
|
-
}
|
|
603115
|
-
} catch {
|
|
603116
|
-
}
|
|
603885
|
+
const refreshed = await fetchCohereDashboardState(ctx3);
|
|
603886
|
+
stats = refreshed.stats;
|
|
603887
|
+
modelList = refreshed.modelList;
|
|
603117
603888
|
}
|
|
603118
603889
|
continue;
|
|
603119
603890
|
}
|
|
@@ -607479,6 +608250,7 @@ var init_commands = __esm({
|
|
|
607479
608250
|
init_listen();
|
|
607480
608251
|
init_dist();
|
|
607481
608252
|
init_tui_select();
|
|
608253
|
+
init_usage_bars();
|
|
607482
608254
|
init_overlay_lock();
|
|
607483
608255
|
init_drop_panel();
|
|
607484
608256
|
init_memory_menu();
|
|
@@ -617189,6 +617961,13 @@ function senderKey2(entry) {
|
|
|
617189
617961
|
if (entry.role === "assistant") return entry.username || entry.speaker || "assistant";
|
|
617190
617962
|
return String(entry.fromUserId || entry.username || entry.firstName || senderLabel(entry));
|
|
617191
617963
|
}
|
|
617964
|
+
function speakerRole(entry) {
|
|
617965
|
+
if (entry.role === "assistant") return "agent_self";
|
|
617966
|
+
return entry.isBot ? "participant_bot" : "participant_human";
|
|
617967
|
+
}
|
|
617968
|
+
function identityBoundary(entry) {
|
|
617969
|
+
return speakerRole(entry) === "agent_self" ? "this message is authored by the Telegram agent itself" : "this message is authored by another Telegram participant; first-person claims belong to that participant, not the agent";
|
|
617970
|
+
}
|
|
617192
617971
|
function scopeFor(entry, options2) {
|
|
617193
617972
|
const chatType = entry.chatType || options2.chatType || "unknown";
|
|
617194
617973
|
return {
|
|
@@ -617202,7 +617981,7 @@ function senderFor(entry) {
|
|
|
617202
617981
|
id: senderKey2(entry),
|
|
617203
617982
|
username: entry.username,
|
|
617204
617983
|
displayName: senderLabel(entry),
|
|
617205
|
-
isBot: entry.role === "assistant"
|
|
617984
|
+
isBot: entry.role === "assistant" || entry.isBot === true
|
|
617206
617985
|
};
|
|
617207
617986
|
}
|
|
617208
617987
|
function messageIdFor(entry, sessionKey) {
|
|
@@ -617241,7 +618020,11 @@ function contentFor(entry, sessionKey, options2) {
|
|
|
617241
618020
|
`message_id: ${messageIdFor(entry, sessionKey)}`,
|
|
617242
618021
|
entry.messageThreadId != null ? `thread_id: ${entry.messageThreadId}` : "",
|
|
617243
618022
|
entry.replyToMessageId != null ? `reply_to_message_id: ${entry.replyToMessageId}` : "",
|
|
618023
|
+
`actor_key: ${senderKey2(entry)}`,
|
|
617244
618024
|
`speaker: ${senderLabel(entry)}`,
|
|
618025
|
+
`speaker_role: ${speakerRole(entry)}`,
|
|
618026
|
+
`identity_boundary: ${identityBoundary(entry)}`,
|
|
618027
|
+
entry.replyContext?.sender ? `reply_sender: ${entry.replyContext.sender.username || entry.replyContext.sender.firstName || entry.replyContext.sender.id || "unknown"} [${entry.replyContext.sender.isBot ? "participant_bot" : "participant_human"}]` : "",
|
|
617245
618028
|
entry.mode ? `mode: ${entry.mode}` : "",
|
|
617246
618029
|
entry.mediaSummary ? `media: ${compact(entry.mediaSummary, 260)}` : "",
|
|
617247
618030
|
"",
|
|
@@ -617265,7 +618048,11 @@ function metadataFor(entry, sessionKey, options2) {
|
|
|
617265
618048
|
username: entry.username,
|
|
617266
618049
|
firstName: entry.firstName,
|
|
617267
618050
|
fromUserId: entry.fromUserId,
|
|
618051
|
+
isBot: entry.isBot,
|
|
617268
618052
|
speaker: senderLabel(entry),
|
|
618053
|
+
actorKey: senderKey2(entry),
|
|
618054
|
+
speakerRole: speakerRole(entry),
|
|
618055
|
+
identityBoundary: identityBoundary(entry),
|
|
617269
618056
|
mediaSummary: entry.mediaSummary
|
|
617270
618057
|
}
|
|
617271
618058
|
};
|
|
@@ -617516,12 +618303,14 @@ function episodeLine(episode) {
|
|
|
617516
618303
|
const meta = episode.metadata;
|
|
617517
618304
|
const telegram = meta?.telegram;
|
|
617518
618305
|
const speaker = clean4(telegram?.speaker || telegram?.username || "unknown", 80);
|
|
618306
|
+
const role = clean4(telegram?.speakerRole || "participant_human", 40);
|
|
617519
618307
|
const messageId = telegram?.messageId == null ? "unknown" : String(telegram.messageId);
|
|
617520
618308
|
const replyTo = telegram?.replyToMessageId == null ? "" : ` reply_to=${telegram.replyToMessageId}`;
|
|
617521
618309
|
return [
|
|
617522
618310
|
`episode_id=${episode.id}`,
|
|
617523
618311
|
`message_id=${messageId}${replyTo}`,
|
|
617524
618312
|
`speaker=${speaker}`,
|
|
618313
|
+
`speaker_role=${role}`,
|
|
617525
618314
|
`modality=${episode.modality}`,
|
|
617526
618315
|
`content=${clean4(episode.content, 700)}`
|
|
617527
618316
|
].join(" | ");
|
|
@@ -617542,6 +618331,9 @@ function buildTelegramReflectionExtractionPrompt(options2) {
|
|
|
617542
618331
|
"- Use only the scoped Telegram corpus, graph nodes, graph edges, and source anchors below.",
|
|
617543
618332
|
"- Preserve message_id and episode_id anchors on every item when possible.",
|
|
617544
618333
|
"- Do not infer identity from a face, voice, or name unless the corpus explicitly says it.",
|
|
618334
|
+
"- speaker_role=agent_self is the Telegram agent; speaker_role=participant_human or participant_bot is another chat participant.",
|
|
618335
|
+
"- Do not assign participant first-person claims, preferences, names, or self-descriptions to the agent/self unless the source episode has speaker_role=agent_self.",
|
|
618336
|
+
"- Replies between non-agent participants are social context and relationship evidence, not direct agent self-reflection.",
|
|
617545
618337
|
"- Private DM followups may be proposed but must not be framed as already sent.",
|
|
617546
618338
|
"- same_group followups must be concise, low-intrusion, and anchored to a source message id.",
|
|
617547
618339
|
"- If a category has no evidence, return an empty array for that category.",
|
|
@@ -617959,6 +618751,8 @@ function formatTelegramSocialStateContext(state, input) {
|
|
|
617959
618751
|
const replyKey = input.replySender ? telegramSocialActorKey(input.replySender) : void 0;
|
|
617960
618752
|
const thread = state.threads[telegramSocialThreadKey(input)];
|
|
617961
618753
|
const participant = state.participants[senderKey3];
|
|
618754
|
+
const senderIdentity = selfKey && senderKey3 === selfKey ? "agent_self" : "participant";
|
|
618755
|
+
const replyIdentity = replyKey ? selfKey && replyKey === selfKey ? "agent_self" : "participant" : "none";
|
|
617962
618756
|
const relevantKeys = new Set([senderKey3, selfKey, replyKey].filter(Boolean));
|
|
617963
618757
|
const edges = state.relationships.filter((edge) => relevantKeys.has(edge.fromKey) || relevantKeys.has(edge.toKey)).sort((a2, b) => b.lastSeenAt - a2.lastSeenAt).slice(0, limit);
|
|
617964
618758
|
const outcomes = state.outcomes.filter((outcome) => outcome.senderKey === senderKey3 || outcome.chatId === String(input.chatId)).sort((a2, b) => b.ts - a2.ts).slice(0, limit);
|
|
@@ -617967,6 +618761,8 @@ function formatTelegramSocialStateContext(state, input) {
|
|
|
617967
618761
|
const preferences = preferenceLines(state.preferences[senderKey3]);
|
|
617968
618762
|
return [
|
|
617969
618763
|
"### Telegram Structured Social State",
|
|
618764
|
+
selfKey ? `Agent self node: ${selfKey}` : "Agent self node: unknown",
|
|
618765
|
+
`Identity boundary: the agent is the self node only. Current actor ${senderKey3} is ${senderIdentity}; reply target ${replyKey ?? "none"} is ${replyIdentity}. Participant first-person claims belong to their actor node, not the agent, unless that actor is the self node.`,
|
|
617970
618766
|
`Current actor node: ${senderKey3} [${participant?.actorKind || telegramSocialActorKind(input)}] messages=${participant?.messageCount ?? 0}${participant?.lastText ? ` last=${jsonLine(participant.lastText, 140)}` : ""}`,
|
|
617971
618767
|
thread ? `Active channel/thread: ${thread.key}; messages=${thread.messageCount}; participants=${thread.participantKeys.slice(-8).join(", ") || "none"}; last_outcomes=${thread.lastOutcomeIds.slice(-5).join(", ") || "none"}` : "",
|
|
617972
618768
|
preferences.length ? `Relevant preference vector for ${senderKey3}:
|
|
@@ -624624,6 +625420,7 @@ ${lines.join("\n")}`);
|
|
|
624624
625420
|
"Classify the live scenario by inference from the full context. Do not use a fixed taxonomy, keyword list, or preset scenario enum.",
|
|
624625
625421
|
"Create a situation-specific scenario_id and scenario_label, then summarize the active state loop that should govern the later attention decision.",
|
|
624626
625422
|
"Use the persona docs below as binding behavioral guidance.",
|
|
625423
|
+
"Maintain the Telegram identity boundary: the agent is only the bot/self actor. Other users and peer bots replying to each other are participants; their first-person claims are not the agent's identity or self-reflection.",
|
|
624627
625424
|
"Return JSON only. No markdown. No <think> tags.",
|
|
624628
625425
|
"",
|
|
624629
625426
|
'Schema: {"silent_disposition":"what happens silently with this message","mental_note":"concise observation of the turn","memory_note":"what scoped memory should retain or connect","relationship_note":"relationship/thread implication","procedure_note":"active tree/branch/abort implication","voice_note":"final voice implication if a reply happens","scenario_note":"identified scenario and transition state","scenario_id":"dynamic inferred scenario id","scenario_label":"human readable dynamic scenario label","scenario_confidence":0.0-1.0,"scenario_objective":"current scenario objective","scenario_state_loop":"state loop to maintain until transition"}',
|