omnius 1.0.133 → 1.0.135
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1956 -212
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1328,6 +1328,893 @@ var init_tool_executor = __esm({
|
|
|
1328
1328
|
}
|
|
1329
1329
|
});
|
|
1330
1330
|
|
|
1331
|
+
// packages/execution/dist/model-broker.js
|
|
1332
|
+
import { EventEmitter } from "node:events";
|
|
1333
|
+
import { totalmem, freemem } from "node:os";
|
|
1334
|
+
import { exec } from "node:child_process";
|
|
1335
|
+
function ramSnapshotMB() {
|
|
1336
|
+
const total = Math.round(totalmem() / (1024 * 1024));
|
|
1337
|
+
const free = Math.round(freemem() / (1024 * 1024));
|
|
1338
|
+
return { total, free, used: total - free };
|
|
1339
|
+
}
|
|
1340
|
+
async function vramSnapshotMB() {
|
|
1341
|
+
if (_nvSmiAvailable === false)
|
|
1342
|
+
return null;
|
|
1343
|
+
try {
|
|
1344
|
+
const out = await new Promise((resolve55, reject) => {
|
|
1345
|
+
exec("nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
|
|
1346
|
+
});
|
|
1347
|
+
_nvSmiAvailable = true;
|
|
1348
|
+
let total = 0, used = 0, free = 0;
|
|
1349
|
+
for (const line of out.trim().split("\n")) {
|
|
1350
|
+
const parts = line.split(",").map((s2) => s2.trim());
|
|
1351
|
+
if (parts.length < 3)
|
|
1352
|
+
continue;
|
|
1353
|
+
total += parseInt(parts[0] ?? "0", 10) || 0;
|
|
1354
|
+
used += parseInt(parts[1] ?? "0", 10) || 0;
|
|
1355
|
+
free += parseInt(parts[2] ?? "0", 10) || 0;
|
|
1356
|
+
}
|
|
1357
|
+
if (total <= 0)
|
|
1358
|
+
return null;
|
|
1359
|
+
return { total, used, free };
|
|
1360
|
+
} catch {
|
|
1361
|
+
_nvSmiAvailable = false;
|
|
1362
|
+
return null;
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
function getModelBroker() {
|
|
1366
|
+
return ModelBroker.getInstance();
|
|
1367
|
+
}
|
|
1368
|
+
var DEFAULT_RAM_HEADROOM_MB, DEFAULT_VRAM_HEADROOM_MB, DEFAULT_IDLE_EVICT_MS, DEFAULT_POLL_MS, DEFAULT_INFLIGHT_WAIT_MS, DEFAULT_SLOT_CAPACITY, DEFAULT_QUEUE_CAPACITY, THROUGHPUT_EMA_ALPHA, THROUGHPUT_INITIAL_TPS, STUCK_INFLIGHT_DIAGNOSTIC_MS, ModelBroker, _nvSmiAvailable;
|
|
1369
|
+
var init_model_broker = __esm({
|
|
1370
|
+
"packages/execution/dist/model-broker.js"() {
|
|
1371
|
+
"use strict";
|
|
1372
|
+
DEFAULT_RAM_HEADROOM_MB = 2048;
|
|
1373
|
+
DEFAULT_VRAM_HEADROOM_MB = 1024;
|
|
1374
|
+
DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
|
|
1375
|
+
DEFAULT_POLL_MS = 4e3;
|
|
1376
|
+
DEFAULT_INFLIGHT_WAIT_MS = 6e4;
|
|
1377
|
+
DEFAULT_SLOT_CAPACITY = 4;
|
|
1378
|
+
DEFAULT_QUEUE_CAPACITY = 50;
|
|
1379
|
+
THROUGHPUT_EMA_ALPHA = 0.2;
|
|
1380
|
+
THROUGHPUT_INITIAL_TPS = 25;
|
|
1381
|
+
STUCK_INFLIGHT_DIAGNOSTIC_MS = 5 * 60 * 1e3;
|
|
1382
|
+
ModelBroker = class _ModelBroker {
|
|
1383
|
+
static _instance = null;
|
|
1384
|
+
/** Loaded model registry keyed by `${host}:${name}`. */
|
|
1385
|
+
_loaded = /* @__PURE__ */ new Map();
|
|
1386
|
+
/** In-flight load promises keyed by `${host}:${name}`. */
|
|
1387
|
+
_inflight = /* @__PURE__ */ new Map();
|
|
1388
|
+
/** Fallback chains keyed by domain. */
|
|
1389
|
+
_fallbacks = /* @__PURE__ */ new Map();
|
|
1390
|
+
/** Cached n_ctx_train per Ollama model. */
|
|
1391
|
+
_ctxTrainCache = /* @__PURE__ */ new Map();
|
|
1392
|
+
/** Cached "model exists in Ollama" probes (true / false). */
|
|
1393
|
+
_ollamaModelExists = /* @__PURE__ */ new Map();
|
|
1394
|
+
/** Event emitter — typed via `on<K>(event: K, listener: BrokerEvents[K])`. */
|
|
1395
|
+
_events = new EventEmitter();
|
|
1396
|
+
/** Poll timer. */
|
|
1397
|
+
_pollTimer = null;
|
|
1398
|
+
/** Last full snapshot. */
|
|
1399
|
+
_lastSnapshot = null;
|
|
1400
|
+
/** Configured Ollama base URL. */
|
|
1401
|
+
_ollamaBaseUrl = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
|
|
1402
|
+
/** Configured thresholds (mutable for /broker tuning). */
|
|
1403
|
+
ramHeadroomMB = DEFAULT_RAM_HEADROOM_MB;
|
|
1404
|
+
vramHeadroomMB = DEFAULT_VRAM_HEADROOM_MB;
|
|
1405
|
+
idleEvictMs = DEFAULT_IDLE_EVICT_MS;
|
|
1406
|
+
/** Inference slot capacity (auto-tunes from Ollama pool size when known). */
|
|
1407
|
+
slotCapacity = DEFAULT_SLOT_CAPACITY;
|
|
1408
|
+
/** Maximum queue depth before queue pressure is emitted. */
|
|
1409
|
+
queueCapacity = DEFAULT_QUEUE_CAPACITY;
|
|
1410
|
+
// ── Inference slot tracking ─────────────────────────────────────────
|
|
1411
|
+
/** Active slots keyed by slot id. */
|
|
1412
|
+
_activeSlots = /* @__PURE__ */ new Map();
|
|
1413
|
+
/** Reserved slots per sessionKey (1 reserved slot per active chat). */
|
|
1414
|
+
_reservedBySession = /* @__PURE__ */ new Map();
|
|
1415
|
+
// sessionKey -> slot id
|
|
1416
|
+
/** Shared (non-reserved) queue. FIFO with priority insertion. */
|
|
1417
|
+
_slotQueue = [];
|
|
1418
|
+
/** Per-model throughput tracking. */
|
|
1419
|
+
_throughput = /* @__PURE__ */ new Map();
|
|
1420
|
+
/** Monotonic counter for slot ids. */
|
|
1421
|
+
_slotIdSeq = 0;
|
|
1422
|
+
static getInstance() {
|
|
1423
|
+
if (!_ModelBroker._instance)
|
|
1424
|
+
_ModelBroker._instance = new _ModelBroker();
|
|
1425
|
+
return _ModelBroker._instance;
|
|
1426
|
+
}
|
|
1427
|
+
/** Reset (test-only). */
|
|
1428
|
+
static resetInstance() {
|
|
1429
|
+
if (_ModelBroker._instance?._pollTimer)
|
|
1430
|
+
clearInterval(_ModelBroker._instance._pollTimer);
|
|
1431
|
+
_ModelBroker._instance = null;
|
|
1432
|
+
}
|
|
1433
|
+
constructor() {
|
|
1434
|
+
this.registerDefaultFallbacks();
|
|
1435
|
+
}
|
|
1436
|
+
// ------------------------------------------------------------------
|
|
1437
|
+
// Public API — events
|
|
1438
|
+
// ------------------------------------------------------------------
|
|
1439
|
+
on(event, listener) {
|
|
1440
|
+
this._events.on(event, listener);
|
|
1441
|
+
return this;
|
|
1442
|
+
}
|
|
1443
|
+
off(event, listener) {
|
|
1444
|
+
this._events.off(event, listener);
|
|
1445
|
+
return this;
|
|
1446
|
+
}
|
|
1447
|
+
emit(event, ...args) {
|
|
1448
|
+
this._events.emit(event, ...args);
|
|
1449
|
+
}
|
|
1450
|
+
// ------------------------------------------------------------------
|
|
1451
|
+
// Public API — polling
|
|
1452
|
+
// ------------------------------------------------------------------
|
|
1453
|
+
/** Start background polling of Ollama /api/ps and nvidia-smi. */
|
|
1454
|
+
startPolling(intervalMs = DEFAULT_POLL_MS) {
|
|
1455
|
+
if (this._pollTimer)
|
|
1456
|
+
return;
|
|
1457
|
+
this._pollTimer = setInterval(() => {
|
|
1458
|
+
this.pollOnce().catch(() => {
|
|
1459
|
+
});
|
|
1460
|
+
}, intervalMs);
|
|
1461
|
+
this.pollOnce().catch(() => {
|
|
1462
|
+
});
|
|
1463
|
+
}
|
|
1464
|
+
stopPolling() {
|
|
1465
|
+
if (this._pollTimer) {
|
|
1466
|
+
clearInterval(this._pollTimer);
|
|
1467
|
+
this._pollTimer = null;
|
|
1468
|
+
}
|
|
1469
|
+
}
|
|
1470
|
+
/** Configure Ollama base URL (called from cli config wiring). */
|
|
1471
|
+
setOllamaBaseUrl(url) {
|
|
1472
|
+
this._ollamaBaseUrl = url;
|
|
1473
|
+
}
|
|
1474
|
+
/** One poll cycle — refreshes /api/ps and emits snapshot. */
|
|
1475
|
+
async pollOnce() {
|
|
1476
|
+
await Promise.all([
|
|
1477
|
+
this.refreshOllamaPs().catch(() => {
|
|
1478
|
+
})
|
|
1479
|
+
// VRAM total/free comes from system-metrics; broker computes its own snapshot
|
|
1480
|
+
]);
|
|
1481
|
+
const snapshot = this.buildSnapshot();
|
|
1482
|
+
this._lastSnapshot = snapshot;
|
|
1483
|
+
this.emit("snapshot", snapshot);
|
|
1484
|
+
this.checkPressure(snapshot);
|
|
1485
|
+
return snapshot;
|
|
1486
|
+
}
|
|
1487
|
+
/** Best-known current snapshot. */
|
|
1488
|
+
snapshot() {
|
|
1489
|
+
return this._lastSnapshot ?? this.buildSnapshot();
|
|
1490
|
+
}
|
|
1491
|
+
// ------------------------------------------------------------------
|
|
1492
|
+
// Public API — fallback registry
|
|
1493
|
+
// ------------------------------------------------------------------
|
|
1494
|
+
/** Register a fallback chain for a domain. Later entries are tried later. */
|
|
1495
|
+
setFallbackChain(domain, chain) {
|
|
1496
|
+
this._fallbacks.set(domain, [...chain]);
|
|
1497
|
+
}
|
|
1498
|
+
getFallbackChain(domain) {
|
|
1499
|
+
return this._fallbacks.get(domain) ?? [];
|
|
1500
|
+
}
|
|
1501
|
+
// ------------------------------------------------------------------
|
|
1502
|
+
// Public API — load decisioning
|
|
1503
|
+
// ------------------------------------------------------------------
|
|
1504
|
+
/**
|
|
1505
|
+
* Pre-flight a model-load request. Always call this before allocating a model.
|
|
1506
|
+
*
|
|
1507
|
+
* Decisions:
|
|
1508
|
+
* - ok: proceed; use `effectiveNumCtx` if returned
|
|
1509
|
+
* - wait-for-inflight: another caller is loading the same model; await `promise`
|
|
1510
|
+
* - evict: caller should free `evictTargets` (broker calls evict
|
|
1511
|
+
* hooks itself when possible) then retry
|
|
1512
|
+
* - degrade: caller should reissue with `fallback`
|
|
1513
|
+
* - reject: nothing viable — caller should error out
|
|
1514
|
+
*/
|
|
1515
|
+
async ensureModelLoadable(spec) {
|
|
1516
|
+
const key = this.keyOf(spec);
|
|
1517
|
+
const inflight = this._inflight.get(key);
|
|
1518
|
+
if (inflight) {
|
|
1519
|
+
return { kind: "wait-for-inflight", promise: inflight.promise };
|
|
1520
|
+
}
|
|
1521
|
+
const existing = this._loaded.get(key);
|
|
1522
|
+
if (existing) {
|
|
1523
|
+
existing.lastUsedAt = Date.now();
|
|
1524
|
+
return { kind: "ok", effectiveNumCtx: existing.numCtx, note: "already-loaded" };
|
|
1525
|
+
}
|
|
1526
|
+
let effectiveNumCtx;
|
|
1527
|
+
if (spec.host === "ollama" && spec.requestedNumCtx) {
|
|
1528
|
+
const trainCtx = await this.getNctxTrain(spec.name);
|
|
1529
|
+
if (trainCtx && spec.requestedNumCtx > trainCtx) {
|
|
1530
|
+
effectiveNumCtx = trainCtx;
|
|
1531
|
+
} else {
|
|
1532
|
+
effectiveNumCtx = spec.requestedNumCtx;
|
|
1533
|
+
}
|
|
1534
|
+
} else if (spec.host === "ollama") {
|
|
1535
|
+
const trainCtx = await this.getNctxTrain(spec.name);
|
|
1536
|
+
if (trainCtx)
|
|
1537
|
+
effectiveNumCtx = trainCtx;
|
|
1538
|
+
}
|
|
1539
|
+
const estVram = spec.estimatedVramMB ?? this.estimateFootprintVramMB(spec);
|
|
1540
|
+
const estRam = spec.estimatedRamMB ?? this.estimateFootprintRamMB(spec);
|
|
1541
|
+
const ram = ramSnapshotMB();
|
|
1542
|
+
const vram = await vramSnapshotMB();
|
|
1543
|
+
const ramFitsAfter = ram.free - estRam >= this.ramHeadroomMB;
|
|
1544
|
+
const vramFitsAfter = vram ? vram.free - estVram >= this.vramHeadroomMB : true;
|
|
1545
|
+
if (ramFitsAfter && vramFitsAfter) {
|
|
1546
|
+
const promise = Promise.resolve({ kind: "ok", effectiveNumCtx });
|
|
1547
|
+
this._inflight.set(key, { startedMs: Date.now(), owner: spec.owner, promise });
|
|
1548
|
+
setTimeout(() => this._inflight.delete(key), spec.loadTimeoutMs ?? DEFAULT_INFLIGHT_WAIT_MS).unref?.();
|
|
1549
|
+
return { kind: "ok", effectiveNumCtx };
|
|
1550
|
+
}
|
|
1551
|
+
const evictTargets = this.pickEvictionCandidates({
|
|
1552
|
+
needVramMB: vramFitsAfter ? 0 : estVram + this.vramHeadroomMB - (vram?.free ?? 0),
|
|
1553
|
+
needRamMB: ramFitsAfter ? 0 : estRam + this.ramHeadroomMB - ram.free,
|
|
1554
|
+
requestingPriority: spec.priority ?? 0,
|
|
1555
|
+
requestingDomain: spec.domain
|
|
1556
|
+
});
|
|
1557
|
+
if (evictTargets.length > 0) {
|
|
1558
|
+
return { kind: "evict", evictTargets, effectiveNumCtx };
|
|
1559
|
+
}
|
|
1560
|
+
const fallback = await this.findRunnableFallback(spec);
|
|
1561
|
+
if (fallback) {
|
|
1562
|
+
this.emit("degraded", spec, fallback, "insufficient-memory-no-evictable");
|
|
1563
|
+
return { kind: "degrade", fallback, reason: "insufficient-memory-no-evictable" };
|
|
1564
|
+
}
|
|
1565
|
+
const reason = `insufficient resources (need ~${estRam}MB RAM, ~${estVram}MB VRAM; free ${ram.free}MB RAM, ${vram ? vram.free : "?"}MB VRAM) and no evictable / fallback models`;
|
|
1566
|
+
this.emit("rejected", spec, reason);
|
|
1567
|
+
return { kind: "reject", reason };
|
|
1568
|
+
}
|
|
1569
|
+
/**
|
|
1570
|
+
* Register a model that has been successfully loaded.
|
|
1571
|
+
* Callers MUST call this after a successful load so the broker can track LRU.
|
|
1572
|
+
*/
|
|
1573
|
+
registerLoaded(model) {
|
|
1574
|
+
const now = Date.now();
|
|
1575
|
+
const m2 = {
|
|
1576
|
+
...model,
|
|
1577
|
+
loadedAt: model.loadedAt ?? now,
|
|
1578
|
+
lastUsedAt: model.lastUsedAt ?? now
|
|
1579
|
+
};
|
|
1580
|
+
this._loaded.set(m2.key, m2);
|
|
1581
|
+
this._inflight.delete(m2.key);
|
|
1582
|
+
this.emit("loaded", m2);
|
|
1583
|
+
return m2;
|
|
1584
|
+
}
|
|
1585
|
+
/** Update last-used timestamp on every successful inference. */
|
|
1586
|
+
touch(host, name10) {
|
|
1587
|
+
const m2 = this._loaded.get(`${host}:${name10}`);
|
|
1588
|
+
if (m2)
|
|
1589
|
+
m2.lastUsedAt = Date.now();
|
|
1590
|
+
}
|
|
1591
|
+
/** Unregister a model (called when caller knows it has unloaded). */
|
|
1592
|
+
unregisterLoaded(host, name10, reason = "caller-unloaded") {
|
|
1593
|
+
const key = `${host}:${name10}`;
|
|
1594
|
+
const m2 = this._loaded.get(key);
|
|
1595
|
+
if (m2) {
|
|
1596
|
+
this._loaded.delete(key);
|
|
1597
|
+
this.emit("evicted", m2, reason);
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
/** Clear an inflight marker without registering a load (failed/aborted). */
|
|
1601
|
+
clearInflight(host, name10) {
|
|
1602
|
+
this._inflight.delete(`${host}:${name10}`);
|
|
1603
|
+
}
|
|
1604
|
+
/**
|
|
1605
|
+
* Best-effort eviction of a tracked model. Returns true if the broker was
|
|
1606
|
+
* able to actively unload (e.g. Ollama keep_alive=0); false if it just
|
|
1607
|
+
* unregistered (caller must clean up its own subprocess).
|
|
1608
|
+
*/
|
|
1609
|
+
async evict(host, name10, reason = "broker-evict") {
|
|
1610
|
+
const key = `${host}:${name10}`;
|
|
1611
|
+
const m2 = this._loaded.get(key);
|
|
1612
|
+
if (!m2)
|
|
1613
|
+
return false;
|
|
1614
|
+
let actively = false;
|
|
1615
|
+
if (host === "ollama") {
|
|
1616
|
+
actively = await this.ollamaUnload(name10).catch(() => false);
|
|
1617
|
+
}
|
|
1618
|
+
this._loaded.delete(key);
|
|
1619
|
+
this.emit("evicted", m2, reason);
|
|
1620
|
+
return actively;
|
|
1621
|
+
}
|
|
1622
|
+
// ------------------------------------------------------------------
|
|
1623
|
+
// Internal — Ollama
|
|
1624
|
+
// ------------------------------------------------------------------
|
|
1625
|
+
/** Fetch Ollama's runtime model list and reconcile against our registry. */
|
|
1626
|
+
async refreshOllamaPs() {
|
|
1627
|
+
try {
|
|
1628
|
+
const res = await fetch(`${this._ollamaBaseUrl}/api/ps`, {
|
|
1629
|
+
signal: AbortSignal.timeout(3e3)
|
|
1630
|
+
});
|
|
1631
|
+
if (!res.ok)
|
|
1632
|
+
return;
|
|
1633
|
+
const data = await res.json();
|
|
1634
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1635
|
+
const now = Date.now();
|
|
1636
|
+
for (const m2 of data.models ?? []) {
|
|
1637
|
+
const key = `ollama:${m2.name}`;
|
|
1638
|
+
seen.add(key);
|
|
1639
|
+
const vramMB = Math.round((m2.size_vram ?? 0) / (1024 * 1024));
|
|
1640
|
+
const ramMB = Math.round(((m2.size ?? 0) - (m2.size_vram ?? 0)) / (1024 * 1024));
|
|
1641
|
+
const existing = this._loaded.get(key);
|
|
1642
|
+
if (existing) {
|
|
1643
|
+
existing.vramMB = vramMB || existing.vramMB;
|
|
1644
|
+
existing.ramMB = ramMB || existing.ramMB;
|
|
1645
|
+
} else {
|
|
1646
|
+
const tracked = this.registerLoaded({
|
|
1647
|
+
key,
|
|
1648
|
+
name: m2.name,
|
|
1649
|
+
domain: this.guessOllamaDomain(m2.name),
|
|
1650
|
+
host: "ollama",
|
|
1651
|
+
owner: "external-ollama",
|
|
1652
|
+
vramMB,
|
|
1653
|
+
ramMB,
|
|
1654
|
+
priority: 0,
|
|
1655
|
+
loadedAt: now,
|
|
1656
|
+
lastUsedAt: now
|
|
1657
|
+
});
|
|
1658
|
+
void tracked;
|
|
1659
|
+
}
|
|
1660
|
+
}
|
|
1661
|
+
for (const [key, m2] of this._loaded) {
|
|
1662
|
+
if (m2.host === "ollama" && !seen.has(key)) {
|
|
1663
|
+
this._loaded.delete(key);
|
|
1664
|
+
this.emit("evicted", m2, "ollama-unloaded");
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1667
|
+
} catch {
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
/** Force Ollama to unload a model by calling /api/generate keep_alive=0. */
|
|
1671
|
+
async ollamaUnload(modelName) {
|
|
1672
|
+
try {
|
|
1673
|
+
const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
|
|
1674
|
+
method: "POST",
|
|
1675
|
+
headers: { "Content-Type": "application/json" },
|
|
1676
|
+
body: JSON.stringify({ model: modelName, keep_alive: 0 }),
|
|
1677
|
+
signal: AbortSignal.timeout(5e3)
|
|
1678
|
+
});
|
|
1679
|
+
return res.ok;
|
|
1680
|
+
} catch {
|
|
1681
|
+
return false;
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
/** Pull n_ctx_train for an Ollama model via /api/show; cached. */
|
|
1685
|
+
async getNctxTrain(modelName) {
|
|
1686
|
+
if (this._ctxTrainCache.has(modelName))
|
|
1687
|
+
return this._ctxTrainCache.get(modelName);
|
|
1688
|
+
try {
|
|
1689
|
+
const res = await fetch(`${this._ollamaBaseUrl}/api/show`, {
|
|
1690
|
+
method: "POST",
|
|
1691
|
+
headers: { "Content-Type": "application/json" },
|
|
1692
|
+
body: JSON.stringify({ name: modelName }),
|
|
1693
|
+
signal: AbortSignal.timeout(5e3)
|
|
1694
|
+
});
|
|
1695
|
+
if (!res.ok)
|
|
1696
|
+
return null;
|
|
1697
|
+
const data = await res.json();
|
|
1698
|
+
const info = data.model_info ?? {};
|
|
1699
|
+
const arch3 = info["general.architecture"];
|
|
1700
|
+
let trainCtx;
|
|
1701
|
+
if (arch3 && typeof info[`${arch3}.context_length`] === "number") {
|
|
1702
|
+
trainCtx = info[`${arch3}.context_length`];
|
|
1703
|
+
} else {
|
|
1704
|
+
for (const [k, v] of Object.entries(info)) {
|
|
1705
|
+
if (k.endsWith(".context_length") && typeof v === "number") {
|
|
1706
|
+
trainCtx = v;
|
|
1707
|
+
break;
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
if (trainCtx && Number.isFinite(trainCtx) && trainCtx > 0) {
|
|
1712
|
+
this._ctxTrainCache.set(modelName, trainCtx);
|
|
1713
|
+
return trainCtx;
|
|
1714
|
+
}
|
|
1715
|
+
return null;
|
|
1716
|
+
} catch {
|
|
1717
|
+
return null;
|
|
1718
|
+
}
|
|
1719
|
+
}
|
|
1720
|
+
/** Probe whether a model exists in Ollama (cached). */
|
|
1721
|
+
async ollamaModelExists(modelName) {
|
|
1722
|
+
if (this._ollamaModelExists.has(modelName))
|
|
1723
|
+
return this._ollamaModelExists.get(modelName);
|
|
1724
|
+
try {
|
|
1725
|
+
const res = await fetch(`${this._ollamaBaseUrl}/api/show`, {
|
|
1726
|
+
method: "POST",
|
|
1727
|
+
headers: { "Content-Type": "application/json" },
|
|
1728
|
+
body: JSON.stringify({ name: modelName }),
|
|
1729
|
+
signal: AbortSignal.timeout(3e3)
|
|
1730
|
+
});
|
|
1731
|
+
const exists2 = res.ok;
|
|
1732
|
+
this._ollamaModelExists.set(modelName, exists2);
|
|
1733
|
+
return exists2;
|
|
1734
|
+
} catch {
|
|
1735
|
+
this._ollamaModelExists.set(modelName, false);
|
|
1736
|
+
return false;
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
// ------------------------------------------------------------------
|
|
1740
|
+
// Internal — LRU eviction selection
|
|
1741
|
+
// ------------------------------------------------------------------
|
|
1742
|
+
pickEvictionCandidates(req2) {
|
|
1743
|
+
const now = Date.now();
|
|
1744
|
+
const sameDomainOk = (m2) => (
|
|
1745
|
+
// never evict the requesting domain's only loaded model unless multiple exist
|
|
1746
|
+
m2.domain !== req2.requestingDomain || this.countByDomain(req2.requestingDomain) > 1
|
|
1747
|
+
);
|
|
1748
|
+
const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
|
|
1749
|
+
const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).sort((a2, b) => {
|
|
1750
|
+
const aIdle = idle(a2) ? 0 : 1;
|
|
1751
|
+
const bIdle = idle(b) ? 0 : 1;
|
|
1752
|
+
if (aIdle !== bIdle)
|
|
1753
|
+
return aIdle - bIdle;
|
|
1754
|
+
return a2.lastUsedAt - b.lastUsedAt;
|
|
1755
|
+
});
|
|
1756
|
+
const targets = [];
|
|
1757
|
+
let vramFreed = 0;
|
|
1758
|
+
let ramFreed = 0;
|
|
1759
|
+
for (const m2 of evictable) {
|
|
1760
|
+
if (vramFreed >= req2.needVramMB && ramFreed >= req2.needRamMB)
|
|
1761
|
+
break;
|
|
1762
|
+
targets.push(m2);
|
|
1763
|
+
vramFreed += m2.vramMB;
|
|
1764
|
+
ramFreed += m2.ramMB;
|
|
1765
|
+
}
|
|
1766
|
+
if (vramFreed >= req2.needVramMB && ramFreed >= req2.needRamMB)
|
|
1767
|
+
return targets;
|
|
1768
|
+
return [];
|
|
1769
|
+
}
|
|
1770
|
+
countByDomain(domain) {
|
|
1771
|
+
let n2 = 0;
|
|
1772
|
+
for (const m2 of this._loaded.values())
|
|
1773
|
+
if (m2.domain === domain)
|
|
1774
|
+
n2++;
|
|
1775
|
+
return n2;
|
|
1776
|
+
}
|
|
1777
|
+
// ------------------------------------------------------------------
|
|
1778
|
+
// Internal — fallback resolution
|
|
1779
|
+
// ------------------------------------------------------------------
|
|
1780
|
+
async findRunnableFallback(original) {
|
|
1781
|
+
const chain = this._fallbacks.get(original.domain) ?? [];
|
|
1782
|
+
for (const entry of chain) {
|
|
1783
|
+
if (entry.spec.host === original.host && entry.spec.name === original.name)
|
|
1784
|
+
continue;
|
|
1785
|
+
const ok3 = entry.available ? await Promise.resolve(entry.available()).catch(() => false) : true;
|
|
1786
|
+
if (!ok3)
|
|
1787
|
+
continue;
|
|
1788
|
+
return { ...entry.spec, owner: original.owner };
|
|
1789
|
+
}
|
|
1790
|
+
return null;
|
|
1791
|
+
}
|
|
1792
|
+
registerDefaultFallbacks() {
|
|
1793
|
+
this.setFallbackChain("vision", [
|
|
1794
|
+
{ spec: { name: "moondream2", domain: "vision", host: "moondream-station" }, note: "local Moondream Station REST" },
|
|
1795
|
+
{ spec: { name: "moondream", domain: "vision", host: "ollama", estimatedVramMB: 1800 }, note: "ollama moondream (small VRAM)" },
|
|
1796
|
+
{ spec: { name: "tesseract-ocr-fallback", domain: "ocr", host: "subprocess", estimatedVramMB: 0, estimatedRamMB: 100 }, note: "OCR-only — no visual reasoning" }
|
|
1797
|
+
]);
|
|
1798
|
+
this.setFallbackChain("image-gen", [
|
|
1799
|
+
{ spec: { name: "flux1-schnell", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 12e3 } },
|
|
1800
|
+
{ spec: { name: "sdxl-turbo", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 8e3 } },
|
|
1801
|
+
{ spec: { name: "sd-turbo", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 4e3 } }
|
|
1802
|
+
]);
|
|
1803
|
+
this.setFallbackChain("music", [
|
|
1804
|
+
{ spec: { name: "facebook/musicgen-medium", domain: "music", host: "audiocraft", estimatedVramMB: 6e3 } },
|
|
1805
|
+
{ spec: { name: "facebook/musicgen-small", domain: "music", host: "audiocraft", estimatedVramMB: 3e3 } }
|
|
1806
|
+
]);
|
|
1807
|
+
this.setFallbackChain("sound", [
|
|
1808
|
+
{ spec: { name: "cvssp/audioldm-s-full-v2", domain: "sound", host: "diffusers-py", estimatedVramMB: 4e3 } },
|
|
1809
|
+
{ spec: { name: "facebook/audiogen-medium", domain: "sound", host: "audiocraft", estimatedVramMB: 3e3 } }
|
|
1810
|
+
]);
|
|
1811
|
+
this.setFallbackChain("asr", [
|
|
1812
|
+
{ spec: { name: "base", domain: "asr", host: "whisper-cli", estimatedRamMB: 800 } },
|
|
1813
|
+
{ spec: { name: "tiny", domain: "asr", host: "whisper-cli", estimatedRamMB: 300 } }
|
|
1814
|
+
]);
|
|
1815
|
+
this.setFallbackChain("tts", [
|
|
1816
|
+
{ spec: { name: "piper-default", domain: "tts", host: "piper", estimatedRamMB: 200 } }
|
|
1817
|
+
]);
|
|
1818
|
+
}
|
|
1819
|
+
// ------------------------------------------------------------------
|
|
1820
|
+
// Internal — footprint estimation
|
|
1821
|
+
// ------------------------------------------------------------------
|
|
1822
|
+
estimateFootprintVramMB(spec) {
|
|
1823
|
+
if (spec.estimatedVramMB !== void 0)
|
|
1824
|
+
return spec.estimatedVramMB;
|
|
1825
|
+
switch (spec.domain) {
|
|
1826
|
+
case "embedding":
|
|
1827
|
+
return 300;
|
|
1828
|
+
case "vision":
|
|
1829
|
+
return spec.host === "moondream-station" ? 2e3 : 2500;
|
|
1830
|
+
case "image-gen":
|
|
1831
|
+
return 8e3;
|
|
1832
|
+
case "video-gen":
|
|
1833
|
+
return 12e3;
|
|
1834
|
+
case "music":
|
|
1835
|
+
return 4e3;
|
|
1836
|
+
case "sound":
|
|
1837
|
+
return 3e3;
|
|
1838
|
+
case "asr":
|
|
1839
|
+
return 1e3;
|
|
1840
|
+
case "tts":
|
|
1841
|
+
return 300;
|
|
1842
|
+
case "subagent":
|
|
1843
|
+
return 4e3;
|
|
1844
|
+
case "ocr":
|
|
1845
|
+
return 0;
|
|
1846
|
+
case "chat":
|
|
1847
|
+
default:
|
|
1848
|
+
return 5e3;
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
estimateFootprintRamMB(spec) {
|
|
1852
|
+
if (spec.estimatedRamMB !== void 0)
|
|
1853
|
+
return spec.estimatedRamMB;
|
|
1854
|
+
switch (spec.domain) {
|
|
1855
|
+
case "ocr":
|
|
1856
|
+
return 100;
|
|
1857
|
+
case "tts":
|
|
1858
|
+
return 200;
|
|
1859
|
+
case "embedding":
|
|
1860
|
+
return 500;
|
|
1861
|
+
case "asr":
|
|
1862
|
+
return 800;
|
|
1863
|
+
case "music":
|
|
1864
|
+
case "sound":
|
|
1865
|
+
return 2e3;
|
|
1866
|
+
case "vision":
|
|
1867
|
+
return 1500;
|
|
1868
|
+
case "image-gen":
|
|
1869
|
+
return 4e3;
|
|
1870
|
+
case "video-gen":
|
|
1871
|
+
return 6e3;
|
|
1872
|
+
case "subagent":
|
|
1873
|
+
return 1500;
|
|
1874
|
+
case "chat":
|
|
1875
|
+
default:
|
|
1876
|
+
return 2e3;
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
guessOllamaDomain(name10) {
|
|
1880
|
+
const n2 = name10.toLowerCase();
|
|
1881
|
+
if (/embed|nomic|bge|e5/.test(n2))
|
|
1882
|
+
return "embedding";
|
|
1883
|
+
if (/moondream|llava|vlm|vision|qwen.*vl|minicpm-v|gemma3|pixtral|cogvlm|internvl/.test(n2))
|
|
1884
|
+
return "vision";
|
|
1885
|
+
if (/flux|stable.diffusion|sdxl|z-image/.test(n2))
|
|
1886
|
+
return "image-gen";
|
|
1887
|
+
return "chat";
|
|
1888
|
+
}
|
|
1889
|
+
// ------------------------------------------------------------------
|
|
1890
|
+
// Internal — snapshot + pressure
|
|
1891
|
+
// ------------------------------------------------------------------
|
|
1892
|
+
buildSnapshot() {
|
|
1893
|
+
const ram = ramSnapshotMB();
|
|
1894
|
+
const vram = this._lastSnapshot?.vramMB ?? null;
|
|
1895
|
+
return {
|
|
1896
|
+
loaded: [...this._loaded.values()],
|
|
1897
|
+
inflight: [...this._inflight.entries()].map(([key, v]) => ({ key, owner: v.owner, startedMs: v.startedMs })),
|
|
1898
|
+
ramMB: ram,
|
|
1899
|
+
vramMB: vram,
|
|
1900
|
+
lastPollAt: Date.now(),
|
|
1901
|
+
slots: this.buildSlotsSnapshot()
|
|
1902
|
+
};
|
|
1903
|
+
}
|
|
1904
|
+
buildSlotsSnapshot() {
|
|
1905
|
+
const byModel = {};
|
|
1906
|
+
for (const slot of this._activeSlots.values()) {
|
|
1907
|
+
const k = slot.model;
|
|
1908
|
+
if (!byModel[k])
|
|
1909
|
+
byModel[k] = { inUse: 0, tokensPerSec: 0, samples: 0 };
|
|
1910
|
+
byModel[k].inUse += 1;
|
|
1911
|
+
}
|
|
1912
|
+
for (const [model, tp] of this._throughput) {
|
|
1913
|
+
if (!byModel[model])
|
|
1914
|
+
byModel[model] = { inUse: 0, tokensPerSec: 0, samples: 0 };
|
|
1915
|
+
byModel[model].tokensPerSec = tp.tokensPerSec;
|
|
1916
|
+
byModel[model].samples = tp.samples;
|
|
1917
|
+
}
|
|
1918
|
+
return {
|
|
1919
|
+
inUse: this._activeSlots.size,
|
|
1920
|
+
capacity: this.slotCapacity,
|
|
1921
|
+
queueDepth: this._slotQueue.length,
|
|
1922
|
+
queueCapacity: this.queueCapacity,
|
|
1923
|
+
byModel
|
|
1924
|
+
};
|
|
1925
|
+
}
|
|
1926
|
+
async checkPressure(snap) {
|
|
1927
|
+
if (snap.ramMB.free < this.ramHeadroomMB) {
|
|
1928
|
+
this.emit("pressure", "ram", snap.ramMB.free, this.ramHeadroomMB);
|
|
1929
|
+
}
|
|
1930
|
+
const v = await vramSnapshotMB();
|
|
1931
|
+
if (v) {
|
|
1932
|
+
snap.vramMB = v;
|
|
1933
|
+
if (v.free < this.vramHeadroomMB) {
|
|
1934
|
+
this.emit("pressure", "vram", v.free, this.vramHeadroomMB);
|
|
1935
|
+
}
|
|
1936
|
+
}
|
|
1937
|
+
const queueThreshold = Math.floor(this.queueCapacity * 0.8);
|
|
1938
|
+
if (this._slotQueue.length >= queueThreshold) {
|
|
1939
|
+
this.emit("pressure", "queue", this._slotQueue.length, queueThreshold);
|
|
1940
|
+
}
|
|
1941
|
+
const now = Date.now();
|
|
1942
|
+
for (const slot of this._activeSlots.values()) {
|
|
1943
|
+
if (now - slot.acquiredAt > STUCK_INFLIGHT_DIAGNOSTIC_MS) {
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
}
|
|
1947
|
+
// ------------------------------------------------------------------
|
|
1948
|
+
// Inference slot admission control (replaces timeouts)
|
|
1949
|
+
// ------------------------------------------------------------------
|
|
1950
|
+
/**
|
|
1951
|
+
* Acquire an inference slot. Blocks (queues with backpressure) until a slot
|
|
1952
|
+
* is available. Never times out — work either completes or is cancelled
|
|
1953
|
+
* via the caller-provided AbortSignal before admission.
|
|
1954
|
+
*
|
|
1955
|
+
* Two-tier admission:
|
|
1956
|
+
* 1. Reserved: 1 slot per sessionKey kept warm even when shared pool full
|
|
1957
|
+
* 2. Shared: queue with FIFO+priority ordering; size-bounded by queueCapacity
|
|
1958
|
+
*
|
|
1959
|
+
* Backpressure: when queue exceeds 80% capacity, emit `pressure: "queue"` —
|
|
1960
|
+
* upstream callers (e.g. Telegram poll loop) should slow ingress.
|
|
1961
|
+
*/
|
|
1962
|
+
acquireInferenceSlot(spec) {
|
|
1963
|
+
if (this._activeSlots.size < this.slotCapacity) {
|
|
1964
|
+
return Promise.resolve(this.admitSlot(
|
|
1965
|
+
spec,
|
|
1966
|
+
/*reserved*/
|
|
1967
|
+
false
|
|
1968
|
+
));
|
|
1969
|
+
}
|
|
1970
|
+
if (spec.sessionKey && !this._reservedBySession.has(spec.sessionKey) && this._activeSlots.size < this.slotCapacity + 1) {
|
|
1971
|
+
const slot = this.admitSlot(
|
|
1972
|
+
spec,
|
|
1973
|
+
/*reserved*/
|
|
1974
|
+
true
|
|
1975
|
+
);
|
|
1976
|
+
this._reservedBySession.set(spec.sessionKey, slot.info.id);
|
|
1977
|
+
return Promise.resolve(slot);
|
|
1978
|
+
}
|
|
1979
|
+
return new Promise((resolve55, reject) => {
|
|
1980
|
+
const entry = { spec, resolve: resolve55, reject, enqueuedAt: Date.now() };
|
|
1981
|
+
if (spec.signal) {
|
|
1982
|
+
const onAbort = () => {
|
|
1983
|
+
const idx = this._slotQueue.indexOf(entry);
|
|
1984
|
+
if (idx >= 0)
|
|
1985
|
+
this._slotQueue.splice(idx, 1);
|
|
1986
|
+
reject(new Error("inference slot acquisition aborted by caller signal"));
|
|
1987
|
+
};
|
|
1988
|
+
if (spec.signal.aborted) {
|
|
1989
|
+
onAbort();
|
|
1990
|
+
return;
|
|
1991
|
+
}
|
|
1992
|
+
spec.signal.addEventListener("abort", onAbort, { once: true });
|
|
1993
|
+
entry.onSignalAbort = onAbort;
|
|
1994
|
+
}
|
|
1995
|
+
const prio = spec.priority ?? 0;
|
|
1996
|
+
let insertAt = this._slotQueue.length;
|
|
1997
|
+
for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
|
|
1998
|
+
const p2 = this._slotQueue[i2].spec.priority ?? 0;
|
|
1999
|
+
if (p2 >= prio) {
|
|
2000
|
+
insertAt = i2 + 1;
|
|
2001
|
+
break;
|
|
2002
|
+
}
|
|
2003
|
+
if (i2 === 0)
|
|
2004
|
+
insertAt = 0;
|
|
2005
|
+
}
|
|
2006
|
+
this._slotQueue.splice(insertAt, 0, entry);
|
|
2007
|
+
const threshold = Math.floor(this.queueCapacity * 0.8);
|
|
2008
|
+
if (this._slotQueue.length === threshold) {
|
|
2009
|
+
this.emit("pressure", "queue", this._slotQueue.length, threshold);
|
|
2010
|
+
}
|
|
2011
|
+
});
|
|
2012
|
+
}
|
|
2013
|
+
/** Admit a slot — internal, called from acquire fast path and from drainQueue. */
|
|
2014
|
+
admitSlot(spec, reserved) {
|
|
2015
|
+
const id = `slot-${++this._slotIdSeq}-${Date.now().toString(36)}`;
|
|
2016
|
+
const info = {
|
|
2017
|
+
id,
|
|
2018
|
+
model: spec.model,
|
|
2019
|
+
domain: spec.domain,
|
|
2020
|
+
owner: spec.owner,
|
|
2021
|
+
sessionKey: spec.sessionKey,
|
|
2022
|
+
acquiredAt: Date.now(),
|
|
2023
|
+
promptTokens: spec.promptTokens ?? 0,
|
|
2024
|
+
reserved
|
|
2025
|
+
};
|
|
2026
|
+
this._activeSlots.set(id, info);
|
|
2027
|
+
this.emit("slotAcquired", info);
|
|
2028
|
+
let released = false;
|
|
2029
|
+
const broker = this;
|
|
2030
|
+
return {
|
|
2031
|
+
info,
|
|
2032
|
+
release(outcome) {
|
|
2033
|
+
if (released)
|
|
2034
|
+
return;
|
|
2035
|
+
released = true;
|
|
2036
|
+
broker.releaseSlot(info, outcome);
|
|
2037
|
+
}
|
|
2038
|
+
};
|
|
2039
|
+
}
|
|
2040
|
+
releaseSlot(info, outcome) {
|
|
2041
|
+
this._activeSlots.delete(info.id);
|
|
2042
|
+
if (info.sessionKey && this._reservedBySession.get(info.sessionKey) === info.id) {
|
|
2043
|
+
this._reservedBySession.delete(info.sessionKey);
|
|
2044
|
+
}
|
|
2045
|
+
if (outcome.ok && (outcome.completionTokens ?? 0) > 0) {
|
|
2046
|
+
const wallMs = Date.now() - info.acquiredAt;
|
|
2047
|
+
if (wallMs > 100) {
|
|
2048
|
+
const tps = outcome.completionTokens * 1e3 / wallMs;
|
|
2049
|
+
const cur = this._throughput.get(info.model) ?? {
|
|
2050
|
+
tokensPerSec: THROUGHPUT_INITIAL_TPS,
|
|
2051
|
+
samples: 0,
|
|
2052
|
+
lastReleaseAt: 0
|
|
2053
|
+
};
|
|
2054
|
+
cur.tokensPerSec = cur.samples === 0 ? tps : cur.tokensPerSec * (1 - THROUGHPUT_EMA_ALPHA) + tps * THROUGHPUT_EMA_ALPHA;
|
|
2055
|
+
cur.samples += 1;
|
|
2056
|
+
cur.lastReleaseAt = Date.now();
|
|
2057
|
+
this._throughput.set(info.model, cur);
|
|
2058
|
+
this.emit("throughputUpdated", info.model, cur.tokensPerSec);
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
this.emit("slotReleased", info, outcome);
|
|
2062
|
+
this.drainSlotQueue();
|
|
2063
|
+
}
|
|
2064
|
+
drainSlotQueue() {
|
|
2065
|
+
while (this._slotQueue.length > 0 && this._activeSlots.size < this.slotCapacity) {
|
|
2066
|
+
const entry = this._slotQueue.shift();
|
|
2067
|
+
if (entry.onSignalAbort && entry.spec.signal) {
|
|
2068
|
+
entry.spec.signal.removeEventListener("abort", entry.onSignalAbort);
|
|
2069
|
+
}
|
|
2070
|
+
if (entry.spec.signal?.aborted) {
|
|
2071
|
+
try {
|
|
2072
|
+
entry.reject(new Error("aborted before admission"));
|
|
2073
|
+
} catch {
|
|
2074
|
+
}
|
|
2075
|
+
continue;
|
|
2076
|
+
}
|
|
2077
|
+
const slot = this.admitSlot(
|
|
2078
|
+
entry.spec,
|
|
2079
|
+
/*reserved*/
|
|
2080
|
+
false
|
|
2081
|
+
);
|
|
2082
|
+
try {
|
|
2083
|
+
entry.resolve(slot);
|
|
2084
|
+
} catch {
|
|
2085
|
+
}
|
|
2086
|
+
}
|
|
2087
|
+
}
|
|
2088
|
+
/** Snapshot of throughput EMAs (for /broker and debugging). */
|
|
2089
|
+
throughputByModel() {
|
|
2090
|
+
const out = {};
|
|
2091
|
+
for (const [model, tp] of this._throughput) {
|
|
2092
|
+
out[model] = { tokensPerSec: tp.tokensPerSec, samples: tp.samples };
|
|
2093
|
+
}
|
|
2094
|
+
return out;
|
|
2095
|
+
}
|
|
2096
|
+
/** Tune the shared slot capacity at runtime (e.g. when Ollama pool resizes). */
|
|
2097
|
+
setSlotCapacity(n2) {
|
|
2098
|
+
this.slotCapacity = Math.max(1, Math.floor(n2));
|
|
2099
|
+
this.drainSlotQueue();
|
|
2100
|
+
}
|
|
2101
|
+
keyOf(spec) {
|
|
2102
|
+
return `${spec.host}:${spec.name}`;
|
|
2103
|
+
}
|
|
2104
|
+
};
|
|
2105
|
+
_nvSmiAvailable = null;
|
|
2106
|
+
}
|
|
2107
|
+
});
|
|
2108
|
+
|
|
2109
|
+
// packages/execution/dist/broker-mediated-backend.js
|
|
2110
|
+
function wrapWithBroker(backend, options2) {
|
|
2111
|
+
const broker = getModelBroker();
|
|
2112
|
+
const clamp7 = options2.clampNumCtx !== false;
|
|
2113
|
+
const wrapped = Object.create(backend);
|
|
2114
|
+
wrapped.chatCompletion = async (request) => {
|
|
2115
|
+
const model = backend.model || request.model || "unknown";
|
|
2116
|
+
let effectiveRequest = request;
|
|
2117
|
+
if (clamp7) {
|
|
2118
|
+
const trainCtx = await broker.getNctxTrain(model).catch(() => null);
|
|
2119
|
+
const requestedNumCtx = request.numCtx;
|
|
2120
|
+
if (trainCtx && trainCtx > 0) {
|
|
2121
|
+
const target = requestedNumCtx ? Math.min(requestedNumCtx, trainCtx) : Math.min(trainCtx, estimateContextNeed(request));
|
|
2122
|
+
if (target > 0) {
|
|
2123
|
+
effectiveRequest = { ...request, numCtx: target };
|
|
2124
|
+
}
|
|
2125
|
+
}
|
|
2126
|
+
}
|
|
2127
|
+
const promptTokens = estimatePromptTokens(request);
|
|
2128
|
+
const slot = await broker.acquireInferenceSlot({
|
|
2129
|
+
model,
|
|
2130
|
+
domain: options2.domain,
|
|
2131
|
+
owner: options2.owner,
|
|
2132
|
+
sessionKey: options2.sessionKey,
|
|
2133
|
+
promptTokens,
|
|
2134
|
+
priority: options2.priority ?? 0
|
|
2135
|
+
});
|
|
2136
|
+
try {
|
|
2137
|
+
const result = await backend.chatCompletion(effectiveRequest);
|
|
2138
|
+
const usage = result.usage;
|
|
2139
|
+
slot.release({ ok: true, completionTokens: usage?.completion_tokens ?? 0 });
|
|
2140
|
+
return result;
|
|
2141
|
+
} catch (err) {
|
|
2142
|
+
slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
|
|
2143
|
+
throw err;
|
|
2144
|
+
}
|
|
2145
|
+
};
|
|
2146
|
+
if (typeof backend.chatCompletionStream === "function") {
|
|
2147
|
+
const streamFn = backend.chatCompletionStream.bind(backend);
|
|
2148
|
+
wrapped.chatCompletionStream = async function* (request) {
|
|
2149
|
+
const model = backend.model || request.model || "unknown";
|
|
2150
|
+
let effectiveRequest = request;
|
|
2151
|
+
if (clamp7) {
|
|
2152
|
+
const trainCtx = await broker.getNctxTrain(model).catch(() => null);
|
|
2153
|
+
const requestedNumCtx = request.numCtx;
|
|
2154
|
+
if (trainCtx && trainCtx > 0) {
|
|
2155
|
+
const target = requestedNumCtx ? Math.min(requestedNumCtx, trainCtx) : Math.min(trainCtx, estimateContextNeed(request));
|
|
2156
|
+
if (target > 0)
|
|
2157
|
+
effectiveRequest = { ...request, numCtx: target };
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
2160
|
+
const promptTokens = estimatePromptTokens(request);
|
|
2161
|
+
const slot = await broker.acquireInferenceSlot({
|
|
2162
|
+
model,
|
|
2163
|
+
domain: options2.domain,
|
|
2164
|
+
owner: options2.owner,
|
|
2165
|
+
sessionKey: options2.sessionKey,
|
|
2166
|
+
promptTokens,
|
|
2167
|
+
priority: options2.priority ?? 0
|
|
2168
|
+
});
|
|
2169
|
+
let completionTokens = 0;
|
|
2170
|
+
try {
|
|
2171
|
+
for await (const chunk of streamFn(effectiveRequest)) {
|
|
2172
|
+
const usage = chunk.usage;
|
|
2173
|
+
if (usage?.completion_tokens)
|
|
2174
|
+
completionTokens = usage.completion_tokens;
|
|
2175
|
+
yield chunk;
|
|
2176
|
+
}
|
|
2177
|
+
slot.release({ ok: true, completionTokens });
|
|
2178
|
+
} catch (err) {
|
|
2179
|
+
slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
|
|
2180
|
+
throw err;
|
|
2181
|
+
}
|
|
2182
|
+
};
|
|
2183
|
+
}
|
|
2184
|
+
return wrapped;
|
|
2185
|
+
}
|
|
2186
|
+
function estimatePromptTokens(request) {
|
|
2187
|
+
let chars = 0;
|
|
2188
|
+
if (Array.isArray(request?.messages)) {
|
|
2189
|
+
for (const m2 of request.messages) {
|
|
2190
|
+
if (typeof m2.content === "string")
|
|
2191
|
+
chars += m2.content.length;
|
|
2192
|
+
else if (Array.isArray(m2.content)) {
|
|
2193
|
+
for (const part of m2.content) {
|
|
2194
|
+
if (typeof part?.text === "string")
|
|
2195
|
+
chars += part.text.length;
|
|
2196
|
+
}
|
|
2197
|
+
}
|
|
2198
|
+
chars += 8;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
if (Array.isArray(request?.tools) && request.tools.length > 0) {
|
|
2202
|
+
chars += request.tools.length * 600;
|
|
2203
|
+
}
|
|
2204
|
+
return Math.ceil(chars / 4);
|
|
2205
|
+
}
|
|
2206
|
+
function estimateContextNeed(request) {
|
|
2207
|
+
const promptTokens = estimatePromptTokens(request);
|
|
2208
|
+
const maxTokens = request?.maxTokens ?? 1024;
|
|
2209
|
+
return Math.max(2048, promptTokens + maxTokens + 512);
|
|
2210
|
+
}
|
|
2211
|
+
var init_broker_mediated_backend = __esm({
|
|
2212
|
+
"packages/execution/dist/broker-mediated-backend.js"() {
|
|
2213
|
+
"use strict";
|
|
2214
|
+
init_model_broker();
|
|
2215
|
+
}
|
|
2216
|
+
});
|
|
2217
|
+
|
|
1331
2218
|
// packages/execution/dist/tools/security-classifier.js
|
|
1332
2219
|
function classifyTool(name10) {
|
|
1333
2220
|
for (const rule of RULES) {
|
|
@@ -19513,6 +20400,20 @@ import { existsSync as existsSync25, mkdirSync as mkdirSync10, writeFileSync as
|
|
|
19513
20400
|
import { join as join28, basename as basename5, extname as extname3, resolve as resolve16 } from "node:path";
|
|
19514
20401
|
import { homedir as homedir9 } from "node:os";
|
|
19515
20402
|
import { execFileSync as execFileSync3, execSync as execSync15 } from "node:child_process";
|
|
20403
|
+
function whisperRamEstimate(model) {
|
|
20404
|
+
const m2 = model.toLowerCase();
|
|
20405
|
+
if (m2.includes("large"))
|
|
20406
|
+
return 3200;
|
|
20407
|
+
if (m2.includes("medium"))
|
|
20408
|
+
return 1700;
|
|
20409
|
+
if (m2.includes("small"))
|
|
20410
|
+
return 800;
|
|
20411
|
+
if (m2.includes("base"))
|
|
20412
|
+
return 400;
|
|
20413
|
+
if (m2.includes("tiny"))
|
|
20414
|
+
return 200;
|
|
20415
|
+
return 800;
|
|
20416
|
+
}
|
|
19516
20417
|
function isTranscribable(path12) {
|
|
19517
20418
|
const ext = extname3(path12).toLowerCase();
|
|
19518
20419
|
return AUDIO_EXTS.has(ext) || VIDEO_EXTS.has(ext);
|
|
@@ -19582,6 +20483,7 @@ var AUDIO_EXTS, VIDEO_EXTS, MAX_TRANSCRIBE_URL_BYTES, _tcModule, _tcChecked, Tra
|
|
|
19582
20483
|
var init_transcribe_tool = __esm({
|
|
19583
20484
|
"packages/execution/dist/tools/transcribe-tool.js"() {
|
|
19584
20485
|
"use strict";
|
|
20486
|
+
init_model_broker();
|
|
19585
20487
|
init_network_egress_policy();
|
|
19586
20488
|
AUDIO_EXTS = /* @__PURE__ */ new Set([
|
|
19587
20489
|
".mp3",
|
|
@@ -19636,7 +20538,7 @@ var init_transcribe_tool = __esm({
|
|
|
19636
20538
|
async execute(args) {
|
|
19637
20539
|
const start2 = performance.now();
|
|
19638
20540
|
const filePath = resolve16(this.workingDir, String(args["path"] ?? ""));
|
|
19639
|
-
|
|
20541
|
+
let model = String(args["model"] ?? "base");
|
|
19640
20542
|
const diarize = Boolean(args["diarize"] ?? false);
|
|
19641
20543
|
if (!existsSync25(filePath)) {
|
|
19642
20544
|
return {
|
|
@@ -19654,6 +20556,32 @@ var init_transcribe_tool = __esm({
|
|
|
19654
20556
|
durationMs: performance.now() - start2
|
|
19655
20557
|
};
|
|
19656
20558
|
}
|
|
20559
|
+
const broker = getModelBroker();
|
|
20560
|
+
const askedModel = model;
|
|
20561
|
+
let effectiveModel = model;
|
|
20562
|
+
const whisperDecision = await broker.ensureModelLoadable({
|
|
20563
|
+
name: askedModel,
|
|
20564
|
+
domain: "asr",
|
|
20565
|
+
host: "whisper-cli",
|
|
20566
|
+
owner: "transcribe-file-tool",
|
|
20567
|
+
estimatedRamMB: whisperRamEstimate(askedModel)
|
|
20568
|
+
});
|
|
20569
|
+
if (whisperDecision.kind === "degrade") {
|
|
20570
|
+
effectiveModel = whisperDecision.fallback.name;
|
|
20571
|
+
} else if (whisperDecision.kind === "evict") {
|
|
20572
|
+
for (const target of whisperDecision.evictTargets) {
|
|
20573
|
+
await broker.evict(target.host, target.name, "asr-needs-room");
|
|
20574
|
+
}
|
|
20575
|
+
} else if (whisperDecision.kind === "reject") {
|
|
20576
|
+
return {
|
|
20577
|
+
success: false,
|
|
20578
|
+
output: "",
|
|
20579
|
+
error: `Transcription blocked by resource broker: ${whisperDecision.reason}`,
|
|
20580
|
+
durationMs: performance.now() - start2
|
|
20581
|
+
};
|
|
20582
|
+
}
|
|
20583
|
+
if (effectiveModel !== askedModel)
|
|
20584
|
+
model = effectiveModel;
|
|
19657
20585
|
const tc = await loadTranscribeCli();
|
|
19658
20586
|
if (!tc) {
|
|
19659
20587
|
return this.execViaCli(filePath, model, diarize, start2);
|
|
@@ -46186,11 +47114,11 @@ var require_eventemitter3 = __commonJS({
|
|
|
46186
47114
|
if (--emitter._eventsCount === 0) emitter._events = new Events();
|
|
46187
47115
|
else delete emitter._events[evt];
|
|
46188
47116
|
}
|
|
46189
|
-
function
|
|
47117
|
+
function EventEmitter15() {
|
|
46190
47118
|
this._events = new Events();
|
|
46191
47119
|
this._eventsCount = 0;
|
|
46192
47120
|
}
|
|
46193
|
-
|
|
47121
|
+
EventEmitter15.prototype.eventNames = function eventNames() {
|
|
46194
47122
|
var names = [], events, name10;
|
|
46195
47123
|
if (this._eventsCount === 0) return names;
|
|
46196
47124
|
for (name10 in events = this._events) {
|
|
@@ -46201,7 +47129,7 @@ var require_eventemitter3 = __commonJS({
|
|
|
46201
47129
|
}
|
|
46202
47130
|
return names;
|
|
46203
47131
|
};
|
|
46204
|
-
|
|
47132
|
+
EventEmitter15.prototype.listeners = function listeners(event) {
|
|
46205
47133
|
var evt = prefix ? prefix + event : event, handlers = this._events[evt];
|
|
46206
47134
|
if (!handlers) return [];
|
|
46207
47135
|
if (handlers.fn) return [handlers.fn];
|
|
@@ -46210,13 +47138,13 @@ var require_eventemitter3 = __commonJS({
|
|
|
46210
47138
|
}
|
|
46211
47139
|
return ee;
|
|
46212
47140
|
};
|
|
46213
|
-
|
|
47141
|
+
EventEmitter15.prototype.listenerCount = function listenerCount(event) {
|
|
46214
47142
|
var evt = prefix ? prefix + event : event, listeners = this._events[evt];
|
|
46215
47143
|
if (!listeners) return 0;
|
|
46216
47144
|
if (listeners.fn) return 1;
|
|
46217
47145
|
return listeners.length;
|
|
46218
47146
|
};
|
|
46219
|
-
|
|
47147
|
+
EventEmitter15.prototype.emit = function emit2(event, a1, a2, a3, a4, a5) {
|
|
46220
47148
|
var evt = prefix ? prefix + event : event;
|
|
46221
47149
|
if (!this._events[evt]) return false;
|
|
46222
47150
|
var listeners = this._events[evt], len = arguments.length, args, i2;
|
|
@@ -46267,13 +47195,13 @@ var require_eventemitter3 = __commonJS({
|
|
|
46267
47195
|
}
|
|
46268
47196
|
return true;
|
|
46269
47197
|
};
|
|
46270
|
-
|
|
47198
|
+
EventEmitter15.prototype.on = function on2(event, fn, context2) {
|
|
46271
47199
|
return addListener2(this, event, fn, context2, false);
|
|
46272
47200
|
};
|
|
46273
|
-
|
|
47201
|
+
EventEmitter15.prototype.once = function once(event, fn, context2) {
|
|
46274
47202
|
return addListener2(this, event, fn, context2, true);
|
|
46275
47203
|
};
|
|
46276
|
-
|
|
47204
|
+
EventEmitter15.prototype.removeListener = function removeListener2(event, fn, context2, once) {
|
|
46277
47205
|
var evt = prefix ? prefix + event : event;
|
|
46278
47206
|
if (!this._events[evt]) return this;
|
|
46279
47207
|
if (!fn) {
|
|
@@ -46296,7 +47224,7 @@ var require_eventemitter3 = __commonJS({
|
|
|
46296
47224
|
}
|
|
46297
47225
|
return this;
|
|
46298
47226
|
};
|
|
46299
|
-
|
|
47227
|
+
EventEmitter15.prototype.removeAllListeners = function removeAllListeners(event) {
|
|
46300
47228
|
var evt;
|
|
46301
47229
|
if (event) {
|
|
46302
47230
|
evt = prefix ? prefix + event : event;
|
|
@@ -46307,12 +47235,12 @@ var require_eventemitter3 = __commonJS({
|
|
|
46307
47235
|
}
|
|
46308
47236
|
return this;
|
|
46309
47237
|
};
|
|
46310
|
-
|
|
46311
|
-
|
|
46312
|
-
|
|
46313
|
-
|
|
47238
|
+
EventEmitter15.prototype.off = EventEmitter15.prototype.removeListener;
|
|
47239
|
+
EventEmitter15.prototype.addListener = EventEmitter15.prototype.on;
|
|
47240
|
+
EventEmitter15.prefixed = prefix;
|
|
47241
|
+
EventEmitter15.EventEmitter = EventEmitter15;
|
|
46314
47242
|
if ("undefined" !== typeof module) {
|
|
46315
|
-
module.exports =
|
|
47243
|
+
module.exports = EventEmitter15;
|
|
46316
47244
|
}
|
|
46317
47245
|
}
|
|
46318
47246
|
});
|
|
@@ -119084,10 +120012,10 @@ var require_wrap_handler = __commonJS({
|
|
|
119084
120012
|
var require_dispatcher = __commonJS({
|
|
119085
120013
|
"../node_modules/undici/lib/dispatcher/dispatcher.js"(exports, module) {
|
|
119086
120014
|
"use strict";
|
|
119087
|
-
var
|
|
120015
|
+
var EventEmitter15 = __require("node:events");
|
|
119088
120016
|
var WrapHandler = require_wrap_handler();
|
|
119089
120017
|
var wrapInterceptor = (dispatch) => (opts, handler) => dispatch(opts, WrapHandler.wrap(handler));
|
|
119090
|
-
var Dispatcher2 = class extends
|
|
120018
|
+
var Dispatcher2 = class extends EventEmitter15 {
|
|
119091
120019
|
dispatch() {
|
|
119092
120020
|
throw new Error("not implemented");
|
|
119093
120021
|
}
|
|
@@ -126370,7 +127298,7 @@ var require_socks5_utils = __commonJS({
|
|
|
126370
127298
|
var require_socks5_client = __commonJS({
|
|
126371
127299
|
"../node_modules/undici/lib/core/socks5-client.js"(exports, module) {
|
|
126372
127300
|
"use strict";
|
|
126373
|
-
var { EventEmitter:
|
|
127301
|
+
var { EventEmitter: EventEmitter15 } = __require("node:events");
|
|
126374
127302
|
var { Buffer: Buffer7 } = __require("node:buffer");
|
|
126375
127303
|
var { InvalidArgumentError, Socks5ProxyError } = require_errors2();
|
|
126376
127304
|
var { debuglog } = __require("node:util");
|
|
@@ -126413,7 +127341,7 @@ var require_socks5_client = __commonJS({
|
|
|
126413
127341
|
ERROR: "error",
|
|
126414
127342
|
CLOSED: "closed"
|
|
126415
127343
|
};
|
|
126416
|
-
var Socks5Client = class extends
|
|
127344
|
+
var Socks5Client = class extends EventEmitter15 {
|
|
126417
127345
|
constructor(socket, options2 = {}) {
|
|
126418
127346
|
super();
|
|
126419
127347
|
if (!socket) {
|
|
@@ -132798,9 +133726,9 @@ var require_memory_cache_store = __commonJS({
|
|
|
132798
133726
|
"../node_modules/undici/lib/cache/memory-cache-store.js"(exports, module) {
|
|
132799
133727
|
"use strict";
|
|
132800
133728
|
var { Writable } = __require("node:stream");
|
|
132801
|
-
var { EventEmitter:
|
|
133729
|
+
var { EventEmitter: EventEmitter15 } = __require("node:events");
|
|
132802
133730
|
var { assertCacheKey, assertCacheValue } = require_cache();
|
|
132803
|
-
var MemoryCacheStore = class extends
|
|
133731
|
+
var MemoryCacheStore = class extends EventEmitter15 {
|
|
132804
133732
|
#maxCount = 1024;
|
|
132805
133733
|
#maxSize = 104857600;
|
|
132806
133734
|
// 100MB
|
|
@@ -229661,7 +230589,7 @@ var init_send_ssdp_message = __esm({
|
|
|
229661
230589
|
});
|
|
229662
230590
|
|
|
229663
230591
|
// ../node_modules/@achingbrain/ssdp/dist/src/ssdp.js
|
|
229664
|
-
import { EventEmitter as
|
|
230592
|
+
import { EventEmitter as EventEmitter3, on } from "node:events";
|
|
229665
230593
|
import { createRequire } from "node:module";
|
|
229666
230594
|
var req, name8, version2, DEFAULT_SSDP_SIGNATURE, SSDP;
|
|
229667
230595
|
var init_ssdp = __esm({
|
|
@@ -229679,7 +230607,7 @@ var init_ssdp = __esm({
|
|
|
229679
230607
|
req = createRequire(import.meta.url);
|
|
229680
230608
|
({ name: name8, version: version2 } = req("../../package.json"));
|
|
229681
230609
|
DEFAULT_SSDP_SIGNATURE = `node.js/${process.version.substring(1)} UPnP/1.1 ${name8}/${version2}`;
|
|
229682
|
-
SSDP = class extends
|
|
230610
|
+
SSDP = class extends EventEmitter3 {
|
|
229683
230611
|
udn;
|
|
229684
230612
|
signature;
|
|
229685
230613
|
sockets;
|
|
@@ -236837,7 +237765,7 @@ var require_extension2 = __commonJS({
|
|
|
236837
237765
|
var require_websocket2 = __commonJS({
|
|
236838
237766
|
"../node_modules/ws/lib/websocket.js"(exports, module) {
|
|
236839
237767
|
"use strict";
|
|
236840
|
-
var
|
|
237768
|
+
var EventEmitter15 = __require("events");
|
|
236841
237769
|
var https4 = __require("https");
|
|
236842
237770
|
var http6 = __require("http");
|
|
236843
237771
|
var net5 = __require("net");
|
|
@@ -236869,7 +237797,7 @@ var require_websocket2 = __commonJS({
|
|
|
236869
237797
|
var protocolVersions = [8, 13];
|
|
236870
237798
|
var readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
|
|
236871
237799
|
var subprotocolRegex = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
|
|
236872
|
-
var WebSocket6 = class _WebSocket extends
|
|
237800
|
+
var WebSocket6 = class _WebSocket extends EventEmitter15 {
|
|
236873
237801
|
/**
|
|
236874
237802
|
* Create a new `WebSocket`.
|
|
236875
237803
|
*
|
|
@@ -237866,7 +238794,7 @@ var require_subprotocol = __commonJS({
|
|
|
237866
238794
|
var require_websocket_server = __commonJS({
|
|
237867
238795
|
"../node_modules/ws/lib/websocket-server.js"(exports, module) {
|
|
237868
238796
|
"use strict";
|
|
237869
|
-
var
|
|
238797
|
+
var EventEmitter15 = __require("events");
|
|
237870
238798
|
var http6 = __require("http");
|
|
237871
238799
|
var { Duplex: Duplex3 } = __require("stream");
|
|
237872
238800
|
var { createHash: createHash31 } = __require("crypto");
|
|
@@ -237879,7 +238807,7 @@ var require_websocket_server = __commonJS({
|
|
|
237879
238807
|
var RUNNING = 0;
|
|
237880
238808
|
var CLOSING = 1;
|
|
237881
238809
|
var CLOSED = 2;
|
|
237882
|
-
var WebSocketServer4 = class extends
|
|
238810
|
+
var WebSocketServer4 = class extends EventEmitter15 {
|
|
237883
238811
|
/**
|
|
237884
238812
|
* Create a `WebSocketServer` instance.
|
|
237885
238813
|
*
|
|
@@ -254016,6 +254944,7 @@ var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, LEGACY_SDXL_TURBO
|
|
|
254016
254944
|
var init_image_generate = __esm({
|
|
254017
254945
|
"packages/execution/dist/tools/image-generate.js"() {
|
|
254018
254946
|
"use strict";
|
|
254947
|
+
init_model_broker();
|
|
254019
254948
|
init_venv_paths();
|
|
254020
254949
|
init_model_store();
|
|
254021
254950
|
DEFAULT_DIFFUSERS_IMAGE_MODEL = "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers";
|
|
@@ -254830,6 +255759,28 @@ if __name__ == "__main__":
|
|
|
254830
255759
|
const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
|
|
254831
255760
|
const seed = optionalNumberArg(args["seed"]);
|
|
254832
255761
|
const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
|
|
255762
|
+
const broker = getModelBroker();
|
|
255763
|
+
const firstCandidate = candidates[0];
|
|
255764
|
+
if (firstCandidate) {
|
|
255765
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
255766
|
+
name: firstCandidate.model,
|
|
255767
|
+
domain: "image-gen",
|
|
255768
|
+
host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
|
|
255769
|
+
owner: "image-generate-tool"
|
|
255770
|
+
});
|
|
255771
|
+
if (decision2.kind === "evict") {
|
|
255772
|
+
for (const target of decision2.evictTargets) {
|
|
255773
|
+
await broker.evict(target.host, target.name, "image-gen-needs-room");
|
|
255774
|
+
}
|
|
255775
|
+
} else if (decision2.kind === "reject") {
|
|
255776
|
+
return {
|
|
255777
|
+
success: false,
|
|
255778
|
+
output: "",
|
|
255779
|
+
error: `Image generation blocked by resource broker: ${decision2.reason}`,
|
|
255780
|
+
durationMs: performance.now() - start2
|
|
255781
|
+
};
|
|
255782
|
+
}
|
|
255783
|
+
}
|
|
254833
255784
|
try {
|
|
254834
255785
|
return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
|
|
254835
255786
|
} catch (err) {
|
|
@@ -256270,6 +257221,7 @@ var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFOR
|
|
|
256270
257221
|
var init_audio_generate = __esm({
|
|
256271
257222
|
"packages/execution/dist/tools/audio-generate.js"() {
|
|
256272
257223
|
"use strict";
|
|
257224
|
+
init_model_broker();
|
|
256273
257225
|
init_venv_paths();
|
|
256274
257226
|
init_model_store();
|
|
256275
257227
|
DEFAULT_SOUND_MODEL = "cvssp/audioldm-s-full-v2";
|
|
@@ -257217,6 +258169,28 @@ if __name__ == "__main__":
|
|
|
257217
258169
|
const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
|
|
257218
258170
|
const seed = optionalNumberArg2(args["seed"]);
|
|
257219
258171
|
const playback = playbackRequested(args);
|
|
258172
|
+
const broker = getModelBroker();
|
|
258173
|
+
const firstCandidate = candidates[0];
|
|
258174
|
+
if (firstCandidate) {
|
|
258175
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
258176
|
+
name: firstCandidate.model,
|
|
258177
|
+
domain: kind === "music" ? "music" : "sound",
|
|
258178
|
+
host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
|
|
258179
|
+
owner: `audio-generate-tool/${kind}`
|
|
258180
|
+
});
|
|
258181
|
+
if (decision2.kind === "evict") {
|
|
258182
|
+
for (const target of decision2.evictTargets) {
|
|
258183
|
+
await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
|
|
258184
|
+
}
|
|
258185
|
+
} else if (decision2.kind === "reject") {
|
|
258186
|
+
return {
|
|
258187
|
+
success: false,
|
|
258188
|
+
output: "",
|
|
258189
|
+
error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
|
|
258190
|
+
durationMs: performance.now() - start2
|
|
258191
|
+
};
|
|
258192
|
+
}
|
|
258193
|
+
}
|
|
257220
258194
|
try {
|
|
257221
258195
|
return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
|
|
257222
258196
|
} catch (err) {
|
|
@@ -258160,6 +259134,7 @@ var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL,
|
|
|
258160
259134
|
var init_video_generate = __esm({
|
|
258161
259135
|
"packages/execution/dist/tools/video-generate.js"() {
|
|
258162
259136
|
"use strict";
|
|
259137
|
+
init_model_broker();
|
|
258163
259138
|
init_venv_paths();
|
|
258164
259139
|
init_model_store();
|
|
258165
259140
|
DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
|
|
@@ -259497,6 +260472,28 @@ if __name__ == "__main__":
|
|
|
259497
260472
|
const withAudio = booleanArg3(args["with_audio"], false);
|
|
259498
260473
|
const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
|
|
259499
260474
|
const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
|
|
260475
|
+
const broker = getModelBroker();
|
|
260476
|
+
const firstCandidate = candidates[0];
|
|
260477
|
+
if (firstCandidate) {
|
|
260478
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
260479
|
+
name: firstCandidate.model,
|
|
260480
|
+
domain: "video-gen",
|
|
260481
|
+
host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
|
|
260482
|
+
owner: "video-generate-tool"
|
|
260483
|
+
});
|
|
260484
|
+
if (decision2.kind === "evict") {
|
|
260485
|
+
for (const target of decision2.evictTargets) {
|
|
260486
|
+
await broker.evict(target.host, target.name, "video-gen-needs-room");
|
|
260487
|
+
}
|
|
260488
|
+
} else if (decision2.kind === "reject") {
|
|
260489
|
+
return {
|
|
260490
|
+
success: false,
|
|
260491
|
+
output: "",
|
|
260492
|
+
error: `Video generation blocked by resource broker: ${decision2.reason}`,
|
|
260493
|
+
durationMs: performance.now() - start2
|
|
260494
|
+
};
|
|
260495
|
+
}
|
|
260496
|
+
}
|
|
259500
260497
|
if (candidates.length === 0) {
|
|
259501
260498
|
return {
|
|
259502
260499
|
success: false,
|
|
@@ -261097,6 +262094,7 @@ var moondreamClient, moondreamError, stationProcess, hfPointUnavailable, IMAGE_E
|
|
|
261097
262094
|
var init_vision = __esm({
|
|
261098
262095
|
"packages/execution/dist/tools/vision.js"() {
|
|
261099
262096
|
"use strict";
|
|
262097
|
+
init_model_broker();
|
|
261100
262098
|
moondreamClient = null;
|
|
261101
262099
|
moondreamError = null;
|
|
261102
262100
|
stationProcess = null;
|
|
@@ -261195,14 +262193,43 @@ var init_vision = __esm({
|
|
|
261195
262193
|
};
|
|
261196
262194
|
}
|
|
261197
262195
|
}
|
|
262196
|
+
const broker = getModelBroker();
|
|
262197
|
+
const moondreamDecision = await broker.ensureModelLoadable({
|
|
262198
|
+
name: "moondream2",
|
|
262199
|
+
domain: "vision",
|
|
262200
|
+
host: "moondream-station",
|
|
262201
|
+
owner: "vision-tool"
|
|
262202
|
+
});
|
|
262203
|
+
const forceDegradeToOllama = moondreamDecision.kind === "degrade" && moondreamDecision.fallback.host === "ollama";
|
|
262204
|
+
const forceReject = moondreamDecision.kind === "reject" && this._activeModelHasVision !== true;
|
|
261198
262205
|
let client = null;
|
|
261199
|
-
|
|
261200
|
-
|
|
261201
|
-
|
|
262206
|
+
if (!forceDegradeToOllama) {
|
|
262207
|
+
try {
|
|
262208
|
+
client = await getMoondreamClient();
|
|
262209
|
+
} catch {
|
|
262210
|
+
}
|
|
261202
262211
|
}
|
|
261203
262212
|
if (client) {
|
|
262213
|
+
broker.registerLoaded({
|
|
262214
|
+
key: "moondream-station:moondream2",
|
|
262215
|
+
name: "moondream2",
|
|
262216
|
+
domain: "vision",
|
|
262217
|
+
host: "moondream-station",
|
|
262218
|
+
owner: "vision-tool",
|
|
262219
|
+
vramMB: 2e3,
|
|
262220
|
+
ramMB: 1500,
|
|
262221
|
+
priority: 1
|
|
262222
|
+
});
|
|
261204
262223
|
return await this.runMoondream(client, buffer2, filename, action, prompt, length4, start2);
|
|
261205
262224
|
}
|
|
262225
|
+
if (forceReject) {
|
|
262226
|
+
return {
|
|
262227
|
+
success: false,
|
|
262228
|
+
output: "",
|
|
262229
|
+
error: `Vision blocked by resource broker: ${moondreamDecision.kind === "reject" ? moondreamDecision.reason : "insufficient memory"}`,
|
|
262230
|
+
durationMs: performance.now() - start2
|
|
262231
|
+
};
|
|
262232
|
+
}
|
|
261206
262233
|
const ollamaResult = await this.tryOllamaVision(buffer2, filename, action, prompt, length4, start2);
|
|
261207
262234
|
if (ollamaResult)
|
|
261208
262235
|
return ollamaResult;
|
|
@@ -261300,7 +262327,41 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
|
|
|
261300
262327
|
async tryOllamaVision(buffer2, filename, action, prompt, length4, start2) {
|
|
261301
262328
|
const ollamaHost = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
|
|
261302
262329
|
const envModel = process.env["OLLAMA_VISION_MODEL"];
|
|
261303
|
-
|
|
262330
|
+
let model = envModel || (this._activeModelHasVision && this._activeModel ? this._activeModel : "moondream");
|
|
262331
|
+
const broker = getModelBroker();
|
|
262332
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
262333
|
+
name: model,
|
|
262334
|
+
domain: "vision",
|
|
262335
|
+
host: "ollama",
|
|
262336
|
+
owner: "vision-tool/ollama",
|
|
262337
|
+
requestedNumCtx: 2048,
|
|
262338
|
+
estimatedVramMB: 2e3
|
|
262339
|
+
});
|
|
262340
|
+
let numCtx;
|
|
262341
|
+
if (decision2.kind === "reject")
|
|
262342
|
+
return null;
|
|
262343
|
+
if (decision2.kind === "degrade") {
|
|
262344
|
+
model = decision2.fallback.name;
|
|
262345
|
+
} else if (decision2.kind === "evict") {
|
|
262346
|
+
for (const target of decision2.evictTargets) {
|
|
262347
|
+
await broker.evict(target.host, target.name, "vision-needs-room");
|
|
262348
|
+
}
|
|
262349
|
+
numCtx = decision2.effectiveNumCtx;
|
|
262350
|
+
} else if (decision2.kind === "ok") {
|
|
262351
|
+
numCtx = decision2.effectiveNumCtx;
|
|
262352
|
+
} else if (decision2.kind === "wait-for-inflight") {
|
|
262353
|
+
const inner = await decision2.promise;
|
|
262354
|
+
if (inner.kind === "ok")
|
|
262355
|
+
numCtx = inner.effectiveNumCtx;
|
|
262356
|
+
else if (inner.kind === "degrade")
|
|
262357
|
+
model = inner.fallback.name;
|
|
262358
|
+
else if (inner.kind === "reject")
|
|
262359
|
+
return null;
|
|
262360
|
+
}
|
|
262361
|
+
if (numCtx === void 0) {
|
|
262362
|
+
const trainCtx = await broker.getNctxTrain(model);
|
|
262363
|
+
numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 4096) : 2048;
|
|
262364
|
+
}
|
|
261304
262365
|
const imageBase64 = buffer2.toString("base64");
|
|
261305
262366
|
let ollamaPrompt;
|
|
261306
262367
|
switch (action) {
|
|
@@ -261323,7 +262384,7 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
|
|
|
261323
262384
|
let res = await fetch(`${ollamaHost}/api/generate`, {
|
|
261324
262385
|
method: "POST",
|
|
261325
262386
|
headers: { "Content-Type": "application/json" },
|
|
261326
|
-
body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false }),
|
|
262387
|
+
body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false, options: { num_ctx: numCtx } }),
|
|
261327
262388
|
signal: AbortSignal.timeout(6e4)
|
|
261328
262389
|
});
|
|
261329
262390
|
if (!res.ok && model === "moondream") {
|
|
@@ -261335,15 +262396,18 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
|
|
|
261335
262396
|
res = await fetch(`${ollamaHost}/api/generate`, {
|
|
261336
262397
|
method: "POST",
|
|
261337
262398
|
headers: { "Content-Type": "application/json" },
|
|
261338
|
-
body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false }),
|
|
262399
|
+
body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false, options: { num_ctx: numCtx } }),
|
|
261339
262400
|
signal: AbortSignal.timeout(6e4)
|
|
261340
262401
|
});
|
|
261341
262402
|
} catch {
|
|
261342
262403
|
}
|
|
261343
262404
|
}
|
|
261344
262405
|
}
|
|
261345
|
-
if (!res.ok)
|
|
262406
|
+
if (!res.ok) {
|
|
262407
|
+
broker.clearInflight("ollama", model);
|
|
261346
262408
|
return null;
|
|
262409
|
+
}
|
|
262410
|
+
broker.touch("ollama", model);
|
|
261347
262411
|
const data = await res.json();
|
|
261348
262412
|
const response = data.response || "";
|
|
261349
262413
|
if (!response)
|
|
@@ -513030,7 +514094,7 @@ var init_dist4 = __esm({
|
|
|
513030
514094
|
});
|
|
513031
514095
|
|
|
513032
514096
|
// packages/execution/dist/tools/code-graph-events.js
|
|
513033
|
-
import { EventEmitter as
|
|
514097
|
+
import { EventEmitter as EventEmitter4 } from "node:events";
|
|
513034
514098
|
function emitIndexed(payload) {
|
|
513035
514099
|
codeGraphEvents.publish({ type: "indexed", timestamp: Date.now(), ...payload });
|
|
513036
514100
|
}
|
|
@@ -513048,7 +514112,7 @@ var init_code_graph_events = __esm({
|
|
|
513048
514112
|
"packages/execution/dist/tools/code-graph-events.js"() {
|
|
513049
514113
|
"use strict";
|
|
513050
514114
|
MAX_RECENT = 256;
|
|
513051
|
-
CodeGraphEventBus = class extends
|
|
514115
|
+
CodeGraphEventBus = class extends EventEmitter4 {
|
|
513052
514116
|
ring = [];
|
|
513053
514117
|
constructor() {
|
|
513054
514118
|
super();
|
|
@@ -519570,6 +520634,7 @@ var init_full_sub_agent = __esm({
|
|
|
519570
520634
|
"packages/execution/dist/tools/full-sub-agent.js"() {
|
|
519571
520635
|
"use strict";
|
|
519572
520636
|
init_process_kill();
|
|
520637
|
+
init_model_broker();
|
|
519573
520638
|
_activeSubProcesses = /* @__PURE__ */ new Map();
|
|
519574
520639
|
FullSubAgentTool = class {
|
|
519575
520640
|
name = "full_sub_agent";
|
|
@@ -519631,10 +520696,45 @@ var init_full_sub_agent = __esm({
|
|
|
519631
520696
|
if (!task)
|
|
519632
520697
|
return { success: false, output: "", error: "task is required", durationMs: performance.now() - start2 };
|
|
519633
520698
|
const model = String(args["model"] ?? this.model);
|
|
520699
|
+
const broker = getModelBroker();
|
|
520700
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
520701
|
+
name: model || "default",
|
|
520702
|
+
domain: "subagent",
|
|
520703
|
+
host: model ? "ollama" : "subprocess",
|
|
520704
|
+
owner: "full-sub-agent-tool",
|
|
520705
|
+
estimatedVramMB: 4e3,
|
|
520706
|
+
estimatedRamMB: 1500,
|
|
520707
|
+
priority: 1
|
|
520708
|
+
// sub-agents are higher priority than idle background models
|
|
520709
|
+
});
|
|
520710
|
+
if (decision2.kind === "evict") {
|
|
520711
|
+
for (const target of decision2.evictTargets) {
|
|
520712
|
+
await broker.evict(target.host, target.name, "sub-agent-spawn-needs-room");
|
|
520713
|
+
}
|
|
520714
|
+
} else if (decision2.kind === "reject") {
|
|
520715
|
+
return {
|
|
520716
|
+
success: false,
|
|
520717
|
+
output: "",
|
|
520718
|
+
error: `Sub-agent spawn blocked by resource broker: ${decision2.reason}`,
|
|
520719
|
+
durationMs: performance.now() - start2
|
|
520720
|
+
};
|
|
520721
|
+
}
|
|
519634
520722
|
const entry = spawnFullSubAgent(task, { model, backendUrl: this.backendUrl, workingDir: this.workingDir }, (text) => this.onViewWrite?.(entry.id, text), (id, exitCode, output) => {
|
|
519635
520723
|
this.onViewStatus?.(id, exitCode === 0 ? "completed" : "failed");
|
|
520724
|
+
broker.unregisterLoaded("subprocess", id, "sub-agent-exited");
|
|
519636
520725
|
this.onComplete?.(id, task, exitCode, output);
|
|
519637
520726
|
});
|
|
520727
|
+
broker.registerLoaded({
|
|
520728
|
+
key: `subprocess:${entry.id}`,
|
|
520729
|
+
name: entry.id,
|
|
520730
|
+
domain: "subagent",
|
|
520731
|
+
host: "subprocess",
|
|
520732
|
+
owner: "full-sub-agent-tool",
|
|
520733
|
+
vramMB: 4e3,
|
|
520734
|
+
ramMB: 1500,
|
|
520735
|
+
pid: entry.pid,
|
|
520736
|
+
priority: 1
|
|
520737
|
+
});
|
|
519638
520738
|
this.onViewRegister?.(entry.id, entry.id, "full");
|
|
519639
520739
|
return {
|
|
519640
520740
|
success: true,
|
|
@@ -519708,6 +520808,7 @@ var _idCounter, AgentTool;
|
|
|
519708
520808
|
var init_agent_tool = __esm({
|
|
519709
520809
|
"packages/execution/dist/tools/agent-tool.js"() {
|
|
519710
520810
|
"use strict";
|
|
520811
|
+
init_model_broker();
|
|
519711
520812
|
_idCounter = 0;
|
|
519712
520813
|
AgentTool = class {
|
|
519713
520814
|
name = "agent";
|
|
@@ -519798,6 +520899,31 @@ var init_agent_tool = __esm({
|
|
|
519798
520899
|
}
|
|
519799
520900
|
const model = modelOverride ?? this.config.model;
|
|
519800
520901
|
const agentId = generateAgentId(subagentType);
|
|
520902
|
+
{
|
|
520903
|
+
const broker = getModelBroker();
|
|
520904
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
520905
|
+
name: model || "default",
|
|
520906
|
+
domain: "subagent",
|
|
520907
|
+
host: "ollama",
|
|
520908
|
+
owner: `agent-tool/${subagentType}`,
|
|
520909
|
+
estimatedVramMB: 2500,
|
|
520910
|
+
estimatedRamMB: 1e3,
|
|
520911
|
+
priority: 1
|
|
520912
|
+
});
|
|
520913
|
+
if (decision2.kind === "evict") {
|
|
520914
|
+
for (const target of decision2.evictTargets) {
|
|
520915
|
+
await broker.evict(target.host, target.name, "agent-tool-needs-room");
|
|
520916
|
+
}
|
|
520917
|
+
} else if (decision2.kind === "reject") {
|
|
520918
|
+
return {
|
|
520919
|
+
success: false,
|
|
520920
|
+
output: "",
|
|
520921
|
+
error: `Agent spawn blocked by resource broker: ${decision2.reason}`,
|
|
520922
|
+
durationMs: performance.now() - start2
|
|
520923
|
+
};
|
|
520924
|
+
}
|
|
520925
|
+
broker.touch("ollama", model);
|
|
520926
|
+
}
|
|
519801
520927
|
const label = description ?? `${subagentType}: ${prompt.slice(0, 40)}`;
|
|
519802
520928
|
const preloadedFiles = [];
|
|
519803
520929
|
if (relevantFilePaths.length > 0) {
|
|
@@ -522473,13 +523599,13 @@ var init_notebook_edit = __esm({
|
|
|
522473
523599
|
|
|
522474
523600
|
// packages/execution/dist/tools/environment-snapshot.js
|
|
522475
523601
|
import { execSync as execSync42 } from "node:child_process";
|
|
522476
|
-
import { cpus, totalmem, freemem, hostname as hostname2, platform as platform2, arch, uptime } from "node:os";
|
|
523602
|
+
import { cpus, totalmem as totalmem2, freemem as freemem2, hostname as hostname2, platform as platform2, arch, uptime } from "node:os";
|
|
522477
523603
|
import { statfsSync as statfsSync2 } from "node:fs";
|
|
522478
523604
|
function collectSnapshot(workingDir) {
|
|
522479
523605
|
const now = /* @__PURE__ */ new Date();
|
|
522480
523606
|
const cpuInfo = cpus();
|
|
522481
|
-
const totalRAM =
|
|
522482
|
-
const freeRAM =
|
|
523607
|
+
const totalRAM = totalmem2();
|
|
523608
|
+
const freeRAM = freemem2();
|
|
522483
523609
|
let load1 = 0, load5 = 0, load15 = 0;
|
|
522484
523610
|
try {
|
|
522485
523611
|
const loadavg4 = __require("node:os").loadavg();
|
|
@@ -522674,6 +523800,7 @@ var VideoUnderstandTool;
|
|
|
522674
523800
|
var init_video_understand = __esm({
|
|
522675
523801
|
"packages/execution/dist/tools/video-understand.js"() {
|
|
522676
523802
|
"use strict";
|
|
523803
|
+
init_model_broker();
|
|
522677
523804
|
VideoUnderstandTool = class {
|
|
522678
523805
|
name = "video_understand";
|
|
522679
523806
|
description = "Analyze a video from URL or local file. Produces timestamped transcript aligned with keyframe descriptions. Supports YouTube URLs and direct video files. Pipeline: download → transcribe (Whisper) → extract keyframes (scene detection) → describe frames → align timestamps → save structured output.";
|
|
@@ -522704,6 +523831,36 @@ var init_video_understand = __esm({
|
|
|
522704
523831
|
if (!url && !localPath) {
|
|
522705
523832
|
return { success: false, output: "", error: "url or path required", durationMs: performance.now() - start2 };
|
|
522706
523833
|
}
|
|
523834
|
+
const broker = getModelBroker();
|
|
523835
|
+
const asrDecision = await broker.ensureModelLoadable({
|
|
523836
|
+
name: whisperModel,
|
|
523837
|
+
domain: "asr",
|
|
523838
|
+
host: "whisper-cli",
|
|
523839
|
+
owner: "video-understand-tool"
|
|
523840
|
+
});
|
|
523841
|
+
if (asrDecision.kind === "evict") {
|
|
523842
|
+
for (const target of asrDecision.evictTargets) {
|
|
523843
|
+
await broker.evict(target.host, target.name, "video-understand-needs-asr-room");
|
|
523844
|
+
}
|
|
523845
|
+
} else if (asrDecision.kind === "reject") {
|
|
523846
|
+
return {
|
|
523847
|
+
success: false,
|
|
523848
|
+
output: "",
|
|
523849
|
+
error: `Video understanding blocked by resource broker (ASR): ${asrDecision.reason}`,
|
|
523850
|
+
durationMs: performance.now() - start2
|
|
523851
|
+
};
|
|
523852
|
+
}
|
|
523853
|
+
const visionDecision = await broker.ensureModelLoadable({
|
|
523854
|
+
name: "moondream2",
|
|
523855
|
+
domain: "vision",
|
|
523856
|
+
host: "moondream-station",
|
|
523857
|
+
owner: "video-understand-tool"
|
|
523858
|
+
});
|
|
523859
|
+
if (visionDecision.kind === "evict") {
|
|
523860
|
+
for (const target of visionDecision.evictTargets) {
|
|
523861
|
+
await broker.evict(target.host, target.name, "video-understand-needs-vision-room");
|
|
523862
|
+
}
|
|
523863
|
+
}
|
|
522707
523864
|
const outDir = join71(this.workingDir, ".omnius", "video-analysis");
|
|
522708
523865
|
mkdirSync28(outDir, { recursive: true });
|
|
522709
523866
|
const tmpDir = join71(outDir, `tmp-${Date.now()}`);
|
|
@@ -524314,6 +525471,7 @@ __export(dist_exports, {
|
|
|
524314
525471
|
MemorySearchTool: () => MemorySearchTool,
|
|
524315
525472
|
MemoryWriteTool: () => MemoryWriteTool,
|
|
524316
525473
|
MeshtasticTool: () => MeshtasticTool,
|
|
525474
|
+
ModelBroker: () => ModelBroker,
|
|
524317
525475
|
MultimodalMemoryTool: () => MultimodalMemoryTool,
|
|
524318
525476
|
NetworkEgressPolicyError: () => NetworkEgressPolicyError,
|
|
524319
525477
|
NexusTool: () => NexusTool,
|
|
@@ -524451,6 +525609,7 @@ __export(dist_exports, {
|
|
|
524451
525609
|
getFileNotes: () => getFileNotes,
|
|
524452
525610
|
getFullSubAgent: () => getFullSubAgent,
|
|
524453
525611
|
getImageGenerationPreset: () => getImageGenerationPreset,
|
|
525612
|
+
getModelBroker: () => getModelBroker,
|
|
524454
525613
|
getModelStoreDiskInfo: () => getModelStoreDiskInfo,
|
|
524455
525614
|
getRecentChangesSummary: () => getRecentChangesSummary,
|
|
524456
525615
|
getSecretsFilePath: () => getSecretsFilePath,
|
|
@@ -524573,6 +525732,7 @@ __export(dist_exports, {
|
|
|
524573
525732
|
videoGenerationQualityLadder: () => videoGenerationQualityLadder,
|
|
524574
525733
|
videoGenerationSetupPlan: () => videoGenerationSetupPlan,
|
|
524575
525734
|
worktreeHasChanges: () => worktreeHasChanges,
|
|
525735
|
+
wrapWithBroker: () => wrapWithBroker,
|
|
524576
525736
|
writeProvenanceFile: () => writeProvenanceFile,
|
|
524577
525737
|
writeTodos: () => writeTodos
|
|
524578
525738
|
});
|
|
@@ -524580,6 +525740,8 @@ var init_dist5 = __esm({
|
|
|
524580
525740
|
"packages/execution/dist/index.js"() {
|
|
524581
525741
|
"use strict";
|
|
524582
525742
|
init_tool_executor();
|
|
525743
|
+
init_model_broker();
|
|
525744
|
+
init_broker_mediated_backend();
|
|
524583
525745
|
init_security_classifier();
|
|
524584
525746
|
init_tool_manifest();
|
|
524585
525747
|
init_provenance();
|
|
@@ -527339,12 +528501,12 @@ var init_ollama_pool_cleanup = __esm({
|
|
|
527339
528501
|
});
|
|
527340
528502
|
|
|
527341
528503
|
// packages/orchestrator/dist/ollama-pool.js
|
|
527342
|
-
import { spawn as spawn23, exec } from "node:child_process";
|
|
528504
|
+
import { spawn as spawn23, exec as exec2 } from "node:child_process";
|
|
527343
528505
|
import { existsSync as existsSync63, readFileSync as readFileSync50, readdirSync as readdirSync20, statfsSync as statfsSync3, statSync as statSync25 } from "node:fs";
|
|
527344
528506
|
import { homedir as homedir28 } from "node:os";
|
|
527345
528507
|
import { join as join77 } from "node:path";
|
|
527346
528508
|
import { createServer as createServer3 } from "node:net";
|
|
527347
|
-
import { EventEmitter as
|
|
528509
|
+
import { EventEmitter as EventEmitter5 } from "node:events";
|
|
527348
528510
|
function discoverSystemOllamaModelStore() {
|
|
527349
528511
|
const fromEnv = process.env["OLLAMA_MODELS"]?.trim();
|
|
527350
528512
|
if (fromEnv && isDirectory(fromEnv))
|
|
@@ -527436,7 +528598,7 @@ async function detectGpus() {
|
|
|
527436
528598
|
if (_nvidiaSmiAvailable === false)
|
|
527437
528599
|
return [];
|
|
527438
528600
|
return new Promise((resolve55) => {
|
|
527439
|
-
|
|
528601
|
+
exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
|
|
527440
528602
|
if (err) {
|
|
527441
528603
|
_nvidiaSmiAvailable = false;
|
|
527442
528604
|
resolve55([]);
|
|
@@ -527465,7 +528627,7 @@ async function detectGpus() {
|
|
|
527465
528627
|
});
|
|
527466
528628
|
}
|
|
527467
528629
|
async function getHardwareSnapshot() {
|
|
527468
|
-
const { totalmem:
|
|
528630
|
+
const { totalmem: totalmem8, freemem: freemem7, cpus: cpus5 } = await import("node:os");
|
|
527469
528631
|
const gpus = await detectGpus();
|
|
527470
528632
|
const diskPath = discoverSystemOllamaModelStore() ?? homedir28();
|
|
527471
528633
|
const disk = snapshotDisk(diskPath);
|
|
@@ -527473,8 +528635,8 @@ async function getHardwareSnapshot() {
|
|
|
527473
528635
|
return {
|
|
527474
528636
|
gpus,
|
|
527475
528637
|
cpuCores: cpus5().length,
|
|
527476
|
-
ramTotalMB: Math.round(
|
|
527477
|
-
ramFreeMB: Math.round(
|
|
528638
|
+
ramTotalMB: Math.round(totalmem8() / (1024 * 1024)),
|
|
528639
|
+
ramFreeMB: Math.round(freemem7() / (1024 * 1024)),
|
|
527478
528640
|
disk,
|
|
527479
528641
|
network,
|
|
527480
528642
|
takenAtMs: Date.now()
|
|
@@ -527741,7 +528903,7 @@ var init_ollama_pool = __esm({
|
|
|
527741
528903
|
return { proc, ready };
|
|
527742
528904
|
};
|
|
527743
528905
|
_gpuCursor = 0;
|
|
527744
|
-
OllamaPool = class extends
|
|
528906
|
+
OllamaPool = class extends EventEmitter5 {
|
|
527745
528907
|
config;
|
|
527746
528908
|
instances = [];
|
|
527747
528909
|
reaperHandle = null;
|
|
@@ -540331,8 +541493,8 @@ var init_streaming_executor = __esm({
|
|
|
540331
541493
|
startExecution(entry) {
|
|
540332
541494
|
entry.state = "executing";
|
|
540333
541495
|
entry.startedAt = Date.now();
|
|
540334
|
-
const
|
|
540335
|
-
entry.promise =
|
|
541496
|
+
const exec6 = this.executeFn;
|
|
541497
|
+
entry.promise = exec6(entry.name, entry.args).then((result) => {
|
|
540336
541498
|
entry.state = "completed";
|
|
540337
541499
|
entry.result = result;
|
|
540338
541500
|
entry.completedAt = Date.now();
|
|
@@ -540621,7 +541783,7 @@ __export(preflightSnapshot_exports, {
|
|
|
540621
541783
|
});
|
|
540622
541784
|
import { existsSync as existsSync79, readFileSync as readFileSync62, statSync as statSync31 } from "node:fs";
|
|
540623
541785
|
import { execSync as execSync46 } from "node:child_process";
|
|
540624
|
-
import { homedir as homedir29, platform as platform3, arch as arch2, totalmem as
|
|
541786
|
+
import { homedir as homedir29, platform as platform3, arch as arch2, totalmem as totalmem3, freemem as freemem3, hostname as hostname3 } from "node:os";
|
|
540625
541787
|
import { join as join92 } from "node:path";
|
|
540626
541788
|
import { createHash as createHash17 } from "node:crypto";
|
|
540627
541789
|
function capturePreflightSnapshot(workingDir) {
|
|
@@ -540668,8 +541830,8 @@ function capturePreflightSnapshot(workingDir) {
|
|
|
540668
541830
|
platform: platform3(),
|
|
540669
541831
|
arch: arch2(),
|
|
540670
541832
|
nodeVersion: process.version,
|
|
540671
|
-
totalMemBytes:
|
|
540672
|
-
freeMemBytes:
|
|
541833
|
+
totalMemBytes: totalmem3(),
|
|
541834
|
+
freeMemBytes: freemem3(),
|
|
540673
541835
|
hostname: hostname3()
|
|
540674
541836
|
},
|
|
540675
541837
|
toolchain: captureToolchainVersions(),
|
|
@@ -554431,6 +555593,13 @@ ${description}`
|
|
|
554431
555593
|
if (responseFormat !== void 0) {
|
|
554432
555594
|
body["response_format"] = responseFormat;
|
|
554433
555595
|
}
|
|
555596
|
+
const reqNumCtx = request.numCtx;
|
|
555597
|
+
if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
|
|
555598
|
+
const opts = body["options"] ?? {};
|
|
555599
|
+
opts["num_ctx"] = reqNumCtx;
|
|
555600
|
+
body["options"] = opts;
|
|
555601
|
+
body["num_ctx"] = reqNumCtx;
|
|
555602
|
+
}
|
|
554434
555603
|
let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
|
|
554435
555604
|
model: this.model
|
|
554436
555605
|
}) : null;
|
|
@@ -559212,7 +560381,7 @@ import { existsSync as existsSync85, mkdirSync as mkdirSync46, writeFileSync as
|
|
|
559212
560381
|
import { join as join100, dirname as dirname27 } from "node:path";
|
|
559213
560382
|
import { homedir as homedir32 } from "node:os";
|
|
559214
560383
|
import { fileURLToPath as fileURLToPath11 } from "node:url";
|
|
559215
|
-
import { EventEmitter as
|
|
560384
|
+
import { EventEmitter as EventEmitter6 } from "node:events";
|
|
559216
560385
|
import { createInterface as createInterface2 } from "node:readline";
|
|
559217
560386
|
function isAudioPath(path12) {
|
|
559218
560387
|
const ext = path12.toLowerCase().split(".").pop();
|
|
@@ -559451,9 +560620,9 @@ function ensureTranscribeCliBackground() {
|
|
|
559451
560620
|
} catch {
|
|
559452
560621
|
}
|
|
559453
560622
|
try {
|
|
559454
|
-
const { exec:
|
|
560623
|
+
const { exec: exec6 } = await import("node:child_process");
|
|
559455
560624
|
return new Promise((resolve55) => {
|
|
559456
|
-
|
|
560625
|
+
exec6("npm i -g transcribe-cli", { timeout: 18e4 }, (err) => {
|
|
559457
560626
|
resolve55(!err);
|
|
559458
560627
|
});
|
|
559459
560628
|
});
|
|
@@ -559501,7 +560670,7 @@ var init_listen = __esm({
|
|
|
559501
560670
|
".m4v",
|
|
559502
560671
|
".ts"
|
|
559503
560672
|
]);
|
|
559504
|
-
WhisperFallbackTranscriber = class extends
|
|
560673
|
+
WhisperFallbackTranscriber = class extends EventEmitter6 {
|
|
559505
560674
|
constructor(model, scriptPath2) {
|
|
559506
560675
|
super();
|
|
559507
560676
|
this.model = model;
|
|
@@ -559610,7 +560779,7 @@ var init_listen = __esm({
|
|
|
559610
560779
|
this._ready = false;
|
|
559611
560780
|
}
|
|
559612
560781
|
};
|
|
559613
|
-
ListenEngine = class extends
|
|
560782
|
+
ListenEngine = class extends EventEmitter6 {
|
|
559614
560783
|
config;
|
|
559615
560784
|
micProcess = null;
|
|
559616
560785
|
liveTranscriber = null;
|
|
@@ -562355,7 +563524,7 @@ var require_extension3 = __commonJS({
|
|
|
562355
563524
|
var require_websocket3 = __commonJS({
|
|
562356
563525
|
"node_modules/.pnpm/ws@8.20.1/node_modules/ws/lib/websocket.js"(exports, module) {
|
|
562357
563526
|
"use strict";
|
|
562358
|
-
var
|
|
563527
|
+
var EventEmitter15 = __require("events");
|
|
562359
563528
|
var https4 = __require("https");
|
|
562360
563529
|
var http6 = __require("http");
|
|
562361
563530
|
var net5 = __require("net");
|
|
@@ -562387,7 +563556,7 @@ var require_websocket3 = __commonJS({
|
|
|
562387
563556
|
var protocolVersions = [8, 13];
|
|
562388
563557
|
var readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
|
|
562389
563558
|
var subprotocolRegex = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
|
|
562390
|
-
var WebSocket6 = class _WebSocket extends
|
|
563559
|
+
var WebSocket6 = class _WebSocket extends EventEmitter15 {
|
|
562391
563560
|
/**
|
|
562392
563561
|
* Create a new `WebSocket`.
|
|
562393
563562
|
*
|
|
@@ -563384,7 +564553,7 @@ var require_subprotocol2 = __commonJS({
|
|
|
563384
564553
|
var require_websocket_server2 = __commonJS({
|
|
563385
564554
|
"node_modules/.pnpm/ws@8.20.1/node_modules/ws/lib/websocket-server.js"(exports, module) {
|
|
563386
564555
|
"use strict";
|
|
563387
|
-
var
|
|
564556
|
+
var EventEmitter15 = __require("events");
|
|
563388
564557
|
var http6 = __require("http");
|
|
563389
564558
|
var { Duplex: Duplex3 } = __require("stream");
|
|
563390
564559
|
var { createHash: createHash31 } = __require("crypto");
|
|
@@ -563397,7 +564566,7 @@ var require_websocket_server2 = __commonJS({
|
|
|
563397
564566
|
var RUNNING = 0;
|
|
563398
564567
|
var CLOSING = 1;
|
|
563399
564568
|
var CLOSED = 2;
|
|
563400
|
-
var WebSocketServer4 = class extends
|
|
564569
|
+
var WebSocketServer4 = class extends EventEmitter15 {
|
|
563401
564570
|
/**
|
|
563402
564571
|
* Create a `WebSocketServer` instance.
|
|
563403
564572
|
*
|
|
@@ -564285,6 +565454,9 @@ var init_command_registry = __esm({
|
|
|
564285
565454
|
["/wizard", "Alias for /setup"],
|
|
564286
565455
|
["/parallel", "Show current Ollama parallel inference slots"],
|
|
564287
565456
|
["/parallel <1-15>", "Set parallel slots (restarts Ollama, max 15)"],
|
|
565457
|
+
["/broker", "Show resource broker status — loaded models, in-flight loads, RAM/VRAM headroom"],
|
|
565458
|
+
["/broker evict <host>:<name>", "Force eviction of a tracked model (e.g. /broker evict ollama:moondream)"],
|
|
565459
|
+
["/broker threshold <ram|vram|idle> <v>", "Tune broker thresholds (RAM/VRAM in MB, idle in seconds)"],
|
|
564288
565460
|
["/ollama cleanup", "Dry-run stale Ollama pool process cleanup"],
|
|
564289
565461
|
["/ollama cleanup --execute", "Terminate guarded stale Ollama pool runners"],
|
|
564290
565462
|
["/mcp", "Show MCP server/tool status and controls"],
|
|
@@ -564554,6 +565726,7 @@ var init_command_registry = __esm({
|
|
|
564554
565726
|
endpoint: "network",
|
|
564555
565727
|
provider: "network",
|
|
564556
565728
|
parallel: "runtime",
|
|
565729
|
+
broker: "runtime",
|
|
564557
565730
|
setup: "runtime",
|
|
564558
565731
|
wizard: "runtime",
|
|
564559
565732
|
mcp: "tools",
|
|
@@ -564695,6 +565868,7 @@ var init_command_registry = __esm({
|
|
|
564695
565868
|
"model",
|
|
564696
565869
|
"endpoint",
|
|
564697
565870
|
"parallel",
|
|
565871
|
+
"broker",
|
|
564698
565872
|
"commands",
|
|
564699
565873
|
"cmds",
|
|
564700
565874
|
"selfmodify",
|
|
@@ -565588,7 +566762,7 @@ var init_task_complete_box = __esm({
|
|
|
565588
566762
|
});
|
|
565589
566763
|
|
|
565590
566764
|
// packages/cli/src/tui/model-picker.ts
|
|
565591
|
-
import { totalmem as
|
|
566765
|
+
import { totalmem as totalmem4 } from "node:os";
|
|
565592
566766
|
function isImageGenModel(name10, family) {
|
|
565593
566767
|
return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
|
|
565594
566768
|
}
|
|
@@ -565977,7 +567151,7 @@ async function queryModelContextSize(baseUrl, modelName) {
|
|
|
565977
567151
|
}
|
|
565978
567152
|
}
|
|
565979
567153
|
function estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2) {
|
|
565980
|
-
const totalMemGB =
|
|
567154
|
+
const totalMemGB = totalmem4() / 1024 ** 3;
|
|
565981
567155
|
const usableBytes = totalMemGB * 0.7 * 1024 ** 3;
|
|
565982
567156
|
const maxTokens = Math.floor(usableBytes / kvBytesPerToken);
|
|
565983
567157
|
let numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
@@ -567316,7 +568490,7 @@ var init_render = __esm({
|
|
|
567316
568490
|
// packages/cli/src/tui/voice-session.ts
|
|
567317
568491
|
import { createServer as createServer4 } from "node:http";
|
|
567318
568492
|
import { spawn as spawn25, execSync as execSync49 } from "node:child_process";
|
|
567319
|
-
import { EventEmitter as
|
|
568493
|
+
import { EventEmitter as EventEmitter7 } from "node:events";
|
|
567320
568494
|
function generateFrontendHTML() {
|
|
567321
568495
|
return `<!DOCTYPE html>
|
|
567322
568496
|
<html lang="en">
|
|
@@ -568051,7 +569225,7 @@ var init_voice_session = __esm({
|
|
|
568051
569225
|
init_wrapper2();
|
|
568052
569226
|
init_render();
|
|
568053
569227
|
init_typed_node_events();
|
|
568054
|
-
VoiceSession = class extends
|
|
569228
|
+
VoiceSession = class extends EventEmitter7 {
|
|
568055
569229
|
state;
|
|
568056
569230
|
server = null;
|
|
568057
569231
|
wss = null;
|
|
@@ -569166,11 +570340,11 @@ var init_voice_soul = __esm({
|
|
|
569166
570340
|
|
|
569167
570341
|
// packages/cli/src/tui/expose.ts
|
|
569168
570342
|
import { createServer as createServer5, request as httpRequest } from "node:http";
|
|
569169
|
-
import { spawn as spawn26, exec as
|
|
569170
|
-
import { EventEmitter as
|
|
570343
|
+
import { spawn as spawn26, exec as exec3 } from "node:child_process";
|
|
570344
|
+
import { EventEmitter as EventEmitter8 } from "node:events";
|
|
569171
570345
|
import { randomBytes as randomBytes19, timingSafeEqual } from "node:crypto";
|
|
569172
570346
|
import { URL as URL2 } from "node:url";
|
|
569173
|
-
import { loadavg, cpus as cpus2, totalmem as
|
|
570347
|
+
import { loadavg, cpus as cpus2, totalmem as totalmem5, freemem as freemem4 } from "node:os";
|
|
569174
570348
|
import { existsSync as existsSync88, readFileSync as readFileSync70, writeFileSync as writeFileSync44, unlinkSync as unlinkSync14, mkdirSync as mkdirSync48, readdirSync as readdirSync29, statSync as statSync34, statfsSync as statfsSync4 } from "node:fs";
|
|
569175
570349
|
import { join as join103 } from "node:path";
|
|
569176
570350
|
function cleanForwardHeaders(raw, targetHost) {
|
|
@@ -569276,8 +570450,8 @@ function parseRateLimitHeaders(headers) {
|
|
|
569276
570450
|
async function collectSystemMetricsAsync() {
|
|
569277
570451
|
const [l1, l5, l15] = loadavg();
|
|
569278
570452
|
const cores = cpus2().length;
|
|
569279
|
-
const totalMem =
|
|
569280
|
-
const freeMem =
|
|
570453
|
+
const totalMem = totalmem5();
|
|
570454
|
+
const freeMem = freemem4();
|
|
569281
570455
|
const usedMem = totalMem - freeMem;
|
|
569282
570456
|
let disk = {
|
|
569283
570457
|
path: process.cwd(),
|
|
@@ -569310,7 +570484,7 @@ async function collectSystemMetricsAsync() {
|
|
|
569310
570484
|
};
|
|
569311
570485
|
try {
|
|
569312
570486
|
const smi = await new Promise((resolve55, reject) => {
|
|
569313
|
-
|
|
570487
|
+
exec3(
|
|
569314
570488
|
"nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
|
|
569315
570489
|
{ encoding: "utf8", timeout: 3e3 },
|
|
569316
570490
|
(err, stdout) => err ? reject(err) : resolve55(stdout)
|
|
@@ -569398,7 +570572,7 @@ var init_expose = __esm({
|
|
|
569398
570572
|
custom: "http://127.0.0.1:11434"
|
|
569399
570573
|
};
|
|
569400
570574
|
STATE_FILE_NAME = "expose-state.json";
|
|
569401
|
-
ExposeGateway = class _ExposeGateway extends
|
|
570575
|
+
ExposeGateway = class _ExposeGateway extends EventEmitter8 {
|
|
569402
570576
|
constructor(options2) {
|
|
569403
570577
|
super();
|
|
569404
570578
|
this.options = options2;
|
|
@@ -570277,7 +571451,7 @@ ${this.formatConnectionInfo()}`);
|
|
|
570277
571451
|
}
|
|
570278
571452
|
};
|
|
570279
571453
|
P2P_STATE_FILE_NAME = "expose-p2p-state.json";
|
|
570280
|
-
ExposeP2PGateway = class _ExposeP2PGateway extends
|
|
571454
|
+
ExposeP2PGateway = class _ExposeP2PGateway extends EventEmitter8 {
|
|
570281
571455
|
_nexusTool;
|
|
570282
571456
|
// NexusTool instance
|
|
570283
571457
|
_kind;
|
|
@@ -571048,7 +572222,7 @@ var init_secret_vault = __esm({
|
|
|
571048
572222
|
});
|
|
571049
572223
|
|
|
571050
572224
|
// packages/cli/src/tui/p2p/peer-mesh.ts
|
|
571051
|
-
import { EventEmitter as
|
|
572225
|
+
import { EventEmitter as EventEmitter9 } from "node:events";
|
|
571052
572226
|
import { createServer as createServer6 } from "node:http";
|
|
571053
572227
|
import { randomBytes as randomBytes21, createHash as createHash21, generateKeyPairSync } from "node:crypto";
|
|
571054
572228
|
var PING_INTERVAL_MS, PEER_TIMEOUT_MS, GOSSIP_INTERVAL_MS, MAX_PEERS, PeerMesh;
|
|
@@ -571060,7 +572234,7 @@ var init_peer_mesh = __esm({
|
|
|
571060
572234
|
PEER_TIMEOUT_MS = 9e4;
|
|
571061
572235
|
GOSSIP_INTERVAL_MS = 6e4;
|
|
571062
572236
|
MAX_PEERS = 50;
|
|
571063
|
-
PeerMesh = class extends
|
|
572237
|
+
PeerMesh = class extends EventEmitter9 {
|
|
571064
572238
|
constructor(options2) {
|
|
571065
572239
|
super();
|
|
571066
572240
|
this.options = options2;
|
|
@@ -571506,7 +572680,7 @@ var init_peer_mesh = __esm({
|
|
|
571506
572680
|
});
|
|
571507
572681
|
|
|
571508
572682
|
// packages/cli/src/tui/p2p/inference-router.ts
|
|
571509
|
-
import { EventEmitter as
|
|
572683
|
+
import { EventEmitter as EventEmitter10 } from "node:events";
|
|
571510
572684
|
var TRUST_WEIGHTS, InferenceRouter;
|
|
571511
572685
|
var init_inference_router = __esm({
|
|
571512
572686
|
"packages/cli/src/tui/p2p/inference-router.ts"() {
|
|
@@ -571518,7 +572692,7 @@ var init_inference_router = __esm({
|
|
|
571518
572692
|
verified: 5,
|
|
571519
572693
|
public: 2
|
|
571520
572694
|
};
|
|
571521
|
-
InferenceRouter = class extends
|
|
572695
|
+
InferenceRouter = class extends EventEmitter10 {
|
|
571522
572696
|
mesh;
|
|
571523
572697
|
vault;
|
|
571524
572698
|
defaultTimeoutMs;
|
|
@@ -571716,7 +572890,7 @@ var init_p2p = __esm({
|
|
|
571716
572890
|
});
|
|
571717
572891
|
|
|
571718
572892
|
// packages/cli/src/tui/call-agent.ts
|
|
571719
|
-
import { EventEmitter as
|
|
572893
|
+
import { EventEmitter as EventEmitter11 } from "node:events";
|
|
571720
572894
|
import crypto13 from "node:crypto";
|
|
571721
572895
|
function adaptTool(tool) {
|
|
571722
572896
|
return {
|
|
@@ -571774,7 +572948,7 @@ var init_call_agent = __esm({
|
|
|
571774
572948
|
}
|
|
571775
572949
|
};
|
|
571776
572950
|
_globalFeed = null;
|
|
571777
|
-
CallSubAgent = class extends
|
|
572951
|
+
CallSubAgent = class extends EventEmitter11 {
|
|
571778
572952
|
tier;
|
|
571779
572953
|
clientId;
|
|
571780
572954
|
runner = null;
|
|
@@ -573876,8 +575050,8 @@ __export(system_metrics_exports, {
|
|
|
573876
575050
|
getInstantSnapshot: () => getInstantSnapshot,
|
|
573877
575051
|
instantaneousCpuPct: () => instantaneousCpuPct
|
|
573878
575052
|
});
|
|
573879
|
-
import { loadavg as loadavg2, cpus as cpus3, totalmem as
|
|
573880
|
-
import { exec as
|
|
575053
|
+
import { loadavg as loadavg2, cpus as cpus3, totalmem as totalmem6, freemem as freemem5, platform as platform4 } from "node:os";
|
|
575054
|
+
import { exec as exec4 } from "node:child_process";
|
|
573881
575055
|
import { readFile as readFile22 } from "node:fs/promises";
|
|
573882
575056
|
function formatRate(bytesPerSec) {
|
|
573883
575057
|
if (bytesPerSec < 1024) return `${Math.round(bytesPerSec)}B`;
|
|
@@ -573924,7 +575098,7 @@ async function collectNetworkMetrics() {
|
|
|
573924
575098
|
if (plat === "darwin") {
|
|
573925
575099
|
try {
|
|
573926
575100
|
const output = await new Promise((resolve55, reject) => {
|
|
573927
|
-
|
|
575101
|
+
exec4("netstat -ib 2>/dev/null | head -30", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
|
|
573928
575102
|
});
|
|
573929
575103
|
let rxBytes = 0, txBytes = 0;
|
|
573930
575104
|
for (const line of output.split("\n")) {
|
|
@@ -573967,7 +575141,7 @@ async function collectGpuMetrics() {
|
|
|
573967
575141
|
if (_nvidiaSmiAvailable2 === false) return noGpu;
|
|
573968
575142
|
try {
|
|
573969
575143
|
const smi = await new Promise((resolve55, reject) => {
|
|
573970
|
-
|
|
575144
|
+
exec4(
|
|
573971
575145
|
"nvidia-smi --query-gpu=index,uuid,utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
|
|
573972
575146
|
{ encoding: "utf8", timeout: 3e3 },
|
|
573973
575147
|
(err, stdout) => err ? reject(err) : resolve55(stdout)
|
|
@@ -574066,8 +575240,8 @@ function instantaneousCpuPct() {
|
|
|
574066
575240
|
function collectCpuRam() {
|
|
574067
575241
|
const cores = cpus3().length;
|
|
574068
575242
|
const cpuModel = cpus3()[0]?.model ?? "";
|
|
574069
|
-
const totalMem =
|
|
574070
|
-
const usedMem = totalMem -
|
|
575243
|
+
const totalMem = totalmem6();
|
|
575244
|
+
const usedMem = totalMem - freemem5();
|
|
574071
575245
|
let cpuUtil = instantaneousCpuPct();
|
|
574072
575246
|
if (cpuUtil < 0) {
|
|
574073
575247
|
const [l1] = loadavg2();
|
|
@@ -574862,6 +576036,7 @@ var init_status_bar = __esm({
|
|
|
574862
576036
|
init_text_selection();
|
|
574863
576037
|
init_daemon_registry();
|
|
574864
576038
|
init_overlay_lock();
|
|
576039
|
+
init_dist5();
|
|
574865
576040
|
init_theme();
|
|
574866
576041
|
init_layout2();
|
|
574867
576042
|
EXPERT_TOOL_BASELINES = {
|
|
@@ -576073,6 +577248,10 @@ var init_status_bar = __esm({
|
|
|
576073
577248
|
this._unifiedMetrics = m2;
|
|
576074
577249
|
if (this.active) this.renderFooterPreserveCursor();
|
|
576075
577250
|
}, intervalMs);
|
|
577251
|
+
try {
|
|
577252
|
+
getModelBroker().startPolling(Math.max(2e3, intervalMs * 2));
|
|
577253
|
+
} catch {
|
|
577254
|
+
}
|
|
576076
577255
|
}
|
|
576077
577256
|
/** Stop all metrics collection (local and remote) */
|
|
576078
577257
|
stopAllMetrics() {
|
|
@@ -580498,7 +581677,7 @@ __export(setup_exports, {
|
|
|
580498
581677
|
updateOllama: () => updateOllama
|
|
580499
581678
|
});
|
|
580500
581679
|
import * as readline from "node:readline";
|
|
580501
|
-
import { execSync as execSync51, spawn as spawn28, exec as
|
|
581680
|
+
import { execSync as execSync51, spawn as spawn28, exec as exec5 } from "node:child_process";
|
|
580502
581681
|
import { promisify as promisify6 } from "node:util";
|
|
580503
581682
|
import { existsSync as existsSync95, writeFileSync as writeFileSync49, readFileSync as readFileSync78, appendFileSync as appendFileSync6, mkdirSync as mkdirSync53 } from "node:fs";
|
|
580504
581683
|
import { join as join111 } from "node:path";
|
|
@@ -583250,7 +584429,7 @@ var init_setup = __esm({
|
|
|
583250
584429
|
init_dist();
|
|
583251
584430
|
init_tui_select();
|
|
583252
584431
|
init_listen();
|
|
583253
|
-
execAsync2 = promisify6(
|
|
584432
|
+
execAsync2 = promisify6(exec5);
|
|
583254
584433
|
OMNIUS_FIRST_RUN_BANNER = [
|
|
583255
584434
|
" ░▒▓██████▓▒░░▒▓██████████████▓▒░░▒▓███████▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░░▒▓███████▓▒░ ",
|
|
583256
584435
|
"░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░ ",
|
|
@@ -595704,6 +596883,9 @@ async function handleSlashCommand(input, ctx3) {
|
|
|
595704
596883
|
case "parallel":
|
|
595705
596884
|
await handleParallel(arg, ctx3);
|
|
595706
596885
|
return "handled";
|
|
596886
|
+
case "broker":
|
|
596887
|
+
await handleBroker(arg, ctx3);
|
|
596888
|
+
return "handled";
|
|
595707
596889
|
case "ollama":
|
|
595708
596890
|
await handleOllama(arg, ctx3);
|
|
595709
596891
|
return "handled";
|
|
@@ -603161,6 +604343,83 @@ async function handlePeerEndpoint(peerId, authKey, ctx3, local) {
|
|
|
603161
604343
|
);
|
|
603162
604344
|
}
|
|
603163
604345
|
}
|
|
604346
|
+
async function handleBroker(arg, _ctx) {
|
|
604347
|
+
const broker = getModelBroker();
|
|
604348
|
+
const sub = (arg || "").trim().toLowerCase();
|
|
604349
|
+
const snap = await broker.pollOnce();
|
|
604350
|
+
if (!sub || sub === "status" || sub === "list" || sub === "ls") {
|
|
604351
|
+
safeLog("");
|
|
604352
|
+
safeLog(` ${c3.bold("Resource Broker")}`);
|
|
604353
|
+
safeLog("");
|
|
604354
|
+
safeLog(` ${c3.dim("RAM:")} ${snap.ramMB.used} / ${snap.ramMB.total} MB used (${snap.ramMB.free} MB free)`);
|
|
604355
|
+
if (snap.vramMB) {
|
|
604356
|
+
safeLog(` ${c3.dim("VRAM:")} ${snap.vramMB.used} / ${snap.vramMB.total} MB used (${snap.vramMB.free} MB free)`);
|
|
604357
|
+
} else {
|
|
604358
|
+
safeLog(` ${c3.dim("VRAM:")} ${c3.dim("(no GPU detected)")}`);
|
|
604359
|
+
}
|
|
604360
|
+
safeLog(` ${c3.dim("RAM headroom threshold:")} ${broker.ramHeadroomMB} MB`);
|
|
604361
|
+
safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB`);
|
|
604362
|
+
safeLog(` ${c3.dim("Idle-evict threshold:")} ${Math.round(broker.idleEvictMs / 1e3)}s`);
|
|
604363
|
+
safeLog("");
|
|
604364
|
+
if (snap.loaded.length === 0) {
|
|
604365
|
+
safeLog(` ${c3.dim("No loaded models tracked.")}`);
|
|
604366
|
+
} else {
|
|
604367
|
+
safeLog(` ${c3.bold("Loaded models:")}`);
|
|
604368
|
+
const now = Date.now();
|
|
604369
|
+
for (const m2 of snap.loaded) {
|
|
604370
|
+
const idle = Math.round((now - m2.lastUsedAt) / 1e3);
|
|
604371
|
+
const owner = m2.owner ? c3.dim(` [owner=${m2.owner}]`) : "";
|
|
604372
|
+
const ctx3 = m2.numCtx ? c3.dim(` n_ctx=${m2.numCtx}`) : "";
|
|
604373
|
+
safeLog(` ${c3.cyan(m2.name)} (${m2.host}/${m2.domain}) vram=${m2.vramMB}MB ram=${m2.ramMB}MB idle=${idle}s${ctx3}${owner}`);
|
|
604374
|
+
}
|
|
604375
|
+
}
|
|
604376
|
+
if (snap.inflight.length > 0) {
|
|
604377
|
+
safeLog("");
|
|
604378
|
+
safeLog(` ${c3.bold("In-flight loads:")}`);
|
|
604379
|
+
for (const f2 of snap.inflight) {
|
|
604380
|
+
const age = Math.round((Date.now() - f2.startedMs) / 1e3);
|
|
604381
|
+
safeLog(` ${c3.yellow(f2.key)} owner=${f2.owner} ${age}s ago`);
|
|
604382
|
+
}
|
|
604383
|
+
}
|
|
604384
|
+
safeLog("");
|
|
604385
|
+
safeLog(` ${c3.dim("Subcommands: /broker [status|evict <key>|threshold ram|vram|idle <value>]")}`);
|
|
604386
|
+
safeLog("");
|
|
604387
|
+
return;
|
|
604388
|
+
}
|
|
604389
|
+
const parts = (arg || "").trim().split(/\s+/);
|
|
604390
|
+
const op = parts[0]?.toLowerCase();
|
|
604391
|
+
if (op === "evict") {
|
|
604392
|
+
const key = parts[1];
|
|
604393
|
+
if (!key || !key.includes(":")) {
|
|
604394
|
+
renderWarning("Usage: /broker evict <host>:<name> e.g. /broker evict ollama:moondream");
|
|
604395
|
+
return;
|
|
604396
|
+
}
|
|
604397
|
+
const [host, ...rest] = key.split(":");
|
|
604398
|
+
const name10 = rest.join(":");
|
|
604399
|
+
const ok3 = await broker.evict(host, name10, "user-requested");
|
|
604400
|
+
if (ok3) renderInfo(`Evicted ${key} (actively unloaded)`);
|
|
604401
|
+
else renderInfo(`Unregistered ${key} (could not actively unload; subprocess may need manual cleanup)`);
|
|
604402
|
+
return;
|
|
604403
|
+
}
|
|
604404
|
+
if (op === "threshold") {
|
|
604405
|
+
const which3 = parts[1]?.toLowerCase();
|
|
604406
|
+
const value2 = parts[2] ? Number(parts[2]) : NaN;
|
|
604407
|
+
if (!which3 || !Number.isFinite(value2) || value2 < 0) {
|
|
604408
|
+
renderWarning("Usage: /broker threshold <ram|vram|idle> <value> (ram/vram in MB, idle in seconds)");
|
|
604409
|
+
return;
|
|
604410
|
+
}
|
|
604411
|
+
if (which3 === "ram") broker.ramHeadroomMB = value2;
|
|
604412
|
+
else if (which3 === "vram") broker.vramHeadroomMB = value2;
|
|
604413
|
+
else if (which3 === "idle") broker.idleEvictMs = value2 * 1e3;
|
|
604414
|
+
else {
|
|
604415
|
+
renderWarning("Unknown threshold; use ram|vram|idle");
|
|
604416
|
+
return;
|
|
604417
|
+
}
|
|
604418
|
+
renderInfo(`Updated broker threshold ${which3} = ${value2}`);
|
|
604419
|
+
return;
|
|
604420
|
+
}
|
|
604421
|
+
renderWarning("Unknown /broker subcommand. Try: status | evict <host>:<name> | threshold <ram|vram|idle> <value>");
|
|
604422
|
+
}
|
|
603164
604423
|
async function handleParallel(arg, ctx3) {
|
|
603165
604424
|
const { execSync: execSync61 } = await import("node:child_process");
|
|
603166
604425
|
const baseUrl = ctx3.config.backendUrl || "http://localhost:11434";
|
|
@@ -604173,9 +605432,9 @@ async function handleUpdate(subcommand, ctx3) {
|
|
|
604173
605432
|
}
|
|
604174
605433
|
};
|
|
604175
605434
|
}
|
|
604176
|
-
const { exec:
|
|
605435
|
+
const { exec: exec6, spawn: spawn34, execSync: es2 } = await import("node:child_process");
|
|
604177
605436
|
const execA = (cmd, opts) => new Promise(
|
|
604178
|
-
(res, rej) =>
|
|
605437
|
+
(res, rej) => exec6(
|
|
604179
605438
|
cmd,
|
|
604180
605439
|
{
|
|
604181
605440
|
encoding: "utf8",
|
|
@@ -604869,7 +606128,7 @@ async function handleUpdate(subcommand, ctx3) {
|
|
|
604869
606128
|
installOverlay.setPhase("Native Modules");
|
|
604870
606129
|
installOverlay.setStatus("Rebuilding native modules...");
|
|
604871
606130
|
await new Promise((resolve55) => {
|
|
604872
|
-
const child =
|
|
606131
|
+
const child = exec6(
|
|
604873
606132
|
`${sudoPrefix}npm rebuild -g omnius 2>/dev/null || true`,
|
|
604874
606133
|
{ timeout: 12e4 },
|
|
604875
606134
|
() => resolve55(true)
|
|
@@ -604911,7 +606170,7 @@ async function handleUpdate(subcommand, ctx3) {
|
|
|
604911
606170
|
if (fsExists(venvPip2)) {
|
|
604912
606171
|
installOverlay.setStatus("Upgrading Python packages...");
|
|
604913
606172
|
await new Promise((resolve55) => {
|
|
604914
|
-
const child =
|
|
606173
|
+
const child = exec6(
|
|
604915
606174
|
`"${venvPip2}" install --upgrade moondream-station pytesseract Pillow opencv-python-headless numpy 2>/dev/null || true`,
|
|
604916
606175
|
{ timeout: 3e5 },
|
|
604917
606176
|
(err) => resolve55(!err)
|
|
@@ -610886,6 +612145,9 @@ var init_bless_engine = __esm({
|
|
|
610886
612145
|
async pingModel() {
|
|
610887
612146
|
try {
|
|
610888
612147
|
const url = `${this.config.backendUrl}/api/chat`;
|
|
612148
|
+
const broker = getModelBroker();
|
|
612149
|
+
const trainCtx = await broker.getNctxTrain(this.config.model);
|
|
612150
|
+
const numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 8192) : void 0;
|
|
610889
612151
|
await fetch(url, {
|
|
610890
612152
|
method: "POST",
|
|
610891
612153
|
headers: { "Content-Type": "application/json" },
|
|
@@ -610893,11 +612155,12 @@ var init_bless_engine = __esm({
|
|
|
610893
612155
|
model: this.config.model,
|
|
610894
612156
|
messages: [{ role: "user", content: "." }],
|
|
610895
612157
|
stream: false,
|
|
610896
|
-
options: { num_predict: 1 },
|
|
612158
|
+
options: numCtx ? { num_predict: 1, num_ctx: numCtx } : { num_predict: 1 },
|
|
610897
612159
|
keep_alive: "30m"
|
|
610898
612160
|
}),
|
|
610899
612161
|
signal: AbortSignal.timeout(15e3)
|
|
610900
612162
|
});
|
|
612163
|
+
broker.touch("ollama", this.config.model);
|
|
610901
612164
|
this.state.keepAlivePings++;
|
|
610902
612165
|
} catch {
|
|
610903
612166
|
}
|
|
@@ -614240,6 +615503,300 @@ var init_stimulation = __esm({
|
|
|
614240
615503
|
}
|
|
614241
615504
|
});
|
|
614242
615505
|
|
|
615506
|
+
// packages/cli/src/tui/pid-controller.ts
|
|
615507
|
+
function clamp018(x) {
|
|
615508
|
+
if (!Number.isFinite(x)) return 0;
|
|
615509
|
+
if (x < 0) return 0;
|
|
615510
|
+
if (x > 1) return 1;
|
|
615511
|
+
return x;
|
|
615512
|
+
}
|
|
615513
|
+
function getPidRegistry() {
|
|
615514
|
+
if (!_registry2) _registry2 = new PidRegistry();
|
|
615515
|
+
return _registry2;
|
|
615516
|
+
}
|
|
615517
|
+
var DEFAULT_PID_CONFIG, PidRegistry, _registry2;
|
|
615518
|
+
var init_pid_controller = __esm({
|
|
615519
|
+
"packages/cli/src/tui/pid-controller.ts"() {
|
|
615520
|
+
"use strict";
|
|
615521
|
+
DEFAULT_PID_CONFIG = {
|
|
615522
|
+
kp: 1e-4,
|
|
615523
|
+
ki: 1e-5,
|
|
615524
|
+
kd: 0,
|
|
615525
|
+
setpointMs: 8e3,
|
|
615526
|
+
initialOutput: 1,
|
|
615527
|
+
pvEmaAlpha: 0.3,
|
|
615528
|
+
integralClamp: 5e3
|
|
615529
|
+
// ms*s — bounds integral term contribution
|
|
615530
|
+
};
|
|
615531
|
+
PidRegistry = class {
|
|
615532
|
+
_controllers = /* @__PURE__ */ new Map();
|
|
615533
|
+
/** Get or create a controller. */
|
|
615534
|
+
get(key, configOverride) {
|
|
615535
|
+
let st = this._controllers.get(key);
|
|
615536
|
+
if (!st) {
|
|
615537
|
+
const config = { ...DEFAULT_PID_CONFIG, ...configOverride ?? {} };
|
|
615538
|
+
st = {
|
|
615539
|
+
output: config.initialOutput,
|
|
615540
|
+
pv: config.setpointMs,
|
|
615541
|
+
// assume on-target at startup
|
|
615542
|
+
integral: 0,
|
|
615543
|
+
lastError: 0,
|
|
615544
|
+
lastSampleAt: 0,
|
|
615545
|
+
samples: 0,
|
|
615546
|
+
config
|
|
615547
|
+
};
|
|
615548
|
+
this._controllers.set(key, st);
|
|
615549
|
+
}
|
|
615550
|
+
return st;
|
|
615551
|
+
}
|
|
615552
|
+
/** Read-only current output (inclusion ratio in [0,1]). */
|
|
615553
|
+
output(key) {
|
|
615554
|
+
return this._controllers.get(key)?.output ?? DEFAULT_PID_CONFIG.initialOutput;
|
|
615555
|
+
}
|
|
615556
|
+
/**
|
|
615557
|
+
* Record a new latency sample (in ms) and update the controller.
|
|
615558
|
+
* Returns the new output value.
|
|
615559
|
+
*/
|
|
615560
|
+
sample(key, latencyMs, configOverride) {
|
|
615561
|
+
const st = this.get(key, configOverride);
|
|
615562
|
+
const now = Date.now();
|
|
615563
|
+
st.pv = st.samples === 0 ? latencyMs : st.pv * (1 - st.config.pvEmaAlpha) + latencyMs * st.config.pvEmaAlpha;
|
|
615564
|
+
const error = st.config.setpointMs - st.pv;
|
|
615565
|
+
st.integral += error;
|
|
615566
|
+
if (st.integral > st.config.integralClamp) st.integral = st.config.integralClamp;
|
|
615567
|
+
if (st.integral < -st.config.integralClamp) st.integral = -st.config.integralClamp;
|
|
615568
|
+
const dt = st.lastSampleAt > 0 ? now - st.lastSampleAt : 1e3;
|
|
615569
|
+
const derivative = dt > 0 ? (error - st.lastError) / dt : 0;
|
|
615570
|
+
const u = st.config.kp * error + st.config.ki * st.integral + st.config.kd * derivative;
|
|
615571
|
+
st.output = clamp018(st.output + u);
|
|
615572
|
+
st.lastError = error;
|
|
615573
|
+
st.lastSampleAt = now;
|
|
615574
|
+
st.samples += 1;
|
|
615575
|
+
return st.output;
|
|
615576
|
+
}
|
|
615577
|
+
/** All controller snapshots — for /broker debug surface. */
|
|
615578
|
+
snapshot() {
|
|
615579
|
+
return [...this._controllers.entries()].map(([key, st]) => ({
|
|
615580
|
+
key,
|
|
615581
|
+
output: st.output,
|
|
615582
|
+
pv: st.pv,
|
|
615583
|
+
setpoint: st.config.setpointMs,
|
|
615584
|
+
samples: st.samples
|
|
615585
|
+
}));
|
|
615586
|
+
}
|
|
615587
|
+
/** Reset (test-only). */
|
|
615588
|
+
reset() {
|
|
615589
|
+
this._controllers.clear();
|
|
615590
|
+
}
|
|
615591
|
+
};
|
|
615592
|
+
_registry2 = null;
|
|
615593
|
+
}
|
|
615594
|
+
});
|
|
615595
|
+
|
|
615596
|
+
// packages/cli/src/tui/component-benefit.ts
|
|
615597
|
+
function getComponentBenefitRegistry() {
|
|
615598
|
+
if (!_registry3) _registry3 = new ComponentBenefitRegistry();
|
|
615599
|
+
return _registry3;
|
|
615600
|
+
}
|
|
615601
|
+
var EMA_ALPHA, MIN_SAMPLES_TO_TRUST, ComponentBenefitRegistry, _registry3;
|
|
615602
|
+
var init_component_benefit = __esm({
|
|
615603
|
+
"packages/cli/src/tui/component-benefit.ts"() {
|
|
615604
|
+
"use strict";
|
|
615605
|
+
EMA_ALPHA = 0.2;
|
|
615606
|
+
MIN_SAMPLES_TO_TRUST = 3;
|
|
615607
|
+
ComponentBenefitRegistry = class {
|
|
615608
|
+
/** Two-tier map: chatKey → componentKey → state. */
|
|
615609
|
+
_byChat = /* @__PURE__ */ new Map();
|
|
615610
|
+
/** Score for a component in a chat. Returns 0.5 (neutral) when not enough samples. */
|
|
615611
|
+
score(chatKey, componentKey) {
|
|
615612
|
+
const st = this._byChat.get(chatKey)?.get(componentKey);
|
|
615613
|
+
if (!st || st.samples < MIN_SAMPLES_TO_TRUST) return 0.5;
|
|
615614
|
+
return st.score;
|
|
615615
|
+
}
|
|
615616
|
+
/** Record one batch — for each sampled component, did the decision text reference its needle? */
|
|
615617
|
+
recordOutcome(chatKey, samples, decisionText) {
|
|
615618
|
+
const haystack = decisionText.toLowerCase();
|
|
615619
|
+
let map2 = this._byChat.get(chatKey);
|
|
615620
|
+
if (!map2) {
|
|
615621
|
+
map2 = /* @__PURE__ */ new Map();
|
|
615622
|
+
this._byChat.set(chatKey, map2);
|
|
615623
|
+
}
|
|
615624
|
+
const now = Date.now();
|
|
615625
|
+
for (const sample of samples) {
|
|
615626
|
+
const needle = sample.needle.toLowerCase();
|
|
615627
|
+
const hit = needle.length >= 3 && haystack.includes(needle);
|
|
615628
|
+
let st = map2.get(sample.key);
|
|
615629
|
+
if (!st) {
|
|
615630
|
+
st = { score: 0.5, samples: 0, hits: 0, lastSeenAt: now };
|
|
615631
|
+
map2.set(sample.key, st);
|
|
615632
|
+
}
|
|
615633
|
+
const newScore = hit ? 1 : 0;
|
|
615634
|
+
st.score = st.samples === 0 ? newScore : st.score * (1 - EMA_ALPHA) + newScore * EMA_ALPHA;
|
|
615635
|
+
st.samples += 1;
|
|
615636
|
+
st.hits += hit ? 1 : 0;
|
|
615637
|
+
st.lastSeenAt = now;
|
|
615638
|
+
}
|
|
615639
|
+
}
|
|
615640
|
+
/** Snapshot for /broker debug surface. */
|
|
615641
|
+
snapshot(chatKey) {
|
|
615642
|
+
const out = [];
|
|
615643
|
+
const iterate = chatKey ? [[chatKey, this._byChat.get(chatKey)]].filter((e2) => !!e2[1]) : [...this._byChat.entries()];
|
|
615644
|
+
for (const [cKey, map2] of iterate) {
|
|
615645
|
+
for (const [comp, st] of map2) {
|
|
615646
|
+
out.push({ chatKey: cKey, componentKey: comp, score: st.score, samples: st.samples, hits: st.hits });
|
|
615647
|
+
}
|
|
615648
|
+
}
|
|
615649
|
+
return out;
|
|
615650
|
+
}
|
|
615651
|
+
reset() {
|
|
615652
|
+
this._byChat.clear();
|
|
615653
|
+
}
|
|
615654
|
+
};
|
|
615655
|
+
_registry3 = null;
|
|
615656
|
+
}
|
|
615657
|
+
});
|
|
615658
|
+
|
|
615659
|
+
// packages/cli/src/tui/soul-observations.ts
|
|
615660
|
+
function getSoulObservationStream() {
|
|
615661
|
+
if (!_stream) {
|
|
615662
|
+
_stream = new SoulObservationStream();
|
|
615663
|
+
subscribeBrokerEvents(_stream);
|
|
615664
|
+
}
|
|
615665
|
+
return _stream;
|
|
615666
|
+
}
|
|
615667
|
+
function subscribeBrokerEvents(stream) {
|
|
615668
|
+
if (_brokerSubscribed) return;
|
|
615669
|
+
_brokerSubscribed = true;
|
|
615670
|
+
const broker = getModelBroker();
|
|
615671
|
+
broker.on("slotReleased", (info, outcome) => {
|
|
615672
|
+
if (outcome.ok) {
|
|
615673
|
+
stream.emit({
|
|
615674
|
+
kind: "inference.completed",
|
|
615675
|
+
model: info.model,
|
|
615676
|
+
sessionKey: info.sessionKey,
|
|
615677
|
+
latencyMs: Date.now() - info.acquiredAt,
|
|
615678
|
+
promptTokens: info.promptTokens,
|
|
615679
|
+
completionTokens: outcome.completionTokens ?? 0,
|
|
615680
|
+
ts: Date.now()
|
|
615681
|
+
});
|
|
615682
|
+
} else {
|
|
615683
|
+
stream.emit({
|
|
615684
|
+
kind: "inference.degraded",
|
|
615685
|
+
model: info.model,
|
|
615686
|
+
sessionKey: info.sessionKey,
|
|
615687
|
+
reason: outcome.error ?? "unknown",
|
|
615688
|
+
ts: Date.now()
|
|
615689
|
+
});
|
|
615690
|
+
}
|
|
615691
|
+
});
|
|
615692
|
+
broker.on("pressure", (kind, value2, threshold) => {
|
|
615693
|
+
stream.emit({ kind: "broker.pressure", pressure: kind, value: value2, threshold, ts: Date.now() });
|
|
615694
|
+
});
|
|
615695
|
+
broker.on("evicted", (m2, reason) => {
|
|
615696
|
+
stream.emit({ kind: "model.evicted", host: m2.host, name: m2.name, reason, ts: Date.now() });
|
|
615697
|
+
});
|
|
615698
|
+
}
|
|
615699
|
+
function formatSystemObservations(sessionKey) {
|
|
615700
|
+
const stream = getSoulObservationStream();
|
|
615701
|
+
const broker = getModelBroker();
|
|
615702
|
+
const snap = broker.snapshot();
|
|
615703
|
+
const pidSnap = getPidRegistry().snapshot();
|
|
615704
|
+
const lines = [];
|
|
615705
|
+
const slots = snap.slots;
|
|
615706
|
+
const utilPct = slots.capacity > 0 ? Math.round(slots.inUse / slots.capacity * 100) : 0;
|
|
615707
|
+
const tpsByModel = Object.entries(slots.byModel).filter(([, m2]) => m2.samples > 0).map(([model, m2]) => `${model}=${m2.tokensPerSec.toFixed(1)}t/s (${m2.samples}s)`).join(", ");
|
|
615708
|
+
if (slots.inUse > 0 || slots.queueDepth > 0 || tpsByModel) {
|
|
615709
|
+
lines.push(`Capacity: ${slots.inUse}/${slots.capacity} slots in use (${utilPct}%), queue=${slots.queueDepth}/${slots.queueCapacity}${tpsByModel ? `; throughput: ${tpsByModel}` : ""}.`);
|
|
615710
|
+
}
|
|
615711
|
+
if (snap.ramMB.free < (broker.ramHeadroomMB ?? 0)) {
|
|
615712
|
+
lines.push(`RAM pressure: ${snap.ramMB.free}MB free (below ${broker.ramHeadroomMB}MB headroom).`);
|
|
615713
|
+
}
|
|
615714
|
+
if (snap.vramMB && snap.vramMB.free < (broker.vramHeadroomMB ?? 0)) {
|
|
615715
|
+
lines.push(`VRAM pressure: ${snap.vramMB.free}MB free (below ${broker.vramHeadroomMB}MB headroom).`);
|
|
615716
|
+
}
|
|
615717
|
+
const queueThreshold = Math.floor(snap.slots.queueCapacity * 0.8);
|
|
615718
|
+
if (snap.slots.queueDepth >= queueThreshold) {
|
|
615719
|
+
lines.push(`Queue pressure: ${snap.slots.queueDepth}/${snap.slots.queueCapacity} entries — prefer brief replies or single-emoji reactions to keep the queue draining.`);
|
|
615720
|
+
}
|
|
615721
|
+
const interesting = pidSnap.filter((p2) => p2.samples >= 3 && (p2.output < 0.95 || p2.output > 1.05));
|
|
615722
|
+
if (interesting.length > 0) {
|
|
615723
|
+
const pidLines = interesting.slice(0, 4).map((p2) => `${p2.key}: u=${p2.output.toFixed(2)} (pv=${Math.round(p2.pv)}ms, sp=${p2.setpoint}ms)`).join(", ");
|
|
615724
|
+
lines.push(`Context tier PID state: ${pidLines}.`);
|
|
615725
|
+
}
|
|
615726
|
+
if (sessionKey) {
|
|
615727
|
+
const recent = stream.recentForSession(sessionKey, 15);
|
|
615728
|
+
if (recent.length > 0) {
|
|
615729
|
+
const sends = recent.filter((e2) => e2.kind.startsWith("telegram.send."));
|
|
615730
|
+
const reactions = recent.filter((e2) => e2.kind.startsWith("emoji."));
|
|
615731
|
+
const forbidden = sends.filter((e2) => e2.kind === "telegram.send.forbidden").length;
|
|
615732
|
+
const rateLimited = sends.filter((e2) => e2.kind === "telegram.send.rate_limited").length;
|
|
615733
|
+
if (forbidden > 0) lines.push(`This chat has refused ${forbidden} recent send attempt(s) (e.g. no rights to post). Treat as a strong silence signal.`);
|
|
615734
|
+
if (rateLimited > 0) lines.push(`This chat rate-limited ${rateLimited} recent send(s). Slow cadence.`);
|
|
615735
|
+
if (reactions.length > 0) {
|
|
615736
|
+
const reactSummary = reactions.filter((e2) => e2.kind === "emoji.reaction.received").map((e2) => e2.emoji).join("");
|
|
615737
|
+
if (reactSummary) lines.push(`Recent inbound reactions in this chat: ${reactSummary}`);
|
|
615738
|
+
}
|
|
615739
|
+
}
|
|
615740
|
+
}
|
|
615741
|
+
if (lines.length === 0) return "";
|
|
615742
|
+
return ["## System Observations (broker, PID, capacity, send outcomes)", ...lines].join("\n");
|
|
615743
|
+
}
|
|
615744
|
+
var PER_SESSION_BUFFER, GLOBAL_BUFFER, SoulObservationStream, _stream, _brokerSubscribed;
|
|
615745
|
+
var init_soul_observations = __esm({
|
|
615746
|
+
"packages/cli/src/tui/soul-observations.ts"() {
|
|
615747
|
+
"use strict";
|
|
615748
|
+
init_dist5();
|
|
615749
|
+
init_pid_controller();
|
|
615750
|
+
PER_SESSION_BUFFER = 60;
|
|
615751
|
+
GLOBAL_BUFFER = 200;
|
|
615752
|
+
SoulObservationStream = class {
|
|
615753
|
+
_bySession = /* @__PURE__ */ new Map();
|
|
615754
|
+
_global = [];
|
|
615755
|
+
_listeners = /* @__PURE__ */ new Set();
|
|
615756
|
+
/** Record an event. */
|
|
615757
|
+
emit(event) {
|
|
615758
|
+
if ("sessionKey" in event && event.sessionKey) {
|
|
615759
|
+
let buf = this._bySession.get(event.sessionKey);
|
|
615760
|
+
if (!buf) {
|
|
615761
|
+
buf = [];
|
|
615762
|
+
this._bySession.set(event.sessionKey, buf);
|
|
615763
|
+
}
|
|
615764
|
+
buf.push(event);
|
|
615765
|
+
if (buf.length > PER_SESSION_BUFFER) buf.shift();
|
|
615766
|
+
}
|
|
615767
|
+
this._global.push(event);
|
|
615768
|
+
if (this._global.length > GLOBAL_BUFFER) this._global.shift();
|
|
615769
|
+
for (const listener of this._listeners) {
|
|
615770
|
+
try {
|
|
615771
|
+
listener(event);
|
|
615772
|
+
} catch {
|
|
615773
|
+
}
|
|
615774
|
+
}
|
|
615775
|
+
}
|
|
615776
|
+
/** Subscribe to all events (live tail). */
|
|
615777
|
+
subscribe(listener) {
|
|
615778
|
+
this._listeners.add(listener);
|
|
615779
|
+
return () => this._listeners.delete(listener);
|
|
615780
|
+
}
|
|
615781
|
+
/** Read recent events for a session (most recent last). */
|
|
615782
|
+
recentForSession(sessionKey, limit = 20) {
|
|
615783
|
+
const buf = this._bySession.get(sessionKey) ?? [];
|
|
615784
|
+
return buf.slice(-limit);
|
|
615785
|
+
}
|
|
615786
|
+
/** Read recent global events. */
|
|
615787
|
+
recentGlobal(limit = 30) {
|
|
615788
|
+
return this._global.slice(-limit);
|
|
615789
|
+
}
|
|
615790
|
+
reset() {
|
|
615791
|
+
this._bySession.clear();
|
|
615792
|
+
this._global.length = 0;
|
|
615793
|
+
}
|
|
615794
|
+
};
|
|
615795
|
+
_stream = null;
|
|
615796
|
+
_brokerSubscribed = false;
|
|
615797
|
+
}
|
|
615798
|
+
});
|
|
615799
|
+
|
|
614243
615800
|
// packages/cli/src/tui/telegram-channel-dmn.ts
|
|
614244
615801
|
import { existsSync as existsSync115, mkdirSync as mkdirSync65, readdirSync as readdirSync40, readFileSync as readFileSync94, writeFileSync as writeFileSync59 } from "node:fs";
|
|
614245
615802
|
import { join as join129 } from "node:path";
|
|
@@ -614338,7 +615895,7 @@ function buildReplyOpportunities(input, openQuestions) {
|
|
|
614338
615895
|
function daydreamOpportunityId(input, trigger) {
|
|
614339
615896
|
return createHash23("sha1").update(`${input.sessionKey}:${input.generatedAtMs}:${trigger}`).digest("hex").slice(0, 16);
|
|
614340
615897
|
}
|
|
614341
|
-
function
|
|
615898
|
+
function clamp019(value2) {
|
|
614342
615899
|
if (!Number.isFinite(value2)) return 0;
|
|
614343
615900
|
return Math.max(0, Math.min(1, value2));
|
|
614344
615901
|
}
|
|
@@ -614349,7 +615906,7 @@ function pushStimulationSignal(signals, signal, source, weight) {
|
|
|
614349
615906
|
const cleanSignal = compactLine2(signal, 120);
|
|
614350
615907
|
const cleanSource = compactLine2(source, 180);
|
|
614351
615908
|
if (!cleanSignal || signals.some((entry) => entry.signal === cleanSignal && entry.source === cleanSource)) return;
|
|
614352
|
-
signals.push({ signal: cleanSignal, source: cleanSource, weight:
|
|
615909
|
+
signals.push({ signal: cleanSignal, source: cleanSource, weight: clamp019(weight) });
|
|
614353
615910
|
}
|
|
614354
615911
|
function buildMetaAnalysisSignals(input) {
|
|
614355
615912
|
const chatLabel = input.chatTitle || input.chatId;
|
|
@@ -614424,7 +615981,7 @@ function buildCuriosityThreads(input, openQuestions, stimulationSignals) {
|
|
|
614424
615981
|
question: text.endsWith("?") || text.endsWith("?") ? text : `What should be learned or clarified from: ${text || entry.mediaSummary || "recent media"}?`,
|
|
614425
615982
|
rationale: "Human curiosity, uncertainty, or multimodal content makes this a useful idle exploration target.",
|
|
614426
615983
|
sourceMessages: messageId,
|
|
614427
|
-
intensity:
|
|
615984
|
+
intensity: clamp019(0.5 + replyBoost + mediaBoost + questionBoost)
|
|
614428
615985
|
});
|
|
614429
615986
|
}
|
|
614430
615987
|
for (const question of openQuestions.slice(-4)) {
|
|
@@ -614444,7 +616001,7 @@ function buildCuriosityThreads(input, openQuestions, stimulationSignals) {
|
|
|
614444
616001
|
question: `Is there a useful clarification or memory consolidation around ${strongest.source}?`,
|
|
614445
616002
|
rationale: "Strongest stimulation signal can seed a low-intrusion reflection target.",
|
|
614446
616003
|
sourceMessages: [],
|
|
614447
|
-
intensity:
|
|
616004
|
+
intensity: clamp019(strongest.weight * 0.72)
|
|
614448
616005
|
});
|
|
614449
616006
|
}
|
|
614450
616007
|
return threads.sort((a2, b) => b.intensity - a2.intensity).slice(0, 8);
|
|
@@ -614518,7 +616075,7 @@ function buildOutreachPlans(input, curiosityThreads) {
|
|
|
614518
616075
|
purpose: "Continue the public thread only when the live model judges that the group would benefit from a concise follow-up.",
|
|
614519
616076
|
draftIntent: "Ask one concrete clarification, offer one useful synthesis, or stay silent if the room has moved on.",
|
|
614520
616077
|
gate: "model_decision",
|
|
614521
|
-
confidence:
|
|
616078
|
+
confidence: clamp019(thread.intensity * 0.86)
|
|
614522
616079
|
});
|
|
614523
616080
|
const participant = participantForThread(input, thread);
|
|
614524
616081
|
if (!participant) continue;
|
|
@@ -614530,7 +616087,7 @@ function buildOutreachPlans(input, curiosityThreads) {
|
|
|
614530
616087
|
purpose: "Offer a one-to-one follow-up only if private contact is allowed and the issue is personal, unresolved, or better handled outside the group.",
|
|
614531
616088
|
draftIntent: "Reference the public thread briefly, ask permission to continue privately, and do not reveal hidden meta-analysis.",
|
|
614532
616089
|
gate: "admin_review",
|
|
614533
|
-
confidence:
|
|
616090
|
+
confidence: clamp019(thread.intensity * 0.58)
|
|
614534
616091
|
});
|
|
614535
616092
|
}
|
|
614536
616093
|
return plans.slice(0, 8);
|
|
@@ -615633,7 +617190,7 @@ function numberOr(value2, fallback) {
|
|
|
615633
617190
|
function isNumber(value2) {
|
|
615634
617191
|
return typeof value2 === "number" && Number.isFinite(value2);
|
|
615635
617192
|
}
|
|
615636
|
-
function
|
|
617193
|
+
function clamp0110(value2) {
|
|
615637
617194
|
return Math.max(0, Math.min(1, Number.isFinite(value2) ? value2 : 0));
|
|
615638
617195
|
}
|
|
615639
617196
|
function iso(ts) {
|
|
@@ -615780,8 +617337,8 @@ function normalizeRelationship(raw) {
|
|
|
615780
617337
|
kind: value2.kind,
|
|
615781
617338
|
fromKey: String(value2.fromKey),
|
|
615782
617339
|
toKey: String(value2.toKey),
|
|
615783
|
-
confidence:
|
|
615784
|
-
weight:
|
|
617340
|
+
confidence: clamp0110(numberOr(value2.confidence, 0)),
|
|
617341
|
+
weight: clamp0110(numberOr(value2.weight, 0)),
|
|
615785
617342
|
firstSeenAt: numberOr(value2.firstSeenAt, Date.now()),
|
|
615786
617343
|
lastSeenAt: numberOr(value2.lastSeenAt, Date.now()),
|
|
615787
617344
|
evidenceMessageIds: Array.isArray(value2.evidenceMessageIds) ? value2.evidenceMessageIds.filter(isNumber).slice(-40) : [],
|
|
@@ -615800,7 +617357,7 @@ function normalizePreferences(raw) {
|
|
|
615800
617357
|
if (!evidence || typeof evidence !== "object") continue;
|
|
615801
617358
|
out[actorKey][key] = {
|
|
615802
617359
|
value: Math.max(-1, Math.min(1, numberOr(evidence.value, 0))),
|
|
615803
|
-
confidence:
|
|
617360
|
+
confidence: clamp0110(numberOr(evidence.confidence, 0)),
|
|
615804
617361
|
updatedAt: numberOr(evidence.updatedAt, Date.now()),
|
|
615805
617362
|
evidenceMessageIds: Array.isArray(evidence.evidenceMessageIds) ? evidence.evidenceMessageIds.filter(isNumber).slice(-12) : [],
|
|
615806
617363
|
note: compactOptional(evidence.note, 220)
|
|
@@ -615858,7 +617415,7 @@ function normalizeOutcome(raw) {
|
|
|
615858
617415
|
replyToMessageId: typeof value2.replyToMessageId === "number" ? value2.replyToMessageId : void 0,
|
|
615859
617416
|
route: value2.route === "action" ? "action" : "chat",
|
|
615860
617417
|
shouldReply: value2.shouldReply === true,
|
|
615861
|
-
confidence:
|
|
617418
|
+
confidence: clamp0110(numberOr(value2.confidence, 0)),
|
|
615862
617419
|
reason: compact2(value2.reason || "", 280),
|
|
615863
617420
|
source: compact2(value2.source || "unknown", 80),
|
|
615864
617421
|
silentDisposition: compactOptional(value2.silentDisposition, 280),
|
|
@@ -615870,7 +617427,7 @@ function normalizeOutcome(raw) {
|
|
|
615870
617427
|
scenarioNote: compactOptional(value2.scenarioNote, 360),
|
|
615871
617428
|
scenarioId: compactOptional(value2.scenarioId, 160),
|
|
615872
617429
|
scenarioLabel: compactOptional(value2.scenarioLabel, 160),
|
|
615873
|
-
scenarioConfidence: typeof value2.scenarioConfidence === "number" && Number.isFinite(value2.scenarioConfidence) ?
|
|
617430
|
+
scenarioConfidence: typeof value2.scenarioConfidence === "number" && Number.isFinite(value2.scenarioConfidence) ? clamp0110(value2.scenarioConfidence) : void 0,
|
|
615874
617431
|
scenarioObjective: compactOptional(value2.scenarioObjective, 360),
|
|
615875
617432
|
scenarioStateLoop: compactOptional(value2.scenarioStateLoop, 360),
|
|
615876
617433
|
salienceSignals: Array.isArray(value2.salienceSignals) ? value2.salienceSignals.map(String).slice(0, 16) : [],
|
|
@@ -615888,7 +617445,7 @@ function normalizeDaydreamOpportunity(raw) {
|
|
|
615888
617445
|
artifactId: String(value2.artifactId || "unknown"),
|
|
615889
617446
|
generatedAt: String(value2.generatedAt || (/* @__PURE__ */ new Date()).toISOString()),
|
|
615890
617447
|
trigger: compact2(value2.trigger || "", 240),
|
|
615891
|
-
confidence:
|
|
617448
|
+
confidence: clamp0110(numberOr(value2.confidence, 0)),
|
|
615892
617449
|
lifecycle,
|
|
615893
617450
|
firstSeenAt: numberOr(value2.firstSeenAt, Date.now()),
|
|
615894
617451
|
updatedAt: numberOr(value2.updatedAt, Date.now()),
|
|
@@ -615945,7 +617502,7 @@ function commitTelegramSocialDecision(state, input) {
|
|
|
615945
617502
|
replyToMessageId: input.replyToMessageId,
|
|
615946
617503
|
route: input.route,
|
|
615947
617504
|
shouldReply: input.shouldReply,
|
|
615948
|
-
confidence:
|
|
617505
|
+
confidence: clamp0110(input.confidence),
|
|
615949
617506
|
reason: compact2(input.reason, 280),
|
|
615950
617507
|
source: compact2(input.source, 80),
|
|
615951
617508
|
silentDisposition: compactOptional(input.silentDisposition, 280),
|
|
@@ -615957,7 +617514,7 @@ function commitTelegramSocialDecision(state, input) {
|
|
|
615957
617514
|
scenarioNote: compactOptional(input.scenarioNote, 360),
|
|
615958
617515
|
scenarioId: compactOptional(input.scenarioId, 160),
|
|
615959
617516
|
scenarioLabel: compactOptional(input.scenarioLabel, 160),
|
|
615960
|
-
scenarioConfidence: input.scenarioConfidence === void 0 ? void 0 :
|
|
617517
|
+
scenarioConfidence: input.scenarioConfidence === void 0 ? void 0 : clamp0110(input.scenarioConfidence),
|
|
615961
617518
|
scenarioObjective: compactOptional(input.scenarioObjective, 360),
|
|
615962
617519
|
scenarioStateLoop: compactOptional(input.scenarioStateLoop, 360),
|
|
615963
617520
|
salienceSignals: [...new Set((input.salienceSignals ?? []).map(String))].slice(0, 16),
|
|
@@ -615981,7 +617538,7 @@ function registerDaydreamOpportunities(state, opportunities, now = Date.now()) {
|
|
|
615981
617538
|
artifactId: opportunity.artifactId || "unknown",
|
|
615982
617539
|
generatedAt: opportunity.generatedAt || new Date(now).toISOString(),
|
|
615983
617540
|
trigger: compact2(opportunity.trigger, 240),
|
|
615984
|
-
confidence:
|
|
617541
|
+
confidence: clamp0110(opportunity.confidence),
|
|
615985
617542
|
lifecycle: "proposed",
|
|
615986
617543
|
firstSeenAt: now,
|
|
615987
617544
|
updatedAt: now,
|
|
@@ -615991,7 +617548,7 @@ function registerDaydreamOpportunities(state, opportunities, now = Date.now()) {
|
|
|
615991
617548
|
};
|
|
615992
617549
|
if (existing) {
|
|
615993
617550
|
item.trigger = compact2(opportunity.trigger, 240) || item.trigger;
|
|
615994
|
-
item.confidence =
|
|
617551
|
+
item.confidence = clamp0110(opportunity.confidence);
|
|
615995
617552
|
item.updatedAt = now;
|
|
615996
617553
|
}
|
|
615997
617554
|
state.daydreamOpportunities[id] = item;
|
|
@@ -616131,8 +617688,8 @@ function upsertRelationship(state, kind, fromKey, toKey, messageId, confidence2,
|
|
|
616131
617688
|
evidenceMessageIds: [],
|
|
616132
617689
|
source
|
|
616133
617690
|
};
|
|
616134
|
-
edge.confidence = Math.max(edge.confidence,
|
|
616135
|
-
edge.weight = Math.min(1, edge.weight + 0.12 +
|
|
617691
|
+
edge.confidence = Math.max(edge.confidence, clamp0110(confidence2));
|
|
617692
|
+
edge.weight = Math.min(1, edge.weight + 0.12 + clamp0110(confidence2) * 0.2);
|
|
616136
617693
|
edge.lastSeenAt = now;
|
|
616137
617694
|
edge.evidenceMessageIds = appendUnique(edge.evidenceMessageIds, messageId, 40);
|
|
616138
617695
|
edge.note = compactOptional(note, 260) || edge.note;
|
|
@@ -616174,7 +617731,7 @@ function setPreference(vector, key, value2, confidence2, messageId, now, note) {
|
|
|
616174
617731
|
const existing = vector[key];
|
|
616175
617732
|
vector[key] = {
|
|
616176
617733
|
value: existing ? existing.value * 0.7 + value2 * 0.3 : value2,
|
|
616177
|
-
confidence: Math.max(existing?.confidence ?? 0,
|
|
617734
|
+
confidence: Math.max(existing?.confidence ?? 0, clamp0110(confidence2)),
|
|
616178
617735
|
updatedAt: now,
|
|
616179
617736
|
evidenceMessageIds: appendUnique(existing?.evidenceMessageIds ?? [], messageId, 12),
|
|
616180
617737
|
note
|
|
@@ -616279,19 +617836,51 @@ async function queryVisionModel(modelName, imagePath, prompt = "Describe what yo
|
|
|
616279
617836
|
if (!existsSync116(imagePath)) return "";
|
|
616280
617837
|
const imageBuffer = readFileSync95(imagePath);
|
|
616281
617838
|
const base64Image = imageBuffer.toString("base64");
|
|
617839
|
+
const broker = getModelBroker();
|
|
617840
|
+
const decision2 = await broker.ensureModelLoadable({
|
|
617841
|
+
name: modelName,
|
|
617842
|
+
domain: "vision",
|
|
617843
|
+
host: "ollama",
|
|
617844
|
+
owner: "vision-ingress",
|
|
617845
|
+
requestedNumCtx: 2048
|
|
617846
|
+
});
|
|
617847
|
+
let effectiveModel = modelName;
|
|
617848
|
+
let numCtx;
|
|
617849
|
+
if (decision2.kind === "reject") {
|
|
617850
|
+
return "";
|
|
617851
|
+
} else if (decision2.kind === "degrade") {
|
|
617852
|
+
effectiveModel = decision2.fallback.name;
|
|
617853
|
+
} else if (decision2.kind === "evict") {
|
|
617854
|
+
for (const target of decision2.evictTargets) {
|
|
617855
|
+
await broker.evict(target.host, target.name, "make-room-for-vision");
|
|
617856
|
+
}
|
|
617857
|
+
numCtx = decision2.effectiveNumCtx;
|
|
617858
|
+
} else if (decision2.kind === "ok") {
|
|
617859
|
+
numCtx = decision2.effectiveNumCtx;
|
|
617860
|
+
} else if (decision2.kind === "wait-for-inflight") {
|
|
617861
|
+
const inner = await decision2.promise;
|
|
617862
|
+
if (inner.kind === "ok") numCtx = inner.effectiveNumCtx;
|
|
617863
|
+
else if (inner.kind === "degrade") effectiveModel = inner.fallback.name;
|
|
617864
|
+
else if (inner.kind === "reject") return "";
|
|
617865
|
+
}
|
|
617866
|
+
if (numCtx === void 0) {
|
|
617867
|
+
const trainCtx = await broker.getNctxTrain(effectiveModel);
|
|
617868
|
+
numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 4096) : 2048;
|
|
617869
|
+
}
|
|
616282
617870
|
try {
|
|
616283
617871
|
const response = await fetch("http://localhost:11434/api/generate", {
|
|
616284
617872
|
method: "POST",
|
|
616285
617873
|
headers: { "Content-Type": "application/json" },
|
|
616286
617874
|
body: JSON.stringify({
|
|
616287
|
-
model:
|
|
617875
|
+
model: effectiveModel,
|
|
616288
617876
|
prompt,
|
|
616289
617877
|
images: [base64Image],
|
|
616290
617878
|
stream: false,
|
|
616291
|
-
options: { temperature: 0.3, num_predict: 1024 }
|
|
617879
|
+
options: { temperature: 0.3, num_predict: 1024, num_ctx: numCtx }
|
|
616292
617880
|
})
|
|
616293
617881
|
});
|
|
616294
617882
|
if (!response.ok) return "";
|
|
617883
|
+
broker.touch("ollama", effectiveModel);
|
|
616295
617884
|
const data = await response.json();
|
|
616296
617885
|
return (data.response || "").trim();
|
|
616297
617886
|
} catch {
|
|
@@ -616344,6 +617933,7 @@ function formatImageContextPrefix(result) {
|
|
|
616344
617933
|
var init_vision_ingress = __esm({
|
|
616345
617934
|
"packages/cli/src/tui/vision-ingress.ts"() {
|
|
616346
617935
|
"use strict";
|
|
617936
|
+
init_dist5();
|
|
616347
617937
|
}
|
|
616348
617938
|
});
|
|
616349
617939
|
|
|
@@ -616532,9 +618122,31 @@ function parseTelegramSilentReflectionNotes(text) {
|
|
|
616532
618122
|
}
|
|
616533
618123
|
return null;
|
|
616534
618124
|
}
|
|
616535
|
-
function
|
|
616536
|
-
|
|
616537
|
-
|
|
618125
|
+
function estimatePromptTokensFromRequest(request) {
|
|
618126
|
+
let chars = 0;
|
|
618127
|
+
for (const m2 of request.messages ?? []) {
|
|
618128
|
+
if (typeof m2.content === "string") chars += m2.content.length;
|
|
618129
|
+
else if (Array.isArray(m2.content)) {
|
|
618130
|
+
for (const part of m2.content) {
|
|
618131
|
+
if (typeof part?.text === "string") chars += part.text.length;
|
|
618132
|
+
}
|
|
618133
|
+
}
|
|
618134
|
+
chars += 8;
|
|
618135
|
+
}
|
|
618136
|
+
if (Array.isArray(request.tools) && request.tools.length > 0) {
|
|
618137
|
+
chars += request.tools.length * 600;
|
|
618138
|
+
}
|
|
618139
|
+
return Math.ceil(chars / 4);
|
|
618140
|
+
}
|
|
618141
|
+
function telegramRouterTimeoutMs(configTimeoutMs, _minMs, _legacyMaxMs) {
|
|
618142
|
+
void _minMs;
|
|
618143
|
+
void _legacyMaxMs;
|
|
618144
|
+
const envRaw = Number.parseInt(process.env["OMNIUS_TG_INFERENCE_LIVENESS_MS"] ?? "", 10);
|
|
618145
|
+
const livenessMs = Number.isFinite(envRaw) && envRaw >= 1e4 ? envRaw : 6e5;
|
|
618146
|
+
if (Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) >= livenessMs) {
|
|
618147
|
+
return configTimeoutMs;
|
|
618148
|
+
}
|
|
618149
|
+
return livenessMs;
|
|
616538
618150
|
}
|
|
616539
618151
|
function telegramThinkSuppressedRequest(request) {
|
|
616540
618152
|
const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
|
|
@@ -617989,6 +619601,9 @@ var init_telegram_bridge = __esm({
|
|
|
617989
619601
|
init_telegram_creative_tools();
|
|
617990
619602
|
init_omnius_directory();
|
|
617991
619603
|
init_stimulation();
|
|
619604
|
+
init_pid_controller();
|
|
619605
|
+
init_component_benefit();
|
|
619606
|
+
init_soul_observations();
|
|
617992
619607
|
init_identity_memory_tool();
|
|
617993
619608
|
init_visual_identity_association();
|
|
617994
619609
|
init_telegram_channel_dmn();
|
|
@@ -621954,6 +623569,12 @@ ${lines.join("\n")}`);
|
|
|
621954
623569
|
this.ensureTelegramConversationLoaded(sessionKey);
|
|
621955
623570
|
const history = this.chatHistory.get(sessionKey) ?? [];
|
|
621956
623571
|
const participants = [...this.chatParticipants.get(sessionKey)?.values() ?? []].sort((a2, b) => b.lastSeenTs - a2.lastSeenTs);
|
|
623572
|
+
const modelKey = this.agentConfig?.model ?? "?";
|
|
623573
|
+
const pidReg = getPidRegistry();
|
|
623574
|
+
const tier1Ratio = pidReg.output(`tier1.${modelKey}`);
|
|
623575
|
+
const tier2Ratio = pidReg.output(`tier2.${modelKey}`);
|
|
623576
|
+
const benefitReg = getComponentBenefitRegistry();
|
|
623577
|
+
const sampledComponents = [];
|
|
621957
623578
|
const isGroup = msg.chatType !== "private";
|
|
621958
623579
|
const retainedCount = history.length;
|
|
621959
623580
|
const olderCount = Math.max(0, retainedCount - maxRecent);
|
|
@@ -621987,14 +623608,28 @@ ${lines.join("\n")}`);
|
|
|
621987
623608
|
sections.push(socialStateContext);
|
|
621988
623609
|
}
|
|
621989
623610
|
if (participants.length > 0) {
|
|
621990
|
-
const
|
|
623611
|
+
const fullCount = Math.min(12, participants.length);
|
|
623612
|
+
const tier1Count = Math.max(1, Math.round(fullCount * tier1Ratio));
|
|
623613
|
+
const sortedByBenefit = participants.slice(0, fullCount).sort((a2, b) => {
|
|
623614
|
+
const scoreA = benefitReg.score(sessionKey, `tier1.participant.${a2.username ?? a2.fromUserId}`);
|
|
623615
|
+
const scoreB = benefitReg.score(sessionKey, `tier1.participant.${b.username ?? b.fromUserId}`);
|
|
623616
|
+
return scoreB - scoreA;
|
|
623617
|
+
});
|
|
623618
|
+
const selected = sortedByBenefit.slice(0, tier1Count);
|
|
623619
|
+
const participantLines = selected.map((profile) => {
|
|
621991
623620
|
const label = profile.username && profile.username !== "unknown" ? `@${profile.username}` : profile.firstName || `user:${profile.fromUserId}`;
|
|
621992
623621
|
const tones = [...profile.toneTags].slice(0, 5).join(", ") || "neutral";
|
|
621993
623622
|
const direct = profile.directAddressCount ? `, direct-addresses:${profile.directAddressCount}` : "";
|
|
621994
623623
|
const replies = profile.replyCount ? `, replies:${profile.replyCount}` : "";
|
|
623624
|
+
sampledComponents.push({
|
|
623625
|
+
key: `tier1.participant.${profile.username ?? profile.fromUserId}`,
|
|
623626
|
+
needle: profile.username ?? String(profile.fromUserId)
|
|
623627
|
+
});
|
|
621995
623628
|
return `- ${label} [${telegramActorKindLabel(profile)}]: messages:${profile.messageCount}${direct}${replies}; tone:${tones}; last=${telegramContextJsonString(profile.lastMessage, 180)}`;
|
|
621996
623629
|
});
|
|
621997
|
-
|
|
623630
|
+
const shed = fullCount - tier1Count;
|
|
623631
|
+
const tierNote = shed > 0 ? ` (tier1 u=${tier1Ratio.toFixed(2)}; ${shed} participants shed by benefit)` : "";
|
|
623632
|
+
sections.push(`### Participants And Relationship Signals${tierNote}
|
|
621998
623633
|
${participantLines.join("\n")}`);
|
|
621999
623634
|
}
|
|
622000
623635
|
const associativeContext = this.relevantTelegramAssociativeMemoryContext(
|
|
@@ -622026,16 +623661,32 @@ ${participantLines.join("\n")}`);
|
|
|
622026
623661
|
}
|
|
622027
623662
|
const memoryCards = this.relevantTelegramMemoryCards(sessionKey, msg, isGroup ? 10 : 6);
|
|
622028
623663
|
if (memoryCards.length > 0) {
|
|
622029
|
-
const
|
|
622030
|
-
|
|
622031
|
-
|
|
622032
|
-
const
|
|
622033
|
-
const
|
|
622034
|
-
return
|
|
623664
|
+
const fullMC = memoryCards.length;
|
|
623665
|
+
const tier2Count = Math.max(0, Math.round(fullMC * tier2Ratio));
|
|
623666
|
+
const sortedMC = [...memoryCards].sort((a2, b) => {
|
|
623667
|
+
const scoreA = benefitReg.score(sessionKey, `tier2.memory_card.${a2.card.id}`);
|
|
623668
|
+
const scoreB = benefitReg.score(sessionKey, `tier2.memory_card.${b.card.id}`);
|
|
623669
|
+
return scoreB - scoreA;
|
|
623670
|
+
});
|
|
623671
|
+
const selectedMC = sortedMC.slice(0, tier2Count);
|
|
623672
|
+
if (selectedMC.length > 0) {
|
|
623673
|
+
const cardLines = selectedMC.map(({ card, score }) => {
|
|
623674
|
+
const tags = card.tags.length ? ` tags:${card.tags.slice(0, 8).join(",")}` : "";
|
|
623675
|
+
const speakers = card.speakers.length ? ` speakers:${card.speakers.join(", ")}` : "";
|
|
623676
|
+
const relevance = score > 0 ? ` relevance:${score.toFixed(2)}` : " relevance:recent";
|
|
623677
|
+
const notes2 = card.notes.slice(-3).map((note) => ` - note=${telegramContextJsonString(note, 220)}`).join("\n");
|
|
623678
|
+
sampledComponents.push({
|
|
623679
|
+
key: `tier2.memory_card.${card.id}`,
|
|
623680
|
+
needle: card.id
|
|
623681
|
+
});
|
|
623682
|
+
return `- ${card.title} (${card.id};${relevance};${speakers}${tags})
|
|
622035
623683
|
${notes2}`;
|
|
622036
|
-
|
|
622037
|
-
|
|
623684
|
+
});
|
|
623685
|
+
const shed = fullMC - tier2Count;
|
|
623686
|
+
const tierNote = shed > 0 ? ` (tier2 u=${tier2Ratio.toFixed(2)}; ${shed} cards shed by benefit)` : "";
|
|
623687
|
+
sections.push(`### Zettelkasten Memory Recall (untrusted conversation notes)${tierNote}
|
|
622038
623688
|
${cardLines.join("\n")}`);
|
|
623689
|
+
}
|
|
622039
623690
|
}
|
|
622040
623691
|
const channelDaydream = this.formatLatestTelegramChannelDaydreamContext(sessionKey);
|
|
622041
623692
|
if (channelDaydream) {
|
|
@@ -622108,6 +623759,7 @@ ${lines.join("\n")}`);
|
|
|
622108
623759
|
`- If the current sender asks what you see or remember, answer from this stream instead of saying the history is gone.`
|
|
622109
623760
|
].join("\n")
|
|
622110
623761
|
);
|
|
623762
|
+
this.telegramStashContextSamples(sessionKey, sampledComponents);
|
|
622111
623763
|
return sections.join("\n\n");
|
|
622112
623764
|
}
|
|
622113
623765
|
maybeLogTelegramGroupSkip(msg, reason) {
|
|
@@ -622170,6 +623822,25 @@ ${lines.join("\n")}`);
|
|
|
622170
623822
|
nextAnalysisAfterMessages: decision2.nextCheckAfterMessages
|
|
622171
623823
|
});
|
|
622172
623824
|
}
|
|
623825
|
+
/**
|
|
623826
|
+
* Collect the per-component benefit samples that were tagged when assembling
|
|
623827
|
+
* the last context stream for this session. Returns the same shape the
|
|
623828
|
+
* component-benefit registry consumes; an empty list means no tier-1/tier-2
|
|
623829
|
+
* components were emitted (early return — benefit tracking skipped).
|
|
623830
|
+
*
|
|
623831
|
+
* Tags are stored on `_telegramLastContextSamples` (a per-session WeakMap-
|
|
623832
|
+
* style cache) so the post-call feedback knows what to score without
|
|
623833
|
+
* re-running the context assembly.
|
|
623834
|
+
*/
|
|
623835
|
+
telegramComponentSamplesForSession(sessionKey) {
|
|
623836
|
+
return this._telegramLastContextSamples.get(sessionKey) ?? [];
|
|
623837
|
+
}
|
|
623838
|
+
/** Per-session cache of last emitted context-component samples. */
|
|
623839
|
+
_telegramLastContextSamples = /* @__PURE__ */ new Map();
|
|
623840
|
+
/** Stash samples for the next post-call feedback cycle. */
|
|
623841
|
+
telegramStashContextSamples(sessionKey, samples) {
|
|
623842
|
+
this._telegramLastContextSamples.set(sessionKey, samples);
|
|
623843
|
+
}
|
|
622173
623844
|
buildTelegramRouterPersonaContext(sessionKey, msg, toolContext, selfIdentityContext) {
|
|
622174
623845
|
const baseContract = toolContext === "telegram-admin-dm" ? ADMIN_DM_PROMPT : toolContext === "telegram-admin-group" ? ADMIN_GROUP_PROMPT : TELEGRAM_SAFETY_PROMPT;
|
|
622175
623846
|
return buildSoulContext({
|
|
@@ -622364,30 +624035,55 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
622364
624035
|
* hard-deadline retire path becomes diagnosable instead of opaque
|
|
622365
624036
|
*/
|
|
622366
624037
|
async telegramObservableInference(backend, request, kind, sessionKey) {
|
|
624038
|
+
const model = this.agentConfig?.model ?? "?";
|
|
624039
|
+
const promptTokens = estimatePromptTokensFromRequest(request);
|
|
624040
|
+
const broker = getModelBroker();
|
|
624041
|
+
const trainCtx = await broker.getNctxTrain(model).catch(() => null);
|
|
624042
|
+
const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + 1024)) : Math.max(2048, promptTokens + 1024);
|
|
624043
|
+
const requestWithCtx = { ...request, numCtx: targetCtx };
|
|
624044
|
+
const slot = await broker.acquireInferenceSlot({
|
|
624045
|
+
model,
|
|
624046
|
+
domain: "chat",
|
|
624047
|
+
owner: `telegram-bridge/${kind}`,
|
|
624048
|
+
sessionKey,
|
|
624049
|
+
promptTokens,
|
|
624050
|
+
priority: kind === "router" || kind === "router-repair" || kind === "router-strict-retry" ? 1 : 0
|
|
624051
|
+
});
|
|
624052
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
624053
|
+
sessionKey,
|
|
624054
|
+
`inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot.info.reserved ? " reserved" : ""}`
|
|
624055
|
+
));
|
|
622367
624056
|
const streamFn = backend.chatCompletionStream;
|
|
622368
|
-
const id = this.registerTelegramInference(kind, sessionKey,
|
|
624057
|
+
const id = this.registerTelegramInference(kind, sessionKey, model);
|
|
624058
|
+
let completionTokens = 0;
|
|
622369
624059
|
try {
|
|
624060
|
+
let result;
|
|
622370
624061
|
if (typeof streamFn !== "function") {
|
|
622371
|
-
|
|
622372
|
-
this.updateTelegramInferenceFinal(id,
|
|
622373
|
-
|
|
622374
|
-
|
|
622375
|
-
|
|
622376
|
-
|
|
622377
|
-
|
|
622378
|
-
|
|
622379
|
-
|
|
622380
|
-
)
|
|
622381
|
-
|
|
622382
|
-
|
|
622383
|
-
|
|
622384
|
-
|
|
622385
|
-
|
|
622386
|
-
|
|
622387
|
-
|
|
622388
|
-
));
|
|
622389
|
-
return r2;
|
|
624062
|
+
result = await backend.chatCompletion(requestWithCtx);
|
|
624063
|
+
this.updateTelegramInferenceFinal(id, result);
|
|
624064
|
+
} else {
|
|
624065
|
+
try {
|
|
624066
|
+
result = await this.streamTelegramInferenceToCompletion(
|
|
624067
|
+
streamFn.bind(backend),
|
|
624068
|
+
requestWithCtx,
|
|
624069
|
+
id
|
|
624070
|
+
);
|
|
624071
|
+
} catch (streamErr) {
|
|
624072
|
+
result = await backend.chatCompletion(requestWithCtx);
|
|
624073
|
+
this.updateTelegramInferenceFinal(id, result);
|
|
624074
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
624075
|
+
sessionKey,
|
|
624076
|
+
`inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
|
|
624077
|
+
));
|
|
624078
|
+
}
|
|
622390
624079
|
}
|
|
624080
|
+
const usage = result.usage;
|
|
624081
|
+
completionTokens = usage?.completion_tokens ?? 0;
|
|
624082
|
+
slot.release({ ok: true, completionTokens });
|
|
624083
|
+
return result;
|
|
624084
|
+
} catch (err) {
|
|
624085
|
+
slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
|
|
624086
|
+
throw err;
|
|
622391
624087
|
} finally {
|
|
622392
624088
|
this.deregisterTelegramInference(id);
|
|
622393
624089
|
}
|
|
@@ -622747,33 +624443,15 @@ ${retryText}`,
|
|
|
622747
624443
|
/**
|
|
622748
624444
|
* Internal: start an actual router inference for a sessionKey, store its
|
|
622749
624445
|
* in-flight promise, and on completion fire any queued trailing call.
|
|
624446
|
+
*
|
|
624447
|
+
* No watchdog timeout — the broker's admission control guarantees the
|
|
624448
|
+
* inference fits available compute. Inflight work always completes; new
|
|
624449
|
+
* work waits in the broker's bounded queue with backpressure to upstream.
|
|
624450
|
+
* Only the fetch-level I/O liveness AbortSignal can interrupt, and only
|
|
624451
|
+
* on TCP-dead.
|
|
622750
624452
|
*/
|
|
622751
624453
|
startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
|
|
622752
|
-
const
|
|
622753
|
-
const inner = this.inferTelegramInteractionDecision(msg, toolContext);
|
|
622754
|
-
const promise = new Promise((resolve55, reject) => {
|
|
622755
|
-
let settled = false;
|
|
622756
|
-
const guard = setTimeout(() => {
|
|
622757
|
-
if (settled) return;
|
|
622758
|
-
settled = true;
|
|
622759
|
-
reject(new Error(`router-coalescer: hard deadline exceeded (${Math.round(HARD_DEADLINE_MS / 1e3)}s); inner inference did not settle`));
|
|
622760
|
-
}, HARD_DEADLINE_MS);
|
|
622761
|
-
if (typeof guard.unref === "function") guard.unref();
|
|
622762
|
-
inner.then(
|
|
622763
|
-
(v) => {
|
|
622764
|
-
if (settled) return;
|
|
622765
|
-
settled = true;
|
|
622766
|
-
clearTimeout(guard);
|
|
622767
|
-
resolve55(v);
|
|
622768
|
-
},
|
|
622769
|
-
(e2) => {
|
|
622770
|
-
if (settled) return;
|
|
622771
|
-
settled = true;
|
|
622772
|
-
clearTimeout(guard);
|
|
622773
|
-
reject(e2);
|
|
622774
|
-
}
|
|
622775
|
-
);
|
|
622776
|
-
});
|
|
624454
|
+
const promise = this.inferTelegramInteractionDecision(msg, toolContext);
|
|
622777
624455
|
this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
|
|
622778
624456
|
const onSettled = () => {
|
|
622779
624457
|
let state;
|
|
@@ -622794,11 +624472,6 @@ ${retryText}`,
|
|
|
622794
624472
|
promise.then(onSettled, onSettled);
|
|
622795
624473
|
return promise;
|
|
622796
624474
|
}
|
|
622797
|
-
telegramRouterHardDeadlineMs() {
|
|
622798
|
-
const raw = Number.parseInt(process.env["OMNIUS_TG_ROUTER_HARD_DEADLINE_MS"] ?? "", 10);
|
|
622799
|
-
if (Number.isFinite(raw) && raw >= 5e3 && raw <= 18e4) return raw;
|
|
622800
|
-
return 6e4;
|
|
622801
|
-
}
|
|
622802
624475
|
/**
|
|
622803
624476
|
* Forcibly cancel every in-flight + trailing router-coalescer entry.
|
|
622804
624477
|
* Used on bridge stop() and by the watchdog if it detects the coalescer
|
|
@@ -622994,30 +624667,52 @@ ${stimulationProbe.context}`,
|
|
|
622994
624667
|
"",
|
|
622995
624668
|
context2
|
|
622996
624669
|
].filter(Boolean).join("\n");
|
|
622997
|
-
const
|
|
622998
|
-
|
|
622999
|
-
|
|
623000
|
-
|
|
623001
|
-
|
|
623002
|
-
|
|
623003
|
-
|
|
623004
|
-
|
|
623005
|
-
|
|
623006
|
-
|
|
623007
|
-
|
|
623008
|
-
|
|
623009
|
-
|
|
623010
|
-
|
|
623011
|
-
|
|
623012
|
-
|
|
623013
|
-
|
|
623014
|
-
|
|
623015
|
-
|
|
623016
|
-
|
|
623017
|
-
|
|
623018
|
-
|
|
623019
|
-
|
|
623020
|
-
|
|
624670
|
+
const brokerSnap = getModelBroker().snapshot();
|
|
624671
|
+
const idleSlotRatio = brokerSnap.slots.capacity > 0 ? 1 - brokerSnap.slots.inUse / brokerSnap.slots.capacity : 1;
|
|
624672
|
+
const consolidatedMode = idleSlotRatio < 0.5 || process.env["OMNIUS_TG_FORCE_CONSOLIDATED"] === "1";
|
|
624673
|
+
let reflectionNotes;
|
|
624674
|
+
let reflectionContext;
|
|
624675
|
+
if (consolidatedMode) {
|
|
624676
|
+
reflectionNotes = this.fallbackTelegramSilentReflectionNotes(msg, "consolidated mode: reflection computed inline by router");
|
|
624677
|
+
reflectionContext = [
|
|
624678
|
+
"## Consolidated Reflection (you produce these fields as part of the same JSON)",
|
|
624679
|
+
"Before emitting your final decision, internally reflect on:",
|
|
624680
|
+
" silent_disposition: what happens silently with this message",
|
|
624681
|
+
" mental_note: concise observation of the turn",
|
|
624682
|
+
" memory_note: what scoped memory should retain or connect",
|
|
624683
|
+
" relationship_note: relationship/thread implication",
|
|
624684
|
+
" procedure_note: active tree/branch/abort implication",
|
|
624685
|
+
" voice_note: final voice implication if a reply happens",
|
|
624686
|
+
" scenario_note: identified scenario and transition state",
|
|
624687
|
+
" scenario_id / scenario_label / scenario_confidence / scenario_objective / scenario_state_loop",
|
|
624688
|
+
"Use these as your attention substrate, then decide route/should_reply/confidence. Return all fields in ONE JSON."
|
|
624689
|
+
].join("\n");
|
|
624690
|
+
} else {
|
|
624691
|
+
reflectionNotes = await this.inferTelegramSilentReflectionNotes(
|
|
624692
|
+
backend,
|
|
624693
|
+
sessionKey,
|
|
624694
|
+
msg,
|
|
624695
|
+
toolContext,
|
|
624696
|
+
personaContext,
|
|
624697
|
+
observationContext,
|
|
624698
|
+
config.timeoutMs
|
|
624699
|
+
);
|
|
624700
|
+
reflectionContext = [
|
|
624701
|
+
"## Silent Reflection Deliverables (must inform the attention decision)",
|
|
624702
|
+
`silent_disposition: ${reflectionNotes.silentDisposition ?? "heard and retained"}`,
|
|
624703
|
+
`mental_note: ${reflectionNotes.mentalNote ?? "no additional observation"}`,
|
|
624704
|
+
`memory_note: ${reflectionNotes.memoryNote ?? "message retained in scoped memory"}`,
|
|
624705
|
+
`relationship_note: ${reflectionNotes.relationshipNote ?? "no relationship change inferred"}`,
|
|
624706
|
+
`procedure_note: ${reflectionNotes.procedureNote ?? "active voice-soul tree loaded; no procedure change inferred"}`,
|
|
624707
|
+
`voice_note: ${reflectionNotes.voiceNote ?? "final voice unchanged unless reply is emitted"}`,
|
|
624708
|
+
`scenario_note: ${reflectionNotes.scenarioNote ?? "scenario classification unavailable"}`,
|
|
624709
|
+
`scenario_id: ${reflectionNotes.scenarioId ?? "unclassified"}`,
|
|
624710
|
+
`scenario_label: ${reflectionNotes.scenarioLabel ?? "Unclassified"}`,
|
|
624711
|
+
`scenario_confidence: ${reflectionNotes.scenarioConfidence !== void 0 ? reflectionNotes.scenarioConfidence.toFixed(2) : "0.00"}`,
|
|
624712
|
+
`scenario_objective: ${reflectionNotes.scenarioObjective ?? "pending model-derived classifier output"}`,
|
|
624713
|
+
`scenario_state_loop: ${reflectionNotes.scenarioStateLoop ?? "pending model-derived classifier output"}`
|
|
624714
|
+
].join("\n");
|
|
624715
|
+
}
|
|
623021
624716
|
const userPrompt = [
|
|
623022
624717
|
`You are the Telegram live routing and reply-discretion model.`,
|
|
623023
624718
|
`The attention decision must happen after reading the silent reflection deliverables below. The notes are not decorations: they are the decision substrate.`,
|
|
@@ -623049,10 +624744,13 @@ ${stimulationProbe.context}`,
|
|
|
623049
624744
|
``,
|
|
623050
624745
|
observationContext,
|
|
623051
624746
|
``,
|
|
624747
|
+
formatSystemObservations(sessionKey),
|
|
624748
|
+
``,
|
|
623052
624749
|
`Current Telegram message text (untrusted user data):
|
|
623053
624750
|
${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
623054
624751
|
].filter(Boolean).join("\n");
|
|
623055
624752
|
const diagnostics = {};
|
|
624753
|
+
const routerStartMs = Date.now();
|
|
623056
624754
|
try {
|
|
623057
624755
|
const result = await this.telegramRouterJsonCompletion(backend, {
|
|
623058
624756
|
messages: [
|
|
@@ -623069,6 +624767,21 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
623069
624767
|
think: false
|
|
623070
624768
|
}, diagnostics);
|
|
623071
624769
|
const text = result.choices[0]?.message?.content ?? "";
|
|
624770
|
+
const routerLatencyMs = Date.now() - routerStartMs;
|
|
624771
|
+
try {
|
|
624772
|
+
const pidReg = getPidRegistry();
|
|
624773
|
+
const modelKey = this.agentConfig?.model ?? "?";
|
|
624774
|
+
pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
|
|
624775
|
+
pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
|
|
624776
|
+
} catch {
|
|
624777
|
+
}
|
|
624778
|
+
try {
|
|
624779
|
+
const samples = this.telegramComponentSamplesForSession(sessionKey);
|
|
624780
|
+
if (samples.length > 0) {
|
|
624781
|
+
getComponentBenefitRegistry().recordOutcome(sessionKey, samples, text);
|
|
624782
|
+
}
|
|
624783
|
+
} catch {
|
|
624784
|
+
}
|
|
623072
624785
|
const parsed = parseTelegramInteractionDecision(text, forcedRoute, {
|
|
623073
624786
|
defaultShouldReply: false
|
|
623074
624787
|
});
|
|
@@ -627303,11 +629016,18 @@ ${text}`.trim());
|
|
|
627303
629016
|
};
|
|
627304
629017
|
const replyParameters = idx === 0 ? telegramReplyParameters(replyToMessageId) : void 0;
|
|
627305
629018
|
if (replyParameters) body["reply_parameters"] = replyParameters;
|
|
629019
|
+
const sessionKeyForObs = String(chatId);
|
|
627306
629020
|
try {
|
|
627307
629021
|
const result = await this.apiCall("sendMessage", body);
|
|
627308
629022
|
if (result.ok === false) throw new Error(String(result.description || "Telegram sendMessage failed"));
|
|
627309
629023
|
this.state.messagesSent++;
|
|
627310
629024
|
if (sentId === null) sentId = result.result?.message_id ?? null;
|
|
629025
|
+
getSoulObservationStream().emit({
|
|
629026
|
+
kind: "telegram.send.success",
|
|
629027
|
+
sessionKey: sessionKeyForObs,
|
|
629028
|
+
messageId: result.result?.message_id ?? void 0,
|
|
629029
|
+
ts: Date.now()
|
|
629030
|
+
});
|
|
627311
629031
|
} catch {
|
|
627312
629032
|
const plain = chunk.replace(/<[^>]+>/g, "");
|
|
627313
629033
|
const fallbackBody = { chat_id: chatId, text: plain };
|
|
@@ -627317,8 +629037,32 @@ ${text}`.trim());
|
|
|
627317
629037
|
if (result.ok === false) throw new Error(String(result.description || "Telegram sendMessage failed"));
|
|
627318
629038
|
this.state.messagesSent++;
|
|
627319
629039
|
if (sentId === null) sentId = result.result?.message_id ?? null;
|
|
629040
|
+
getSoulObservationStream().emit({
|
|
629041
|
+
kind: "telegram.send.success",
|
|
629042
|
+
sessionKey: sessionKeyForObs,
|
|
629043
|
+
messageId: result.result?.message_id ?? void 0,
|
|
629044
|
+
ts: Date.now()
|
|
629045
|
+
});
|
|
627320
629046
|
} catch (err) {
|
|
627321
629047
|
this.tuiWrite(() => renderWarning(`Failed to send Telegram message: ${err instanceof Error ? err.message : String(err)}`));
|
|
629048
|
+
const errStr = err instanceof Error ? err.message : String(err);
|
|
629049
|
+
const lc = errStr.toLowerCase();
|
|
629050
|
+
if (/(not enough rights|forbidden|chat_write_forbidden|user_banned|kicked|chat_admin_required)/.test(lc)) {
|
|
629051
|
+
getSoulObservationStream().emit({
|
|
629052
|
+
kind: "telegram.send.forbidden",
|
|
629053
|
+
sessionKey: sessionKeyForObs,
|
|
629054
|
+
reason: errStr,
|
|
629055
|
+
ts: Date.now()
|
|
629056
|
+
});
|
|
629057
|
+
} else if (/too many requests|retry after/.test(lc)) {
|
|
629058
|
+
const m2 = lc.match(/retry after (\d+)/);
|
|
629059
|
+
getSoulObservationStream().emit({
|
|
629060
|
+
kind: "telegram.send.rate_limited",
|
|
629061
|
+
sessionKey: sessionKeyForObs,
|
|
629062
|
+
retryAfterSec: m2 ? parseInt(m2[1], 10) : void 0,
|
|
629063
|
+
ts: Date.now()
|
|
629064
|
+
});
|
|
629065
|
+
}
|
|
627322
629066
|
}
|
|
627323
629067
|
}
|
|
627324
629068
|
}
|
|
@@ -629030,12 +630774,12 @@ var direct_input_exports = {};
|
|
|
629030
630774
|
__export(direct_input_exports, {
|
|
629031
630775
|
DirectInput: () => DirectInput
|
|
629032
630776
|
});
|
|
629033
|
-
import { EventEmitter as
|
|
630777
|
+
import { EventEmitter as EventEmitter12 } from "node:events";
|
|
629034
630778
|
var DirectInput;
|
|
629035
630779
|
var init_direct_input = __esm({
|
|
629036
630780
|
"packages/cli/src/tui/direct-input.ts"() {
|
|
629037
630781
|
"use strict";
|
|
629038
|
-
DirectInput = class extends
|
|
630782
|
+
DirectInput = class extends EventEmitter12 {
|
|
629039
630783
|
/** Current input line text */
|
|
629040
630784
|
line = "";
|
|
629041
630785
|
/** Cursor position within .line (0-based) */
|
|
@@ -629754,8 +631498,8 @@ var voicechat_exports = {};
|
|
|
629754
631498
|
__export(voicechat_exports, {
|
|
629755
631499
|
VoiceChatSession: () => VoiceChatSession
|
|
629756
631500
|
});
|
|
629757
|
-
import { EventEmitter as
|
|
629758
|
-
function
|
|
631501
|
+
import { EventEmitter as EventEmitter13 } from "node:events";
|
|
631502
|
+
function clamp0111(x) {
|
|
629759
631503
|
return x < 0 ? 0 : x > 1 ? 1 : x;
|
|
629760
631504
|
}
|
|
629761
631505
|
function alnumRatio(s2) {
|
|
@@ -629794,9 +631538,9 @@ function computeSignalFromText(text, confidence2) {
|
|
|
629794
631538
|
else score = 0.15;
|
|
629795
631539
|
score -= repeatingCharPenalty(t2) * 0.4;
|
|
629796
631540
|
if (typeof confidence2 === "number" && !Number.isNaN(confidence2)) {
|
|
629797
|
-
score = 0.7 * score + 0.3 *
|
|
631541
|
+
score = 0.7 * score + 0.3 * clamp0111(confidence2);
|
|
629798
631542
|
}
|
|
629799
|
-
return
|
|
631543
|
+
return clamp0111(score);
|
|
629800
631544
|
}
|
|
629801
631545
|
function truncateForLog(s2, n2) {
|
|
629802
631546
|
return s2.length <= n2 ? s2 : s2.slice(0, n2 - 1) + "…";
|
|
@@ -629864,7 +631608,7 @@ Rules:
|
|
|
629864
631608
|
- Prefer tools for factual queries; otherwise, answer directly with a short reply.`;
|
|
629865
631609
|
MIN_SIGNAL_SCORE = 0.4;
|
|
629866
631610
|
NOISE_ONLY_RE = /^(?:[.·…\s,;:!?\-–—_()\[\]{}"'`]+|(?:uh|um|erm|hmm|mm+|uhh+|umm+)[\s.!?]*)+$/i;
|
|
629867
|
-
VoiceChatSession = class extends
|
|
631611
|
+
VoiceChatSession = class extends EventEmitter13 {
|
|
629868
631612
|
voice;
|
|
629869
631613
|
listen;
|
|
629870
631614
|
backendUrl;
|
|
@@ -630066,7 +631810,7 @@ Rules:
|
|
|
630066
631810
|
}, MAX_SEGMENT_MS);
|
|
630067
631811
|
}
|
|
630068
631812
|
this.captureBuffer = text;
|
|
630069
|
-
this.lastSignalScore = typeof snr === "number" && !Number.isNaN(snr) ?
|
|
631813
|
+
this.lastSignalScore = typeof snr === "number" && !Number.isNaN(snr) ? clamp0111(snr) : computeSignalFromText(text, confidence2);
|
|
630070
631814
|
this.emit("snr", { score: this.lastSignalScore });
|
|
630071
631815
|
this.onPartialTranscript(text);
|
|
630072
631816
|
if (this.silenceTimer) clearTimeout(this.silenceTimer);
|
|
@@ -630393,7 +632137,7 @@ __export(voice_runtime_exports, {
|
|
|
630393
632137
|
synthesizeToWav: () => synthesizeToWav,
|
|
630394
632138
|
unregisterClient: () => unregisterClient
|
|
630395
632139
|
});
|
|
630396
|
-
import { EventEmitter as
|
|
632140
|
+
import { EventEmitter as EventEmitter14 } from "node:events";
|
|
630397
632141
|
function getVoiceEngine() {
|
|
630398
632142
|
if (!_voiceEngine) {
|
|
630399
632143
|
_voiceEngine = new VoiceEngine();
|
|
@@ -630405,7 +632149,7 @@ function getDaemonListenEngine() {
|
|
|
630405
632149
|
return _listenEngine;
|
|
630406
632150
|
}
|
|
630407
632151
|
function getVoiceBus() {
|
|
630408
|
-
if (!_bus) _bus = new
|
|
632152
|
+
if (!_bus) _bus = new EventEmitter14();
|
|
630409
632153
|
return _bus;
|
|
630410
632154
|
}
|
|
630411
632155
|
function getRuntimeStatus() {
|
|
@@ -661368,8 +663112,8 @@ NEW TASK: ${fullInput}`;
|
|
|
661368
663112
|
const updateInfo = await checkForUpdate(version4);
|
|
661369
663113
|
if (updateInfo) {
|
|
661370
663114
|
_autoUpdatedThisSession = true;
|
|
661371
|
-
const { exec:
|
|
661372
|
-
|
|
663115
|
+
const { exec: exec6 } = await import("node:child_process");
|
|
663116
|
+
exec6(
|
|
661373
663117
|
`npm install -g omnius@latest --prefer-online`,
|
|
661374
663118
|
{ timeout: 18e4 },
|
|
661375
663119
|
(err) => {
|