omnius 1.0.133 → 1.0.135

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1328,6 +1328,893 @@ var init_tool_executor = __esm({
1328
1328
  }
1329
1329
  });
1330
1330
 
1331
+ // packages/execution/dist/model-broker.js
1332
+ import { EventEmitter } from "node:events";
1333
+ import { totalmem, freemem } from "node:os";
1334
+ import { exec } from "node:child_process";
1335
+ function ramSnapshotMB() {
1336
+ const total = Math.round(totalmem() / (1024 * 1024));
1337
+ const free = Math.round(freemem() / (1024 * 1024));
1338
+ return { total, free, used: total - free };
1339
+ }
1340
+ async function vramSnapshotMB() {
1341
+ if (_nvSmiAvailable === false)
1342
+ return null;
1343
+ try {
1344
+ const out = await new Promise((resolve55, reject) => {
1345
+ exec("nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
1346
+ });
1347
+ _nvSmiAvailable = true;
1348
+ let total = 0, used = 0, free = 0;
1349
+ for (const line of out.trim().split("\n")) {
1350
+ const parts = line.split(",").map((s2) => s2.trim());
1351
+ if (parts.length < 3)
1352
+ continue;
1353
+ total += parseInt(parts[0] ?? "0", 10) || 0;
1354
+ used += parseInt(parts[1] ?? "0", 10) || 0;
1355
+ free += parseInt(parts[2] ?? "0", 10) || 0;
1356
+ }
1357
+ if (total <= 0)
1358
+ return null;
1359
+ return { total, used, free };
1360
+ } catch {
1361
+ _nvSmiAvailable = false;
1362
+ return null;
1363
+ }
1364
+ }
1365
+ function getModelBroker() {
1366
+ return ModelBroker.getInstance();
1367
+ }
1368
+ var DEFAULT_RAM_HEADROOM_MB, DEFAULT_VRAM_HEADROOM_MB, DEFAULT_IDLE_EVICT_MS, DEFAULT_POLL_MS, DEFAULT_INFLIGHT_WAIT_MS, DEFAULT_SLOT_CAPACITY, DEFAULT_QUEUE_CAPACITY, THROUGHPUT_EMA_ALPHA, THROUGHPUT_INITIAL_TPS, STUCK_INFLIGHT_DIAGNOSTIC_MS, ModelBroker, _nvSmiAvailable;
1369
+ var init_model_broker = __esm({
1370
+ "packages/execution/dist/model-broker.js"() {
1371
+ "use strict";
1372
+ DEFAULT_RAM_HEADROOM_MB = 2048;
1373
+ DEFAULT_VRAM_HEADROOM_MB = 1024;
1374
+ DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
1375
+ DEFAULT_POLL_MS = 4e3;
1376
+ DEFAULT_INFLIGHT_WAIT_MS = 6e4;
1377
+ DEFAULT_SLOT_CAPACITY = 4;
1378
+ DEFAULT_QUEUE_CAPACITY = 50;
1379
+ THROUGHPUT_EMA_ALPHA = 0.2;
1380
+ THROUGHPUT_INITIAL_TPS = 25;
1381
+ STUCK_INFLIGHT_DIAGNOSTIC_MS = 5 * 60 * 1e3;
1382
+ ModelBroker = class _ModelBroker {
1383
+ static _instance = null;
1384
+ /** Loaded model registry keyed by `${host}:${name}`. */
1385
+ _loaded = /* @__PURE__ */ new Map();
1386
+ /** In-flight load promises keyed by `${host}:${name}`. */
1387
+ _inflight = /* @__PURE__ */ new Map();
1388
+ /** Fallback chains keyed by domain. */
1389
+ _fallbacks = /* @__PURE__ */ new Map();
1390
+ /** Cached n_ctx_train per Ollama model. */
1391
+ _ctxTrainCache = /* @__PURE__ */ new Map();
1392
+ /** Cached "model exists in Ollama" probes (true / false). */
1393
+ _ollamaModelExists = /* @__PURE__ */ new Map();
1394
+ /** Event emitter — typed via `on<K>(event: K, listener: BrokerEvents[K])`. */
1395
+ _events = new EventEmitter();
1396
+ /** Poll timer. */
1397
+ _pollTimer = null;
1398
+ /** Last full snapshot. */
1399
+ _lastSnapshot = null;
1400
+ /** Configured Ollama base URL. */
1401
+ _ollamaBaseUrl = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
1402
+ /** Configured thresholds (mutable for /broker tuning). */
1403
+ ramHeadroomMB = DEFAULT_RAM_HEADROOM_MB;
1404
+ vramHeadroomMB = DEFAULT_VRAM_HEADROOM_MB;
1405
+ idleEvictMs = DEFAULT_IDLE_EVICT_MS;
1406
+ /** Inference slot capacity (auto-tunes from Ollama pool size when known). */
1407
+ slotCapacity = DEFAULT_SLOT_CAPACITY;
1408
+ /** Maximum queue depth before queue pressure is emitted. */
1409
+ queueCapacity = DEFAULT_QUEUE_CAPACITY;
1410
+ // ── Inference slot tracking ─────────────────────────────────────────
1411
+ /** Active slots keyed by slot id. */
1412
+ _activeSlots = /* @__PURE__ */ new Map();
1413
+ /** Reserved slots per sessionKey (1 reserved slot per active chat). */
1414
+ _reservedBySession = /* @__PURE__ */ new Map();
1415
+ // sessionKey -> slot id
1416
+ /** Shared (non-reserved) queue. FIFO with priority insertion. */
1417
+ _slotQueue = [];
1418
+ /** Per-model throughput tracking. */
1419
+ _throughput = /* @__PURE__ */ new Map();
1420
+ /** Monotonic counter for slot ids. */
1421
+ _slotIdSeq = 0;
1422
+ static getInstance() {
1423
+ if (!_ModelBroker._instance)
1424
+ _ModelBroker._instance = new _ModelBroker();
1425
+ return _ModelBroker._instance;
1426
+ }
1427
+ /** Reset (test-only). */
1428
+ static resetInstance() {
1429
+ if (_ModelBroker._instance?._pollTimer)
1430
+ clearInterval(_ModelBroker._instance._pollTimer);
1431
+ _ModelBroker._instance = null;
1432
+ }
1433
+ constructor() {
1434
+ this.registerDefaultFallbacks();
1435
+ }
1436
+ // ------------------------------------------------------------------
1437
+ // Public API — events
1438
+ // ------------------------------------------------------------------
1439
+ on(event, listener) {
1440
+ this._events.on(event, listener);
1441
+ return this;
1442
+ }
1443
+ off(event, listener) {
1444
+ this._events.off(event, listener);
1445
+ return this;
1446
+ }
1447
+ emit(event, ...args) {
1448
+ this._events.emit(event, ...args);
1449
+ }
1450
+ // ------------------------------------------------------------------
1451
+ // Public API — polling
1452
+ // ------------------------------------------------------------------
1453
+ /** Start background polling of Ollama /api/ps and nvidia-smi. */
1454
+ startPolling(intervalMs = DEFAULT_POLL_MS) {
1455
+ if (this._pollTimer)
1456
+ return;
1457
+ this._pollTimer = setInterval(() => {
1458
+ this.pollOnce().catch(() => {
1459
+ });
1460
+ }, intervalMs);
1461
+ this.pollOnce().catch(() => {
1462
+ });
1463
+ }
1464
+ stopPolling() {
1465
+ if (this._pollTimer) {
1466
+ clearInterval(this._pollTimer);
1467
+ this._pollTimer = null;
1468
+ }
1469
+ }
1470
+ /** Configure Ollama base URL (called from cli config wiring). */
1471
+ setOllamaBaseUrl(url) {
1472
+ this._ollamaBaseUrl = url;
1473
+ }
1474
+ /** One poll cycle — refreshes /api/ps and emits snapshot. */
1475
+ async pollOnce() {
1476
+ await Promise.all([
1477
+ this.refreshOllamaPs().catch(() => {
1478
+ })
1479
+ // VRAM total/free comes from system-metrics; broker computes its own snapshot
1480
+ ]);
1481
+ const snapshot = this.buildSnapshot();
1482
+ this._lastSnapshot = snapshot;
1483
+ this.emit("snapshot", snapshot);
1484
+ this.checkPressure(snapshot);
1485
+ return snapshot;
1486
+ }
1487
+ /** Best-known current snapshot. */
1488
+ snapshot() {
1489
+ return this._lastSnapshot ?? this.buildSnapshot();
1490
+ }
1491
+ // ------------------------------------------------------------------
1492
+ // Public API — fallback registry
1493
+ // ------------------------------------------------------------------
1494
+ /** Register a fallback chain for a domain. Later entries are tried later. */
1495
+ setFallbackChain(domain, chain) {
1496
+ this._fallbacks.set(domain, [...chain]);
1497
+ }
1498
+ getFallbackChain(domain) {
1499
+ return this._fallbacks.get(domain) ?? [];
1500
+ }
1501
+ // ------------------------------------------------------------------
1502
+ // Public API — load decisioning
1503
+ // ------------------------------------------------------------------
1504
+ /**
1505
+ * Pre-flight a model-load request. Always call this before allocating a model.
1506
+ *
1507
+ * Decisions:
1508
+ * - ok: proceed; use `effectiveNumCtx` if returned
1509
+ * - wait-for-inflight: another caller is loading the same model; await `promise`
1510
+ * - evict: caller should free `evictTargets` (broker calls evict
1511
+ * hooks itself when possible) then retry
1512
+ * - degrade: caller should reissue with `fallback`
1513
+ * - reject: nothing viable — caller should error out
1514
+ */
1515
+ async ensureModelLoadable(spec) {
1516
+ const key = this.keyOf(spec);
1517
+ const inflight = this._inflight.get(key);
1518
+ if (inflight) {
1519
+ return { kind: "wait-for-inflight", promise: inflight.promise };
1520
+ }
1521
+ const existing = this._loaded.get(key);
1522
+ if (existing) {
1523
+ existing.lastUsedAt = Date.now();
1524
+ return { kind: "ok", effectiveNumCtx: existing.numCtx, note: "already-loaded" };
1525
+ }
1526
+ let effectiveNumCtx;
1527
+ if (spec.host === "ollama" && spec.requestedNumCtx) {
1528
+ const trainCtx = await this.getNctxTrain(spec.name);
1529
+ if (trainCtx && spec.requestedNumCtx > trainCtx) {
1530
+ effectiveNumCtx = trainCtx;
1531
+ } else {
1532
+ effectiveNumCtx = spec.requestedNumCtx;
1533
+ }
1534
+ } else if (spec.host === "ollama") {
1535
+ const trainCtx = await this.getNctxTrain(spec.name);
1536
+ if (trainCtx)
1537
+ effectiveNumCtx = trainCtx;
1538
+ }
1539
+ const estVram = spec.estimatedVramMB ?? this.estimateFootprintVramMB(spec);
1540
+ const estRam = spec.estimatedRamMB ?? this.estimateFootprintRamMB(spec);
1541
+ const ram = ramSnapshotMB();
1542
+ const vram = await vramSnapshotMB();
1543
+ const ramFitsAfter = ram.free - estRam >= this.ramHeadroomMB;
1544
+ const vramFitsAfter = vram ? vram.free - estVram >= this.vramHeadroomMB : true;
1545
+ if (ramFitsAfter && vramFitsAfter) {
1546
+ const promise = Promise.resolve({ kind: "ok", effectiveNumCtx });
1547
+ this._inflight.set(key, { startedMs: Date.now(), owner: spec.owner, promise });
1548
+ setTimeout(() => this._inflight.delete(key), spec.loadTimeoutMs ?? DEFAULT_INFLIGHT_WAIT_MS).unref?.();
1549
+ return { kind: "ok", effectiveNumCtx };
1550
+ }
1551
+ const evictTargets = this.pickEvictionCandidates({
1552
+ needVramMB: vramFitsAfter ? 0 : estVram + this.vramHeadroomMB - (vram?.free ?? 0),
1553
+ needRamMB: ramFitsAfter ? 0 : estRam + this.ramHeadroomMB - ram.free,
1554
+ requestingPriority: spec.priority ?? 0,
1555
+ requestingDomain: spec.domain
1556
+ });
1557
+ if (evictTargets.length > 0) {
1558
+ return { kind: "evict", evictTargets, effectiveNumCtx };
1559
+ }
1560
+ const fallback = await this.findRunnableFallback(spec);
1561
+ if (fallback) {
1562
+ this.emit("degraded", spec, fallback, "insufficient-memory-no-evictable");
1563
+ return { kind: "degrade", fallback, reason: "insufficient-memory-no-evictable" };
1564
+ }
1565
+ const reason = `insufficient resources (need ~${estRam}MB RAM, ~${estVram}MB VRAM; free ${ram.free}MB RAM, ${vram ? vram.free : "?"}MB VRAM) and no evictable / fallback models`;
1566
+ this.emit("rejected", spec, reason);
1567
+ return { kind: "reject", reason };
1568
+ }
1569
+ /**
1570
+ * Register a model that has been successfully loaded.
1571
+ * Callers MUST call this after a successful load so the broker can track LRU.
1572
+ */
1573
+ registerLoaded(model) {
1574
+ const now = Date.now();
1575
+ const m2 = {
1576
+ ...model,
1577
+ loadedAt: model.loadedAt ?? now,
1578
+ lastUsedAt: model.lastUsedAt ?? now
1579
+ };
1580
+ this._loaded.set(m2.key, m2);
1581
+ this._inflight.delete(m2.key);
1582
+ this.emit("loaded", m2);
1583
+ return m2;
1584
+ }
1585
+ /** Update last-used timestamp on every successful inference. */
1586
+ touch(host, name10) {
1587
+ const m2 = this._loaded.get(`${host}:${name10}`);
1588
+ if (m2)
1589
+ m2.lastUsedAt = Date.now();
1590
+ }
1591
+ /** Unregister a model (called when caller knows it has unloaded). */
1592
+ unregisterLoaded(host, name10, reason = "caller-unloaded") {
1593
+ const key = `${host}:${name10}`;
1594
+ const m2 = this._loaded.get(key);
1595
+ if (m2) {
1596
+ this._loaded.delete(key);
1597
+ this.emit("evicted", m2, reason);
1598
+ }
1599
+ }
1600
+ /** Clear an inflight marker without registering a load (failed/aborted). */
1601
+ clearInflight(host, name10) {
1602
+ this._inflight.delete(`${host}:${name10}`);
1603
+ }
1604
+ /**
1605
+ * Best-effort eviction of a tracked model. Returns true if the broker was
1606
+ * able to actively unload (e.g. Ollama keep_alive=0); false if it just
1607
+ * unregistered (caller must clean up its own subprocess).
1608
+ */
1609
+ async evict(host, name10, reason = "broker-evict") {
1610
+ const key = `${host}:${name10}`;
1611
+ const m2 = this._loaded.get(key);
1612
+ if (!m2)
1613
+ return false;
1614
+ let actively = false;
1615
+ if (host === "ollama") {
1616
+ actively = await this.ollamaUnload(name10).catch(() => false);
1617
+ }
1618
+ this._loaded.delete(key);
1619
+ this.emit("evicted", m2, reason);
1620
+ return actively;
1621
+ }
1622
+ // ------------------------------------------------------------------
1623
+ // Internal — Ollama
1624
+ // ------------------------------------------------------------------
1625
+ /** Fetch Ollama's runtime model list and reconcile against our registry. */
1626
+ async refreshOllamaPs() {
1627
+ try {
1628
+ const res = await fetch(`${this._ollamaBaseUrl}/api/ps`, {
1629
+ signal: AbortSignal.timeout(3e3)
1630
+ });
1631
+ if (!res.ok)
1632
+ return;
1633
+ const data = await res.json();
1634
+ const seen = /* @__PURE__ */ new Set();
1635
+ const now = Date.now();
1636
+ for (const m2 of data.models ?? []) {
1637
+ const key = `ollama:${m2.name}`;
1638
+ seen.add(key);
1639
+ const vramMB = Math.round((m2.size_vram ?? 0) / (1024 * 1024));
1640
+ const ramMB = Math.round(((m2.size ?? 0) - (m2.size_vram ?? 0)) / (1024 * 1024));
1641
+ const existing = this._loaded.get(key);
1642
+ if (existing) {
1643
+ existing.vramMB = vramMB || existing.vramMB;
1644
+ existing.ramMB = ramMB || existing.ramMB;
1645
+ } else {
1646
+ const tracked = this.registerLoaded({
1647
+ key,
1648
+ name: m2.name,
1649
+ domain: this.guessOllamaDomain(m2.name),
1650
+ host: "ollama",
1651
+ owner: "external-ollama",
1652
+ vramMB,
1653
+ ramMB,
1654
+ priority: 0,
1655
+ loadedAt: now,
1656
+ lastUsedAt: now
1657
+ });
1658
+ void tracked;
1659
+ }
1660
+ }
1661
+ for (const [key, m2] of this._loaded) {
1662
+ if (m2.host === "ollama" && !seen.has(key)) {
1663
+ this._loaded.delete(key);
1664
+ this.emit("evicted", m2, "ollama-unloaded");
1665
+ }
1666
+ }
1667
+ } catch {
1668
+ }
1669
+ }
1670
+ /** Force Ollama to unload a model by calling /api/generate keep_alive=0. */
1671
+ async ollamaUnload(modelName) {
1672
+ try {
1673
+ const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
1674
+ method: "POST",
1675
+ headers: { "Content-Type": "application/json" },
1676
+ body: JSON.stringify({ model: modelName, keep_alive: 0 }),
1677
+ signal: AbortSignal.timeout(5e3)
1678
+ });
1679
+ return res.ok;
1680
+ } catch {
1681
+ return false;
1682
+ }
1683
+ }
1684
+ /** Pull n_ctx_train for an Ollama model via /api/show; cached. */
1685
+ async getNctxTrain(modelName) {
1686
+ if (this._ctxTrainCache.has(modelName))
1687
+ return this._ctxTrainCache.get(modelName);
1688
+ try {
1689
+ const res = await fetch(`${this._ollamaBaseUrl}/api/show`, {
1690
+ method: "POST",
1691
+ headers: { "Content-Type": "application/json" },
1692
+ body: JSON.stringify({ name: modelName }),
1693
+ signal: AbortSignal.timeout(5e3)
1694
+ });
1695
+ if (!res.ok)
1696
+ return null;
1697
+ const data = await res.json();
1698
+ const info = data.model_info ?? {};
1699
+ const arch3 = info["general.architecture"];
1700
+ let trainCtx;
1701
+ if (arch3 && typeof info[`${arch3}.context_length`] === "number") {
1702
+ trainCtx = info[`${arch3}.context_length`];
1703
+ } else {
1704
+ for (const [k, v] of Object.entries(info)) {
1705
+ if (k.endsWith(".context_length") && typeof v === "number") {
1706
+ trainCtx = v;
1707
+ break;
1708
+ }
1709
+ }
1710
+ }
1711
+ if (trainCtx && Number.isFinite(trainCtx) && trainCtx > 0) {
1712
+ this._ctxTrainCache.set(modelName, trainCtx);
1713
+ return trainCtx;
1714
+ }
1715
+ return null;
1716
+ } catch {
1717
+ return null;
1718
+ }
1719
+ }
1720
+ /** Probe whether a model exists in Ollama (cached). */
1721
+ async ollamaModelExists(modelName) {
1722
+ if (this._ollamaModelExists.has(modelName))
1723
+ return this._ollamaModelExists.get(modelName);
1724
+ try {
1725
+ const res = await fetch(`${this._ollamaBaseUrl}/api/show`, {
1726
+ method: "POST",
1727
+ headers: { "Content-Type": "application/json" },
1728
+ body: JSON.stringify({ name: modelName }),
1729
+ signal: AbortSignal.timeout(3e3)
1730
+ });
1731
+ const exists2 = res.ok;
1732
+ this._ollamaModelExists.set(modelName, exists2);
1733
+ return exists2;
1734
+ } catch {
1735
+ this._ollamaModelExists.set(modelName, false);
1736
+ return false;
1737
+ }
1738
+ }
1739
+ // ------------------------------------------------------------------
1740
+ // Internal — LRU eviction selection
1741
+ // ------------------------------------------------------------------
1742
+ pickEvictionCandidates(req2) {
1743
+ const now = Date.now();
1744
+ const sameDomainOk = (m2) => (
1745
+ // never evict the requesting domain's only loaded model unless multiple exist
1746
+ m2.domain !== req2.requestingDomain || this.countByDomain(req2.requestingDomain) > 1
1747
+ );
1748
+ const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
1749
+ const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).sort((a2, b) => {
1750
+ const aIdle = idle(a2) ? 0 : 1;
1751
+ const bIdle = idle(b) ? 0 : 1;
1752
+ if (aIdle !== bIdle)
1753
+ return aIdle - bIdle;
1754
+ return a2.lastUsedAt - b.lastUsedAt;
1755
+ });
1756
+ const targets = [];
1757
+ let vramFreed = 0;
1758
+ let ramFreed = 0;
1759
+ for (const m2 of evictable) {
1760
+ if (vramFreed >= req2.needVramMB && ramFreed >= req2.needRamMB)
1761
+ break;
1762
+ targets.push(m2);
1763
+ vramFreed += m2.vramMB;
1764
+ ramFreed += m2.ramMB;
1765
+ }
1766
+ if (vramFreed >= req2.needVramMB && ramFreed >= req2.needRamMB)
1767
+ return targets;
1768
+ return [];
1769
+ }
1770
+ countByDomain(domain) {
1771
+ let n2 = 0;
1772
+ for (const m2 of this._loaded.values())
1773
+ if (m2.domain === domain)
1774
+ n2++;
1775
+ return n2;
1776
+ }
1777
+ // ------------------------------------------------------------------
1778
+ // Internal — fallback resolution
1779
+ // ------------------------------------------------------------------
1780
+ async findRunnableFallback(original) {
1781
+ const chain = this._fallbacks.get(original.domain) ?? [];
1782
+ for (const entry of chain) {
1783
+ if (entry.spec.host === original.host && entry.spec.name === original.name)
1784
+ continue;
1785
+ const ok3 = entry.available ? await Promise.resolve(entry.available()).catch(() => false) : true;
1786
+ if (!ok3)
1787
+ continue;
1788
+ return { ...entry.spec, owner: original.owner };
1789
+ }
1790
+ return null;
1791
+ }
1792
+ registerDefaultFallbacks() {
1793
+ this.setFallbackChain("vision", [
1794
+ { spec: { name: "moondream2", domain: "vision", host: "moondream-station" }, note: "local Moondream Station REST" },
1795
+ { spec: { name: "moondream", domain: "vision", host: "ollama", estimatedVramMB: 1800 }, note: "ollama moondream (small VRAM)" },
1796
+ { spec: { name: "tesseract-ocr-fallback", domain: "ocr", host: "subprocess", estimatedVramMB: 0, estimatedRamMB: 100 }, note: "OCR-only — no visual reasoning" }
1797
+ ]);
1798
+ this.setFallbackChain("image-gen", [
1799
+ { spec: { name: "flux1-schnell", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 12e3 } },
1800
+ { spec: { name: "sdxl-turbo", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 8e3 } },
1801
+ { spec: { name: "sd-turbo", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 4e3 } }
1802
+ ]);
1803
+ this.setFallbackChain("music", [
1804
+ { spec: { name: "facebook/musicgen-medium", domain: "music", host: "audiocraft", estimatedVramMB: 6e3 } },
1805
+ { spec: { name: "facebook/musicgen-small", domain: "music", host: "audiocraft", estimatedVramMB: 3e3 } }
1806
+ ]);
1807
+ this.setFallbackChain("sound", [
1808
+ { spec: { name: "cvssp/audioldm-s-full-v2", domain: "sound", host: "diffusers-py", estimatedVramMB: 4e3 } },
1809
+ { spec: { name: "facebook/audiogen-medium", domain: "sound", host: "audiocraft", estimatedVramMB: 3e3 } }
1810
+ ]);
1811
+ this.setFallbackChain("asr", [
1812
+ { spec: { name: "base", domain: "asr", host: "whisper-cli", estimatedRamMB: 800 } },
1813
+ { spec: { name: "tiny", domain: "asr", host: "whisper-cli", estimatedRamMB: 300 } }
1814
+ ]);
1815
+ this.setFallbackChain("tts", [
1816
+ { spec: { name: "piper-default", domain: "tts", host: "piper", estimatedRamMB: 200 } }
1817
+ ]);
1818
+ }
1819
+ // ------------------------------------------------------------------
1820
+ // Internal — footprint estimation
1821
+ // ------------------------------------------------------------------
1822
+ estimateFootprintVramMB(spec) {
1823
+ if (spec.estimatedVramMB !== void 0)
1824
+ return spec.estimatedVramMB;
1825
+ switch (spec.domain) {
1826
+ case "embedding":
1827
+ return 300;
1828
+ case "vision":
1829
+ return spec.host === "moondream-station" ? 2e3 : 2500;
1830
+ case "image-gen":
1831
+ return 8e3;
1832
+ case "video-gen":
1833
+ return 12e3;
1834
+ case "music":
1835
+ return 4e3;
1836
+ case "sound":
1837
+ return 3e3;
1838
+ case "asr":
1839
+ return 1e3;
1840
+ case "tts":
1841
+ return 300;
1842
+ case "subagent":
1843
+ return 4e3;
1844
+ case "ocr":
1845
+ return 0;
1846
+ case "chat":
1847
+ default:
1848
+ return 5e3;
1849
+ }
1850
+ }
1851
+ estimateFootprintRamMB(spec) {
1852
+ if (spec.estimatedRamMB !== void 0)
1853
+ return spec.estimatedRamMB;
1854
+ switch (spec.domain) {
1855
+ case "ocr":
1856
+ return 100;
1857
+ case "tts":
1858
+ return 200;
1859
+ case "embedding":
1860
+ return 500;
1861
+ case "asr":
1862
+ return 800;
1863
+ case "music":
1864
+ case "sound":
1865
+ return 2e3;
1866
+ case "vision":
1867
+ return 1500;
1868
+ case "image-gen":
1869
+ return 4e3;
1870
+ case "video-gen":
1871
+ return 6e3;
1872
+ case "subagent":
1873
+ return 1500;
1874
+ case "chat":
1875
+ default:
1876
+ return 2e3;
1877
+ }
1878
+ }
1879
+ guessOllamaDomain(name10) {
1880
+ const n2 = name10.toLowerCase();
1881
+ if (/embed|nomic|bge|e5/.test(n2))
1882
+ return "embedding";
1883
+ if (/moondream|llava|vlm|vision|qwen.*vl|minicpm-v|gemma3|pixtral|cogvlm|internvl/.test(n2))
1884
+ return "vision";
1885
+ if (/flux|stable.diffusion|sdxl|z-image/.test(n2))
1886
+ return "image-gen";
1887
+ return "chat";
1888
+ }
1889
+ // ------------------------------------------------------------------
1890
+ // Internal — snapshot + pressure
1891
+ // ------------------------------------------------------------------
1892
+ buildSnapshot() {
1893
+ const ram = ramSnapshotMB();
1894
+ const vram = this._lastSnapshot?.vramMB ?? null;
1895
+ return {
1896
+ loaded: [...this._loaded.values()],
1897
+ inflight: [...this._inflight.entries()].map(([key, v]) => ({ key, owner: v.owner, startedMs: v.startedMs })),
1898
+ ramMB: ram,
1899
+ vramMB: vram,
1900
+ lastPollAt: Date.now(),
1901
+ slots: this.buildSlotsSnapshot()
1902
+ };
1903
+ }
1904
+ buildSlotsSnapshot() {
1905
+ const byModel = {};
1906
+ for (const slot of this._activeSlots.values()) {
1907
+ const k = slot.model;
1908
+ if (!byModel[k])
1909
+ byModel[k] = { inUse: 0, tokensPerSec: 0, samples: 0 };
1910
+ byModel[k].inUse += 1;
1911
+ }
1912
+ for (const [model, tp] of this._throughput) {
1913
+ if (!byModel[model])
1914
+ byModel[model] = { inUse: 0, tokensPerSec: 0, samples: 0 };
1915
+ byModel[model].tokensPerSec = tp.tokensPerSec;
1916
+ byModel[model].samples = tp.samples;
1917
+ }
1918
+ return {
1919
+ inUse: this._activeSlots.size,
1920
+ capacity: this.slotCapacity,
1921
+ queueDepth: this._slotQueue.length,
1922
+ queueCapacity: this.queueCapacity,
1923
+ byModel
1924
+ };
1925
+ }
1926
+ async checkPressure(snap) {
1927
+ if (snap.ramMB.free < this.ramHeadroomMB) {
1928
+ this.emit("pressure", "ram", snap.ramMB.free, this.ramHeadroomMB);
1929
+ }
1930
+ const v = await vramSnapshotMB();
1931
+ if (v) {
1932
+ snap.vramMB = v;
1933
+ if (v.free < this.vramHeadroomMB) {
1934
+ this.emit("pressure", "vram", v.free, this.vramHeadroomMB);
1935
+ }
1936
+ }
1937
+ const queueThreshold = Math.floor(this.queueCapacity * 0.8);
1938
+ if (this._slotQueue.length >= queueThreshold) {
1939
+ this.emit("pressure", "queue", this._slotQueue.length, queueThreshold);
1940
+ }
1941
+ const now = Date.now();
1942
+ for (const slot of this._activeSlots.values()) {
1943
+ if (now - slot.acquiredAt > STUCK_INFLIGHT_DIAGNOSTIC_MS) {
1944
+ }
1945
+ }
1946
+ }
1947
+ // ------------------------------------------------------------------
1948
+ // Inference slot admission control (replaces timeouts)
1949
+ // ------------------------------------------------------------------
1950
+ /**
1951
+ * Acquire an inference slot. Blocks (queues with backpressure) until a slot
1952
+ * is available. Never times out — work either completes or is cancelled
1953
+ * via the caller-provided AbortSignal before admission.
1954
+ *
1955
+ * Two-tier admission:
1956
+ * 1. Reserved: 1 slot per sessionKey kept warm even when shared pool full
1957
+ * 2. Shared: queue with FIFO+priority ordering; size-bounded by queueCapacity
1958
+ *
1959
+ * Backpressure: when queue exceeds 80% capacity, emit `pressure: "queue"` —
1960
+ * upstream callers (e.g. Telegram poll loop) should slow ingress.
1961
+ */
1962
+ acquireInferenceSlot(spec) {
1963
+ if (this._activeSlots.size < this.slotCapacity) {
1964
+ return Promise.resolve(this.admitSlot(
1965
+ spec,
1966
+ /*reserved*/
1967
+ false
1968
+ ));
1969
+ }
1970
+ if (spec.sessionKey && !this._reservedBySession.has(spec.sessionKey) && this._activeSlots.size < this.slotCapacity + 1) {
1971
+ const slot = this.admitSlot(
1972
+ spec,
1973
+ /*reserved*/
1974
+ true
1975
+ );
1976
+ this._reservedBySession.set(spec.sessionKey, slot.info.id);
1977
+ return Promise.resolve(slot);
1978
+ }
1979
+ return new Promise((resolve55, reject) => {
1980
+ const entry = { spec, resolve: resolve55, reject, enqueuedAt: Date.now() };
1981
+ if (spec.signal) {
1982
+ const onAbort = () => {
1983
+ const idx = this._slotQueue.indexOf(entry);
1984
+ if (idx >= 0)
1985
+ this._slotQueue.splice(idx, 1);
1986
+ reject(new Error("inference slot acquisition aborted by caller signal"));
1987
+ };
1988
+ if (spec.signal.aborted) {
1989
+ onAbort();
1990
+ return;
1991
+ }
1992
+ spec.signal.addEventListener("abort", onAbort, { once: true });
1993
+ entry.onSignalAbort = onAbort;
1994
+ }
1995
+ const prio = spec.priority ?? 0;
1996
+ let insertAt = this._slotQueue.length;
1997
+ for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
1998
+ const p2 = this._slotQueue[i2].spec.priority ?? 0;
1999
+ if (p2 >= prio) {
2000
+ insertAt = i2 + 1;
2001
+ break;
2002
+ }
2003
+ if (i2 === 0)
2004
+ insertAt = 0;
2005
+ }
2006
+ this._slotQueue.splice(insertAt, 0, entry);
2007
+ const threshold = Math.floor(this.queueCapacity * 0.8);
2008
+ if (this._slotQueue.length === threshold) {
2009
+ this.emit("pressure", "queue", this._slotQueue.length, threshold);
2010
+ }
2011
+ });
2012
+ }
2013
+ /** Admit a slot — internal, called from acquire fast path and from drainQueue. */
2014
+ admitSlot(spec, reserved) {
2015
+ const id = `slot-${++this._slotIdSeq}-${Date.now().toString(36)}`;
2016
+ const info = {
2017
+ id,
2018
+ model: spec.model,
2019
+ domain: spec.domain,
2020
+ owner: spec.owner,
2021
+ sessionKey: spec.sessionKey,
2022
+ acquiredAt: Date.now(),
2023
+ promptTokens: spec.promptTokens ?? 0,
2024
+ reserved
2025
+ };
2026
+ this._activeSlots.set(id, info);
2027
+ this.emit("slotAcquired", info);
2028
+ let released = false;
2029
+ const broker = this;
2030
+ return {
2031
+ info,
2032
+ release(outcome) {
2033
+ if (released)
2034
+ return;
2035
+ released = true;
2036
+ broker.releaseSlot(info, outcome);
2037
+ }
2038
+ };
2039
+ }
2040
+ releaseSlot(info, outcome) {
2041
+ this._activeSlots.delete(info.id);
2042
+ if (info.sessionKey && this._reservedBySession.get(info.sessionKey) === info.id) {
2043
+ this._reservedBySession.delete(info.sessionKey);
2044
+ }
2045
+ if (outcome.ok && (outcome.completionTokens ?? 0) > 0) {
2046
+ const wallMs = Date.now() - info.acquiredAt;
2047
+ if (wallMs > 100) {
2048
+ const tps = outcome.completionTokens * 1e3 / wallMs;
2049
+ const cur = this._throughput.get(info.model) ?? {
2050
+ tokensPerSec: THROUGHPUT_INITIAL_TPS,
2051
+ samples: 0,
2052
+ lastReleaseAt: 0
2053
+ };
2054
+ cur.tokensPerSec = cur.samples === 0 ? tps : cur.tokensPerSec * (1 - THROUGHPUT_EMA_ALPHA) + tps * THROUGHPUT_EMA_ALPHA;
2055
+ cur.samples += 1;
2056
+ cur.lastReleaseAt = Date.now();
2057
+ this._throughput.set(info.model, cur);
2058
+ this.emit("throughputUpdated", info.model, cur.tokensPerSec);
2059
+ }
2060
+ }
2061
+ this.emit("slotReleased", info, outcome);
2062
+ this.drainSlotQueue();
2063
+ }
2064
+ drainSlotQueue() {
2065
+ while (this._slotQueue.length > 0 && this._activeSlots.size < this.slotCapacity) {
2066
+ const entry = this._slotQueue.shift();
2067
+ if (entry.onSignalAbort && entry.spec.signal) {
2068
+ entry.spec.signal.removeEventListener("abort", entry.onSignalAbort);
2069
+ }
2070
+ if (entry.spec.signal?.aborted) {
2071
+ try {
2072
+ entry.reject(new Error("aborted before admission"));
2073
+ } catch {
2074
+ }
2075
+ continue;
2076
+ }
2077
+ const slot = this.admitSlot(
2078
+ entry.spec,
2079
+ /*reserved*/
2080
+ false
2081
+ );
2082
+ try {
2083
+ entry.resolve(slot);
2084
+ } catch {
2085
+ }
2086
+ }
2087
+ }
2088
+ /** Snapshot of throughput EMAs (for /broker and debugging). */
2089
+ throughputByModel() {
2090
+ const out = {};
2091
+ for (const [model, tp] of this._throughput) {
2092
+ out[model] = { tokensPerSec: tp.tokensPerSec, samples: tp.samples };
2093
+ }
2094
+ return out;
2095
+ }
2096
+ /** Tune the shared slot capacity at runtime (e.g. when Ollama pool resizes). */
2097
+ setSlotCapacity(n2) {
2098
+ this.slotCapacity = Math.max(1, Math.floor(n2));
2099
+ this.drainSlotQueue();
2100
+ }
2101
+ keyOf(spec) {
2102
+ return `${spec.host}:${spec.name}`;
2103
+ }
2104
+ };
2105
+ _nvSmiAvailable = null;
2106
+ }
2107
+ });
2108
+
2109
+ // packages/execution/dist/broker-mediated-backend.js
2110
+ function wrapWithBroker(backend, options2) {
2111
+ const broker = getModelBroker();
2112
+ const clamp7 = options2.clampNumCtx !== false;
2113
+ const wrapped = Object.create(backend);
2114
+ wrapped.chatCompletion = async (request) => {
2115
+ const model = backend.model || request.model || "unknown";
2116
+ let effectiveRequest = request;
2117
+ if (clamp7) {
2118
+ const trainCtx = await broker.getNctxTrain(model).catch(() => null);
2119
+ const requestedNumCtx = request.numCtx;
2120
+ if (trainCtx && trainCtx > 0) {
2121
+ const target = requestedNumCtx ? Math.min(requestedNumCtx, trainCtx) : Math.min(trainCtx, estimateContextNeed(request));
2122
+ if (target > 0) {
2123
+ effectiveRequest = { ...request, numCtx: target };
2124
+ }
2125
+ }
2126
+ }
2127
+ const promptTokens = estimatePromptTokens(request);
2128
+ const slot = await broker.acquireInferenceSlot({
2129
+ model,
2130
+ domain: options2.domain,
2131
+ owner: options2.owner,
2132
+ sessionKey: options2.sessionKey,
2133
+ promptTokens,
2134
+ priority: options2.priority ?? 0
2135
+ });
2136
+ try {
2137
+ const result = await backend.chatCompletion(effectiveRequest);
2138
+ const usage = result.usage;
2139
+ slot.release({ ok: true, completionTokens: usage?.completion_tokens ?? 0 });
2140
+ return result;
2141
+ } catch (err) {
2142
+ slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
2143
+ throw err;
2144
+ }
2145
+ };
2146
+ if (typeof backend.chatCompletionStream === "function") {
2147
+ const streamFn = backend.chatCompletionStream.bind(backend);
2148
+ wrapped.chatCompletionStream = async function* (request) {
2149
+ const model = backend.model || request.model || "unknown";
2150
+ let effectiveRequest = request;
2151
+ if (clamp7) {
2152
+ const trainCtx = await broker.getNctxTrain(model).catch(() => null);
2153
+ const requestedNumCtx = request.numCtx;
2154
+ if (trainCtx && trainCtx > 0) {
2155
+ const target = requestedNumCtx ? Math.min(requestedNumCtx, trainCtx) : Math.min(trainCtx, estimateContextNeed(request));
2156
+ if (target > 0)
2157
+ effectiveRequest = { ...request, numCtx: target };
2158
+ }
2159
+ }
2160
+ const promptTokens = estimatePromptTokens(request);
2161
+ const slot = await broker.acquireInferenceSlot({
2162
+ model,
2163
+ domain: options2.domain,
2164
+ owner: options2.owner,
2165
+ sessionKey: options2.sessionKey,
2166
+ promptTokens,
2167
+ priority: options2.priority ?? 0
2168
+ });
2169
+ let completionTokens = 0;
2170
+ try {
2171
+ for await (const chunk of streamFn(effectiveRequest)) {
2172
+ const usage = chunk.usage;
2173
+ if (usage?.completion_tokens)
2174
+ completionTokens = usage.completion_tokens;
2175
+ yield chunk;
2176
+ }
2177
+ slot.release({ ok: true, completionTokens });
2178
+ } catch (err) {
2179
+ slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
2180
+ throw err;
2181
+ }
2182
+ };
2183
+ }
2184
+ return wrapped;
2185
+ }
2186
+ function estimatePromptTokens(request) {
2187
+ let chars = 0;
2188
+ if (Array.isArray(request?.messages)) {
2189
+ for (const m2 of request.messages) {
2190
+ if (typeof m2.content === "string")
2191
+ chars += m2.content.length;
2192
+ else if (Array.isArray(m2.content)) {
2193
+ for (const part of m2.content) {
2194
+ if (typeof part?.text === "string")
2195
+ chars += part.text.length;
2196
+ }
2197
+ }
2198
+ chars += 8;
2199
+ }
2200
+ }
2201
+ if (Array.isArray(request?.tools) && request.tools.length > 0) {
2202
+ chars += request.tools.length * 600;
2203
+ }
2204
+ return Math.ceil(chars / 4);
2205
+ }
2206
+ function estimateContextNeed(request) {
2207
+ const promptTokens = estimatePromptTokens(request);
2208
+ const maxTokens = request?.maxTokens ?? 1024;
2209
+ return Math.max(2048, promptTokens + maxTokens + 512);
2210
+ }
2211
+ var init_broker_mediated_backend = __esm({
2212
+ "packages/execution/dist/broker-mediated-backend.js"() {
2213
+ "use strict";
2214
+ init_model_broker();
2215
+ }
2216
+ });
2217
+
1331
2218
  // packages/execution/dist/tools/security-classifier.js
1332
2219
  function classifyTool(name10) {
1333
2220
  for (const rule of RULES) {
@@ -19513,6 +20400,20 @@ import { existsSync as existsSync25, mkdirSync as mkdirSync10, writeFileSync as
19513
20400
  import { join as join28, basename as basename5, extname as extname3, resolve as resolve16 } from "node:path";
19514
20401
  import { homedir as homedir9 } from "node:os";
19515
20402
  import { execFileSync as execFileSync3, execSync as execSync15 } from "node:child_process";
20403
+ function whisperRamEstimate(model) {
20404
+ const m2 = model.toLowerCase();
20405
+ if (m2.includes("large"))
20406
+ return 3200;
20407
+ if (m2.includes("medium"))
20408
+ return 1700;
20409
+ if (m2.includes("small"))
20410
+ return 800;
20411
+ if (m2.includes("base"))
20412
+ return 400;
20413
+ if (m2.includes("tiny"))
20414
+ return 200;
20415
+ return 800;
20416
+ }
19516
20417
  function isTranscribable(path12) {
19517
20418
  const ext = extname3(path12).toLowerCase();
19518
20419
  return AUDIO_EXTS.has(ext) || VIDEO_EXTS.has(ext);
@@ -19582,6 +20483,7 @@ var AUDIO_EXTS, VIDEO_EXTS, MAX_TRANSCRIBE_URL_BYTES, _tcModule, _tcChecked, Tra
19582
20483
  var init_transcribe_tool = __esm({
19583
20484
  "packages/execution/dist/tools/transcribe-tool.js"() {
19584
20485
  "use strict";
20486
+ init_model_broker();
19585
20487
  init_network_egress_policy();
19586
20488
  AUDIO_EXTS = /* @__PURE__ */ new Set([
19587
20489
  ".mp3",
@@ -19636,7 +20538,7 @@ var init_transcribe_tool = __esm({
19636
20538
  async execute(args) {
19637
20539
  const start2 = performance.now();
19638
20540
  const filePath = resolve16(this.workingDir, String(args["path"] ?? ""));
19639
- const model = String(args["model"] ?? "base");
20541
+ let model = String(args["model"] ?? "base");
19640
20542
  const diarize = Boolean(args["diarize"] ?? false);
19641
20543
  if (!existsSync25(filePath)) {
19642
20544
  return {
@@ -19654,6 +20556,32 @@ var init_transcribe_tool = __esm({
19654
20556
  durationMs: performance.now() - start2
19655
20557
  };
19656
20558
  }
20559
+ const broker = getModelBroker();
20560
+ const askedModel = model;
20561
+ let effectiveModel = model;
20562
+ const whisperDecision = await broker.ensureModelLoadable({
20563
+ name: askedModel,
20564
+ domain: "asr",
20565
+ host: "whisper-cli",
20566
+ owner: "transcribe-file-tool",
20567
+ estimatedRamMB: whisperRamEstimate(askedModel)
20568
+ });
20569
+ if (whisperDecision.kind === "degrade") {
20570
+ effectiveModel = whisperDecision.fallback.name;
20571
+ } else if (whisperDecision.kind === "evict") {
20572
+ for (const target of whisperDecision.evictTargets) {
20573
+ await broker.evict(target.host, target.name, "asr-needs-room");
20574
+ }
20575
+ } else if (whisperDecision.kind === "reject") {
20576
+ return {
20577
+ success: false,
20578
+ output: "",
20579
+ error: `Transcription blocked by resource broker: ${whisperDecision.reason}`,
20580
+ durationMs: performance.now() - start2
20581
+ };
20582
+ }
20583
+ if (effectiveModel !== askedModel)
20584
+ model = effectiveModel;
19657
20585
  const tc = await loadTranscribeCli();
19658
20586
  if (!tc) {
19659
20587
  return this.execViaCli(filePath, model, diarize, start2);
@@ -46186,11 +47114,11 @@ var require_eventemitter3 = __commonJS({
46186
47114
  if (--emitter._eventsCount === 0) emitter._events = new Events();
46187
47115
  else delete emitter._events[evt];
46188
47116
  }
46189
- function EventEmitter14() {
47117
+ function EventEmitter15() {
46190
47118
  this._events = new Events();
46191
47119
  this._eventsCount = 0;
46192
47120
  }
46193
- EventEmitter14.prototype.eventNames = function eventNames() {
47121
+ EventEmitter15.prototype.eventNames = function eventNames() {
46194
47122
  var names = [], events, name10;
46195
47123
  if (this._eventsCount === 0) return names;
46196
47124
  for (name10 in events = this._events) {
@@ -46201,7 +47129,7 @@ var require_eventemitter3 = __commonJS({
46201
47129
  }
46202
47130
  return names;
46203
47131
  };
46204
- EventEmitter14.prototype.listeners = function listeners(event) {
47132
+ EventEmitter15.prototype.listeners = function listeners(event) {
46205
47133
  var evt = prefix ? prefix + event : event, handlers = this._events[evt];
46206
47134
  if (!handlers) return [];
46207
47135
  if (handlers.fn) return [handlers.fn];
@@ -46210,13 +47138,13 @@ var require_eventemitter3 = __commonJS({
46210
47138
  }
46211
47139
  return ee;
46212
47140
  };
46213
- EventEmitter14.prototype.listenerCount = function listenerCount(event) {
47141
+ EventEmitter15.prototype.listenerCount = function listenerCount(event) {
46214
47142
  var evt = prefix ? prefix + event : event, listeners = this._events[evt];
46215
47143
  if (!listeners) return 0;
46216
47144
  if (listeners.fn) return 1;
46217
47145
  return listeners.length;
46218
47146
  };
46219
- EventEmitter14.prototype.emit = function emit2(event, a1, a2, a3, a4, a5) {
47147
+ EventEmitter15.prototype.emit = function emit2(event, a1, a2, a3, a4, a5) {
46220
47148
  var evt = prefix ? prefix + event : event;
46221
47149
  if (!this._events[evt]) return false;
46222
47150
  var listeners = this._events[evt], len = arguments.length, args, i2;
@@ -46267,13 +47195,13 @@ var require_eventemitter3 = __commonJS({
46267
47195
  }
46268
47196
  return true;
46269
47197
  };
46270
- EventEmitter14.prototype.on = function on2(event, fn, context2) {
47198
+ EventEmitter15.prototype.on = function on2(event, fn, context2) {
46271
47199
  return addListener2(this, event, fn, context2, false);
46272
47200
  };
46273
- EventEmitter14.prototype.once = function once(event, fn, context2) {
47201
+ EventEmitter15.prototype.once = function once(event, fn, context2) {
46274
47202
  return addListener2(this, event, fn, context2, true);
46275
47203
  };
46276
- EventEmitter14.prototype.removeListener = function removeListener2(event, fn, context2, once) {
47204
+ EventEmitter15.prototype.removeListener = function removeListener2(event, fn, context2, once) {
46277
47205
  var evt = prefix ? prefix + event : event;
46278
47206
  if (!this._events[evt]) return this;
46279
47207
  if (!fn) {
@@ -46296,7 +47224,7 @@ var require_eventemitter3 = __commonJS({
46296
47224
  }
46297
47225
  return this;
46298
47226
  };
46299
- EventEmitter14.prototype.removeAllListeners = function removeAllListeners(event) {
47227
+ EventEmitter15.prototype.removeAllListeners = function removeAllListeners(event) {
46300
47228
  var evt;
46301
47229
  if (event) {
46302
47230
  evt = prefix ? prefix + event : event;
@@ -46307,12 +47235,12 @@ var require_eventemitter3 = __commonJS({
46307
47235
  }
46308
47236
  return this;
46309
47237
  };
46310
- EventEmitter14.prototype.off = EventEmitter14.prototype.removeListener;
46311
- EventEmitter14.prototype.addListener = EventEmitter14.prototype.on;
46312
- EventEmitter14.prefixed = prefix;
46313
- EventEmitter14.EventEmitter = EventEmitter14;
47238
+ EventEmitter15.prototype.off = EventEmitter15.prototype.removeListener;
47239
+ EventEmitter15.prototype.addListener = EventEmitter15.prototype.on;
47240
+ EventEmitter15.prefixed = prefix;
47241
+ EventEmitter15.EventEmitter = EventEmitter15;
46314
47242
  if ("undefined" !== typeof module) {
46315
- module.exports = EventEmitter14;
47243
+ module.exports = EventEmitter15;
46316
47244
  }
46317
47245
  }
46318
47246
  });
@@ -119084,10 +120012,10 @@ var require_wrap_handler = __commonJS({
119084
120012
  var require_dispatcher = __commonJS({
119085
120013
  "../node_modules/undici/lib/dispatcher/dispatcher.js"(exports, module) {
119086
120014
  "use strict";
119087
- var EventEmitter14 = __require("node:events");
120015
+ var EventEmitter15 = __require("node:events");
119088
120016
  var WrapHandler = require_wrap_handler();
119089
120017
  var wrapInterceptor = (dispatch) => (opts, handler) => dispatch(opts, WrapHandler.wrap(handler));
119090
- var Dispatcher2 = class extends EventEmitter14 {
120018
+ var Dispatcher2 = class extends EventEmitter15 {
119091
120019
  dispatch() {
119092
120020
  throw new Error("not implemented");
119093
120021
  }
@@ -126370,7 +127298,7 @@ var require_socks5_utils = __commonJS({
126370
127298
  var require_socks5_client = __commonJS({
126371
127299
  "../node_modules/undici/lib/core/socks5-client.js"(exports, module) {
126372
127300
  "use strict";
126373
- var { EventEmitter: EventEmitter14 } = __require("node:events");
127301
+ var { EventEmitter: EventEmitter15 } = __require("node:events");
126374
127302
  var { Buffer: Buffer7 } = __require("node:buffer");
126375
127303
  var { InvalidArgumentError, Socks5ProxyError } = require_errors2();
126376
127304
  var { debuglog } = __require("node:util");
@@ -126413,7 +127341,7 @@ var require_socks5_client = __commonJS({
126413
127341
  ERROR: "error",
126414
127342
  CLOSED: "closed"
126415
127343
  };
126416
- var Socks5Client = class extends EventEmitter14 {
127344
+ var Socks5Client = class extends EventEmitter15 {
126417
127345
  constructor(socket, options2 = {}) {
126418
127346
  super();
126419
127347
  if (!socket) {
@@ -132798,9 +133726,9 @@ var require_memory_cache_store = __commonJS({
132798
133726
  "../node_modules/undici/lib/cache/memory-cache-store.js"(exports, module) {
132799
133727
  "use strict";
132800
133728
  var { Writable } = __require("node:stream");
132801
- var { EventEmitter: EventEmitter14 } = __require("node:events");
133729
+ var { EventEmitter: EventEmitter15 } = __require("node:events");
132802
133730
  var { assertCacheKey, assertCacheValue } = require_cache();
132803
- var MemoryCacheStore = class extends EventEmitter14 {
133731
+ var MemoryCacheStore = class extends EventEmitter15 {
132804
133732
  #maxCount = 1024;
132805
133733
  #maxSize = 104857600;
132806
133734
  // 100MB
@@ -229661,7 +230589,7 @@ var init_send_ssdp_message = __esm({
229661
230589
  });
229662
230590
 
229663
230591
  // ../node_modules/@achingbrain/ssdp/dist/src/ssdp.js
229664
- import { EventEmitter as EventEmitter2, on } from "node:events";
230592
+ import { EventEmitter as EventEmitter3, on } from "node:events";
229665
230593
  import { createRequire } from "node:module";
229666
230594
  var req, name8, version2, DEFAULT_SSDP_SIGNATURE, SSDP;
229667
230595
  var init_ssdp = __esm({
@@ -229679,7 +230607,7 @@ var init_ssdp = __esm({
229679
230607
  req = createRequire(import.meta.url);
229680
230608
  ({ name: name8, version: version2 } = req("../../package.json"));
229681
230609
  DEFAULT_SSDP_SIGNATURE = `node.js/${process.version.substring(1)} UPnP/1.1 ${name8}/${version2}`;
229682
- SSDP = class extends EventEmitter2 {
230610
+ SSDP = class extends EventEmitter3 {
229683
230611
  udn;
229684
230612
  signature;
229685
230613
  sockets;
@@ -236837,7 +237765,7 @@ var require_extension2 = __commonJS({
236837
237765
  var require_websocket2 = __commonJS({
236838
237766
  "../node_modules/ws/lib/websocket.js"(exports, module) {
236839
237767
  "use strict";
236840
- var EventEmitter14 = __require("events");
237768
+ var EventEmitter15 = __require("events");
236841
237769
  var https4 = __require("https");
236842
237770
  var http6 = __require("http");
236843
237771
  var net5 = __require("net");
@@ -236869,7 +237797,7 @@ var require_websocket2 = __commonJS({
236869
237797
  var protocolVersions = [8, 13];
236870
237798
  var readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
236871
237799
  var subprotocolRegex = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
236872
- var WebSocket6 = class _WebSocket extends EventEmitter14 {
237800
+ var WebSocket6 = class _WebSocket extends EventEmitter15 {
236873
237801
  /**
236874
237802
  * Create a new `WebSocket`.
236875
237803
  *
@@ -237866,7 +238794,7 @@ var require_subprotocol = __commonJS({
237866
238794
  var require_websocket_server = __commonJS({
237867
238795
  "../node_modules/ws/lib/websocket-server.js"(exports, module) {
237868
238796
  "use strict";
237869
- var EventEmitter14 = __require("events");
238797
+ var EventEmitter15 = __require("events");
237870
238798
  var http6 = __require("http");
237871
238799
  var { Duplex: Duplex3 } = __require("stream");
237872
238800
  var { createHash: createHash31 } = __require("crypto");
@@ -237879,7 +238807,7 @@ var require_websocket_server = __commonJS({
237879
238807
  var RUNNING = 0;
237880
238808
  var CLOSING = 1;
237881
238809
  var CLOSED = 2;
237882
- var WebSocketServer4 = class extends EventEmitter14 {
238810
+ var WebSocketServer4 = class extends EventEmitter15 {
237883
238811
  /**
237884
238812
  * Create a `WebSocketServer` instance.
237885
238813
  *
@@ -254016,6 +254944,7 @@ var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, LEGACY_SDXL_TURBO
254016
254944
  var init_image_generate = __esm({
254017
254945
  "packages/execution/dist/tools/image-generate.js"() {
254018
254946
  "use strict";
254947
+ init_model_broker();
254019
254948
  init_venv_paths();
254020
254949
  init_model_store();
254021
254950
  DEFAULT_DIFFUSERS_IMAGE_MODEL = "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers";
@@ -254830,6 +255759,28 @@ if __name__ == "__main__":
254830
255759
  const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
254831
255760
  const seed = optionalNumberArg(args["seed"]);
254832
255761
  const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
255762
+ const broker = getModelBroker();
255763
+ const firstCandidate = candidates[0];
255764
+ if (firstCandidate) {
255765
+ const decision2 = await broker.ensureModelLoadable({
255766
+ name: firstCandidate.model,
255767
+ domain: "image-gen",
255768
+ host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
255769
+ owner: "image-generate-tool"
255770
+ });
255771
+ if (decision2.kind === "evict") {
255772
+ for (const target of decision2.evictTargets) {
255773
+ await broker.evict(target.host, target.name, "image-gen-needs-room");
255774
+ }
255775
+ } else if (decision2.kind === "reject") {
255776
+ return {
255777
+ success: false,
255778
+ output: "",
255779
+ error: `Image generation blocked by resource broker: ${decision2.reason}`,
255780
+ durationMs: performance.now() - start2
255781
+ };
255782
+ }
255783
+ }
254833
255784
  try {
254834
255785
  return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
254835
255786
  } catch (err) {
@@ -256270,6 +257221,7 @@ var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFOR
256270
257221
  var init_audio_generate = __esm({
256271
257222
  "packages/execution/dist/tools/audio-generate.js"() {
256272
257223
  "use strict";
257224
+ init_model_broker();
256273
257225
  init_venv_paths();
256274
257226
  init_model_store();
256275
257227
  DEFAULT_SOUND_MODEL = "cvssp/audioldm-s-full-v2";
@@ -257217,6 +258169,28 @@ if __name__ == "__main__":
257217
258169
  const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
257218
258170
  const seed = optionalNumberArg2(args["seed"]);
257219
258171
  const playback = playbackRequested(args);
258172
+ const broker = getModelBroker();
258173
+ const firstCandidate = candidates[0];
258174
+ if (firstCandidate) {
258175
+ const decision2 = await broker.ensureModelLoadable({
258176
+ name: firstCandidate.model,
258177
+ domain: kind === "music" ? "music" : "sound",
258178
+ host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
258179
+ owner: `audio-generate-tool/${kind}`
258180
+ });
258181
+ if (decision2.kind === "evict") {
258182
+ for (const target of decision2.evictTargets) {
258183
+ await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
258184
+ }
258185
+ } else if (decision2.kind === "reject") {
258186
+ return {
258187
+ success: false,
258188
+ output: "",
258189
+ error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
258190
+ durationMs: performance.now() - start2
258191
+ };
258192
+ }
258193
+ }
257220
258194
  try {
257221
258195
  return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
257222
258196
  } catch (err) {
@@ -258160,6 +259134,7 @@ var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL,
258160
259134
  var init_video_generate = __esm({
258161
259135
  "packages/execution/dist/tools/video-generate.js"() {
258162
259136
  "use strict";
259137
+ init_model_broker();
258163
259138
  init_venv_paths();
258164
259139
  init_model_store();
258165
259140
  DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
@@ -259497,6 +260472,28 @@ if __name__ == "__main__":
259497
260472
  const withAudio = booleanArg3(args["with_audio"], false);
259498
260473
  const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
259499
260474
  const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
260475
+ const broker = getModelBroker();
260476
+ const firstCandidate = candidates[0];
260477
+ if (firstCandidate) {
260478
+ const decision2 = await broker.ensureModelLoadable({
260479
+ name: firstCandidate.model,
260480
+ domain: "video-gen",
260481
+ host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
260482
+ owner: "video-generate-tool"
260483
+ });
260484
+ if (decision2.kind === "evict") {
260485
+ for (const target of decision2.evictTargets) {
260486
+ await broker.evict(target.host, target.name, "video-gen-needs-room");
260487
+ }
260488
+ } else if (decision2.kind === "reject") {
260489
+ return {
260490
+ success: false,
260491
+ output: "",
260492
+ error: `Video generation blocked by resource broker: ${decision2.reason}`,
260493
+ durationMs: performance.now() - start2
260494
+ };
260495
+ }
260496
+ }
259500
260497
  if (candidates.length === 0) {
259501
260498
  return {
259502
260499
  success: false,
@@ -261097,6 +262094,7 @@ var moondreamClient, moondreamError, stationProcess, hfPointUnavailable, IMAGE_E
261097
262094
  var init_vision = __esm({
261098
262095
  "packages/execution/dist/tools/vision.js"() {
261099
262096
  "use strict";
262097
+ init_model_broker();
261100
262098
  moondreamClient = null;
261101
262099
  moondreamError = null;
261102
262100
  stationProcess = null;
@@ -261195,14 +262193,43 @@ var init_vision = __esm({
261195
262193
  };
261196
262194
  }
261197
262195
  }
262196
+ const broker = getModelBroker();
262197
+ const moondreamDecision = await broker.ensureModelLoadable({
262198
+ name: "moondream2",
262199
+ domain: "vision",
262200
+ host: "moondream-station",
262201
+ owner: "vision-tool"
262202
+ });
262203
+ const forceDegradeToOllama = moondreamDecision.kind === "degrade" && moondreamDecision.fallback.host === "ollama";
262204
+ const forceReject = moondreamDecision.kind === "reject" && this._activeModelHasVision !== true;
261198
262205
  let client = null;
261199
- try {
261200
- client = await getMoondreamClient();
261201
- } catch {
262206
+ if (!forceDegradeToOllama) {
262207
+ try {
262208
+ client = await getMoondreamClient();
262209
+ } catch {
262210
+ }
261202
262211
  }
261203
262212
  if (client) {
262213
+ broker.registerLoaded({
262214
+ key: "moondream-station:moondream2",
262215
+ name: "moondream2",
262216
+ domain: "vision",
262217
+ host: "moondream-station",
262218
+ owner: "vision-tool",
262219
+ vramMB: 2e3,
262220
+ ramMB: 1500,
262221
+ priority: 1
262222
+ });
261204
262223
  return await this.runMoondream(client, buffer2, filename, action, prompt, length4, start2);
261205
262224
  }
262225
+ if (forceReject) {
262226
+ return {
262227
+ success: false,
262228
+ output: "",
262229
+ error: `Vision blocked by resource broker: ${moondreamDecision.kind === "reject" ? moondreamDecision.reason : "insufficient memory"}`,
262230
+ durationMs: performance.now() - start2
262231
+ };
262232
+ }
261206
262233
  const ollamaResult = await this.tryOllamaVision(buffer2, filename, action, prompt, length4, start2);
261207
262234
  if (ollamaResult)
261208
262235
  return ollamaResult;
@@ -261300,7 +262327,41 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
261300
262327
  async tryOllamaVision(buffer2, filename, action, prompt, length4, start2) {
261301
262328
  const ollamaHost = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
261302
262329
  const envModel = process.env["OLLAMA_VISION_MODEL"];
261303
- const model = envModel || (this._activeModelHasVision && this._activeModel ? this._activeModel : "moondream");
262330
+ let model = envModel || (this._activeModelHasVision && this._activeModel ? this._activeModel : "moondream");
262331
+ const broker = getModelBroker();
262332
+ const decision2 = await broker.ensureModelLoadable({
262333
+ name: model,
262334
+ domain: "vision",
262335
+ host: "ollama",
262336
+ owner: "vision-tool/ollama",
262337
+ requestedNumCtx: 2048,
262338
+ estimatedVramMB: 2e3
262339
+ });
262340
+ let numCtx;
262341
+ if (decision2.kind === "reject")
262342
+ return null;
262343
+ if (decision2.kind === "degrade") {
262344
+ model = decision2.fallback.name;
262345
+ } else if (decision2.kind === "evict") {
262346
+ for (const target of decision2.evictTargets) {
262347
+ await broker.evict(target.host, target.name, "vision-needs-room");
262348
+ }
262349
+ numCtx = decision2.effectiveNumCtx;
262350
+ } else if (decision2.kind === "ok") {
262351
+ numCtx = decision2.effectiveNumCtx;
262352
+ } else if (decision2.kind === "wait-for-inflight") {
262353
+ const inner = await decision2.promise;
262354
+ if (inner.kind === "ok")
262355
+ numCtx = inner.effectiveNumCtx;
262356
+ else if (inner.kind === "degrade")
262357
+ model = inner.fallback.name;
262358
+ else if (inner.kind === "reject")
262359
+ return null;
262360
+ }
262361
+ if (numCtx === void 0) {
262362
+ const trainCtx = await broker.getNctxTrain(model);
262363
+ numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 4096) : 2048;
262364
+ }
261304
262365
  const imageBase64 = buffer2.toString("base64");
261305
262366
  let ollamaPrompt;
261306
262367
  switch (action) {
@@ -261323,7 +262384,7 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
261323
262384
  let res = await fetch(`${ollamaHost}/api/generate`, {
261324
262385
  method: "POST",
261325
262386
  headers: { "Content-Type": "application/json" },
261326
- body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false }),
262387
+ body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false, options: { num_ctx: numCtx } }),
261327
262388
  signal: AbortSignal.timeout(6e4)
261328
262389
  });
261329
262390
  if (!res.ok && model === "moondream") {
@@ -261335,15 +262396,18 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
261335
262396
  res = await fetch(`${ollamaHost}/api/generate`, {
261336
262397
  method: "POST",
261337
262398
  headers: { "Content-Type": "application/json" },
261338
- body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false }),
262399
+ body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false, options: { num_ctx: numCtx } }),
261339
262400
  signal: AbortSignal.timeout(6e4)
261340
262401
  });
261341
262402
  } catch {
261342
262403
  }
261343
262404
  }
261344
262405
  }
261345
- if (!res.ok)
262406
+ if (!res.ok) {
262407
+ broker.clearInflight("ollama", model);
261346
262408
  return null;
262409
+ }
262410
+ broker.touch("ollama", model);
261347
262411
  const data = await res.json();
261348
262412
  const response = data.response || "";
261349
262413
  if (!response)
@@ -513030,7 +514094,7 @@ var init_dist4 = __esm({
513030
514094
  });
513031
514095
 
513032
514096
  // packages/execution/dist/tools/code-graph-events.js
513033
- import { EventEmitter as EventEmitter3 } from "node:events";
514097
+ import { EventEmitter as EventEmitter4 } from "node:events";
513034
514098
  function emitIndexed(payload) {
513035
514099
  codeGraphEvents.publish({ type: "indexed", timestamp: Date.now(), ...payload });
513036
514100
  }
@@ -513048,7 +514112,7 @@ var init_code_graph_events = __esm({
513048
514112
  "packages/execution/dist/tools/code-graph-events.js"() {
513049
514113
  "use strict";
513050
514114
  MAX_RECENT = 256;
513051
- CodeGraphEventBus = class extends EventEmitter3 {
514115
+ CodeGraphEventBus = class extends EventEmitter4 {
513052
514116
  ring = [];
513053
514117
  constructor() {
513054
514118
  super();
@@ -519570,6 +520634,7 @@ var init_full_sub_agent = __esm({
519570
520634
  "packages/execution/dist/tools/full-sub-agent.js"() {
519571
520635
  "use strict";
519572
520636
  init_process_kill();
520637
+ init_model_broker();
519573
520638
  _activeSubProcesses = /* @__PURE__ */ new Map();
519574
520639
  FullSubAgentTool = class {
519575
520640
  name = "full_sub_agent";
@@ -519631,10 +520696,45 @@ var init_full_sub_agent = __esm({
519631
520696
  if (!task)
519632
520697
  return { success: false, output: "", error: "task is required", durationMs: performance.now() - start2 };
519633
520698
  const model = String(args["model"] ?? this.model);
520699
+ const broker = getModelBroker();
520700
+ const decision2 = await broker.ensureModelLoadable({
520701
+ name: model || "default",
520702
+ domain: "subagent",
520703
+ host: model ? "ollama" : "subprocess",
520704
+ owner: "full-sub-agent-tool",
520705
+ estimatedVramMB: 4e3,
520706
+ estimatedRamMB: 1500,
520707
+ priority: 1
520708
+ // sub-agents are higher priority than idle background models
520709
+ });
520710
+ if (decision2.kind === "evict") {
520711
+ for (const target of decision2.evictTargets) {
520712
+ await broker.evict(target.host, target.name, "sub-agent-spawn-needs-room");
520713
+ }
520714
+ } else if (decision2.kind === "reject") {
520715
+ return {
520716
+ success: false,
520717
+ output: "",
520718
+ error: `Sub-agent spawn blocked by resource broker: ${decision2.reason}`,
520719
+ durationMs: performance.now() - start2
520720
+ };
520721
+ }
519634
520722
  const entry = spawnFullSubAgent(task, { model, backendUrl: this.backendUrl, workingDir: this.workingDir }, (text) => this.onViewWrite?.(entry.id, text), (id, exitCode, output) => {
519635
520723
  this.onViewStatus?.(id, exitCode === 0 ? "completed" : "failed");
520724
+ broker.unregisterLoaded("subprocess", id, "sub-agent-exited");
519636
520725
  this.onComplete?.(id, task, exitCode, output);
519637
520726
  });
520727
+ broker.registerLoaded({
520728
+ key: `subprocess:${entry.id}`,
520729
+ name: entry.id,
520730
+ domain: "subagent",
520731
+ host: "subprocess",
520732
+ owner: "full-sub-agent-tool",
520733
+ vramMB: 4e3,
520734
+ ramMB: 1500,
520735
+ pid: entry.pid,
520736
+ priority: 1
520737
+ });
519638
520738
  this.onViewRegister?.(entry.id, entry.id, "full");
519639
520739
  return {
519640
520740
  success: true,
@@ -519708,6 +520808,7 @@ var _idCounter, AgentTool;
519708
520808
  var init_agent_tool = __esm({
519709
520809
  "packages/execution/dist/tools/agent-tool.js"() {
519710
520810
  "use strict";
520811
+ init_model_broker();
519711
520812
  _idCounter = 0;
519712
520813
  AgentTool = class {
519713
520814
  name = "agent";
@@ -519798,6 +520899,31 @@ var init_agent_tool = __esm({
519798
520899
  }
519799
520900
  const model = modelOverride ?? this.config.model;
519800
520901
  const agentId = generateAgentId(subagentType);
520902
+ {
520903
+ const broker = getModelBroker();
520904
+ const decision2 = await broker.ensureModelLoadable({
520905
+ name: model || "default",
520906
+ domain: "subagent",
520907
+ host: "ollama",
520908
+ owner: `agent-tool/${subagentType}`,
520909
+ estimatedVramMB: 2500,
520910
+ estimatedRamMB: 1e3,
520911
+ priority: 1
520912
+ });
520913
+ if (decision2.kind === "evict") {
520914
+ for (const target of decision2.evictTargets) {
520915
+ await broker.evict(target.host, target.name, "agent-tool-needs-room");
520916
+ }
520917
+ } else if (decision2.kind === "reject") {
520918
+ return {
520919
+ success: false,
520920
+ output: "",
520921
+ error: `Agent spawn blocked by resource broker: ${decision2.reason}`,
520922
+ durationMs: performance.now() - start2
520923
+ };
520924
+ }
520925
+ broker.touch("ollama", model);
520926
+ }
519801
520927
  const label = description ?? `${subagentType}: ${prompt.slice(0, 40)}`;
519802
520928
  const preloadedFiles = [];
519803
520929
  if (relevantFilePaths.length > 0) {
@@ -522473,13 +523599,13 @@ var init_notebook_edit = __esm({
522473
523599
 
522474
523600
  // packages/execution/dist/tools/environment-snapshot.js
522475
523601
  import { execSync as execSync42 } from "node:child_process";
522476
- import { cpus, totalmem, freemem, hostname as hostname2, platform as platform2, arch, uptime } from "node:os";
523602
+ import { cpus, totalmem as totalmem2, freemem as freemem2, hostname as hostname2, platform as platform2, arch, uptime } from "node:os";
522477
523603
  import { statfsSync as statfsSync2 } from "node:fs";
522478
523604
  function collectSnapshot(workingDir) {
522479
523605
  const now = /* @__PURE__ */ new Date();
522480
523606
  const cpuInfo = cpus();
522481
- const totalRAM = totalmem();
522482
- const freeRAM = freemem();
523607
+ const totalRAM = totalmem2();
523608
+ const freeRAM = freemem2();
522483
523609
  let load1 = 0, load5 = 0, load15 = 0;
522484
523610
  try {
522485
523611
  const loadavg4 = __require("node:os").loadavg();
@@ -522674,6 +523800,7 @@ var VideoUnderstandTool;
522674
523800
  var init_video_understand = __esm({
522675
523801
  "packages/execution/dist/tools/video-understand.js"() {
522676
523802
  "use strict";
523803
+ init_model_broker();
522677
523804
  VideoUnderstandTool = class {
522678
523805
  name = "video_understand";
522679
523806
  description = "Analyze a video from URL or local file. Produces timestamped transcript aligned with keyframe descriptions. Supports YouTube URLs and direct video files. Pipeline: download → transcribe (Whisper) → extract keyframes (scene detection) → describe frames → align timestamps → save structured output.";
@@ -522704,6 +523831,36 @@ var init_video_understand = __esm({
522704
523831
  if (!url && !localPath) {
522705
523832
  return { success: false, output: "", error: "url or path required", durationMs: performance.now() - start2 };
522706
523833
  }
523834
+ const broker = getModelBroker();
523835
+ const asrDecision = await broker.ensureModelLoadable({
523836
+ name: whisperModel,
523837
+ domain: "asr",
523838
+ host: "whisper-cli",
523839
+ owner: "video-understand-tool"
523840
+ });
523841
+ if (asrDecision.kind === "evict") {
523842
+ for (const target of asrDecision.evictTargets) {
523843
+ await broker.evict(target.host, target.name, "video-understand-needs-asr-room");
523844
+ }
523845
+ } else if (asrDecision.kind === "reject") {
523846
+ return {
523847
+ success: false,
523848
+ output: "",
523849
+ error: `Video understanding blocked by resource broker (ASR): ${asrDecision.reason}`,
523850
+ durationMs: performance.now() - start2
523851
+ };
523852
+ }
523853
+ const visionDecision = await broker.ensureModelLoadable({
523854
+ name: "moondream2",
523855
+ domain: "vision",
523856
+ host: "moondream-station",
523857
+ owner: "video-understand-tool"
523858
+ });
523859
+ if (visionDecision.kind === "evict") {
523860
+ for (const target of visionDecision.evictTargets) {
523861
+ await broker.evict(target.host, target.name, "video-understand-needs-vision-room");
523862
+ }
523863
+ }
522707
523864
  const outDir = join71(this.workingDir, ".omnius", "video-analysis");
522708
523865
  mkdirSync28(outDir, { recursive: true });
522709
523866
  const tmpDir = join71(outDir, `tmp-${Date.now()}`);
@@ -524314,6 +525471,7 @@ __export(dist_exports, {
524314
525471
  MemorySearchTool: () => MemorySearchTool,
524315
525472
  MemoryWriteTool: () => MemoryWriteTool,
524316
525473
  MeshtasticTool: () => MeshtasticTool,
525474
+ ModelBroker: () => ModelBroker,
524317
525475
  MultimodalMemoryTool: () => MultimodalMemoryTool,
524318
525476
  NetworkEgressPolicyError: () => NetworkEgressPolicyError,
524319
525477
  NexusTool: () => NexusTool,
@@ -524451,6 +525609,7 @@ __export(dist_exports, {
524451
525609
  getFileNotes: () => getFileNotes,
524452
525610
  getFullSubAgent: () => getFullSubAgent,
524453
525611
  getImageGenerationPreset: () => getImageGenerationPreset,
525612
+ getModelBroker: () => getModelBroker,
524454
525613
  getModelStoreDiskInfo: () => getModelStoreDiskInfo,
524455
525614
  getRecentChangesSummary: () => getRecentChangesSummary,
524456
525615
  getSecretsFilePath: () => getSecretsFilePath,
@@ -524573,6 +525732,7 @@ __export(dist_exports, {
524573
525732
  videoGenerationQualityLadder: () => videoGenerationQualityLadder,
524574
525733
  videoGenerationSetupPlan: () => videoGenerationSetupPlan,
524575
525734
  worktreeHasChanges: () => worktreeHasChanges,
525735
+ wrapWithBroker: () => wrapWithBroker,
524576
525736
  writeProvenanceFile: () => writeProvenanceFile,
524577
525737
  writeTodos: () => writeTodos
524578
525738
  });
@@ -524580,6 +525740,8 @@ var init_dist5 = __esm({
524580
525740
  "packages/execution/dist/index.js"() {
524581
525741
  "use strict";
524582
525742
  init_tool_executor();
525743
+ init_model_broker();
525744
+ init_broker_mediated_backend();
524583
525745
  init_security_classifier();
524584
525746
  init_tool_manifest();
524585
525747
  init_provenance();
@@ -527339,12 +528501,12 @@ var init_ollama_pool_cleanup = __esm({
527339
528501
  });
527340
528502
 
527341
528503
  // packages/orchestrator/dist/ollama-pool.js
527342
- import { spawn as spawn23, exec } from "node:child_process";
528504
+ import { spawn as spawn23, exec as exec2 } from "node:child_process";
527343
528505
  import { existsSync as existsSync63, readFileSync as readFileSync50, readdirSync as readdirSync20, statfsSync as statfsSync3, statSync as statSync25 } from "node:fs";
527344
528506
  import { homedir as homedir28 } from "node:os";
527345
528507
  import { join as join77 } from "node:path";
527346
528508
  import { createServer as createServer3 } from "node:net";
527347
- import { EventEmitter as EventEmitter4 } from "node:events";
528509
+ import { EventEmitter as EventEmitter5 } from "node:events";
527348
528510
  function discoverSystemOllamaModelStore() {
527349
528511
  const fromEnv = process.env["OLLAMA_MODELS"]?.trim();
527350
528512
  if (fromEnv && isDirectory(fromEnv))
@@ -527436,7 +528598,7 @@ async function detectGpus() {
527436
528598
  if (_nvidiaSmiAvailable === false)
527437
528599
  return [];
527438
528600
  return new Promise((resolve55) => {
527439
- exec("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
528601
+ exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
527440
528602
  if (err) {
527441
528603
  _nvidiaSmiAvailable = false;
527442
528604
  resolve55([]);
@@ -527465,7 +528627,7 @@ async function detectGpus() {
527465
528627
  });
527466
528628
  }
527467
528629
  async function getHardwareSnapshot() {
527468
- const { totalmem: totalmem7, freemem: freemem6, cpus: cpus5 } = await import("node:os");
528630
+ const { totalmem: totalmem8, freemem: freemem7, cpus: cpus5 } = await import("node:os");
527469
528631
  const gpus = await detectGpus();
527470
528632
  const diskPath = discoverSystemOllamaModelStore() ?? homedir28();
527471
528633
  const disk = snapshotDisk(diskPath);
@@ -527473,8 +528635,8 @@ async function getHardwareSnapshot() {
527473
528635
  return {
527474
528636
  gpus,
527475
528637
  cpuCores: cpus5().length,
527476
- ramTotalMB: Math.round(totalmem7() / (1024 * 1024)),
527477
- ramFreeMB: Math.round(freemem6() / (1024 * 1024)),
528638
+ ramTotalMB: Math.round(totalmem8() / (1024 * 1024)),
528639
+ ramFreeMB: Math.round(freemem7() / (1024 * 1024)),
527478
528640
  disk,
527479
528641
  network,
527480
528642
  takenAtMs: Date.now()
@@ -527741,7 +528903,7 @@ var init_ollama_pool = __esm({
527741
528903
  return { proc, ready };
527742
528904
  };
527743
528905
  _gpuCursor = 0;
527744
- OllamaPool = class extends EventEmitter4 {
528906
+ OllamaPool = class extends EventEmitter5 {
527745
528907
  config;
527746
528908
  instances = [];
527747
528909
  reaperHandle = null;
@@ -540331,8 +541493,8 @@ var init_streaming_executor = __esm({
540331
541493
  startExecution(entry) {
540332
541494
  entry.state = "executing";
540333
541495
  entry.startedAt = Date.now();
540334
- const exec5 = this.executeFn;
540335
- entry.promise = exec5(entry.name, entry.args).then((result) => {
541496
+ const exec6 = this.executeFn;
541497
+ entry.promise = exec6(entry.name, entry.args).then((result) => {
540336
541498
  entry.state = "completed";
540337
541499
  entry.result = result;
540338
541500
  entry.completedAt = Date.now();
@@ -540621,7 +541783,7 @@ __export(preflightSnapshot_exports, {
540621
541783
  });
540622
541784
  import { existsSync as existsSync79, readFileSync as readFileSync62, statSync as statSync31 } from "node:fs";
540623
541785
  import { execSync as execSync46 } from "node:child_process";
540624
- import { homedir as homedir29, platform as platform3, arch as arch2, totalmem as totalmem2, freemem as freemem2, hostname as hostname3 } from "node:os";
541786
+ import { homedir as homedir29, platform as platform3, arch as arch2, totalmem as totalmem3, freemem as freemem3, hostname as hostname3 } from "node:os";
540625
541787
  import { join as join92 } from "node:path";
540626
541788
  import { createHash as createHash17 } from "node:crypto";
540627
541789
  function capturePreflightSnapshot(workingDir) {
@@ -540668,8 +541830,8 @@ function capturePreflightSnapshot(workingDir) {
540668
541830
  platform: platform3(),
540669
541831
  arch: arch2(),
540670
541832
  nodeVersion: process.version,
540671
- totalMemBytes: totalmem2(),
540672
- freeMemBytes: freemem2(),
541833
+ totalMemBytes: totalmem3(),
541834
+ freeMemBytes: freemem3(),
540673
541835
  hostname: hostname3()
540674
541836
  },
540675
541837
  toolchain: captureToolchainVersions(),
@@ -554431,6 +555593,13 @@ ${description}`
554431
555593
  if (responseFormat !== void 0) {
554432
555594
  body["response_format"] = responseFormat;
554433
555595
  }
555596
+ const reqNumCtx = request.numCtx;
555597
+ if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
555598
+ const opts = body["options"] ?? {};
555599
+ opts["num_ctx"] = reqNumCtx;
555600
+ body["options"] = opts;
555601
+ body["num_ctx"] = reqNumCtx;
555602
+ }
554434
555603
  let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
554435
555604
  model: this.model
554436
555605
  }) : null;
@@ -559212,7 +560381,7 @@ import { existsSync as existsSync85, mkdirSync as mkdirSync46, writeFileSync as
559212
560381
  import { join as join100, dirname as dirname27 } from "node:path";
559213
560382
  import { homedir as homedir32 } from "node:os";
559214
560383
  import { fileURLToPath as fileURLToPath11 } from "node:url";
559215
- import { EventEmitter as EventEmitter5 } from "node:events";
560384
+ import { EventEmitter as EventEmitter6 } from "node:events";
559216
560385
  import { createInterface as createInterface2 } from "node:readline";
559217
560386
  function isAudioPath(path12) {
559218
560387
  const ext = path12.toLowerCase().split(".").pop();
@@ -559451,9 +560620,9 @@ function ensureTranscribeCliBackground() {
559451
560620
  } catch {
559452
560621
  }
559453
560622
  try {
559454
- const { exec: exec5 } = await import("node:child_process");
560623
+ const { exec: exec6 } = await import("node:child_process");
559455
560624
  return new Promise((resolve55) => {
559456
- exec5("npm i -g transcribe-cli", { timeout: 18e4 }, (err) => {
560625
+ exec6("npm i -g transcribe-cli", { timeout: 18e4 }, (err) => {
559457
560626
  resolve55(!err);
559458
560627
  });
559459
560628
  });
@@ -559501,7 +560670,7 @@ var init_listen = __esm({
559501
560670
  ".m4v",
559502
560671
  ".ts"
559503
560672
  ]);
559504
- WhisperFallbackTranscriber = class extends EventEmitter5 {
560673
+ WhisperFallbackTranscriber = class extends EventEmitter6 {
559505
560674
  constructor(model, scriptPath2) {
559506
560675
  super();
559507
560676
  this.model = model;
@@ -559610,7 +560779,7 @@ var init_listen = __esm({
559610
560779
  this._ready = false;
559611
560780
  }
559612
560781
  };
559613
- ListenEngine = class extends EventEmitter5 {
560782
+ ListenEngine = class extends EventEmitter6 {
559614
560783
  config;
559615
560784
  micProcess = null;
559616
560785
  liveTranscriber = null;
@@ -562355,7 +563524,7 @@ var require_extension3 = __commonJS({
562355
563524
  var require_websocket3 = __commonJS({
562356
563525
  "node_modules/.pnpm/ws@8.20.1/node_modules/ws/lib/websocket.js"(exports, module) {
562357
563526
  "use strict";
562358
- var EventEmitter14 = __require("events");
563527
+ var EventEmitter15 = __require("events");
562359
563528
  var https4 = __require("https");
562360
563529
  var http6 = __require("http");
562361
563530
  var net5 = __require("net");
@@ -562387,7 +563556,7 @@ var require_websocket3 = __commonJS({
562387
563556
  var protocolVersions = [8, 13];
562388
563557
  var readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
562389
563558
  var subprotocolRegex = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
562390
- var WebSocket6 = class _WebSocket extends EventEmitter14 {
563559
+ var WebSocket6 = class _WebSocket extends EventEmitter15 {
562391
563560
  /**
562392
563561
  * Create a new `WebSocket`.
562393
563562
  *
@@ -563384,7 +564553,7 @@ var require_subprotocol2 = __commonJS({
563384
564553
  var require_websocket_server2 = __commonJS({
563385
564554
  "node_modules/.pnpm/ws@8.20.1/node_modules/ws/lib/websocket-server.js"(exports, module) {
563386
564555
  "use strict";
563387
- var EventEmitter14 = __require("events");
564556
+ var EventEmitter15 = __require("events");
563388
564557
  var http6 = __require("http");
563389
564558
  var { Duplex: Duplex3 } = __require("stream");
563390
564559
  var { createHash: createHash31 } = __require("crypto");
@@ -563397,7 +564566,7 @@ var require_websocket_server2 = __commonJS({
563397
564566
  var RUNNING = 0;
563398
564567
  var CLOSING = 1;
563399
564568
  var CLOSED = 2;
563400
- var WebSocketServer4 = class extends EventEmitter14 {
564569
+ var WebSocketServer4 = class extends EventEmitter15 {
563401
564570
  /**
563402
564571
  * Create a `WebSocketServer` instance.
563403
564572
  *
@@ -564285,6 +565454,9 @@ var init_command_registry = __esm({
564285
565454
  ["/wizard", "Alias for /setup"],
564286
565455
  ["/parallel", "Show current Ollama parallel inference slots"],
564287
565456
  ["/parallel <1-15>", "Set parallel slots (restarts Ollama, max 15)"],
565457
+ ["/broker", "Show resource broker status — loaded models, in-flight loads, RAM/VRAM headroom"],
565458
+ ["/broker evict <host>:<name>", "Force eviction of a tracked model (e.g. /broker evict ollama:moondream)"],
565459
+ ["/broker threshold <ram|vram|idle> <v>", "Tune broker thresholds (RAM/VRAM in MB, idle in seconds)"],
564288
565460
  ["/ollama cleanup", "Dry-run stale Ollama pool process cleanup"],
564289
565461
  ["/ollama cleanup --execute", "Terminate guarded stale Ollama pool runners"],
564290
565462
  ["/mcp", "Show MCP server/tool status and controls"],
@@ -564554,6 +565726,7 @@ var init_command_registry = __esm({
564554
565726
  endpoint: "network",
564555
565727
  provider: "network",
564556
565728
  parallel: "runtime",
565729
+ broker: "runtime",
564557
565730
  setup: "runtime",
564558
565731
  wizard: "runtime",
564559
565732
  mcp: "tools",
@@ -564695,6 +565868,7 @@ var init_command_registry = __esm({
564695
565868
  "model",
564696
565869
  "endpoint",
564697
565870
  "parallel",
565871
+ "broker",
564698
565872
  "commands",
564699
565873
  "cmds",
564700
565874
  "selfmodify",
@@ -565588,7 +566762,7 @@ var init_task_complete_box = __esm({
565588
566762
  });
565589
566763
 
565590
566764
  // packages/cli/src/tui/model-picker.ts
565591
- import { totalmem as totalmem3 } from "node:os";
566765
+ import { totalmem as totalmem4 } from "node:os";
565592
566766
  function isImageGenModel(name10, family) {
565593
566767
  return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
565594
566768
  }
@@ -565977,7 +567151,7 @@ async function queryModelContextSize(baseUrl, modelName) {
565977
567151
  }
565978
567152
  }
565979
567153
  function estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2) {
565980
- const totalMemGB = totalmem3() / 1024 ** 3;
567154
+ const totalMemGB = totalmem4() / 1024 ** 3;
565981
567155
  const usableBytes = totalMemGB * 0.7 * 1024 ** 3;
565982
567156
  const maxTokens = Math.floor(usableBytes / kvBytesPerToken);
565983
567157
  let numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
@@ -567316,7 +568490,7 @@ var init_render = __esm({
567316
568490
  // packages/cli/src/tui/voice-session.ts
567317
568491
  import { createServer as createServer4 } from "node:http";
567318
568492
  import { spawn as spawn25, execSync as execSync49 } from "node:child_process";
567319
- import { EventEmitter as EventEmitter6 } from "node:events";
568493
+ import { EventEmitter as EventEmitter7 } from "node:events";
567320
568494
  function generateFrontendHTML() {
567321
568495
  return `<!DOCTYPE html>
567322
568496
  <html lang="en">
@@ -568051,7 +569225,7 @@ var init_voice_session = __esm({
568051
569225
  init_wrapper2();
568052
569226
  init_render();
568053
569227
  init_typed_node_events();
568054
- VoiceSession = class extends EventEmitter6 {
569228
+ VoiceSession = class extends EventEmitter7 {
568055
569229
  state;
568056
569230
  server = null;
568057
569231
  wss = null;
@@ -569166,11 +570340,11 @@ var init_voice_soul = __esm({
569166
570340
 
569167
570341
  // packages/cli/src/tui/expose.ts
569168
570342
  import { createServer as createServer5, request as httpRequest } from "node:http";
569169
- import { spawn as spawn26, exec as exec2 } from "node:child_process";
569170
- import { EventEmitter as EventEmitter7 } from "node:events";
570343
+ import { spawn as spawn26, exec as exec3 } from "node:child_process";
570344
+ import { EventEmitter as EventEmitter8 } from "node:events";
569171
570345
  import { randomBytes as randomBytes19, timingSafeEqual } from "node:crypto";
569172
570346
  import { URL as URL2 } from "node:url";
569173
- import { loadavg, cpus as cpus2, totalmem as totalmem4, freemem as freemem3 } from "node:os";
570347
+ import { loadavg, cpus as cpus2, totalmem as totalmem5, freemem as freemem4 } from "node:os";
569174
570348
  import { existsSync as existsSync88, readFileSync as readFileSync70, writeFileSync as writeFileSync44, unlinkSync as unlinkSync14, mkdirSync as mkdirSync48, readdirSync as readdirSync29, statSync as statSync34, statfsSync as statfsSync4 } from "node:fs";
569175
570349
  import { join as join103 } from "node:path";
569176
570350
  function cleanForwardHeaders(raw, targetHost) {
@@ -569276,8 +570450,8 @@ function parseRateLimitHeaders(headers) {
569276
570450
  async function collectSystemMetricsAsync() {
569277
570451
  const [l1, l5, l15] = loadavg();
569278
570452
  const cores = cpus2().length;
569279
- const totalMem = totalmem4();
569280
- const freeMem = freemem3();
570453
+ const totalMem = totalmem5();
570454
+ const freeMem = freemem4();
569281
570455
  const usedMem = totalMem - freeMem;
569282
570456
  let disk = {
569283
570457
  path: process.cwd(),
@@ -569310,7 +570484,7 @@ async function collectSystemMetricsAsync() {
569310
570484
  };
569311
570485
  try {
569312
570486
  const smi = await new Promise((resolve55, reject) => {
569313
- exec2(
570487
+ exec3(
569314
570488
  "nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
569315
570489
  { encoding: "utf8", timeout: 3e3 },
569316
570490
  (err, stdout) => err ? reject(err) : resolve55(stdout)
@@ -569398,7 +570572,7 @@ var init_expose = __esm({
569398
570572
  custom: "http://127.0.0.1:11434"
569399
570573
  };
569400
570574
  STATE_FILE_NAME = "expose-state.json";
569401
- ExposeGateway = class _ExposeGateway extends EventEmitter7 {
570575
+ ExposeGateway = class _ExposeGateway extends EventEmitter8 {
569402
570576
  constructor(options2) {
569403
570577
  super();
569404
570578
  this.options = options2;
@@ -570277,7 +571451,7 @@ ${this.formatConnectionInfo()}`);
570277
571451
  }
570278
571452
  };
570279
571453
  P2P_STATE_FILE_NAME = "expose-p2p-state.json";
570280
- ExposeP2PGateway = class _ExposeP2PGateway extends EventEmitter7 {
571454
+ ExposeP2PGateway = class _ExposeP2PGateway extends EventEmitter8 {
570281
571455
  _nexusTool;
570282
571456
  // NexusTool instance
570283
571457
  _kind;
@@ -571048,7 +572222,7 @@ var init_secret_vault = __esm({
571048
572222
  });
571049
572223
 
571050
572224
  // packages/cli/src/tui/p2p/peer-mesh.ts
571051
- import { EventEmitter as EventEmitter8 } from "node:events";
572225
+ import { EventEmitter as EventEmitter9 } from "node:events";
571052
572226
  import { createServer as createServer6 } from "node:http";
571053
572227
  import { randomBytes as randomBytes21, createHash as createHash21, generateKeyPairSync } from "node:crypto";
571054
572228
  var PING_INTERVAL_MS, PEER_TIMEOUT_MS, GOSSIP_INTERVAL_MS, MAX_PEERS, PeerMesh;
@@ -571060,7 +572234,7 @@ var init_peer_mesh = __esm({
571060
572234
  PEER_TIMEOUT_MS = 9e4;
571061
572235
  GOSSIP_INTERVAL_MS = 6e4;
571062
572236
  MAX_PEERS = 50;
571063
- PeerMesh = class extends EventEmitter8 {
572237
+ PeerMesh = class extends EventEmitter9 {
571064
572238
  constructor(options2) {
571065
572239
  super();
571066
572240
  this.options = options2;
@@ -571506,7 +572680,7 @@ var init_peer_mesh = __esm({
571506
572680
  });
571507
572681
 
571508
572682
  // packages/cli/src/tui/p2p/inference-router.ts
571509
- import { EventEmitter as EventEmitter9 } from "node:events";
572683
+ import { EventEmitter as EventEmitter10 } from "node:events";
571510
572684
  var TRUST_WEIGHTS, InferenceRouter;
571511
572685
  var init_inference_router = __esm({
571512
572686
  "packages/cli/src/tui/p2p/inference-router.ts"() {
@@ -571518,7 +572692,7 @@ var init_inference_router = __esm({
571518
572692
  verified: 5,
571519
572693
  public: 2
571520
572694
  };
571521
- InferenceRouter = class extends EventEmitter9 {
572695
+ InferenceRouter = class extends EventEmitter10 {
571522
572696
  mesh;
571523
572697
  vault;
571524
572698
  defaultTimeoutMs;
@@ -571716,7 +572890,7 @@ var init_p2p = __esm({
571716
572890
  });
571717
572891
 
571718
572892
  // packages/cli/src/tui/call-agent.ts
571719
- import { EventEmitter as EventEmitter10 } from "node:events";
572893
+ import { EventEmitter as EventEmitter11 } from "node:events";
571720
572894
  import crypto13 from "node:crypto";
571721
572895
  function adaptTool(tool) {
571722
572896
  return {
@@ -571774,7 +572948,7 @@ var init_call_agent = __esm({
571774
572948
  }
571775
572949
  };
571776
572950
  _globalFeed = null;
571777
- CallSubAgent = class extends EventEmitter10 {
572951
+ CallSubAgent = class extends EventEmitter11 {
571778
572952
  tier;
571779
572953
  clientId;
571780
572954
  runner = null;
@@ -573876,8 +575050,8 @@ __export(system_metrics_exports, {
573876
575050
  getInstantSnapshot: () => getInstantSnapshot,
573877
575051
  instantaneousCpuPct: () => instantaneousCpuPct
573878
575052
  });
573879
- import { loadavg as loadavg2, cpus as cpus3, totalmem as totalmem5, freemem as freemem4, platform as platform4 } from "node:os";
573880
- import { exec as exec3 } from "node:child_process";
575053
+ import { loadavg as loadavg2, cpus as cpus3, totalmem as totalmem6, freemem as freemem5, platform as platform4 } from "node:os";
575054
+ import { exec as exec4 } from "node:child_process";
573881
575055
  import { readFile as readFile22 } from "node:fs/promises";
573882
575056
  function formatRate(bytesPerSec) {
573883
575057
  if (bytesPerSec < 1024) return `${Math.round(bytesPerSec)}B`;
@@ -573924,7 +575098,7 @@ async function collectNetworkMetrics() {
573924
575098
  if (plat === "darwin") {
573925
575099
  try {
573926
575100
  const output = await new Promise((resolve55, reject) => {
573927
- exec3("netstat -ib 2>/dev/null | head -30", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
575101
+ exec4("netstat -ib 2>/dev/null | head -30", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
573928
575102
  });
573929
575103
  let rxBytes = 0, txBytes = 0;
573930
575104
  for (const line of output.split("\n")) {
@@ -573967,7 +575141,7 @@ async function collectGpuMetrics() {
573967
575141
  if (_nvidiaSmiAvailable2 === false) return noGpu;
573968
575142
  try {
573969
575143
  const smi = await new Promise((resolve55, reject) => {
573970
- exec3(
575144
+ exec4(
573971
575145
  "nvidia-smi --query-gpu=index,uuid,utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
573972
575146
  { encoding: "utf8", timeout: 3e3 },
573973
575147
  (err, stdout) => err ? reject(err) : resolve55(stdout)
@@ -574066,8 +575240,8 @@ function instantaneousCpuPct() {
574066
575240
  function collectCpuRam() {
574067
575241
  const cores = cpus3().length;
574068
575242
  const cpuModel = cpus3()[0]?.model ?? "";
574069
- const totalMem = totalmem5();
574070
- const usedMem = totalMem - freemem4();
575243
+ const totalMem = totalmem6();
575244
+ const usedMem = totalMem - freemem5();
574071
575245
  let cpuUtil = instantaneousCpuPct();
574072
575246
  if (cpuUtil < 0) {
574073
575247
  const [l1] = loadavg2();
@@ -574862,6 +576036,7 @@ var init_status_bar = __esm({
574862
576036
  init_text_selection();
574863
576037
  init_daemon_registry();
574864
576038
  init_overlay_lock();
576039
+ init_dist5();
574865
576040
  init_theme();
574866
576041
  init_layout2();
574867
576042
  EXPERT_TOOL_BASELINES = {
@@ -576073,6 +577248,10 @@ var init_status_bar = __esm({
576073
577248
  this._unifiedMetrics = m2;
576074
577249
  if (this.active) this.renderFooterPreserveCursor();
576075
577250
  }, intervalMs);
577251
+ try {
577252
+ getModelBroker().startPolling(Math.max(2e3, intervalMs * 2));
577253
+ } catch {
577254
+ }
576076
577255
  }
576077
577256
  /** Stop all metrics collection (local and remote) */
576078
577257
  stopAllMetrics() {
@@ -580498,7 +581677,7 @@ __export(setup_exports, {
580498
581677
  updateOllama: () => updateOllama
580499
581678
  });
580500
581679
  import * as readline from "node:readline";
580501
- import { execSync as execSync51, spawn as spawn28, exec as exec4 } from "node:child_process";
581680
+ import { execSync as execSync51, spawn as spawn28, exec as exec5 } from "node:child_process";
580502
581681
  import { promisify as promisify6 } from "node:util";
580503
581682
  import { existsSync as existsSync95, writeFileSync as writeFileSync49, readFileSync as readFileSync78, appendFileSync as appendFileSync6, mkdirSync as mkdirSync53 } from "node:fs";
580504
581683
  import { join as join111 } from "node:path";
@@ -583250,7 +584429,7 @@ var init_setup = __esm({
583250
584429
  init_dist();
583251
584430
  init_tui_select();
583252
584431
  init_listen();
583253
- execAsync2 = promisify6(exec4);
584432
+ execAsync2 = promisify6(exec5);
583254
584433
  OMNIUS_FIRST_RUN_BANNER = [
583255
584434
  " ░▒▓██████▓▒░░▒▓██████████████▓▒░░▒▓███████▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░░▒▓███████▓▒░ ",
583256
584435
  "░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░ ",
@@ -595704,6 +596883,9 @@ async function handleSlashCommand(input, ctx3) {
595704
596883
  case "parallel":
595705
596884
  await handleParallel(arg, ctx3);
595706
596885
  return "handled";
596886
+ case "broker":
596887
+ await handleBroker(arg, ctx3);
596888
+ return "handled";
595707
596889
  case "ollama":
595708
596890
  await handleOllama(arg, ctx3);
595709
596891
  return "handled";
@@ -603161,6 +604343,83 @@ async function handlePeerEndpoint(peerId, authKey, ctx3, local) {
603161
604343
  );
603162
604344
  }
603163
604345
  }
604346
+ async function handleBroker(arg, _ctx) {
604347
+ const broker = getModelBroker();
604348
+ const sub = (arg || "").trim().toLowerCase();
604349
+ const snap = await broker.pollOnce();
604350
+ if (!sub || sub === "status" || sub === "list" || sub === "ls") {
604351
+ safeLog("");
604352
+ safeLog(` ${c3.bold("Resource Broker")}`);
604353
+ safeLog("");
604354
+ safeLog(` ${c3.dim("RAM:")} ${snap.ramMB.used} / ${snap.ramMB.total} MB used (${snap.ramMB.free} MB free)`);
604355
+ if (snap.vramMB) {
604356
+ safeLog(` ${c3.dim("VRAM:")} ${snap.vramMB.used} / ${snap.vramMB.total} MB used (${snap.vramMB.free} MB free)`);
604357
+ } else {
604358
+ safeLog(` ${c3.dim("VRAM:")} ${c3.dim("(no GPU detected)")}`);
604359
+ }
604360
+ safeLog(` ${c3.dim("RAM headroom threshold:")} ${broker.ramHeadroomMB} MB`);
604361
+ safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB`);
604362
+ safeLog(` ${c3.dim("Idle-evict threshold:")} ${Math.round(broker.idleEvictMs / 1e3)}s`);
604363
+ safeLog("");
604364
+ if (snap.loaded.length === 0) {
604365
+ safeLog(` ${c3.dim("No loaded models tracked.")}`);
604366
+ } else {
604367
+ safeLog(` ${c3.bold("Loaded models:")}`);
604368
+ const now = Date.now();
604369
+ for (const m2 of snap.loaded) {
604370
+ const idle = Math.round((now - m2.lastUsedAt) / 1e3);
604371
+ const owner = m2.owner ? c3.dim(` [owner=${m2.owner}]`) : "";
604372
+ const ctx3 = m2.numCtx ? c3.dim(` n_ctx=${m2.numCtx}`) : "";
604373
+ safeLog(` ${c3.cyan(m2.name)} (${m2.host}/${m2.domain}) vram=${m2.vramMB}MB ram=${m2.ramMB}MB idle=${idle}s${ctx3}${owner}`);
604374
+ }
604375
+ }
604376
+ if (snap.inflight.length > 0) {
604377
+ safeLog("");
604378
+ safeLog(` ${c3.bold("In-flight loads:")}`);
604379
+ for (const f2 of snap.inflight) {
604380
+ const age = Math.round((Date.now() - f2.startedMs) / 1e3);
604381
+ safeLog(` ${c3.yellow(f2.key)} owner=${f2.owner} ${age}s ago`);
604382
+ }
604383
+ }
604384
+ safeLog("");
604385
+ safeLog(` ${c3.dim("Subcommands: /broker [status|evict <key>|threshold ram|vram|idle <value>]")}`);
604386
+ safeLog("");
604387
+ return;
604388
+ }
604389
+ const parts = (arg || "").trim().split(/\s+/);
604390
+ const op = parts[0]?.toLowerCase();
604391
+ if (op === "evict") {
604392
+ const key = parts[1];
604393
+ if (!key || !key.includes(":")) {
604394
+ renderWarning("Usage: /broker evict <host>:<name> e.g. /broker evict ollama:moondream");
604395
+ return;
604396
+ }
604397
+ const [host, ...rest] = key.split(":");
604398
+ const name10 = rest.join(":");
604399
+ const ok3 = await broker.evict(host, name10, "user-requested");
604400
+ if (ok3) renderInfo(`Evicted ${key} (actively unloaded)`);
604401
+ else renderInfo(`Unregistered ${key} (could not actively unload; subprocess may need manual cleanup)`);
604402
+ return;
604403
+ }
604404
+ if (op === "threshold") {
604405
+ const which3 = parts[1]?.toLowerCase();
604406
+ const value2 = parts[2] ? Number(parts[2]) : NaN;
604407
+ if (!which3 || !Number.isFinite(value2) || value2 < 0) {
604408
+ renderWarning("Usage: /broker threshold <ram|vram|idle> <value> (ram/vram in MB, idle in seconds)");
604409
+ return;
604410
+ }
604411
+ if (which3 === "ram") broker.ramHeadroomMB = value2;
604412
+ else if (which3 === "vram") broker.vramHeadroomMB = value2;
604413
+ else if (which3 === "idle") broker.idleEvictMs = value2 * 1e3;
604414
+ else {
604415
+ renderWarning("Unknown threshold; use ram|vram|idle");
604416
+ return;
604417
+ }
604418
+ renderInfo(`Updated broker threshold ${which3} = ${value2}`);
604419
+ return;
604420
+ }
604421
+ renderWarning("Unknown /broker subcommand. Try: status | evict <host>:<name> | threshold <ram|vram|idle> <value>");
604422
+ }
603164
604423
  async function handleParallel(arg, ctx3) {
603165
604424
  const { execSync: execSync61 } = await import("node:child_process");
603166
604425
  const baseUrl = ctx3.config.backendUrl || "http://localhost:11434";
@@ -604173,9 +605432,9 @@ async function handleUpdate(subcommand, ctx3) {
604173
605432
  }
604174
605433
  };
604175
605434
  }
604176
- const { exec: exec5, spawn: spawn34, execSync: es2 } = await import("node:child_process");
605435
+ const { exec: exec6, spawn: spawn34, execSync: es2 } = await import("node:child_process");
604177
605436
  const execA = (cmd, opts) => new Promise(
604178
- (res, rej) => exec5(
605437
+ (res, rej) => exec6(
604179
605438
  cmd,
604180
605439
  {
604181
605440
  encoding: "utf8",
@@ -604869,7 +606128,7 @@ async function handleUpdate(subcommand, ctx3) {
604869
606128
  installOverlay.setPhase("Native Modules");
604870
606129
  installOverlay.setStatus("Rebuilding native modules...");
604871
606130
  await new Promise((resolve55) => {
604872
- const child = exec5(
606131
+ const child = exec6(
604873
606132
  `${sudoPrefix}npm rebuild -g omnius 2>/dev/null || true`,
604874
606133
  { timeout: 12e4 },
604875
606134
  () => resolve55(true)
@@ -604911,7 +606170,7 @@ async function handleUpdate(subcommand, ctx3) {
604911
606170
  if (fsExists(venvPip2)) {
604912
606171
  installOverlay.setStatus("Upgrading Python packages...");
604913
606172
  await new Promise((resolve55) => {
604914
- const child = exec5(
606173
+ const child = exec6(
604915
606174
  `"${venvPip2}" install --upgrade moondream-station pytesseract Pillow opencv-python-headless numpy 2>/dev/null || true`,
604916
606175
  { timeout: 3e5 },
604917
606176
  (err) => resolve55(!err)
@@ -610886,6 +612145,9 @@ var init_bless_engine = __esm({
610886
612145
  async pingModel() {
610887
612146
  try {
610888
612147
  const url = `${this.config.backendUrl}/api/chat`;
612148
+ const broker = getModelBroker();
612149
+ const trainCtx = await broker.getNctxTrain(this.config.model);
612150
+ const numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 8192) : void 0;
610889
612151
  await fetch(url, {
610890
612152
  method: "POST",
610891
612153
  headers: { "Content-Type": "application/json" },
@@ -610893,11 +612155,12 @@ var init_bless_engine = __esm({
610893
612155
  model: this.config.model,
610894
612156
  messages: [{ role: "user", content: "." }],
610895
612157
  stream: false,
610896
- options: { num_predict: 1 },
612158
+ options: numCtx ? { num_predict: 1, num_ctx: numCtx } : { num_predict: 1 },
610897
612159
  keep_alive: "30m"
610898
612160
  }),
610899
612161
  signal: AbortSignal.timeout(15e3)
610900
612162
  });
612163
+ broker.touch("ollama", this.config.model);
610901
612164
  this.state.keepAlivePings++;
610902
612165
  } catch {
610903
612166
  }
@@ -614240,6 +615503,300 @@ var init_stimulation = __esm({
614240
615503
  }
614241
615504
  });
614242
615505
 
615506
+ // packages/cli/src/tui/pid-controller.ts
615507
+ function clamp018(x) {
615508
+ if (!Number.isFinite(x)) return 0;
615509
+ if (x < 0) return 0;
615510
+ if (x > 1) return 1;
615511
+ return x;
615512
+ }
615513
+ function getPidRegistry() {
615514
+ if (!_registry2) _registry2 = new PidRegistry();
615515
+ return _registry2;
615516
+ }
615517
+ var DEFAULT_PID_CONFIG, PidRegistry, _registry2;
615518
+ var init_pid_controller = __esm({
615519
+ "packages/cli/src/tui/pid-controller.ts"() {
615520
+ "use strict";
615521
+ DEFAULT_PID_CONFIG = {
615522
+ kp: 1e-4,
615523
+ ki: 1e-5,
615524
+ kd: 0,
615525
+ setpointMs: 8e3,
615526
+ initialOutput: 1,
615527
+ pvEmaAlpha: 0.3,
615528
+ integralClamp: 5e3
615529
+ // ms*s — bounds integral term contribution
615530
+ };
615531
+ PidRegistry = class {
615532
+ _controllers = /* @__PURE__ */ new Map();
615533
+ /** Get or create a controller. */
615534
+ get(key, configOverride) {
615535
+ let st = this._controllers.get(key);
615536
+ if (!st) {
615537
+ const config = { ...DEFAULT_PID_CONFIG, ...configOverride ?? {} };
615538
+ st = {
615539
+ output: config.initialOutput,
615540
+ pv: config.setpointMs,
615541
+ // assume on-target at startup
615542
+ integral: 0,
615543
+ lastError: 0,
615544
+ lastSampleAt: 0,
615545
+ samples: 0,
615546
+ config
615547
+ };
615548
+ this._controllers.set(key, st);
615549
+ }
615550
+ return st;
615551
+ }
615552
+ /** Read-only current output (inclusion ratio in [0,1]). */
615553
+ output(key) {
615554
+ return this._controllers.get(key)?.output ?? DEFAULT_PID_CONFIG.initialOutput;
615555
+ }
615556
+ /**
615557
+ * Record a new latency sample (in ms) and update the controller.
615558
+ * Returns the new output value.
615559
+ */
615560
+ sample(key, latencyMs, configOverride) {
615561
+ const st = this.get(key, configOverride);
615562
+ const now = Date.now();
615563
+ st.pv = st.samples === 0 ? latencyMs : st.pv * (1 - st.config.pvEmaAlpha) + latencyMs * st.config.pvEmaAlpha;
615564
+ const error = st.config.setpointMs - st.pv;
615565
+ st.integral += error;
615566
+ if (st.integral > st.config.integralClamp) st.integral = st.config.integralClamp;
615567
+ if (st.integral < -st.config.integralClamp) st.integral = -st.config.integralClamp;
615568
+ const dt = st.lastSampleAt > 0 ? now - st.lastSampleAt : 1e3;
615569
+ const derivative = dt > 0 ? (error - st.lastError) / dt : 0;
615570
+ const u = st.config.kp * error + st.config.ki * st.integral + st.config.kd * derivative;
615571
+ st.output = clamp018(st.output + u);
615572
+ st.lastError = error;
615573
+ st.lastSampleAt = now;
615574
+ st.samples += 1;
615575
+ return st.output;
615576
+ }
615577
+ /** All controller snapshots — for /broker debug surface. */
615578
+ snapshot() {
615579
+ return [...this._controllers.entries()].map(([key, st]) => ({
615580
+ key,
615581
+ output: st.output,
615582
+ pv: st.pv,
615583
+ setpoint: st.config.setpointMs,
615584
+ samples: st.samples
615585
+ }));
615586
+ }
615587
+ /** Reset (test-only). */
615588
+ reset() {
615589
+ this._controllers.clear();
615590
+ }
615591
+ };
615592
+ _registry2 = null;
615593
+ }
615594
+ });
615595
+
615596
+ // packages/cli/src/tui/component-benefit.ts
615597
+ function getComponentBenefitRegistry() {
615598
+ if (!_registry3) _registry3 = new ComponentBenefitRegistry();
615599
+ return _registry3;
615600
+ }
615601
+ var EMA_ALPHA, MIN_SAMPLES_TO_TRUST, ComponentBenefitRegistry, _registry3;
615602
+ var init_component_benefit = __esm({
615603
+ "packages/cli/src/tui/component-benefit.ts"() {
615604
+ "use strict";
615605
+ EMA_ALPHA = 0.2;
615606
+ MIN_SAMPLES_TO_TRUST = 3;
615607
+ ComponentBenefitRegistry = class {
615608
+ /** Two-tier map: chatKey → componentKey → state. */
615609
+ _byChat = /* @__PURE__ */ new Map();
615610
+ /** Score for a component in a chat. Returns 0.5 (neutral) when not enough samples. */
615611
+ score(chatKey, componentKey) {
615612
+ const st = this._byChat.get(chatKey)?.get(componentKey);
615613
+ if (!st || st.samples < MIN_SAMPLES_TO_TRUST) return 0.5;
615614
+ return st.score;
615615
+ }
615616
+ /** Record one batch — for each sampled component, did the decision text reference its needle? */
615617
+ recordOutcome(chatKey, samples, decisionText) {
615618
+ const haystack = decisionText.toLowerCase();
615619
+ let map2 = this._byChat.get(chatKey);
615620
+ if (!map2) {
615621
+ map2 = /* @__PURE__ */ new Map();
615622
+ this._byChat.set(chatKey, map2);
615623
+ }
615624
+ const now = Date.now();
615625
+ for (const sample of samples) {
615626
+ const needle = sample.needle.toLowerCase();
615627
+ const hit = needle.length >= 3 && haystack.includes(needle);
615628
+ let st = map2.get(sample.key);
615629
+ if (!st) {
615630
+ st = { score: 0.5, samples: 0, hits: 0, lastSeenAt: now };
615631
+ map2.set(sample.key, st);
615632
+ }
615633
+ const newScore = hit ? 1 : 0;
615634
+ st.score = st.samples === 0 ? newScore : st.score * (1 - EMA_ALPHA) + newScore * EMA_ALPHA;
615635
+ st.samples += 1;
615636
+ st.hits += hit ? 1 : 0;
615637
+ st.lastSeenAt = now;
615638
+ }
615639
+ }
615640
+ /** Snapshot for /broker debug surface. */
615641
+ snapshot(chatKey) {
615642
+ const out = [];
615643
+ const iterate = chatKey ? [[chatKey, this._byChat.get(chatKey)]].filter((e2) => !!e2[1]) : [...this._byChat.entries()];
615644
+ for (const [cKey, map2] of iterate) {
615645
+ for (const [comp, st] of map2) {
615646
+ out.push({ chatKey: cKey, componentKey: comp, score: st.score, samples: st.samples, hits: st.hits });
615647
+ }
615648
+ }
615649
+ return out;
615650
+ }
615651
+ reset() {
615652
+ this._byChat.clear();
615653
+ }
615654
+ };
615655
+ _registry3 = null;
615656
+ }
615657
+ });
615658
+
615659
+ // packages/cli/src/tui/soul-observations.ts
615660
+ function getSoulObservationStream() {
615661
+ if (!_stream) {
615662
+ _stream = new SoulObservationStream();
615663
+ subscribeBrokerEvents(_stream);
615664
+ }
615665
+ return _stream;
615666
+ }
615667
+ function subscribeBrokerEvents(stream) {
615668
+ if (_brokerSubscribed) return;
615669
+ _brokerSubscribed = true;
615670
+ const broker = getModelBroker();
615671
+ broker.on("slotReleased", (info, outcome) => {
615672
+ if (outcome.ok) {
615673
+ stream.emit({
615674
+ kind: "inference.completed",
615675
+ model: info.model,
615676
+ sessionKey: info.sessionKey,
615677
+ latencyMs: Date.now() - info.acquiredAt,
615678
+ promptTokens: info.promptTokens,
615679
+ completionTokens: outcome.completionTokens ?? 0,
615680
+ ts: Date.now()
615681
+ });
615682
+ } else {
615683
+ stream.emit({
615684
+ kind: "inference.degraded",
615685
+ model: info.model,
615686
+ sessionKey: info.sessionKey,
615687
+ reason: outcome.error ?? "unknown",
615688
+ ts: Date.now()
615689
+ });
615690
+ }
615691
+ });
615692
+ broker.on("pressure", (kind, value2, threshold) => {
615693
+ stream.emit({ kind: "broker.pressure", pressure: kind, value: value2, threshold, ts: Date.now() });
615694
+ });
615695
+ broker.on("evicted", (m2, reason) => {
615696
+ stream.emit({ kind: "model.evicted", host: m2.host, name: m2.name, reason, ts: Date.now() });
615697
+ });
615698
+ }
615699
+ function formatSystemObservations(sessionKey) {
615700
+ const stream = getSoulObservationStream();
615701
+ const broker = getModelBroker();
615702
+ const snap = broker.snapshot();
615703
+ const pidSnap = getPidRegistry().snapshot();
615704
+ const lines = [];
615705
+ const slots = snap.slots;
615706
+ const utilPct = slots.capacity > 0 ? Math.round(slots.inUse / slots.capacity * 100) : 0;
615707
+ const tpsByModel = Object.entries(slots.byModel).filter(([, m2]) => m2.samples > 0).map(([model, m2]) => `${model}=${m2.tokensPerSec.toFixed(1)}t/s (${m2.samples}s)`).join(", ");
615708
+ if (slots.inUse > 0 || slots.queueDepth > 0 || tpsByModel) {
615709
+ lines.push(`Capacity: ${slots.inUse}/${slots.capacity} slots in use (${utilPct}%), queue=${slots.queueDepth}/${slots.queueCapacity}${tpsByModel ? `; throughput: ${tpsByModel}` : ""}.`);
615710
+ }
615711
+ if (snap.ramMB.free < (broker.ramHeadroomMB ?? 0)) {
615712
+ lines.push(`RAM pressure: ${snap.ramMB.free}MB free (below ${broker.ramHeadroomMB}MB headroom).`);
615713
+ }
615714
+ if (snap.vramMB && snap.vramMB.free < (broker.vramHeadroomMB ?? 0)) {
615715
+ lines.push(`VRAM pressure: ${snap.vramMB.free}MB free (below ${broker.vramHeadroomMB}MB headroom).`);
615716
+ }
615717
+ const queueThreshold = Math.floor(snap.slots.queueCapacity * 0.8);
615718
+ if (snap.slots.queueDepth >= queueThreshold) {
615719
+ lines.push(`Queue pressure: ${snap.slots.queueDepth}/${snap.slots.queueCapacity} entries — prefer brief replies or single-emoji reactions to keep the queue draining.`);
615720
+ }
615721
+ const interesting = pidSnap.filter((p2) => p2.samples >= 3 && (p2.output < 0.95 || p2.output > 1.05));
615722
+ if (interesting.length > 0) {
615723
+ const pidLines = interesting.slice(0, 4).map((p2) => `${p2.key}: u=${p2.output.toFixed(2)} (pv=${Math.round(p2.pv)}ms, sp=${p2.setpoint}ms)`).join(", ");
615724
+ lines.push(`Context tier PID state: ${pidLines}.`);
615725
+ }
615726
+ if (sessionKey) {
615727
+ const recent = stream.recentForSession(sessionKey, 15);
615728
+ if (recent.length > 0) {
615729
+ const sends = recent.filter((e2) => e2.kind.startsWith("telegram.send."));
615730
+ const reactions = recent.filter((e2) => e2.kind.startsWith("emoji."));
615731
+ const forbidden = sends.filter((e2) => e2.kind === "telegram.send.forbidden").length;
615732
+ const rateLimited = sends.filter((e2) => e2.kind === "telegram.send.rate_limited").length;
615733
+ if (forbidden > 0) lines.push(`This chat has refused ${forbidden} recent send attempt(s) (e.g. no rights to post). Treat as a strong silence signal.`);
615734
+ if (rateLimited > 0) lines.push(`This chat rate-limited ${rateLimited} recent send(s). Slow cadence.`);
615735
+ if (reactions.length > 0) {
615736
+ const reactSummary = reactions.filter((e2) => e2.kind === "emoji.reaction.received").map((e2) => e2.emoji).join("");
615737
+ if (reactSummary) lines.push(`Recent inbound reactions in this chat: ${reactSummary}`);
615738
+ }
615739
+ }
615740
+ }
615741
+ if (lines.length === 0) return "";
615742
+ return ["## System Observations (broker, PID, capacity, send outcomes)", ...lines].join("\n");
615743
+ }
615744
+ var PER_SESSION_BUFFER, GLOBAL_BUFFER, SoulObservationStream, _stream, _brokerSubscribed;
615745
+ var init_soul_observations = __esm({
615746
+ "packages/cli/src/tui/soul-observations.ts"() {
615747
+ "use strict";
615748
+ init_dist5();
615749
+ init_pid_controller();
615750
+ PER_SESSION_BUFFER = 60;
615751
+ GLOBAL_BUFFER = 200;
615752
+ SoulObservationStream = class {
615753
+ _bySession = /* @__PURE__ */ new Map();
615754
+ _global = [];
615755
+ _listeners = /* @__PURE__ */ new Set();
615756
+ /** Record an event. */
615757
+ emit(event) {
615758
+ if ("sessionKey" in event && event.sessionKey) {
615759
+ let buf = this._bySession.get(event.sessionKey);
615760
+ if (!buf) {
615761
+ buf = [];
615762
+ this._bySession.set(event.sessionKey, buf);
615763
+ }
615764
+ buf.push(event);
615765
+ if (buf.length > PER_SESSION_BUFFER) buf.shift();
615766
+ }
615767
+ this._global.push(event);
615768
+ if (this._global.length > GLOBAL_BUFFER) this._global.shift();
615769
+ for (const listener of this._listeners) {
615770
+ try {
615771
+ listener(event);
615772
+ } catch {
615773
+ }
615774
+ }
615775
+ }
615776
+ /** Subscribe to all events (live tail). */
615777
+ subscribe(listener) {
615778
+ this._listeners.add(listener);
615779
+ return () => this._listeners.delete(listener);
615780
+ }
615781
+ /** Read recent events for a session (most recent last). */
615782
+ recentForSession(sessionKey, limit = 20) {
615783
+ const buf = this._bySession.get(sessionKey) ?? [];
615784
+ return buf.slice(-limit);
615785
+ }
615786
+ /** Read recent global events. */
615787
+ recentGlobal(limit = 30) {
615788
+ return this._global.slice(-limit);
615789
+ }
615790
+ reset() {
615791
+ this._bySession.clear();
615792
+ this._global.length = 0;
615793
+ }
615794
+ };
615795
+ _stream = null;
615796
+ _brokerSubscribed = false;
615797
+ }
615798
+ });
615799
+
614243
615800
  // packages/cli/src/tui/telegram-channel-dmn.ts
614244
615801
  import { existsSync as existsSync115, mkdirSync as mkdirSync65, readdirSync as readdirSync40, readFileSync as readFileSync94, writeFileSync as writeFileSync59 } from "node:fs";
614245
615802
  import { join as join129 } from "node:path";
@@ -614338,7 +615895,7 @@ function buildReplyOpportunities(input, openQuestions) {
614338
615895
  function daydreamOpportunityId(input, trigger) {
614339
615896
  return createHash23("sha1").update(`${input.sessionKey}:${input.generatedAtMs}:${trigger}`).digest("hex").slice(0, 16);
614340
615897
  }
614341
- function clamp018(value2) {
615898
+ function clamp019(value2) {
614342
615899
  if (!Number.isFinite(value2)) return 0;
614343
615900
  return Math.max(0, Math.min(1, value2));
614344
615901
  }
@@ -614349,7 +615906,7 @@ function pushStimulationSignal(signals, signal, source, weight) {
614349
615906
  const cleanSignal = compactLine2(signal, 120);
614350
615907
  const cleanSource = compactLine2(source, 180);
614351
615908
  if (!cleanSignal || signals.some((entry) => entry.signal === cleanSignal && entry.source === cleanSource)) return;
614352
- signals.push({ signal: cleanSignal, source: cleanSource, weight: clamp018(weight) });
615909
+ signals.push({ signal: cleanSignal, source: cleanSource, weight: clamp019(weight) });
614353
615910
  }
614354
615911
  function buildMetaAnalysisSignals(input) {
614355
615912
  const chatLabel = input.chatTitle || input.chatId;
@@ -614424,7 +615981,7 @@ function buildCuriosityThreads(input, openQuestions, stimulationSignals) {
614424
615981
  question: text.endsWith("?") || text.endsWith("?") ? text : `What should be learned or clarified from: ${text || entry.mediaSummary || "recent media"}?`,
614425
615982
  rationale: "Human curiosity, uncertainty, or multimodal content makes this a useful idle exploration target.",
614426
615983
  sourceMessages: messageId,
614427
- intensity: clamp018(0.5 + replyBoost + mediaBoost + questionBoost)
615984
+ intensity: clamp019(0.5 + replyBoost + mediaBoost + questionBoost)
614428
615985
  });
614429
615986
  }
614430
615987
  for (const question of openQuestions.slice(-4)) {
@@ -614444,7 +616001,7 @@ function buildCuriosityThreads(input, openQuestions, stimulationSignals) {
614444
616001
  question: `Is there a useful clarification or memory consolidation around ${strongest.source}?`,
614445
616002
  rationale: "Strongest stimulation signal can seed a low-intrusion reflection target.",
614446
616003
  sourceMessages: [],
614447
- intensity: clamp018(strongest.weight * 0.72)
616004
+ intensity: clamp019(strongest.weight * 0.72)
614448
616005
  });
614449
616006
  }
614450
616007
  return threads.sort((a2, b) => b.intensity - a2.intensity).slice(0, 8);
@@ -614518,7 +616075,7 @@ function buildOutreachPlans(input, curiosityThreads) {
614518
616075
  purpose: "Continue the public thread only when the live model judges that the group would benefit from a concise follow-up.",
614519
616076
  draftIntent: "Ask one concrete clarification, offer one useful synthesis, or stay silent if the room has moved on.",
614520
616077
  gate: "model_decision",
614521
- confidence: clamp018(thread.intensity * 0.86)
616078
+ confidence: clamp019(thread.intensity * 0.86)
614522
616079
  });
614523
616080
  const participant = participantForThread(input, thread);
614524
616081
  if (!participant) continue;
@@ -614530,7 +616087,7 @@ function buildOutreachPlans(input, curiosityThreads) {
614530
616087
  purpose: "Offer a one-to-one follow-up only if private contact is allowed and the issue is personal, unresolved, or better handled outside the group.",
614531
616088
  draftIntent: "Reference the public thread briefly, ask permission to continue privately, and do not reveal hidden meta-analysis.",
614532
616089
  gate: "admin_review",
614533
- confidence: clamp018(thread.intensity * 0.58)
616090
+ confidence: clamp019(thread.intensity * 0.58)
614534
616091
  });
614535
616092
  }
614536
616093
  return plans.slice(0, 8);
@@ -615633,7 +617190,7 @@ function numberOr(value2, fallback) {
615633
617190
  function isNumber(value2) {
615634
617191
  return typeof value2 === "number" && Number.isFinite(value2);
615635
617192
  }
615636
- function clamp019(value2) {
617193
+ function clamp0110(value2) {
615637
617194
  return Math.max(0, Math.min(1, Number.isFinite(value2) ? value2 : 0));
615638
617195
  }
615639
617196
  function iso(ts) {
@@ -615780,8 +617337,8 @@ function normalizeRelationship(raw) {
615780
617337
  kind: value2.kind,
615781
617338
  fromKey: String(value2.fromKey),
615782
617339
  toKey: String(value2.toKey),
615783
- confidence: clamp019(numberOr(value2.confidence, 0)),
615784
- weight: clamp019(numberOr(value2.weight, 0)),
617340
+ confidence: clamp0110(numberOr(value2.confidence, 0)),
617341
+ weight: clamp0110(numberOr(value2.weight, 0)),
615785
617342
  firstSeenAt: numberOr(value2.firstSeenAt, Date.now()),
615786
617343
  lastSeenAt: numberOr(value2.lastSeenAt, Date.now()),
615787
617344
  evidenceMessageIds: Array.isArray(value2.evidenceMessageIds) ? value2.evidenceMessageIds.filter(isNumber).slice(-40) : [],
@@ -615800,7 +617357,7 @@ function normalizePreferences(raw) {
615800
617357
  if (!evidence || typeof evidence !== "object") continue;
615801
617358
  out[actorKey][key] = {
615802
617359
  value: Math.max(-1, Math.min(1, numberOr(evidence.value, 0))),
615803
- confidence: clamp019(numberOr(evidence.confidence, 0)),
617360
+ confidence: clamp0110(numberOr(evidence.confidence, 0)),
615804
617361
  updatedAt: numberOr(evidence.updatedAt, Date.now()),
615805
617362
  evidenceMessageIds: Array.isArray(evidence.evidenceMessageIds) ? evidence.evidenceMessageIds.filter(isNumber).slice(-12) : [],
615806
617363
  note: compactOptional(evidence.note, 220)
@@ -615858,7 +617415,7 @@ function normalizeOutcome(raw) {
615858
617415
  replyToMessageId: typeof value2.replyToMessageId === "number" ? value2.replyToMessageId : void 0,
615859
617416
  route: value2.route === "action" ? "action" : "chat",
615860
617417
  shouldReply: value2.shouldReply === true,
615861
- confidence: clamp019(numberOr(value2.confidence, 0)),
617418
+ confidence: clamp0110(numberOr(value2.confidence, 0)),
615862
617419
  reason: compact2(value2.reason || "", 280),
615863
617420
  source: compact2(value2.source || "unknown", 80),
615864
617421
  silentDisposition: compactOptional(value2.silentDisposition, 280),
@@ -615870,7 +617427,7 @@ function normalizeOutcome(raw) {
615870
617427
  scenarioNote: compactOptional(value2.scenarioNote, 360),
615871
617428
  scenarioId: compactOptional(value2.scenarioId, 160),
615872
617429
  scenarioLabel: compactOptional(value2.scenarioLabel, 160),
615873
- scenarioConfidence: typeof value2.scenarioConfidence === "number" && Number.isFinite(value2.scenarioConfidence) ? clamp019(value2.scenarioConfidence) : void 0,
617430
+ scenarioConfidence: typeof value2.scenarioConfidence === "number" && Number.isFinite(value2.scenarioConfidence) ? clamp0110(value2.scenarioConfidence) : void 0,
615874
617431
  scenarioObjective: compactOptional(value2.scenarioObjective, 360),
615875
617432
  scenarioStateLoop: compactOptional(value2.scenarioStateLoop, 360),
615876
617433
  salienceSignals: Array.isArray(value2.salienceSignals) ? value2.salienceSignals.map(String).slice(0, 16) : [],
@@ -615888,7 +617445,7 @@ function normalizeDaydreamOpportunity(raw) {
615888
617445
  artifactId: String(value2.artifactId || "unknown"),
615889
617446
  generatedAt: String(value2.generatedAt || (/* @__PURE__ */ new Date()).toISOString()),
615890
617447
  trigger: compact2(value2.trigger || "", 240),
615891
- confidence: clamp019(numberOr(value2.confidence, 0)),
617448
+ confidence: clamp0110(numberOr(value2.confidence, 0)),
615892
617449
  lifecycle,
615893
617450
  firstSeenAt: numberOr(value2.firstSeenAt, Date.now()),
615894
617451
  updatedAt: numberOr(value2.updatedAt, Date.now()),
@@ -615945,7 +617502,7 @@ function commitTelegramSocialDecision(state, input) {
615945
617502
  replyToMessageId: input.replyToMessageId,
615946
617503
  route: input.route,
615947
617504
  shouldReply: input.shouldReply,
615948
- confidence: clamp019(input.confidence),
617505
+ confidence: clamp0110(input.confidence),
615949
617506
  reason: compact2(input.reason, 280),
615950
617507
  source: compact2(input.source, 80),
615951
617508
  silentDisposition: compactOptional(input.silentDisposition, 280),
@@ -615957,7 +617514,7 @@ function commitTelegramSocialDecision(state, input) {
615957
617514
  scenarioNote: compactOptional(input.scenarioNote, 360),
615958
617515
  scenarioId: compactOptional(input.scenarioId, 160),
615959
617516
  scenarioLabel: compactOptional(input.scenarioLabel, 160),
615960
- scenarioConfidence: input.scenarioConfidence === void 0 ? void 0 : clamp019(input.scenarioConfidence),
617517
+ scenarioConfidence: input.scenarioConfidence === void 0 ? void 0 : clamp0110(input.scenarioConfidence),
615961
617518
  scenarioObjective: compactOptional(input.scenarioObjective, 360),
615962
617519
  scenarioStateLoop: compactOptional(input.scenarioStateLoop, 360),
615963
617520
  salienceSignals: [...new Set((input.salienceSignals ?? []).map(String))].slice(0, 16),
@@ -615981,7 +617538,7 @@ function registerDaydreamOpportunities(state, opportunities, now = Date.now()) {
615981
617538
  artifactId: opportunity.artifactId || "unknown",
615982
617539
  generatedAt: opportunity.generatedAt || new Date(now).toISOString(),
615983
617540
  trigger: compact2(opportunity.trigger, 240),
615984
- confidence: clamp019(opportunity.confidence),
617541
+ confidence: clamp0110(opportunity.confidence),
615985
617542
  lifecycle: "proposed",
615986
617543
  firstSeenAt: now,
615987
617544
  updatedAt: now,
@@ -615991,7 +617548,7 @@ function registerDaydreamOpportunities(state, opportunities, now = Date.now()) {
615991
617548
  };
615992
617549
  if (existing) {
615993
617550
  item.trigger = compact2(opportunity.trigger, 240) || item.trigger;
615994
- item.confidence = clamp019(opportunity.confidence);
617551
+ item.confidence = clamp0110(opportunity.confidence);
615995
617552
  item.updatedAt = now;
615996
617553
  }
615997
617554
  state.daydreamOpportunities[id] = item;
@@ -616131,8 +617688,8 @@ function upsertRelationship(state, kind, fromKey, toKey, messageId, confidence2,
616131
617688
  evidenceMessageIds: [],
616132
617689
  source
616133
617690
  };
616134
- edge.confidence = Math.max(edge.confidence, clamp019(confidence2));
616135
- edge.weight = Math.min(1, edge.weight + 0.12 + clamp019(confidence2) * 0.2);
617691
+ edge.confidence = Math.max(edge.confidence, clamp0110(confidence2));
617692
+ edge.weight = Math.min(1, edge.weight + 0.12 + clamp0110(confidence2) * 0.2);
616136
617693
  edge.lastSeenAt = now;
616137
617694
  edge.evidenceMessageIds = appendUnique(edge.evidenceMessageIds, messageId, 40);
616138
617695
  edge.note = compactOptional(note, 260) || edge.note;
@@ -616174,7 +617731,7 @@ function setPreference(vector, key, value2, confidence2, messageId, now, note) {
616174
617731
  const existing = vector[key];
616175
617732
  vector[key] = {
616176
617733
  value: existing ? existing.value * 0.7 + value2 * 0.3 : value2,
616177
- confidence: Math.max(existing?.confidence ?? 0, clamp019(confidence2)),
617734
+ confidence: Math.max(existing?.confidence ?? 0, clamp0110(confidence2)),
616178
617735
  updatedAt: now,
616179
617736
  evidenceMessageIds: appendUnique(existing?.evidenceMessageIds ?? [], messageId, 12),
616180
617737
  note
@@ -616279,19 +617836,51 @@ async function queryVisionModel(modelName, imagePath, prompt = "Describe what yo
616279
617836
  if (!existsSync116(imagePath)) return "";
616280
617837
  const imageBuffer = readFileSync95(imagePath);
616281
617838
  const base64Image = imageBuffer.toString("base64");
617839
+ const broker = getModelBroker();
617840
+ const decision2 = await broker.ensureModelLoadable({
617841
+ name: modelName,
617842
+ domain: "vision",
617843
+ host: "ollama",
617844
+ owner: "vision-ingress",
617845
+ requestedNumCtx: 2048
617846
+ });
617847
+ let effectiveModel = modelName;
617848
+ let numCtx;
617849
+ if (decision2.kind === "reject") {
617850
+ return "";
617851
+ } else if (decision2.kind === "degrade") {
617852
+ effectiveModel = decision2.fallback.name;
617853
+ } else if (decision2.kind === "evict") {
617854
+ for (const target of decision2.evictTargets) {
617855
+ await broker.evict(target.host, target.name, "make-room-for-vision");
617856
+ }
617857
+ numCtx = decision2.effectiveNumCtx;
617858
+ } else if (decision2.kind === "ok") {
617859
+ numCtx = decision2.effectiveNumCtx;
617860
+ } else if (decision2.kind === "wait-for-inflight") {
617861
+ const inner = await decision2.promise;
617862
+ if (inner.kind === "ok") numCtx = inner.effectiveNumCtx;
617863
+ else if (inner.kind === "degrade") effectiveModel = inner.fallback.name;
617864
+ else if (inner.kind === "reject") return "";
617865
+ }
617866
+ if (numCtx === void 0) {
617867
+ const trainCtx = await broker.getNctxTrain(effectiveModel);
617868
+ numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 4096) : 2048;
617869
+ }
616282
617870
  try {
616283
617871
  const response = await fetch("http://localhost:11434/api/generate", {
616284
617872
  method: "POST",
616285
617873
  headers: { "Content-Type": "application/json" },
616286
617874
  body: JSON.stringify({
616287
- model: modelName,
617875
+ model: effectiveModel,
616288
617876
  prompt,
616289
617877
  images: [base64Image],
616290
617878
  stream: false,
616291
- options: { temperature: 0.3, num_predict: 1024 }
617879
+ options: { temperature: 0.3, num_predict: 1024, num_ctx: numCtx }
616292
617880
  })
616293
617881
  });
616294
617882
  if (!response.ok) return "";
617883
+ broker.touch("ollama", effectiveModel);
616295
617884
  const data = await response.json();
616296
617885
  return (data.response || "").trim();
616297
617886
  } catch {
@@ -616344,6 +617933,7 @@ function formatImageContextPrefix(result) {
616344
617933
  var init_vision_ingress = __esm({
616345
617934
  "packages/cli/src/tui/vision-ingress.ts"() {
616346
617935
  "use strict";
617936
+ init_dist5();
616347
617937
  }
616348
617938
  });
616349
617939
 
@@ -616532,9 +618122,31 @@ function parseTelegramSilentReflectionNotes(text) {
616532
618122
  }
616533
618123
  return null;
616534
618124
  }
616535
- function telegramRouterTimeoutMs(configTimeoutMs, minMs = 12e4, _legacyMaxMs) {
616536
- const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
616537
- return Math.max(configured, minMs, 12e4);
618125
+ function estimatePromptTokensFromRequest(request) {
618126
+ let chars = 0;
618127
+ for (const m2 of request.messages ?? []) {
618128
+ if (typeof m2.content === "string") chars += m2.content.length;
618129
+ else if (Array.isArray(m2.content)) {
618130
+ for (const part of m2.content) {
618131
+ if (typeof part?.text === "string") chars += part.text.length;
618132
+ }
618133
+ }
618134
+ chars += 8;
618135
+ }
618136
+ if (Array.isArray(request.tools) && request.tools.length > 0) {
618137
+ chars += request.tools.length * 600;
618138
+ }
618139
+ return Math.ceil(chars / 4);
618140
+ }
618141
+ function telegramRouterTimeoutMs(configTimeoutMs, _minMs, _legacyMaxMs) {
618142
+ void _minMs;
618143
+ void _legacyMaxMs;
618144
+ const envRaw = Number.parseInt(process.env["OMNIUS_TG_INFERENCE_LIVENESS_MS"] ?? "", 10);
618145
+ const livenessMs = Number.isFinite(envRaw) && envRaw >= 1e4 ? envRaw : 6e5;
618146
+ if (Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) >= livenessMs) {
618147
+ return configTimeoutMs;
618148
+ }
618149
+ return livenessMs;
616538
618150
  }
616539
618151
  function telegramThinkSuppressedRequest(request) {
616540
618152
  const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
@@ -617989,6 +619601,9 @@ var init_telegram_bridge = __esm({
617989
619601
  init_telegram_creative_tools();
617990
619602
  init_omnius_directory();
617991
619603
  init_stimulation();
619604
+ init_pid_controller();
619605
+ init_component_benefit();
619606
+ init_soul_observations();
617992
619607
  init_identity_memory_tool();
617993
619608
  init_visual_identity_association();
617994
619609
  init_telegram_channel_dmn();
@@ -621954,6 +623569,12 @@ ${lines.join("\n")}`);
621954
623569
  this.ensureTelegramConversationLoaded(sessionKey);
621955
623570
  const history = this.chatHistory.get(sessionKey) ?? [];
621956
623571
  const participants = [...this.chatParticipants.get(sessionKey)?.values() ?? []].sort((a2, b) => b.lastSeenTs - a2.lastSeenTs);
623572
+ const modelKey = this.agentConfig?.model ?? "?";
623573
+ const pidReg = getPidRegistry();
623574
+ const tier1Ratio = pidReg.output(`tier1.${modelKey}`);
623575
+ const tier2Ratio = pidReg.output(`tier2.${modelKey}`);
623576
+ const benefitReg = getComponentBenefitRegistry();
623577
+ const sampledComponents = [];
621957
623578
  const isGroup = msg.chatType !== "private";
621958
623579
  const retainedCount = history.length;
621959
623580
  const olderCount = Math.max(0, retainedCount - maxRecent);
@@ -621987,14 +623608,28 @@ ${lines.join("\n")}`);
621987
623608
  sections.push(socialStateContext);
621988
623609
  }
621989
623610
  if (participants.length > 0) {
621990
- const participantLines = participants.slice(0, 12).map((profile) => {
623611
+ const fullCount = Math.min(12, participants.length);
623612
+ const tier1Count = Math.max(1, Math.round(fullCount * tier1Ratio));
623613
+ const sortedByBenefit = participants.slice(0, fullCount).sort((a2, b) => {
623614
+ const scoreA = benefitReg.score(sessionKey, `tier1.participant.${a2.username ?? a2.fromUserId}`);
623615
+ const scoreB = benefitReg.score(sessionKey, `tier1.participant.${b.username ?? b.fromUserId}`);
623616
+ return scoreB - scoreA;
623617
+ });
623618
+ const selected = sortedByBenefit.slice(0, tier1Count);
623619
+ const participantLines = selected.map((profile) => {
621991
623620
  const label = profile.username && profile.username !== "unknown" ? `@${profile.username}` : profile.firstName || `user:${profile.fromUserId}`;
621992
623621
  const tones = [...profile.toneTags].slice(0, 5).join(", ") || "neutral";
621993
623622
  const direct = profile.directAddressCount ? `, direct-addresses:${profile.directAddressCount}` : "";
621994
623623
  const replies = profile.replyCount ? `, replies:${profile.replyCount}` : "";
623624
+ sampledComponents.push({
623625
+ key: `tier1.participant.${profile.username ?? profile.fromUserId}`,
623626
+ needle: profile.username ?? String(profile.fromUserId)
623627
+ });
621995
623628
  return `- ${label} [${telegramActorKindLabel(profile)}]: messages:${profile.messageCount}${direct}${replies}; tone:${tones}; last=${telegramContextJsonString(profile.lastMessage, 180)}`;
621996
623629
  });
621997
- sections.push(`### Participants And Relationship Signals
623630
+ const shed = fullCount - tier1Count;
623631
+ const tierNote = shed > 0 ? ` (tier1 u=${tier1Ratio.toFixed(2)}; ${shed} participants shed by benefit)` : "";
623632
+ sections.push(`### Participants And Relationship Signals${tierNote}
621998
623633
  ${participantLines.join("\n")}`);
621999
623634
  }
622000
623635
  const associativeContext = this.relevantTelegramAssociativeMemoryContext(
@@ -622026,16 +623661,32 @@ ${participantLines.join("\n")}`);
622026
623661
  }
622027
623662
  const memoryCards = this.relevantTelegramMemoryCards(sessionKey, msg, isGroup ? 10 : 6);
622028
623663
  if (memoryCards.length > 0) {
622029
- const cardLines = memoryCards.map(({ card, score }) => {
622030
- const tags = card.tags.length ? ` tags:${card.tags.slice(0, 8).join(",")}` : "";
622031
- const speakers = card.speakers.length ? ` speakers:${card.speakers.join(", ")}` : "";
622032
- const relevance = score > 0 ? ` relevance:${score.toFixed(2)}` : " relevance:recent";
622033
- const notes2 = card.notes.slice(-3).map((note) => ` - note=${telegramContextJsonString(note, 220)}`).join("\n");
622034
- return `- ${card.title} (${card.id};${relevance};${speakers}${tags})
623664
+ const fullMC = memoryCards.length;
623665
+ const tier2Count = Math.max(0, Math.round(fullMC * tier2Ratio));
623666
+ const sortedMC = [...memoryCards].sort((a2, b) => {
623667
+ const scoreA = benefitReg.score(sessionKey, `tier2.memory_card.${a2.card.id}`);
623668
+ const scoreB = benefitReg.score(sessionKey, `tier2.memory_card.${b.card.id}`);
623669
+ return scoreB - scoreA;
623670
+ });
623671
+ const selectedMC = sortedMC.slice(0, tier2Count);
623672
+ if (selectedMC.length > 0) {
623673
+ const cardLines = selectedMC.map(({ card, score }) => {
623674
+ const tags = card.tags.length ? ` tags:${card.tags.slice(0, 8).join(",")}` : "";
623675
+ const speakers = card.speakers.length ? ` speakers:${card.speakers.join(", ")}` : "";
623676
+ const relevance = score > 0 ? ` relevance:${score.toFixed(2)}` : " relevance:recent";
623677
+ const notes2 = card.notes.slice(-3).map((note) => ` - note=${telegramContextJsonString(note, 220)}`).join("\n");
623678
+ sampledComponents.push({
623679
+ key: `tier2.memory_card.${card.id}`,
623680
+ needle: card.id
623681
+ });
623682
+ return `- ${card.title} (${card.id};${relevance};${speakers}${tags})
622035
623683
  ${notes2}`;
622036
- });
622037
- sections.push(`### Zettelkasten Memory Recall (untrusted conversation notes)
623684
+ });
623685
+ const shed = fullMC - tier2Count;
623686
+ const tierNote = shed > 0 ? ` (tier2 u=${tier2Ratio.toFixed(2)}; ${shed} cards shed by benefit)` : "";
623687
+ sections.push(`### Zettelkasten Memory Recall (untrusted conversation notes)${tierNote}
622038
623688
  ${cardLines.join("\n")}`);
623689
+ }
622039
623690
  }
622040
623691
  const channelDaydream = this.formatLatestTelegramChannelDaydreamContext(sessionKey);
622041
623692
  if (channelDaydream) {
@@ -622108,6 +623759,7 @@ ${lines.join("\n")}`);
622108
623759
  `- If the current sender asks what you see or remember, answer from this stream instead of saying the history is gone.`
622109
623760
  ].join("\n")
622110
623761
  );
623762
+ this.telegramStashContextSamples(sessionKey, sampledComponents);
622111
623763
  return sections.join("\n\n");
622112
623764
  }
622113
623765
  maybeLogTelegramGroupSkip(msg, reason) {
@@ -622170,6 +623822,25 @@ ${lines.join("\n")}`);
622170
623822
  nextAnalysisAfterMessages: decision2.nextCheckAfterMessages
622171
623823
  });
622172
623824
  }
623825
+ /**
623826
+ * Collect the per-component benefit samples that were tagged when assembling
623827
+ * the last context stream for this session. Returns the same shape the
623828
+ * component-benefit registry consumes; an empty list means no tier-1/tier-2
623829
+ * components were emitted (early return — benefit tracking skipped).
623830
+ *
623831
+ * Tags are stored on `_telegramLastContextSamples` (a per-session WeakMap-
623832
+ * style cache) so the post-call feedback knows what to score without
623833
+ * re-running the context assembly.
623834
+ */
623835
+ telegramComponentSamplesForSession(sessionKey) {
623836
+ return this._telegramLastContextSamples.get(sessionKey) ?? [];
623837
+ }
623838
+ /** Per-session cache of last emitted context-component samples. */
623839
+ _telegramLastContextSamples = /* @__PURE__ */ new Map();
623840
+ /** Stash samples for the next post-call feedback cycle. */
623841
+ telegramStashContextSamples(sessionKey, samples) {
623842
+ this._telegramLastContextSamples.set(sessionKey, samples);
623843
+ }
622173
623844
  buildTelegramRouterPersonaContext(sessionKey, msg, toolContext, selfIdentityContext) {
622174
623845
  const baseContract = toolContext === "telegram-admin-dm" ? ADMIN_DM_PROMPT : toolContext === "telegram-admin-group" ? ADMIN_GROUP_PROMPT : TELEGRAM_SAFETY_PROMPT;
622175
623846
  return buildSoulContext({
@@ -622364,30 +624035,55 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
622364
624035
  * hard-deadline retire path becomes diagnosable instead of opaque
622365
624036
  */
622366
624037
  async telegramObservableInference(backend, request, kind, sessionKey) {
624038
+ const model = this.agentConfig?.model ?? "?";
624039
+ const promptTokens = estimatePromptTokensFromRequest(request);
624040
+ const broker = getModelBroker();
624041
+ const trainCtx = await broker.getNctxTrain(model).catch(() => null);
624042
+ const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + 1024)) : Math.max(2048, promptTokens + 1024);
624043
+ const requestWithCtx = { ...request, numCtx: targetCtx };
624044
+ const slot = await broker.acquireInferenceSlot({
624045
+ model,
624046
+ domain: "chat",
624047
+ owner: `telegram-bridge/${kind}`,
624048
+ sessionKey,
624049
+ promptTokens,
624050
+ priority: kind === "router" || kind === "router-repair" || kind === "router-strict-retry" ? 1 : 0
624051
+ });
624052
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
624053
+ sessionKey,
624054
+ `inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot.info.reserved ? " reserved" : ""}`
624055
+ ));
622367
624056
  const streamFn = backend.chatCompletionStream;
622368
- const id = this.registerTelegramInference(kind, sessionKey, this.agentConfig?.model ?? "?");
624057
+ const id = this.registerTelegramInference(kind, sessionKey, model);
624058
+ let completionTokens = 0;
622369
624059
  try {
624060
+ let result;
622370
624061
  if (typeof streamFn !== "function") {
622371
- const r2 = await backend.chatCompletion(request);
622372
- this.updateTelegramInferenceFinal(id, r2);
622373
- return r2;
622374
- }
622375
- try {
622376
- const result = await this.streamTelegramInferenceToCompletion(
622377
- streamFn.bind(backend),
622378
- request,
622379
- id
622380
- );
622381
- return result;
622382
- } catch (streamErr) {
622383
- const r2 = await backend.chatCompletion(request);
622384
- this.updateTelegramInferenceFinal(id, r2);
622385
- this.tuiWrite(() => renderTelegramSubAgentEvent(
622386
- sessionKey,
622387
- `inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
622388
- ));
622389
- return r2;
624062
+ result = await backend.chatCompletion(requestWithCtx);
624063
+ this.updateTelegramInferenceFinal(id, result);
624064
+ } else {
624065
+ try {
624066
+ result = await this.streamTelegramInferenceToCompletion(
624067
+ streamFn.bind(backend),
624068
+ requestWithCtx,
624069
+ id
624070
+ );
624071
+ } catch (streamErr) {
624072
+ result = await backend.chatCompletion(requestWithCtx);
624073
+ this.updateTelegramInferenceFinal(id, result);
624074
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
624075
+ sessionKey,
624076
+ `inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
624077
+ ));
624078
+ }
622390
624079
  }
624080
+ const usage = result.usage;
624081
+ completionTokens = usage?.completion_tokens ?? 0;
624082
+ slot.release({ ok: true, completionTokens });
624083
+ return result;
624084
+ } catch (err) {
624085
+ slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
624086
+ throw err;
622391
624087
  } finally {
622392
624088
  this.deregisterTelegramInference(id);
622393
624089
  }
@@ -622747,33 +624443,15 @@ ${retryText}`,
622747
624443
  /**
622748
624444
  * Internal: start an actual router inference for a sessionKey, store its
622749
624445
  * in-flight promise, and on completion fire any queued trailing call.
624446
+ *
624447
+ * No watchdog timeout — the broker's admission control guarantees the
624448
+ * inference fits available compute. Inflight work always completes; new
624449
+ * work waits in the broker's bounded queue with backpressure to upstream.
624450
+ * Only the fetch-level I/O liveness AbortSignal can interrupt, and only
624451
+ * on TCP-dead.
622750
624452
  */
622751
624453
  startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
622752
- const HARD_DEADLINE_MS = this.telegramRouterHardDeadlineMs();
622753
- const inner = this.inferTelegramInteractionDecision(msg, toolContext);
622754
- const promise = new Promise((resolve55, reject) => {
622755
- let settled = false;
622756
- const guard = setTimeout(() => {
622757
- if (settled) return;
622758
- settled = true;
622759
- reject(new Error(`router-coalescer: hard deadline exceeded (${Math.round(HARD_DEADLINE_MS / 1e3)}s); inner inference did not settle`));
622760
- }, HARD_DEADLINE_MS);
622761
- if (typeof guard.unref === "function") guard.unref();
622762
- inner.then(
622763
- (v) => {
622764
- if (settled) return;
622765
- settled = true;
622766
- clearTimeout(guard);
622767
- resolve55(v);
622768
- },
622769
- (e2) => {
622770
- if (settled) return;
622771
- settled = true;
622772
- clearTimeout(guard);
622773
- reject(e2);
622774
- }
622775
- );
622776
- });
624454
+ const promise = this.inferTelegramInteractionDecision(msg, toolContext);
622777
624455
  this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
622778
624456
  const onSettled = () => {
622779
624457
  let state;
@@ -622794,11 +624472,6 @@ ${retryText}`,
622794
624472
  promise.then(onSettled, onSettled);
622795
624473
  return promise;
622796
624474
  }
622797
- telegramRouterHardDeadlineMs() {
622798
- const raw = Number.parseInt(process.env["OMNIUS_TG_ROUTER_HARD_DEADLINE_MS"] ?? "", 10);
622799
- if (Number.isFinite(raw) && raw >= 5e3 && raw <= 18e4) return raw;
622800
- return 6e4;
622801
- }
622802
624475
  /**
622803
624476
  * Forcibly cancel every in-flight + trailing router-coalescer entry.
622804
624477
  * Used on bridge stop() and by the watchdog if it detects the coalescer
@@ -622994,30 +624667,52 @@ ${stimulationProbe.context}`,
622994
624667
  "",
622995
624668
  context2
622996
624669
  ].filter(Boolean).join("\n");
622997
- const reflectionNotes = await this.inferTelegramSilentReflectionNotes(
622998
- backend,
622999
- sessionKey,
623000
- msg,
623001
- toolContext,
623002
- personaContext,
623003
- observationContext,
623004
- config.timeoutMs
623005
- );
623006
- const reflectionContext = [
623007
- "## Silent Reflection Deliverables (must inform the attention decision)",
623008
- `silent_disposition: ${reflectionNotes.silentDisposition ?? "heard and retained"}`,
623009
- `mental_note: ${reflectionNotes.mentalNote ?? "no additional observation"}`,
623010
- `memory_note: ${reflectionNotes.memoryNote ?? "message retained in scoped memory"}`,
623011
- `relationship_note: ${reflectionNotes.relationshipNote ?? "no relationship change inferred"}`,
623012
- `procedure_note: ${reflectionNotes.procedureNote ?? "active voice-soul tree loaded; no procedure change inferred"}`,
623013
- `voice_note: ${reflectionNotes.voiceNote ?? "final voice unchanged unless reply is emitted"}`,
623014
- `scenario_note: ${reflectionNotes.scenarioNote ?? "scenario classification unavailable"}`,
623015
- `scenario_id: ${reflectionNotes.scenarioId ?? "unclassified"}`,
623016
- `scenario_label: ${reflectionNotes.scenarioLabel ?? "Unclassified"}`,
623017
- `scenario_confidence: ${reflectionNotes.scenarioConfidence !== void 0 ? reflectionNotes.scenarioConfidence.toFixed(2) : "0.00"}`,
623018
- `scenario_objective: ${reflectionNotes.scenarioObjective ?? "pending model-derived classifier output"}`,
623019
- `scenario_state_loop: ${reflectionNotes.scenarioStateLoop ?? "pending model-derived classifier output"}`
623020
- ].join("\n");
624670
+ const brokerSnap = getModelBroker().snapshot();
624671
+ const idleSlotRatio = brokerSnap.slots.capacity > 0 ? 1 - brokerSnap.slots.inUse / brokerSnap.slots.capacity : 1;
624672
+ const consolidatedMode = idleSlotRatio < 0.5 || process.env["OMNIUS_TG_FORCE_CONSOLIDATED"] === "1";
624673
+ let reflectionNotes;
624674
+ let reflectionContext;
624675
+ if (consolidatedMode) {
624676
+ reflectionNotes = this.fallbackTelegramSilentReflectionNotes(msg, "consolidated mode: reflection computed inline by router");
624677
+ reflectionContext = [
624678
+ "## Consolidated Reflection (you produce these fields as part of the same JSON)",
624679
+ "Before emitting your final decision, internally reflect on:",
624680
+ " silent_disposition: what happens silently with this message",
624681
+ " mental_note: concise observation of the turn",
624682
+ " memory_note: what scoped memory should retain or connect",
624683
+ " relationship_note: relationship/thread implication",
624684
+ " procedure_note: active tree/branch/abort implication",
624685
+ " voice_note: final voice implication if a reply happens",
624686
+ " scenario_note: identified scenario and transition state",
624687
+ " scenario_id / scenario_label / scenario_confidence / scenario_objective / scenario_state_loop",
624688
+ "Use these as your attention substrate, then decide route/should_reply/confidence. Return all fields in ONE JSON."
624689
+ ].join("\n");
624690
+ } else {
624691
+ reflectionNotes = await this.inferTelegramSilentReflectionNotes(
624692
+ backend,
624693
+ sessionKey,
624694
+ msg,
624695
+ toolContext,
624696
+ personaContext,
624697
+ observationContext,
624698
+ config.timeoutMs
624699
+ );
624700
+ reflectionContext = [
624701
+ "## Silent Reflection Deliverables (must inform the attention decision)",
624702
+ `silent_disposition: ${reflectionNotes.silentDisposition ?? "heard and retained"}`,
624703
+ `mental_note: ${reflectionNotes.mentalNote ?? "no additional observation"}`,
624704
+ `memory_note: ${reflectionNotes.memoryNote ?? "message retained in scoped memory"}`,
624705
+ `relationship_note: ${reflectionNotes.relationshipNote ?? "no relationship change inferred"}`,
624706
+ `procedure_note: ${reflectionNotes.procedureNote ?? "active voice-soul tree loaded; no procedure change inferred"}`,
624707
+ `voice_note: ${reflectionNotes.voiceNote ?? "final voice unchanged unless reply is emitted"}`,
624708
+ `scenario_note: ${reflectionNotes.scenarioNote ?? "scenario classification unavailable"}`,
624709
+ `scenario_id: ${reflectionNotes.scenarioId ?? "unclassified"}`,
624710
+ `scenario_label: ${reflectionNotes.scenarioLabel ?? "Unclassified"}`,
624711
+ `scenario_confidence: ${reflectionNotes.scenarioConfidence !== void 0 ? reflectionNotes.scenarioConfidence.toFixed(2) : "0.00"}`,
624712
+ `scenario_objective: ${reflectionNotes.scenarioObjective ?? "pending model-derived classifier output"}`,
624713
+ `scenario_state_loop: ${reflectionNotes.scenarioStateLoop ?? "pending model-derived classifier output"}`
624714
+ ].join("\n");
624715
+ }
623021
624716
  const userPrompt = [
623022
624717
  `You are the Telegram live routing and reply-discretion model.`,
623023
624718
  `The attention decision must happen after reading the silent reflection deliverables below. The notes are not decorations: they are the decision substrate.`,
@@ -623049,10 +624744,13 @@ ${stimulationProbe.context}`,
623049
624744
  ``,
623050
624745
  observationContext,
623051
624746
  ``,
624747
+ formatSystemObservations(sessionKey),
624748
+ ``,
623052
624749
  `Current Telegram message text (untrusted user data):
623053
624750
  ${this.quoteTelegramContextBlock(msg.text, 1200)}`
623054
624751
  ].filter(Boolean).join("\n");
623055
624752
  const diagnostics = {};
624753
+ const routerStartMs = Date.now();
623056
624754
  try {
623057
624755
  const result = await this.telegramRouterJsonCompletion(backend, {
623058
624756
  messages: [
@@ -623069,6 +624767,21 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
623069
624767
  think: false
623070
624768
  }, diagnostics);
623071
624769
  const text = result.choices[0]?.message?.content ?? "";
624770
+ const routerLatencyMs = Date.now() - routerStartMs;
624771
+ try {
624772
+ const pidReg = getPidRegistry();
624773
+ const modelKey = this.agentConfig?.model ?? "?";
624774
+ pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
624775
+ pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
624776
+ } catch {
624777
+ }
624778
+ try {
624779
+ const samples = this.telegramComponentSamplesForSession(sessionKey);
624780
+ if (samples.length > 0) {
624781
+ getComponentBenefitRegistry().recordOutcome(sessionKey, samples, text);
624782
+ }
624783
+ } catch {
624784
+ }
623072
624785
  const parsed = parseTelegramInteractionDecision(text, forcedRoute, {
623073
624786
  defaultShouldReply: false
623074
624787
  });
@@ -627303,11 +629016,18 @@ ${text}`.trim());
627303
629016
  };
627304
629017
  const replyParameters = idx === 0 ? telegramReplyParameters(replyToMessageId) : void 0;
627305
629018
  if (replyParameters) body["reply_parameters"] = replyParameters;
629019
+ const sessionKeyForObs = String(chatId);
627306
629020
  try {
627307
629021
  const result = await this.apiCall("sendMessage", body);
627308
629022
  if (result.ok === false) throw new Error(String(result.description || "Telegram sendMessage failed"));
627309
629023
  this.state.messagesSent++;
627310
629024
  if (sentId === null) sentId = result.result?.message_id ?? null;
629025
+ getSoulObservationStream().emit({
629026
+ kind: "telegram.send.success",
629027
+ sessionKey: sessionKeyForObs,
629028
+ messageId: result.result?.message_id ?? void 0,
629029
+ ts: Date.now()
629030
+ });
627311
629031
  } catch {
627312
629032
  const plain = chunk.replace(/<[^>]+>/g, "");
627313
629033
  const fallbackBody = { chat_id: chatId, text: plain };
@@ -627317,8 +629037,32 @@ ${text}`.trim());
627317
629037
  if (result.ok === false) throw new Error(String(result.description || "Telegram sendMessage failed"));
627318
629038
  this.state.messagesSent++;
627319
629039
  if (sentId === null) sentId = result.result?.message_id ?? null;
629040
+ getSoulObservationStream().emit({
629041
+ kind: "telegram.send.success",
629042
+ sessionKey: sessionKeyForObs,
629043
+ messageId: result.result?.message_id ?? void 0,
629044
+ ts: Date.now()
629045
+ });
627320
629046
  } catch (err) {
627321
629047
  this.tuiWrite(() => renderWarning(`Failed to send Telegram message: ${err instanceof Error ? err.message : String(err)}`));
629048
+ const errStr = err instanceof Error ? err.message : String(err);
629049
+ const lc = errStr.toLowerCase();
629050
+ if (/(not enough rights|forbidden|chat_write_forbidden|user_banned|kicked|chat_admin_required)/.test(lc)) {
629051
+ getSoulObservationStream().emit({
629052
+ kind: "telegram.send.forbidden",
629053
+ sessionKey: sessionKeyForObs,
629054
+ reason: errStr,
629055
+ ts: Date.now()
629056
+ });
629057
+ } else if (/too many requests|retry after/.test(lc)) {
629058
+ const m2 = lc.match(/retry after (\d+)/);
629059
+ getSoulObservationStream().emit({
629060
+ kind: "telegram.send.rate_limited",
629061
+ sessionKey: sessionKeyForObs,
629062
+ retryAfterSec: m2 ? parseInt(m2[1], 10) : void 0,
629063
+ ts: Date.now()
629064
+ });
629065
+ }
627322
629066
  }
627323
629067
  }
627324
629068
  }
@@ -629030,12 +630774,12 @@ var direct_input_exports = {};
629030
630774
  __export(direct_input_exports, {
629031
630775
  DirectInput: () => DirectInput
629032
630776
  });
629033
- import { EventEmitter as EventEmitter11 } from "node:events";
630777
+ import { EventEmitter as EventEmitter12 } from "node:events";
629034
630778
  var DirectInput;
629035
630779
  var init_direct_input = __esm({
629036
630780
  "packages/cli/src/tui/direct-input.ts"() {
629037
630781
  "use strict";
629038
- DirectInput = class extends EventEmitter11 {
630782
+ DirectInput = class extends EventEmitter12 {
629039
630783
  /** Current input line text */
629040
630784
  line = "";
629041
630785
  /** Cursor position within .line (0-based) */
@@ -629754,8 +631498,8 @@ var voicechat_exports = {};
629754
631498
  __export(voicechat_exports, {
629755
631499
  VoiceChatSession: () => VoiceChatSession
629756
631500
  });
629757
- import { EventEmitter as EventEmitter12 } from "node:events";
629758
- function clamp0110(x) {
631501
+ import { EventEmitter as EventEmitter13 } from "node:events";
631502
+ function clamp0111(x) {
629759
631503
  return x < 0 ? 0 : x > 1 ? 1 : x;
629760
631504
  }
629761
631505
  function alnumRatio(s2) {
@@ -629794,9 +631538,9 @@ function computeSignalFromText(text, confidence2) {
629794
631538
  else score = 0.15;
629795
631539
  score -= repeatingCharPenalty(t2) * 0.4;
629796
631540
  if (typeof confidence2 === "number" && !Number.isNaN(confidence2)) {
629797
- score = 0.7 * score + 0.3 * clamp0110(confidence2);
631541
+ score = 0.7 * score + 0.3 * clamp0111(confidence2);
629798
631542
  }
629799
- return clamp0110(score);
631543
+ return clamp0111(score);
629800
631544
  }
629801
631545
  function truncateForLog(s2, n2) {
629802
631546
  return s2.length <= n2 ? s2 : s2.slice(0, n2 - 1) + "…";
@@ -629864,7 +631608,7 @@ Rules:
629864
631608
  - Prefer tools for factual queries; otherwise, answer directly with a short reply.`;
629865
631609
  MIN_SIGNAL_SCORE = 0.4;
629866
631610
  NOISE_ONLY_RE = /^(?:[.·…\s,;:!?\-–—_()\[\]{}"'`]+|(?:uh|um|erm|hmm|mm+|uhh+|umm+)[\s.!?]*)+$/i;
629867
- VoiceChatSession = class extends EventEmitter12 {
631611
+ VoiceChatSession = class extends EventEmitter13 {
629868
631612
  voice;
629869
631613
  listen;
629870
631614
  backendUrl;
@@ -630066,7 +631810,7 @@ Rules:
630066
631810
  }, MAX_SEGMENT_MS);
630067
631811
  }
630068
631812
  this.captureBuffer = text;
630069
- this.lastSignalScore = typeof snr === "number" && !Number.isNaN(snr) ? clamp0110(snr) : computeSignalFromText(text, confidence2);
631813
+ this.lastSignalScore = typeof snr === "number" && !Number.isNaN(snr) ? clamp0111(snr) : computeSignalFromText(text, confidence2);
630070
631814
  this.emit("snr", { score: this.lastSignalScore });
630071
631815
  this.onPartialTranscript(text);
630072
631816
  if (this.silenceTimer) clearTimeout(this.silenceTimer);
@@ -630393,7 +632137,7 @@ __export(voice_runtime_exports, {
630393
632137
  synthesizeToWav: () => synthesizeToWav,
630394
632138
  unregisterClient: () => unregisterClient
630395
632139
  });
630396
- import { EventEmitter as EventEmitter13 } from "node:events";
632140
+ import { EventEmitter as EventEmitter14 } from "node:events";
630397
632141
  function getVoiceEngine() {
630398
632142
  if (!_voiceEngine) {
630399
632143
  _voiceEngine = new VoiceEngine();
@@ -630405,7 +632149,7 @@ function getDaemonListenEngine() {
630405
632149
  return _listenEngine;
630406
632150
  }
630407
632151
  function getVoiceBus() {
630408
- if (!_bus) _bus = new EventEmitter13();
632152
+ if (!_bus) _bus = new EventEmitter14();
630409
632153
  return _bus;
630410
632154
  }
630411
632155
  function getRuntimeStatus() {
@@ -661368,8 +663112,8 @@ NEW TASK: ${fullInput}`;
661368
663112
  const updateInfo = await checkForUpdate(version4);
661369
663113
  if (updateInfo) {
661370
663114
  _autoUpdatedThisSession = true;
661371
- const { exec: exec5 } = await import("node:child_process");
661372
- exec5(
663115
+ const { exec: exec6 } = await import("node:child_process");
663116
+ exec6(
661373
663117
  `npm install -g omnius@latest --prefer-online`,
661374
663118
  { timeout: 18e4 },
661375
663119
  (err) => {