omnius 1.0.133 → 1.0.134

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1328,6 +1328,577 @@ var init_tool_executor = __esm({
1328
1328
  }
1329
1329
  });
1330
1330
 
1331
+ // packages/execution/dist/model-broker.js
1332
+ import { EventEmitter } from "node:events";
1333
+ import { totalmem, freemem } from "node:os";
1334
+ import { exec } from "node:child_process";
1335
+ function ramSnapshotMB() {
1336
+ const total = Math.round(totalmem() / (1024 * 1024));
1337
+ const free = Math.round(freemem() / (1024 * 1024));
1338
+ return { total, free, used: total - free };
1339
+ }
1340
+ async function vramSnapshotMB() {
1341
+ if (_nvSmiAvailable === false)
1342
+ return null;
1343
+ try {
1344
+ const out = await new Promise((resolve55, reject) => {
1345
+ exec("nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
1346
+ });
1347
+ _nvSmiAvailable = true;
1348
+ let total = 0, used = 0, free = 0;
1349
+ for (const line of out.trim().split("\n")) {
1350
+ const parts = line.split(",").map((s2) => s2.trim());
1351
+ if (parts.length < 3)
1352
+ continue;
1353
+ total += parseInt(parts[0] ?? "0", 10) || 0;
1354
+ used += parseInt(parts[1] ?? "0", 10) || 0;
1355
+ free += parseInt(parts[2] ?? "0", 10) || 0;
1356
+ }
1357
+ if (total <= 0)
1358
+ return null;
1359
+ return { total, used, free };
1360
+ } catch {
1361
+ _nvSmiAvailable = false;
1362
+ return null;
1363
+ }
1364
+ }
1365
+ function getModelBroker() {
1366
+ return ModelBroker.getInstance();
1367
+ }
1368
+ var DEFAULT_RAM_HEADROOM_MB, DEFAULT_VRAM_HEADROOM_MB, DEFAULT_IDLE_EVICT_MS, DEFAULT_POLL_MS, DEFAULT_INFLIGHT_WAIT_MS, ModelBroker, _nvSmiAvailable;
1369
+ var init_model_broker = __esm({
1370
+ "packages/execution/dist/model-broker.js"() {
1371
+ "use strict";
1372
+ DEFAULT_RAM_HEADROOM_MB = 2048;
1373
+ DEFAULT_VRAM_HEADROOM_MB = 1024;
1374
+ DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
1375
+ DEFAULT_POLL_MS = 4e3;
1376
+ DEFAULT_INFLIGHT_WAIT_MS = 6e4;
1377
+ ModelBroker = class _ModelBroker {
1378
+ static _instance = null;
1379
+ /** Loaded model registry keyed by `${host}:${name}`. */
1380
+ _loaded = /* @__PURE__ */ new Map();
1381
+ /** In-flight load promises keyed by `${host}:${name}`. */
1382
+ _inflight = /* @__PURE__ */ new Map();
1383
+ /** Fallback chains keyed by domain. */
1384
+ _fallbacks = /* @__PURE__ */ new Map();
1385
+ /** Cached n_ctx_train per Ollama model. */
1386
+ _ctxTrainCache = /* @__PURE__ */ new Map();
1387
+ /** Cached "model exists in Ollama" probes (true / false). */
1388
+ _ollamaModelExists = /* @__PURE__ */ new Map();
1389
+ /** Event emitter — typed via `on<K>(event: K, listener: BrokerEvents[K])`. */
1390
+ _events = new EventEmitter();
1391
+ /** Poll timer. */
1392
+ _pollTimer = null;
1393
+ /** Last full snapshot. */
1394
+ _lastSnapshot = null;
1395
+ /** Configured Ollama base URL. */
1396
+ _ollamaBaseUrl = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
1397
+ /** Configured thresholds (mutable for /broker tuning). */
1398
+ ramHeadroomMB = DEFAULT_RAM_HEADROOM_MB;
1399
+ vramHeadroomMB = DEFAULT_VRAM_HEADROOM_MB;
1400
+ idleEvictMs = DEFAULT_IDLE_EVICT_MS;
1401
+ static getInstance() {
1402
+ if (!_ModelBroker._instance)
1403
+ _ModelBroker._instance = new _ModelBroker();
1404
+ return _ModelBroker._instance;
1405
+ }
1406
+ /** Reset (test-only). */
1407
+ static resetInstance() {
1408
+ if (_ModelBroker._instance?._pollTimer)
1409
+ clearInterval(_ModelBroker._instance._pollTimer);
1410
+ _ModelBroker._instance = null;
1411
+ }
1412
+ constructor() {
1413
+ this.registerDefaultFallbacks();
1414
+ }
1415
+ // ------------------------------------------------------------------
1416
+ // Public API — events
1417
+ // ------------------------------------------------------------------
1418
+ on(event, listener) {
1419
+ this._events.on(event, listener);
1420
+ return this;
1421
+ }
1422
+ off(event, listener) {
1423
+ this._events.off(event, listener);
1424
+ return this;
1425
+ }
1426
+ emit(event, ...args) {
1427
+ this._events.emit(event, ...args);
1428
+ }
1429
+ // ------------------------------------------------------------------
1430
+ // Public API — polling
1431
+ // ------------------------------------------------------------------
1432
+ /** Start background polling of Ollama /api/ps and nvidia-smi. */
1433
+ startPolling(intervalMs = DEFAULT_POLL_MS) {
1434
+ if (this._pollTimer)
1435
+ return;
1436
+ this._pollTimer = setInterval(() => {
1437
+ this.pollOnce().catch(() => {
1438
+ });
1439
+ }, intervalMs);
1440
+ this.pollOnce().catch(() => {
1441
+ });
1442
+ }
1443
+ stopPolling() {
1444
+ if (this._pollTimer) {
1445
+ clearInterval(this._pollTimer);
1446
+ this._pollTimer = null;
1447
+ }
1448
+ }
1449
+ /** Configure Ollama base URL (called from cli config wiring). */
1450
+ setOllamaBaseUrl(url) {
1451
+ this._ollamaBaseUrl = url;
1452
+ }
1453
+ /** One poll cycle — refreshes /api/ps and emits snapshot. */
1454
+ async pollOnce() {
1455
+ await Promise.all([
1456
+ this.refreshOllamaPs().catch(() => {
1457
+ })
1458
+ // VRAM total/free comes from system-metrics; broker computes its own snapshot
1459
+ ]);
1460
+ const snapshot = this.buildSnapshot();
1461
+ this._lastSnapshot = snapshot;
1462
+ this.emit("snapshot", snapshot);
1463
+ this.checkPressure(snapshot);
1464
+ return snapshot;
1465
+ }
1466
+ /** Best-known current snapshot. */
1467
+ snapshot() {
1468
+ return this._lastSnapshot ?? this.buildSnapshot();
1469
+ }
1470
+ // ------------------------------------------------------------------
1471
+ // Public API — fallback registry
1472
+ // ------------------------------------------------------------------
1473
+ /** Register a fallback chain for a domain. Later entries are tried later. */
1474
+ setFallbackChain(domain, chain) {
1475
+ this._fallbacks.set(domain, [...chain]);
1476
+ }
1477
+ getFallbackChain(domain) {
1478
+ return this._fallbacks.get(domain) ?? [];
1479
+ }
1480
+ // ------------------------------------------------------------------
1481
+ // Public API — load decisioning
1482
+ // ------------------------------------------------------------------
1483
+ /**
1484
+ * Pre-flight a model-load request. Always call this before allocating a model.
1485
+ *
1486
+ * Decisions:
1487
+ * - ok: proceed; use `effectiveNumCtx` if returned
1488
+ * - wait-for-inflight: another caller is loading the same model; await `promise`
1489
+ * - evict: caller should free `evictTargets` (broker calls evict
1490
+ * hooks itself when possible) then retry
1491
+ * - degrade: caller should reissue with `fallback`
1492
+ * - reject: nothing viable — caller should error out
1493
+ */
1494
+ async ensureModelLoadable(spec) {
1495
+ const key = this.keyOf(spec);
1496
+ const inflight = this._inflight.get(key);
1497
+ if (inflight) {
1498
+ return { kind: "wait-for-inflight", promise: inflight.promise };
1499
+ }
1500
+ const existing = this._loaded.get(key);
1501
+ if (existing) {
1502
+ existing.lastUsedAt = Date.now();
1503
+ return { kind: "ok", effectiveNumCtx: existing.numCtx, note: "already-loaded" };
1504
+ }
1505
+ let effectiveNumCtx;
1506
+ if (spec.host === "ollama" && spec.requestedNumCtx) {
1507
+ const trainCtx = await this.getNctxTrain(spec.name);
1508
+ if (trainCtx && spec.requestedNumCtx > trainCtx) {
1509
+ effectiveNumCtx = trainCtx;
1510
+ } else {
1511
+ effectiveNumCtx = spec.requestedNumCtx;
1512
+ }
1513
+ } else if (spec.host === "ollama") {
1514
+ const trainCtx = await this.getNctxTrain(spec.name);
1515
+ if (trainCtx)
1516
+ effectiveNumCtx = trainCtx;
1517
+ }
1518
+ const estVram = spec.estimatedVramMB ?? this.estimateFootprintVramMB(spec);
1519
+ const estRam = spec.estimatedRamMB ?? this.estimateFootprintRamMB(spec);
1520
+ const ram = ramSnapshotMB();
1521
+ const vram = await vramSnapshotMB();
1522
+ const ramFitsAfter = ram.free - estRam >= this.ramHeadroomMB;
1523
+ const vramFitsAfter = vram ? vram.free - estVram >= this.vramHeadroomMB : true;
1524
+ if (ramFitsAfter && vramFitsAfter) {
1525
+ const promise = Promise.resolve({ kind: "ok", effectiveNumCtx });
1526
+ this._inflight.set(key, { startedMs: Date.now(), owner: spec.owner, promise });
1527
+ setTimeout(() => this._inflight.delete(key), spec.loadTimeoutMs ?? DEFAULT_INFLIGHT_WAIT_MS).unref?.();
1528
+ return { kind: "ok", effectiveNumCtx };
1529
+ }
1530
+ const evictTargets = this.pickEvictionCandidates({
1531
+ needVramMB: vramFitsAfter ? 0 : estVram + this.vramHeadroomMB - (vram?.free ?? 0),
1532
+ needRamMB: ramFitsAfter ? 0 : estRam + this.ramHeadroomMB - ram.free,
1533
+ requestingPriority: spec.priority ?? 0,
1534
+ requestingDomain: spec.domain
1535
+ });
1536
+ if (evictTargets.length > 0) {
1537
+ return { kind: "evict", evictTargets, effectiveNumCtx };
1538
+ }
1539
+ const fallback = await this.findRunnableFallback(spec);
1540
+ if (fallback) {
1541
+ this.emit("degraded", spec, fallback, "insufficient-memory-no-evictable");
1542
+ return { kind: "degrade", fallback, reason: "insufficient-memory-no-evictable" };
1543
+ }
1544
+ const reason = `insufficient resources (need ~${estRam}MB RAM, ~${estVram}MB VRAM; free ${ram.free}MB RAM, ${vram ? vram.free : "?"}MB VRAM) and no evictable / fallback models`;
1545
+ this.emit("rejected", spec, reason);
1546
+ return { kind: "reject", reason };
1547
+ }
1548
+ /**
1549
+ * Register a model that has been successfully loaded.
1550
+ * Callers MUST call this after a successful load so the broker can track LRU.
1551
+ */
1552
+ registerLoaded(model) {
1553
+ const now = Date.now();
1554
+ const m2 = {
1555
+ ...model,
1556
+ loadedAt: model.loadedAt ?? now,
1557
+ lastUsedAt: model.lastUsedAt ?? now
1558
+ };
1559
+ this._loaded.set(m2.key, m2);
1560
+ this._inflight.delete(m2.key);
1561
+ this.emit("loaded", m2);
1562
+ return m2;
1563
+ }
1564
+ /** Update last-used timestamp on every successful inference. */
1565
+ touch(host, name10) {
1566
+ const m2 = this._loaded.get(`${host}:${name10}`);
1567
+ if (m2)
1568
+ m2.lastUsedAt = Date.now();
1569
+ }
1570
+ /** Unregister a model (called when caller knows it has unloaded). */
1571
+ unregisterLoaded(host, name10, reason = "caller-unloaded") {
1572
+ const key = `${host}:${name10}`;
1573
+ const m2 = this._loaded.get(key);
1574
+ if (m2) {
1575
+ this._loaded.delete(key);
1576
+ this.emit("evicted", m2, reason);
1577
+ }
1578
+ }
1579
+ /** Clear an inflight marker without registering a load (failed/aborted). */
1580
+ clearInflight(host, name10) {
1581
+ this._inflight.delete(`${host}:${name10}`);
1582
+ }
1583
+ /**
1584
+ * Best-effort eviction of a tracked model. Returns true if the broker was
1585
+ * able to actively unload (e.g. Ollama keep_alive=0); false if it just
1586
+ * unregistered (caller must clean up its own subprocess).
1587
+ */
1588
+ async evict(host, name10, reason = "broker-evict") {
1589
+ const key = `${host}:${name10}`;
1590
+ const m2 = this._loaded.get(key);
1591
+ if (!m2)
1592
+ return false;
1593
+ let actively = false;
1594
+ if (host === "ollama") {
1595
+ actively = await this.ollamaUnload(name10).catch(() => false);
1596
+ }
1597
+ this._loaded.delete(key);
1598
+ this.emit("evicted", m2, reason);
1599
+ return actively;
1600
+ }
1601
+ // ------------------------------------------------------------------
1602
+ // Internal — Ollama
1603
+ // ------------------------------------------------------------------
1604
+ /** Fetch Ollama's runtime model list and reconcile against our registry. */
1605
+ async refreshOllamaPs() {
1606
+ try {
1607
+ const res = await fetch(`${this._ollamaBaseUrl}/api/ps`, {
1608
+ signal: AbortSignal.timeout(3e3)
1609
+ });
1610
+ if (!res.ok)
1611
+ return;
1612
+ const data = await res.json();
1613
+ const seen = /* @__PURE__ */ new Set();
1614
+ const now = Date.now();
1615
+ for (const m2 of data.models ?? []) {
1616
+ const key = `ollama:${m2.name}`;
1617
+ seen.add(key);
1618
+ const vramMB = Math.round((m2.size_vram ?? 0) / (1024 * 1024));
1619
+ const ramMB = Math.round(((m2.size ?? 0) - (m2.size_vram ?? 0)) / (1024 * 1024));
1620
+ const existing = this._loaded.get(key);
1621
+ if (existing) {
1622
+ existing.vramMB = vramMB || existing.vramMB;
1623
+ existing.ramMB = ramMB || existing.ramMB;
1624
+ } else {
1625
+ const tracked = this.registerLoaded({
1626
+ key,
1627
+ name: m2.name,
1628
+ domain: this.guessOllamaDomain(m2.name),
1629
+ host: "ollama",
1630
+ owner: "external-ollama",
1631
+ vramMB,
1632
+ ramMB,
1633
+ priority: 0,
1634
+ loadedAt: now,
1635
+ lastUsedAt: now
1636
+ });
1637
+ void tracked;
1638
+ }
1639
+ }
1640
+ for (const [key, m2] of this._loaded) {
1641
+ if (m2.host === "ollama" && !seen.has(key)) {
1642
+ this._loaded.delete(key);
1643
+ this.emit("evicted", m2, "ollama-unloaded");
1644
+ }
1645
+ }
1646
+ } catch {
1647
+ }
1648
+ }
1649
+ /** Force Ollama to unload a model by calling /api/generate keep_alive=0. */
1650
+ async ollamaUnload(modelName) {
1651
+ try {
1652
+ const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
1653
+ method: "POST",
1654
+ headers: { "Content-Type": "application/json" },
1655
+ body: JSON.stringify({ model: modelName, keep_alive: 0 }),
1656
+ signal: AbortSignal.timeout(5e3)
1657
+ });
1658
+ return res.ok;
1659
+ } catch {
1660
+ return false;
1661
+ }
1662
+ }
1663
+ /** Pull n_ctx_train for an Ollama model via /api/show; cached. */
1664
+ async getNctxTrain(modelName) {
1665
+ if (this._ctxTrainCache.has(modelName))
1666
+ return this._ctxTrainCache.get(modelName);
1667
+ try {
1668
+ const res = await fetch(`${this._ollamaBaseUrl}/api/show`, {
1669
+ method: "POST",
1670
+ headers: { "Content-Type": "application/json" },
1671
+ body: JSON.stringify({ name: modelName }),
1672
+ signal: AbortSignal.timeout(5e3)
1673
+ });
1674
+ if (!res.ok)
1675
+ return null;
1676
+ const data = await res.json();
1677
+ const info = data.model_info ?? {};
1678
+ const arch3 = info["general.architecture"];
1679
+ let trainCtx;
1680
+ if (arch3 && typeof info[`${arch3}.context_length`] === "number") {
1681
+ trainCtx = info[`${arch3}.context_length`];
1682
+ } else {
1683
+ for (const [k, v] of Object.entries(info)) {
1684
+ if (k.endsWith(".context_length") && typeof v === "number") {
1685
+ trainCtx = v;
1686
+ break;
1687
+ }
1688
+ }
1689
+ }
1690
+ if (trainCtx && Number.isFinite(trainCtx) && trainCtx > 0) {
1691
+ this._ctxTrainCache.set(modelName, trainCtx);
1692
+ return trainCtx;
1693
+ }
1694
+ return null;
1695
+ } catch {
1696
+ return null;
1697
+ }
1698
+ }
1699
+ /** Probe whether a model exists in Ollama (cached). */
1700
+ async ollamaModelExists(modelName) {
1701
+ if (this._ollamaModelExists.has(modelName))
1702
+ return this._ollamaModelExists.get(modelName);
1703
+ try {
1704
+ const res = await fetch(`${this._ollamaBaseUrl}/api/show`, {
1705
+ method: "POST",
1706
+ headers: { "Content-Type": "application/json" },
1707
+ body: JSON.stringify({ name: modelName }),
1708
+ signal: AbortSignal.timeout(3e3)
1709
+ });
1710
+ const exists2 = res.ok;
1711
+ this._ollamaModelExists.set(modelName, exists2);
1712
+ return exists2;
1713
+ } catch {
1714
+ this._ollamaModelExists.set(modelName, false);
1715
+ return false;
1716
+ }
1717
+ }
1718
+ // ------------------------------------------------------------------
1719
+ // Internal — LRU eviction selection
1720
+ // ------------------------------------------------------------------
1721
+ pickEvictionCandidates(req2) {
1722
+ const now = Date.now();
1723
+ const sameDomainOk = (m2) => (
1724
+ // never evict the requesting domain's only loaded model unless multiple exist
1725
+ m2.domain !== req2.requestingDomain || this.countByDomain(req2.requestingDomain) > 1
1726
+ );
1727
+ const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
1728
+ const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).sort((a2, b) => {
1729
+ const aIdle = idle(a2) ? 0 : 1;
1730
+ const bIdle = idle(b) ? 0 : 1;
1731
+ if (aIdle !== bIdle)
1732
+ return aIdle - bIdle;
1733
+ return a2.lastUsedAt - b.lastUsedAt;
1734
+ });
1735
+ const targets = [];
1736
+ let vramFreed = 0;
1737
+ let ramFreed = 0;
1738
+ for (const m2 of evictable) {
1739
+ if (vramFreed >= req2.needVramMB && ramFreed >= req2.needRamMB)
1740
+ break;
1741
+ targets.push(m2);
1742
+ vramFreed += m2.vramMB;
1743
+ ramFreed += m2.ramMB;
1744
+ }
1745
+ if (vramFreed >= req2.needVramMB && ramFreed >= req2.needRamMB)
1746
+ return targets;
1747
+ return [];
1748
+ }
1749
+ countByDomain(domain) {
1750
+ let n2 = 0;
1751
+ for (const m2 of this._loaded.values())
1752
+ if (m2.domain === domain)
1753
+ n2++;
1754
+ return n2;
1755
+ }
1756
+ // ------------------------------------------------------------------
1757
+ // Internal — fallback resolution
1758
+ // ------------------------------------------------------------------
1759
+ async findRunnableFallback(original) {
1760
+ const chain = this._fallbacks.get(original.domain) ?? [];
1761
+ for (const entry of chain) {
1762
+ if (entry.spec.host === original.host && entry.spec.name === original.name)
1763
+ continue;
1764
+ const ok3 = entry.available ? await Promise.resolve(entry.available()).catch(() => false) : true;
1765
+ if (!ok3)
1766
+ continue;
1767
+ return { ...entry.spec, owner: original.owner };
1768
+ }
1769
+ return null;
1770
+ }
1771
+ registerDefaultFallbacks() {
1772
+ this.setFallbackChain("vision", [
1773
+ { spec: { name: "moondream2", domain: "vision", host: "moondream-station" }, note: "local Moondream Station REST" },
1774
+ { spec: { name: "moondream", domain: "vision", host: "ollama", estimatedVramMB: 1800 }, note: "ollama moondream (small VRAM)" },
1775
+ { spec: { name: "tesseract-ocr-fallback", domain: "ocr", host: "subprocess", estimatedVramMB: 0, estimatedRamMB: 100 }, note: "OCR-only — no visual reasoning" }
1776
+ ]);
1777
+ this.setFallbackChain("image-gen", [
1778
+ { spec: { name: "flux1-schnell", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 12e3 } },
1779
+ { spec: { name: "sdxl-turbo", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 8e3 } },
1780
+ { spec: { name: "sd-turbo", domain: "image-gen", host: "diffusers-py", estimatedVramMB: 4e3 } }
1781
+ ]);
1782
+ this.setFallbackChain("music", [
1783
+ { spec: { name: "facebook/musicgen-medium", domain: "music", host: "audiocraft", estimatedVramMB: 6e3 } },
1784
+ { spec: { name: "facebook/musicgen-small", domain: "music", host: "audiocraft", estimatedVramMB: 3e3 } }
1785
+ ]);
1786
+ this.setFallbackChain("sound", [
1787
+ { spec: { name: "cvssp/audioldm-s-full-v2", domain: "sound", host: "diffusers-py", estimatedVramMB: 4e3 } },
1788
+ { spec: { name: "facebook/audiogen-medium", domain: "sound", host: "audiocraft", estimatedVramMB: 3e3 } }
1789
+ ]);
1790
+ this.setFallbackChain("asr", [
1791
+ { spec: { name: "base", domain: "asr", host: "whisper-cli", estimatedRamMB: 800 } },
1792
+ { spec: { name: "tiny", domain: "asr", host: "whisper-cli", estimatedRamMB: 300 } }
1793
+ ]);
1794
+ this.setFallbackChain("tts", [
1795
+ { spec: { name: "piper-default", domain: "tts", host: "piper", estimatedRamMB: 200 } }
1796
+ ]);
1797
+ }
1798
+ // ------------------------------------------------------------------
1799
+ // Internal — footprint estimation
1800
+ // ------------------------------------------------------------------
1801
+ estimateFootprintVramMB(spec) {
1802
+ if (spec.estimatedVramMB !== void 0)
1803
+ return spec.estimatedVramMB;
1804
+ switch (spec.domain) {
1805
+ case "embedding":
1806
+ return 300;
1807
+ case "vision":
1808
+ return spec.host === "moondream-station" ? 2e3 : 2500;
1809
+ case "image-gen":
1810
+ return 8e3;
1811
+ case "video-gen":
1812
+ return 12e3;
1813
+ case "music":
1814
+ return 4e3;
1815
+ case "sound":
1816
+ return 3e3;
1817
+ case "asr":
1818
+ return 1e3;
1819
+ case "tts":
1820
+ return 300;
1821
+ case "subagent":
1822
+ return 4e3;
1823
+ case "ocr":
1824
+ return 0;
1825
+ case "chat":
1826
+ default:
1827
+ return 5e3;
1828
+ }
1829
+ }
1830
+ estimateFootprintRamMB(spec) {
1831
+ if (spec.estimatedRamMB !== void 0)
1832
+ return spec.estimatedRamMB;
1833
+ switch (spec.domain) {
1834
+ case "ocr":
1835
+ return 100;
1836
+ case "tts":
1837
+ return 200;
1838
+ case "embedding":
1839
+ return 500;
1840
+ case "asr":
1841
+ return 800;
1842
+ case "music":
1843
+ case "sound":
1844
+ return 2e3;
1845
+ case "vision":
1846
+ return 1500;
1847
+ case "image-gen":
1848
+ return 4e3;
1849
+ case "video-gen":
1850
+ return 6e3;
1851
+ case "subagent":
1852
+ return 1500;
1853
+ case "chat":
1854
+ default:
1855
+ return 2e3;
1856
+ }
1857
+ }
1858
+ guessOllamaDomain(name10) {
1859
+ const n2 = name10.toLowerCase();
1860
+ if (/embed|nomic|bge|e5/.test(n2))
1861
+ return "embedding";
1862
+ if (/moondream|llava|vlm|vision|qwen.*vl|minicpm-v|gemma3|pixtral|cogvlm|internvl/.test(n2))
1863
+ return "vision";
1864
+ if (/flux|stable.diffusion|sdxl|z-image/.test(n2))
1865
+ return "image-gen";
1866
+ return "chat";
1867
+ }
1868
+ // ------------------------------------------------------------------
1869
+ // Internal — snapshot + pressure
1870
+ // ------------------------------------------------------------------
1871
+ buildSnapshot() {
1872
+ const ram = ramSnapshotMB();
1873
+ const vram = this._lastSnapshot?.vramMB ?? null;
1874
+ return {
1875
+ loaded: [...this._loaded.values()],
1876
+ inflight: [...this._inflight.entries()].map(([key, v]) => ({ key, owner: v.owner, startedMs: v.startedMs })),
1877
+ ramMB: ram,
1878
+ vramMB: vram,
1879
+ lastPollAt: Date.now()
1880
+ };
1881
+ }
1882
+ async checkPressure(snap) {
1883
+ if (snap.ramMB.free < this.ramHeadroomMB) {
1884
+ this.emit("pressure", "ram", snap.ramMB.free, this.ramHeadroomMB);
1885
+ }
1886
+ const v = await vramSnapshotMB();
1887
+ if (v) {
1888
+ snap.vramMB = v;
1889
+ if (v.free < this.vramHeadroomMB) {
1890
+ this.emit("pressure", "vram", v.free, this.vramHeadroomMB);
1891
+ }
1892
+ }
1893
+ }
1894
+ keyOf(spec) {
1895
+ return `${spec.host}:${spec.name}`;
1896
+ }
1897
+ };
1898
+ _nvSmiAvailable = null;
1899
+ }
1900
+ });
1901
+
1331
1902
  // packages/execution/dist/tools/security-classifier.js
1332
1903
  function classifyTool(name10) {
1333
1904
  for (const rule of RULES) {
@@ -19513,6 +20084,20 @@ import { existsSync as existsSync25, mkdirSync as mkdirSync10, writeFileSync as
19513
20084
  import { join as join28, basename as basename5, extname as extname3, resolve as resolve16 } from "node:path";
19514
20085
  import { homedir as homedir9 } from "node:os";
19515
20086
  import { execFileSync as execFileSync3, execSync as execSync15 } from "node:child_process";
20087
+ function whisperRamEstimate(model) {
20088
+ const m2 = model.toLowerCase();
20089
+ if (m2.includes("large"))
20090
+ return 3200;
20091
+ if (m2.includes("medium"))
20092
+ return 1700;
20093
+ if (m2.includes("small"))
20094
+ return 800;
20095
+ if (m2.includes("base"))
20096
+ return 400;
20097
+ if (m2.includes("tiny"))
20098
+ return 200;
20099
+ return 800;
20100
+ }
19516
20101
  function isTranscribable(path12) {
19517
20102
  const ext = extname3(path12).toLowerCase();
19518
20103
  return AUDIO_EXTS.has(ext) || VIDEO_EXTS.has(ext);
@@ -19582,6 +20167,7 @@ var AUDIO_EXTS, VIDEO_EXTS, MAX_TRANSCRIBE_URL_BYTES, _tcModule, _tcChecked, Tra
19582
20167
  var init_transcribe_tool = __esm({
19583
20168
  "packages/execution/dist/tools/transcribe-tool.js"() {
19584
20169
  "use strict";
20170
+ init_model_broker();
19585
20171
  init_network_egress_policy();
19586
20172
  AUDIO_EXTS = /* @__PURE__ */ new Set([
19587
20173
  ".mp3",
@@ -19636,7 +20222,7 @@ var init_transcribe_tool = __esm({
19636
20222
  async execute(args) {
19637
20223
  const start2 = performance.now();
19638
20224
  const filePath = resolve16(this.workingDir, String(args["path"] ?? ""));
19639
- const model = String(args["model"] ?? "base");
20225
+ let model = String(args["model"] ?? "base");
19640
20226
  const diarize = Boolean(args["diarize"] ?? false);
19641
20227
  if (!existsSync25(filePath)) {
19642
20228
  return {
@@ -19654,6 +20240,32 @@ var init_transcribe_tool = __esm({
19654
20240
  durationMs: performance.now() - start2
19655
20241
  };
19656
20242
  }
20243
+ const broker = getModelBroker();
20244
+ const askedModel = model;
20245
+ let effectiveModel = model;
20246
+ const whisperDecision = await broker.ensureModelLoadable({
20247
+ name: askedModel,
20248
+ domain: "asr",
20249
+ host: "whisper-cli",
20250
+ owner: "transcribe-file-tool",
20251
+ estimatedRamMB: whisperRamEstimate(askedModel)
20252
+ });
20253
+ if (whisperDecision.kind === "degrade") {
20254
+ effectiveModel = whisperDecision.fallback.name;
20255
+ } else if (whisperDecision.kind === "evict") {
20256
+ for (const target of whisperDecision.evictTargets) {
20257
+ await broker.evict(target.host, target.name, "asr-needs-room");
20258
+ }
20259
+ } else if (whisperDecision.kind === "reject") {
20260
+ return {
20261
+ success: false,
20262
+ output: "",
20263
+ error: `Transcription blocked by resource broker: ${whisperDecision.reason}`,
20264
+ durationMs: performance.now() - start2
20265
+ };
20266
+ }
20267
+ if (effectiveModel !== askedModel)
20268
+ model = effectiveModel;
19657
20269
  const tc = await loadTranscribeCli();
19658
20270
  if (!tc) {
19659
20271
  return this.execViaCli(filePath, model, diarize, start2);
@@ -46186,11 +46798,11 @@ var require_eventemitter3 = __commonJS({
46186
46798
  if (--emitter._eventsCount === 0) emitter._events = new Events();
46187
46799
  else delete emitter._events[evt];
46188
46800
  }
46189
- function EventEmitter14() {
46801
+ function EventEmitter15() {
46190
46802
  this._events = new Events();
46191
46803
  this._eventsCount = 0;
46192
46804
  }
46193
- EventEmitter14.prototype.eventNames = function eventNames() {
46805
+ EventEmitter15.prototype.eventNames = function eventNames() {
46194
46806
  var names = [], events, name10;
46195
46807
  if (this._eventsCount === 0) return names;
46196
46808
  for (name10 in events = this._events) {
@@ -46201,7 +46813,7 @@ var require_eventemitter3 = __commonJS({
46201
46813
  }
46202
46814
  return names;
46203
46815
  };
46204
- EventEmitter14.prototype.listeners = function listeners(event) {
46816
+ EventEmitter15.prototype.listeners = function listeners(event) {
46205
46817
  var evt = prefix ? prefix + event : event, handlers = this._events[evt];
46206
46818
  if (!handlers) return [];
46207
46819
  if (handlers.fn) return [handlers.fn];
@@ -46210,13 +46822,13 @@ var require_eventemitter3 = __commonJS({
46210
46822
  }
46211
46823
  return ee;
46212
46824
  };
46213
- EventEmitter14.prototype.listenerCount = function listenerCount(event) {
46825
+ EventEmitter15.prototype.listenerCount = function listenerCount(event) {
46214
46826
  var evt = prefix ? prefix + event : event, listeners = this._events[evt];
46215
46827
  if (!listeners) return 0;
46216
46828
  if (listeners.fn) return 1;
46217
46829
  return listeners.length;
46218
46830
  };
46219
- EventEmitter14.prototype.emit = function emit2(event, a1, a2, a3, a4, a5) {
46831
+ EventEmitter15.prototype.emit = function emit2(event, a1, a2, a3, a4, a5) {
46220
46832
  var evt = prefix ? prefix + event : event;
46221
46833
  if (!this._events[evt]) return false;
46222
46834
  var listeners = this._events[evt], len = arguments.length, args, i2;
@@ -46267,13 +46879,13 @@ var require_eventemitter3 = __commonJS({
46267
46879
  }
46268
46880
  return true;
46269
46881
  };
46270
- EventEmitter14.prototype.on = function on2(event, fn, context2) {
46882
+ EventEmitter15.prototype.on = function on2(event, fn, context2) {
46271
46883
  return addListener2(this, event, fn, context2, false);
46272
46884
  };
46273
- EventEmitter14.prototype.once = function once(event, fn, context2) {
46885
+ EventEmitter15.prototype.once = function once(event, fn, context2) {
46274
46886
  return addListener2(this, event, fn, context2, true);
46275
46887
  };
46276
- EventEmitter14.prototype.removeListener = function removeListener2(event, fn, context2, once) {
46888
+ EventEmitter15.prototype.removeListener = function removeListener2(event, fn, context2, once) {
46277
46889
  var evt = prefix ? prefix + event : event;
46278
46890
  if (!this._events[evt]) return this;
46279
46891
  if (!fn) {
@@ -46296,7 +46908,7 @@ var require_eventemitter3 = __commonJS({
46296
46908
  }
46297
46909
  return this;
46298
46910
  };
46299
- EventEmitter14.prototype.removeAllListeners = function removeAllListeners(event) {
46911
+ EventEmitter15.prototype.removeAllListeners = function removeAllListeners(event) {
46300
46912
  var evt;
46301
46913
  if (event) {
46302
46914
  evt = prefix ? prefix + event : event;
@@ -46307,12 +46919,12 @@ var require_eventemitter3 = __commonJS({
46307
46919
  }
46308
46920
  return this;
46309
46921
  };
46310
- EventEmitter14.prototype.off = EventEmitter14.prototype.removeListener;
46311
- EventEmitter14.prototype.addListener = EventEmitter14.prototype.on;
46312
- EventEmitter14.prefixed = prefix;
46313
- EventEmitter14.EventEmitter = EventEmitter14;
46922
+ EventEmitter15.prototype.off = EventEmitter15.prototype.removeListener;
46923
+ EventEmitter15.prototype.addListener = EventEmitter15.prototype.on;
46924
+ EventEmitter15.prefixed = prefix;
46925
+ EventEmitter15.EventEmitter = EventEmitter15;
46314
46926
  if ("undefined" !== typeof module) {
46315
- module.exports = EventEmitter14;
46927
+ module.exports = EventEmitter15;
46316
46928
  }
46317
46929
  }
46318
46930
  });
@@ -119084,10 +119696,10 @@ var require_wrap_handler = __commonJS({
119084
119696
  var require_dispatcher = __commonJS({
119085
119697
  "../node_modules/undici/lib/dispatcher/dispatcher.js"(exports, module) {
119086
119698
  "use strict";
119087
- var EventEmitter14 = __require("node:events");
119699
+ var EventEmitter15 = __require("node:events");
119088
119700
  var WrapHandler = require_wrap_handler();
119089
119701
  var wrapInterceptor = (dispatch) => (opts, handler) => dispatch(opts, WrapHandler.wrap(handler));
119090
- var Dispatcher2 = class extends EventEmitter14 {
119702
+ var Dispatcher2 = class extends EventEmitter15 {
119091
119703
  dispatch() {
119092
119704
  throw new Error("not implemented");
119093
119705
  }
@@ -126370,7 +126982,7 @@ var require_socks5_utils = __commonJS({
126370
126982
  var require_socks5_client = __commonJS({
126371
126983
  "../node_modules/undici/lib/core/socks5-client.js"(exports, module) {
126372
126984
  "use strict";
126373
- var { EventEmitter: EventEmitter14 } = __require("node:events");
126985
+ var { EventEmitter: EventEmitter15 } = __require("node:events");
126374
126986
  var { Buffer: Buffer7 } = __require("node:buffer");
126375
126987
  var { InvalidArgumentError, Socks5ProxyError } = require_errors2();
126376
126988
  var { debuglog } = __require("node:util");
@@ -126413,7 +127025,7 @@ var require_socks5_client = __commonJS({
126413
127025
  ERROR: "error",
126414
127026
  CLOSED: "closed"
126415
127027
  };
126416
- var Socks5Client = class extends EventEmitter14 {
127028
+ var Socks5Client = class extends EventEmitter15 {
126417
127029
  constructor(socket, options2 = {}) {
126418
127030
  super();
126419
127031
  if (!socket) {
@@ -132798,9 +133410,9 @@ var require_memory_cache_store = __commonJS({
132798
133410
  "../node_modules/undici/lib/cache/memory-cache-store.js"(exports, module) {
132799
133411
  "use strict";
132800
133412
  var { Writable } = __require("node:stream");
132801
- var { EventEmitter: EventEmitter14 } = __require("node:events");
133413
+ var { EventEmitter: EventEmitter15 } = __require("node:events");
132802
133414
  var { assertCacheKey, assertCacheValue } = require_cache();
132803
- var MemoryCacheStore = class extends EventEmitter14 {
133415
+ var MemoryCacheStore = class extends EventEmitter15 {
132804
133416
  #maxCount = 1024;
132805
133417
  #maxSize = 104857600;
132806
133418
  // 100MB
@@ -229661,7 +230273,7 @@ var init_send_ssdp_message = __esm({
229661
230273
  });
229662
230274
 
229663
230275
  // ../node_modules/@achingbrain/ssdp/dist/src/ssdp.js
229664
- import { EventEmitter as EventEmitter2, on } from "node:events";
230276
+ import { EventEmitter as EventEmitter3, on } from "node:events";
229665
230277
  import { createRequire } from "node:module";
229666
230278
  var req, name8, version2, DEFAULT_SSDP_SIGNATURE, SSDP;
229667
230279
  var init_ssdp = __esm({
@@ -229679,7 +230291,7 @@ var init_ssdp = __esm({
229679
230291
  req = createRequire(import.meta.url);
229680
230292
  ({ name: name8, version: version2 } = req("../../package.json"));
229681
230293
  DEFAULT_SSDP_SIGNATURE = `node.js/${process.version.substring(1)} UPnP/1.1 ${name8}/${version2}`;
229682
- SSDP = class extends EventEmitter2 {
230294
+ SSDP = class extends EventEmitter3 {
229683
230295
  udn;
229684
230296
  signature;
229685
230297
  sockets;
@@ -236837,7 +237449,7 @@ var require_extension2 = __commonJS({
236837
237449
  var require_websocket2 = __commonJS({
236838
237450
  "../node_modules/ws/lib/websocket.js"(exports, module) {
236839
237451
  "use strict";
236840
- var EventEmitter14 = __require("events");
237452
+ var EventEmitter15 = __require("events");
236841
237453
  var https4 = __require("https");
236842
237454
  var http6 = __require("http");
236843
237455
  var net5 = __require("net");
@@ -236869,7 +237481,7 @@ var require_websocket2 = __commonJS({
236869
237481
  var protocolVersions = [8, 13];
236870
237482
  var readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
236871
237483
  var subprotocolRegex = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
236872
- var WebSocket6 = class _WebSocket extends EventEmitter14 {
237484
+ var WebSocket6 = class _WebSocket extends EventEmitter15 {
236873
237485
  /**
236874
237486
  * Create a new `WebSocket`.
236875
237487
  *
@@ -237866,7 +238478,7 @@ var require_subprotocol = __commonJS({
237866
238478
  var require_websocket_server = __commonJS({
237867
238479
  "../node_modules/ws/lib/websocket-server.js"(exports, module) {
237868
238480
  "use strict";
237869
- var EventEmitter14 = __require("events");
238481
+ var EventEmitter15 = __require("events");
237870
238482
  var http6 = __require("http");
237871
238483
  var { Duplex: Duplex3 } = __require("stream");
237872
238484
  var { createHash: createHash31 } = __require("crypto");
@@ -237879,7 +238491,7 @@ var require_websocket_server = __commonJS({
237879
238491
  var RUNNING = 0;
237880
238492
  var CLOSING = 1;
237881
238493
  var CLOSED = 2;
237882
- var WebSocketServer4 = class extends EventEmitter14 {
238494
+ var WebSocketServer4 = class extends EventEmitter15 {
237883
238495
  /**
237884
238496
  * Create a `WebSocketServer` instance.
237885
238497
  *
@@ -254016,6 +254628,7 @@ var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, LEGACY_SDXL_TURBO
254016
254628
  var init_image_generate = __esm({
254017
254629
  "packages/execution/dist/tools/image-generate.js"() {
254018
254630
  "use strict";
254631
+ init_model_broker();
254019
254632
  init_venv_paths();
254020
254633
  init_model_store();
254021
254634
  DEFAULT_DIFFUSERS_IMAGE_MODEL = "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers";
@@ -254830,6 +255443,28 @@ if __name__ == "__main__":
254830
255443
  const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
254831
255444
  const seed = optionalNumberArg(args["seed"]);
254832
255445
  const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
255446
+ const broker = getModelBroker();
255447
+ const firstCandidate = candidates[0];
255448
+ if (firstCandidate) {
255449
+ const decision2 = await broker.ensureModelLoadable({
255450
+ name: firstCandidate.model,
255451
+ domain: "image-gen",
255452
+ host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
255453
+ owner: "image-generate-tool"
255454
+ });
255455
+ if (decision2.kind === "evict") {
255456
+ for (const target of decision2.evictTargets) {
255457
+ await broker.evict(target.host, target.name, "image-gen-needs-room");
255458
+ }
255459
+ } else if (decision2.kind === "reject") {
255460
+ return {
255461
+ success: false,
255462
+ output: "",
255463
+ error: `Image generation blocked by resource broker: ${decision2.reason}`,
255464
+ durationMs: performance.now() - start2
255465
+ };
255466
+ }
255467
+ }
254833
255468
  try {
254834
255469
  return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
254835
255470
  } catch (err) {
@@ -256270,6 +256905,7 @@ var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFOR
256270
256905
  var init_audio_generate = __esm({
256271
256906
  "packages/execution/dist/tools/audio-generate.js"() {
256272
256907
  "use strict";
256908
+ init_model_broker();
256273
256909
  init_venv_paths();
256274
256910
  init_model_store();
256275
256911
  DEFAULT_SOUND_MODEL = "cvssp/audioldm-s-full-v2";
@@ -257217,6 +257853,28 @@ if __name__ == "__main__":
257217
257853
  const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
257218
257854
  const seed = optionalNumberArg2(args["seed"]);
257219
257855
  const playback = playbackRequested(args);
257856
+ const broker = getModelBroker();
257857
+ const firstCandidate = candidates[0];
257858
+ if (firstCandidate) {
257859
+ const decision2 = await broker.ensureModelLoadable({
257860
+ name: firstCandidate.model,
257861
+ domain: kind === "music" ? "music" : "sound",
257862
+ host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
257863
+ owner: `audio-generate-tool/${kind}`
257864
+ });
257865
+ if (decision2.kind === "evict") {
257866
+ for (const target of decision2.evictTargets) {
257867
+ await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
257868
+ }
257869
+ } else if (decision2.kind === "reject") {
257870
+ return {
257871
+ success: false,
257872
+ output: "",
257873
+ error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
257874
+ durationMs: performance.now() - start2
257875
+ };
257876
+ }
257877
+ }
257220
257878
  try {
257221
257879
  return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
257222
257880
  } catch (err) {
@@ -258160,6 +258818,7 @@ var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL,
258160
258818
  var init_video_generate = __esm({
258161
258819
  "packages/execution/dist/tools/video-generate.js"() {
258162
258820
  "use strict";
258821
+ init_model_broker();
258163
258822
  init_venv_paths();
258164
258823
  init_model_store();
258165
258824
  DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
@@ -259497,6 +260156,28 @@ if __name__ == "__main__":
259497
260156
  const withAudio = booleanArg3(args["with_audio"], false);
259498
260157
  const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
259499
260158
  const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
260159
+ const broker = getModelBroker();
260160
+ const firstCandidate = candidates[0];
260161
+ if (firstCandidate) {
260162
+ const decision2 = await broker.ensureModelLoadable({
260163
+ name: firstCandidate.model,
260164
+ domain: "video-gen",
260165
+ host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
260166
+ owner: "video-generate-tool"
260167
+ });
260168
+ if (decision2.kind === "evict") {
260169
+ for (const target of decision2.evictTargets) {
260170
+ await broker.evict(target.host, target.name, "video-gen-needs-room");
260171
+ }
260172
+ } else if (decision2.kind === "reject") {
260173
+ return {
260174
+ success: false,
260175
+ output: "",
260176
+ error: `Video generation blocked by resource broker: ${decision2.reason}`,
260177
+ durationMs: performance.now() - start2
260178
+ };
260179
+ }
260180
+ }
259500
260181
  if (candidates.length === 0) {
259501
260182
  return {
259502
260183
  success: false,
@@ -261097,6 +261778,7 @@ var moondreamClient, moondreamError, stationProcess, hfPointUnavailable, IMAGE_E
261097
261778
  var init_vision = __esm({
261098
261779
  "packages/execution/dist/tools/vision.js"() {
261099
261780
  "use strict";
261781
+ init_model_broker();
261100
261782
  moondreamClient = null;
261101
261783
  moondreamError = null;
261102
261784
  stationProcess = null;
@@ -261195,14 +261877,43 @@ var init_vision = __esm({
261195
261877
  };
261196
261878
  }
261197
261879
  }
261880
+ const broker = getModelBroker();
261881
+ const moondreamDecision = await broker.ensureModelLoadable({
261882
+ name: "moondream2",
261883
+ domain: "vision",
261884
+ host: "moondream-station",
261885
+ owner: "vision-tool"
261886
+ });
261887
+ const forceDegradeToOllama = moondreamDecision.kind === "degrade" && moondreamDecision.fallback.host === "ollama";
261888
+ const forceReject = moondreamDecision.kind === "reject" && this._activeModelHasVision !== true;
261198
261889
  let client = null;
261199
- try {
261200
- client = await getMoondreamClient();
261201
- } catch {
261890
+ if (!forceDegradeToOllama) {
261891
+ try {
261892
+ client = await getMoondreamClient();
261893
+ } catch {
261894
+ }
261202
261895
  }
261203
261896
  if (client) {
261897
+ broker.registerLoaded({
261898
+ key: "moondream-station:moondream2",
261899
+ name: "moondream2",
261900
+ domain: "vision",
261901
+ host: "moondream-station",
261902
+ owner: "vision-tool",
261903
+ vramMB: 2e3,
261904
+ ramMB: 1500,
261905
+ priority: 1
261906
+ });
261204
261907
  return await this.runMoondream(client, buffer2, filename, action, prompt, length4, start2);
261205
261908
  }
261909
+ if (forceReject) {
261910
+ return {
261911
+ success: false,
261912
+ output: "",
261913
+ error: `Vision blocked by resource broker: ${moondreamDecision.kind === "reject" ? moondreamDecision.reason : "insufficient memory"}`,
261914
+ durationMs: performance.now() - start2
261915
+ };
261916
+ }
261206
261917
  const ollamaResult = await this.tryOllamaVision(buffer2, filename, action, prompt, length4, start2);
261207
261918
  if (ollamaResult)
261208
261919
  return ollamaResult;
@@ -261300,7 +262011,41 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
261300
262011
  async tryOllamaVision(buffer2, filename, action, prompt, length4, start2) {
261301
262012
  const ollamaHost = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
261302
262013
  const envModel = process.env["OLLAMA_VISION_MODEL"];
261303
- const model = envModel || (this._activeModelHasVision && this._activeModel ? this._activeModel : "moondream");
262014
+ let model = envModel || (this._activeModelHasVision && this._activeModel ? this._activeModel : "moondream");
262015
+ const broker = getModelBroker();
262016
+ const decision2 = await broker.ensureModelLoadable({
262017
+ name: model,
262018
+ domain: "vision",
262019
+ host: "ollama",
262020
+ owner: "vision-tool/ollama",
262021
+ requestedNumCtx: 2048,
262022
+ estimatedVramMB: 2e3
262023
+ });
262024
+ let numCtx;
262025
+ if (decision2.kind === "reject")
262026
+ return null;
262027
+ if (decision2.kind === "degrade") {
262028
+ model = decision2.fallback.name;
262029
+ } else if (decision2.kind === "evict") {
262030
+ for (const target of decision2.evictTargets) {
262031
+ await broker.evict(target.host, target.name, "vision-needs-room");
262032
+ }
262033
+ numCtx = decision2.effectiveNumCtx;
262034
+ } else if (decision2.kind === "ok") {
262035
+ numCtx = decision2.effectiveNumCtx;
262036
+ } else if (decision2.kind === "wait-for-inflight") {
262037
+ const inner = await decision2.promise;
262038
+ if (inner.kind === "ok")
262039
+ numCtx = inner.effectiveNumCtx;
262040
+ else if (inner.kind === "degrade")
262041
+ model = inner.fallback.name;
262042
+ else if (inner.kind === "reject")
262043
+ return null;
262044
+ }
262045
+ if (numCtx === void 0) {
262046
+ const trainCtx = await broker.getNctxTrain(model);
262047
+ numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 4096) : 2048;
262048
+ }
261304
262049
  const imageBase64 = buffer2.toString("base64");
261305
262050
  let ollamaPrompt;
261306
262051
  switch (action) {
@@ -261323,7 +262068,7 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
261323
262068
  let res = await fetch(`${ollamaHost}/api/generate`, {
261324
262069
  method: "POST",
261325
262070
  headers: { "Content-Type": "application/json" },
261326
- body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false }),
262071
+ body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false, options: { num_ctx: numCtx } }),
261327
262072
  signal: AbortSignal.timeout(6e4)
261328
262073
  });
261329
262074
  if (!res.ok && model === "moondream") {
@@ -261335,15 +262080,18 @@ Coordinates are normalized (0-1). Multiply by image width/height for pixel value
261335
262080
  res = await fetch(`${ollamaHost}/api/generate`, {
261336
262081
  method: "POST",
261337
262082
  headers: { "Content-Type": "application/json" },
261338
- body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false }),
262083
+ body: JSON.stringify({ model, prompt: ollamaPrompt, images: [imageBase64], stream: false, options: { num_ctx: numCtx } }),
261339
262084
  signal: AbortSignal.timeout(6e4)
261340
262085
  });
261341
262086
  } catch {
261342
262087
  }
261343
262088
  }
261344
262089
  }
261345
- if (!res.ok)
262090
+ if (!res.ok) {
262091
+ broker.clearInflight("ollama", model);
261346
262092
  return null;
262093
+ }
262094
+ broker.touch("ollama", model);
261347
262095
  const data = await res.json();
261348
262096
  const response = data.response || "";
261349
262097
  if (!response)
@@ -513030,7 +513778,7 @@ var init_dist4 = __esm({
513030
513778
  });
513031
513779
 
513032
513780
  // packages/execution/dist/tools/code-graph-events.js
513033
- import { EventEmitter as EventEmitter3 } from "node:events";
513781
+ import { EventEmitter as EventEmitter4 } from "node:events";
513034
513782
  function emitIndexed(payload) {
513035
513783
  codeGraphEvents.publish({ type: "indexed", timestamp: Date.now(), ...payload });
513036
513784
  }
@@ -513048,7 +513796,7 @@ var init_code_graph_events = __esm({
513048
513796
  "packages/execution/dist/tools/code-graph-events.js"() {
513049
513797
  "use strict";
513050
513798
  MAX_RECENT = 256;
513051
- CodeGraphEventBus = class extends EventEmitter3 {
513799
+ CodeGraphEventBus = class extends EventEmitter4 {
513052
513800
  ring = [];
513053
513801
  constructor() {
513054
513802
  super();
@@ -519570,6 +520318,7 @@ var init_full_sub_agent = __esm({
519570
520318
  "packages/execution/dist/tools/full-sub-agent.js"() {
519571
520319
  "use strict";
519572
520320
  init_process_kill();
520321
+ init_model_broker();
519573
520322
  _activeSubProcesses = /* @__PURE__ */ new Map();
519574
520323
  FullSubAgentTool = class {
519575
520324
  name = "full_sub_agent";
@@ -519631,10 +520380,45 @@ var init_full_sub_agent = __esm({
519631
520380
  if (!task)
519632
520381
  return { success: false, output: "", error: "task is required", durationMs: performance.now() - start2 };
519633
520382
  const model = String(args["model"] ?? this.model);
520383
+ const broker = getModelBroker();
520384
+ const decision2 = await broker.ensureModelLoadable({
520385
+ name: model || "default",
520386
+ domain: "subagent",
520387
+ host: model ? "ollama" : "subprocess",
520388
+ owner: "full-sub-agent-tool",
520389
+ estimatedVramMB: 4e3,
520390
+ estimatedRamMB: 1500,
520391
+ priority: 1
520392
+ // sub-agents are higher priority than idle background models
520393
+ });
520394
+ if (decision2.kind === "evict") {
520395
+ for (const target of decision2.evictTargets) {
520396
+ await broker.evict(target.host, target.name, "sub-agent-spawn-needs-room");
520397
+ }
520398
+ } else if (decision2.kind === "reject") {
520399
+ return {
520400
+ success: false,
520401
+ output: "",
520402
+ error: `Sub-agent spawn blocked by resource broker: ${decision2.reason}`,
520403
+ durationMs: performance.now() - start2
520404
+ };
520405
+ }
519634
520406
  const entry = spawnFullSubAgent(task, { model, backendUrl: this.backendUrl, workingDir: this.workingDir }, (text) => this.onViewWrite?.(entry.id, text), (id, exitCode, output) => {
519635
520407
  this.onViewStatus?.(id, exitCode === 0 ? "completed" : "failed");
520408
+ broker.unregisterLoaded("subprocess", id, "sub-agent-exited");
519636
520409
  this.onComplete?.(id, task, exitCode, output);
519637
520410
  });
520411
+ broker.registerLoaded({
520412
+ key: `subprocess:${entry.id}`,
520413
+ name: entry.id,
520414
+ domain: "subagent",
520415
+ host: "subprocess",
520416
+ owner: "full-sub-agent-tool",
520417
+ vramMB: 4e3,
520418
+ ramMB: 1500,
520419
+ pid: entry.pid,
520420
+ priority: 1
520421
+ });
519638
520422
  this.onViewRegister?.(entry.id, entry.id, "full");
519639
520423
  return {
519640
520424
  success: true,
@@ -519708,6 +520492,7 @@ var _idCounter, AgentTool;
519708
520492
  var init_agent_tool = __esm({
519709
520493
  "packages/execution/dist/tools/agent-tool.js"() {
519710
520494
  "use strict";
520495
+ init_model_broker();
519711
520496
  _idCounter = 0;
519712
520497
  AgentTool = class {
519713
520498
  name = "agent";
@@ -519798,6 +520583,31 @@ var init_agent_tool = __esm({
519798
520583
  }
519799
520584
  const model = modelOverride ?? this.config.model;
519800
520585
  const agentId = generateAgentId(subagentType);
520586
+ {
520587
+ const broker = getModelBroker();
520588
+ const decision2 = await broker.ensureModelLoadable({
520589
+ name: model || "default",
520590
+ domain: "subagent",
520591
+ host: "ollama",
520592
+ owner: `agent-tool/${subagentType}`,
520593
+ estimatedVramMB: 2500,
520594
+ estimatedRamMB: 1e3,
520595
+ priority: 1
520596
+ });
520597
+ if (decision2.kind === "evict") {
520598
+ for (const target of decision2.evictTargets) {
520599
+ await broker.evict(target.host, target.name, "agent-tool-needs-room");
520600
+ }
520601
+ } else if (decision2.kind === "reject") {
520602
+ return {
520603
+ success: false,
520604
+ output: "",
520605
+ error: `Agent spawn blocked by resource broker: ${decision2.reason}`,
520606
+ durationMs: performance.now() - start2
520607
+ };
520608
+ }
520609
+ broker.touch("ollama", model);
520610
+ }
519801
520611
  const label = description ?? `${subagentType}: ${prompt.slice(0, 40)}`;
519802
520612
  const preloadedFiles = [];
519803
520613
  if (relevantFilePaths.length > 0) {
@@ -522473,13 +523283,13 @@ var init_notebook_edit = __esm({
522473
523283
 
522474
523284
  // packages/execution/dist/tools/environment-snapshot.js
522475
523285
  import { execSync as execSync42 } from "node:child_process";
522476
- import { cpus, totalmem, freemem, hostname as hostname2, platform as platform2, arch, uptime } from "node:os";
523286
+ import { cpus, totalmem as totalmem2, freemem as freemem2, hostname as hostname2, platform as platform2, arch, uptime } from "node:os";
522477
523287
  import { statfsSync as statfsSync2 } from "node:fs";
522478
523288
  function collectSnapshot(workingDir) {
522479
523289
  const now = /* @__PURE__ */ new Date();
522480
523290
  const cpuInfo = cpus();
522481
- const totalRAM = totalmem();
522482
- const freeRAM = freemem();
523291
+ const totalRAM = totalmem2();
523292
+ const freeRAM = freemem2();
522483
523293
  let load1 = 0, load5 = 0, load15 = 0;
522484
523294
  try {
522485
523295
  const loadavg4 = __require("node:os").loadavg();
@@ -522674,6 +523484,7 @@ var VideoUnderstandTool;
522674
523484
  var init_video_understand = __esm({
522675
523485
  "packages/execution/dist/tools/video-understand.js"() {
522676
523486
  "use strict";
523487
+ init_model_broker();
522677
523488
  VideoUnderstandTool = class {
522678
523489
  name = "video_understand";
522679
523490
  description = "Analyze a video from URL or local file. Produces timestamped transcript aligned with keyframe descriptions. Supports YouTube URLs and direct video files. Pipeline: download → transcribe (Whisper) → extract keyframes (scene detection) → describe frames → align timestamps → save structured output.";
@@ -522704,6 +523515,36 @@ var init_video_understand = __esm({
522704
523515
  if (!url && !localPath) {
522705
523516
  return { success: false, output: "", error: "url or path required", durationMs: performance.now() - start2 };
522706
523517
  }
523518
+ const broker = getModelBroker();
523519
+ const asrDecision = await broker.ensureModelLoadable({
523520
+ name: whisperModel,
523521
+ domain: "asr",
523522
+ host: "whisper-cli",
523523
+ owner: "video-understand-tool"
523524
+ });
523525
+ if (asrDecision.kind === "evict") {
523526
+ for (const target of asrDecision.evictTargets) {
523527
+ await broker.evict(target.host, target.name, "video-understand-needs-asr-room");
523528
+ }
523529
+ } else if (asrDecision.kind === "reject") {
523530
+ return {
523531
+ success: false,
523532
+ output: "",
523533
+ error: `Video understanding blocked by resource broker (ASR): ${asrDecision.reason}`,
523534
+ durationMs: performance.now() - start2
523535
+ };
523536
+ }
523537
+ const visionDecision = await broker.ensureModelLoadable({
523538
+ name: "moondream2",
523539
+ domain: "vision",
523540
+ host: "moondream-station",
523541
+ owner: "video-understand-tool"
523542
+ });
523543
+ if (visionDecision.kind === "evict") {
523544
+ for (const target of visionDecision.evictTargets) {
523545
+ await broker.evict(target.host, target.name, "video-understand-needs-vision-room");
523546
+ }
523547
+ }
522707
523548
  const outDir = join71(this.workingDir, ".omnius", "video-analysis");
522708
523549
  mkdirSync28(outDir, { recursive: true });
522709
523550
  const tmpDir = join71(outDir, `tmp-${Date.now()}`);
@@ -524314,6 +525155,7 @@ __export(dist_exports, {
524314
525155
  MemorySearchTool: () => MemorySearchTool,
524315
525156
  MemoryWriteTool: () => MemoryWriteTool,
524316
525157
  MeshtasticTool: () => MeshtasticTool,
525158
+ ModelBroker: () => ModelBroker,
524317
525159
  MultimodalMemoryTool: () => MultimodalMemoryTool,
524318
525160
  NetworkEgressPolicyError: () => NetworkEgressPolicyError,
524319
525161
  NexusTool: () => NexusTool,
@@ -524451,6 +525293,7 @@ __export(dist_exports, {
524451
525293
  getFileNotes: () => getFileNotes,
524452
525294
  getFullSubAgent: () => getFullSubAgent,
524453
525295
  getImageGenerationPreset: () => getImageGenerationPreset,
525296
+ getModelBroker: () => getModelBroker,
524454
525297
  getModelStoreDiskInfo: () => getModelStoreDiskInfo,
524455
525298
  getRecentChangesSummary: () => getRecentChangesSummary,
524456
525299
  getSecretsFilePath: () => getSecretsFilePath,
@@ -524580,6 +525423,7 @@ var init_dist5 = __esm({
524580
525423
  "packages/execution/dist/index.js"() {
524581
525424
  "use strict";
524582
525425
  init_tool_executor();
525426
+ init_model_broker();
524583
525427
  init_security_classifier();
524584
525428
  init_tool_manifest();
524585
525429
  init_provenance();
@@ -527339,12 +528183,12 @@ var init_ollama_pool_cleanup = __esm({
527339
528183
  });
527340
528184
 
527341
528185
  // packages/orchestrator/dist/ollama-pool.js
527342
- import { spawn as spawn23, exec } from "node:child_process";
528186
+ import { spawn as spawn23, exec as exec2 } from "node:child_process";
527343
528187
  import { existsSync as existsSync63, readFileSync as readFileSync50, readdirSync as readdirSync20, statfsSync as statfsSync3, statSync as statSync25 } from "node:fs";
527344
528188
  import { homedir as homedir28 } from "node:os";
527345
528189
  import { join as join77 } from "node:path";
527346
528190
  import { createServer as createServer3 } from "node:net";
527347
- import { EventEmitter as EventEmitter4 } from "node:events";
528191
+ import { EventEmitter as EventEmitter5 } from "node:events";
527348
528192
  function discoverSystemOllamaModelStore() {
527349
528193
  const fromEnv = process.env["OLLAMA_MODELS"]?.trim();
527350
528194
  if (fromEnv && isDirectory(fromEnv))
@@ -527436,7 +528280,7 @@ async function detectGpus() {
527436
528280
  if (_nvidiaSmiAvailable === false)
527437
528281
  return [];
527438
528282
  return new Promise((resolve55) => {
527439
- exec("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
528283
+ exec2("nvidia-smi --query-gpu=index,uuid,name,memory.total,memory.free,utilization.gpu --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => {
527440
528284
  if (err) {
527441
528285
  _nvidiaSmiAvailable = false;
527442
528286
  resolve55([]);
@@ -527465,7 +528309,7 @@ async function detectGpus() {
527465
528309
  });
527466
528310
  }
527467
528311
  async function getHardwareSnapshot() {
527468
- const { totalmem: totalmem7, freemem: freemem6, cpus: cpus5 } = await import("node:os");
528312
+ const { totalmem: totalmem8, freemem: freemem7, cpus: cpus5 } = await import("node:os");
527469
528313
  const gpus = await detectGpus();
527470
528314
  const diskPath = discoverSystemOllamaModelStore() ?? homedir28();
527471
528315
  const disk = snapshotDisk(diskPath);
@@ -527473,8 +528317,8 @@ async function getHardwareSnapshot() {
527473
528317
  return {
527474
528318
  gpus,
527475
528319
  cpuCores: cpus5().length,
527476
- ramTotalMB: Math.round(totalmem7() / (1024 * 1024)),
527477
- ramFreeMB: Math.round(freemem6() / (1024 * 1024)),
528320
+ ramTotalMB: Math.round(totalmem8() / (1024 * 1024)),
528321
+ ramFreeMB: Math.round(freemem7() / (1024 * 1024)),
527478
528322
  disk,
527479
528323
  network,
527480
528324
  takenAtMs: Date.now()
@@ -527741,7 +528585,7 @@ var init_ollama_pool = __esm({
527741
528585
  return { proc, ready };
527742
528586
  };
527743
528587
  _gpuCursor = 0;
527744
- OllamaPool = class extends EventEmitter4 {
528588
+ OllamaPool = class extends EventEmitter5 {
527745
528589
  config;
527746
528590
  instances = [];
527747
528591
  reaperHandle = null;
@@ -540331,8 +541175,8 @@ var init_streaming_executor = __esm({
540331
541175
  startExecution(entry) {
540332
541176
  entry.state = "executing";
540333
541177
  entry.startedAt = Date.now();
540334
- const exec5 = this.executeFn;
540335
- entry.promise = exec5(entry.name, entry.args).then((result) => {
541178
+ const exec6 = this.executeFn;
541179
+ entry.promise = exec6(entry.name, entry.args).then((result) => {
540336
541180
  entry.state = "completed";
540337
541181
  entry.result = result;
540338
541182
  entry.completedAt = Date.now();
@@ -540621,7 +541465,7 @@ __export(preflightSnapshot_exports, {
540621
541465
  });
540622
541466
  import { existsSync as existsSync79, readFileSync as readFileSync62, statSync as statSync31 } from "node:fs";
540623
541467
  import { execSync as execSync46 } from "node:child_process";
540624
- import { homedir as homedir29, platform as platform3, arch as arch2, totalmem as totalmem2, freemem as freemem2, hostname as hostname3 } from "node:os";
541468
+ import { homedir as homedir29, platform as platform3, arch as arch2, totalmem as totalmem3, freemem as freemem3, hostname as hostname3 } from "node:os";
540625
541469
  import { join as join92 } from "node:path";
540626
541470
  import { createHash as createHash17 } from "node:crypto";
540627
541471
  function capturePreflightSnapshot(workingDir) {
@@ -540668,8 +541512,8 @@ function capturePreflightSnapshot(workingDir) {
540668
541512
  platform: platform3(),
540669
541513
  arch: arch2(),
540670
541514
  nodeVersion: process.version,
540671
- totalMemBytes: totalmem2(),
540672
- freeMemBytes: freemem2(),
541515
+ totalMemBytes: totalmem3(),
541516
+ freeMemBytes: freemem3(),
540673
541517
  hostname: hostname3()
540674
541518
  },
540675
541519
  toolchain: captureToolchainVersions(),
@@ -559212,7 +560056,7 @@ import { existsSync as existsSync85, mkdirSync as mkdirSync46, writeFileSync as
559212
560056
  import { join as join100, dirname as dirname27 } from "node:path";
559213
560057
  import { homedir as homedir32 } from "node:os";
559214
560058
  import { fileURLToPath as fileURLToPath11 } from "node:url";
559215
- import { EventEmitter as EventEmitter5 } from "node:events";
560059
+ import { EventEmitter as EventEmitter6 } from "node:events";
559216
560060
  import { createInterface as createInterface2 } from "node:readline";
559217
560061
  function isAudioPath(path12) {
559218
560062
  const ext = path12.toLowerCase().split(".").pop();
@@ -559451,9 +560295,9 @@ function ensureTranscribeCliBackground() {
559451
560295
  } catch {
559452
560296
  }
559453
560297
  try {
559454
- const { exec: exec5 } = await import("node:child_process");
560298
+ const { exec: exec6 } = await import("node:child_process");
559455
560299
  return new Promise((resolve55) => {
559456
- exec5("npm i -g transcribe-cli", { timeout: 18e4 }, (err) => {
560300
+ exec6("npm i -g transcribe-cli", { timeout: 18e4 }, (err) => {
559457
560301
  resolve55(!err);
559458
560302
  });
559459
560303
  });
@@ -559501,7 +560345,7 @@ var init_listen = __esm({
559501
560345
  ".m4v",
559502
560346
  ".ts"
559503
560347
  ]);
559504
- WhisperFallbackTranscriber = class extends EventEmitter5 {
560348
+ WhisperFallbackTranscriber = class extends EventEmitter6 {
559505
560349
  constructor(model, scriptPath2) {
559506
560350
  super();
559507
560351
  this.model = model;
@@ -559610,7 +560454,7 @@ var init_listen = __esm({
559610
560454
  this._ready = false;
559611
560455
  }
559612
560456
  };
559613
- ListenEngine = class extends EventEmitter5 {
560457
+ ListenEngine = class extends EventEmitter6 {
559614
560458
  config;
559615
560459
  micProcess = null;
559616
560460
  liveTranscriber = null;
@@ -562355,7 +563199,7 @@ var require_extension3 = __commonJS({
562355
563199
  var require_websocket3 = __commonJS({
562356
563200
  "node_modules/.pnpm/ws@8.20.1/node_modules/ws/lib/websocket.js"(exports, module) {
562357
563201
  "use strict";
562358
- var EventEmitter14 = __require("events");
563202
+ var EventEmitter15 = __require("events");
562359
563203
  var https4 = __require("https");
562360
563204
  var http6 = __require("http");
562361
563205
  var net5 = __require("net");
@@ -562387,7 +563231,7 @@ var require_websocket3 = __commonJS({
562387
563231
  var protocolVersions = [8, 13];
562388
563232
  var readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
562389
563233
  var subprotocolRegex = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
562390
- var WebSocket6 = class _WebSocket extends EventEmitter14 {
563234
+ var WebSocket6 = class _WebSocket extends EventEmitter15 {
562391
563235
  /**
562392
563236
  * Create a new `WebSocket`.
562393
563237
  *
@@ -563384,7 +564228,7 @@ var require_subprotocol2 = __commonJS({
563384
564228
  var require_websocket_server2 = __commonJS({
563385
564229
  "node_modules/.pnpm/ws@8.20.1/node_modules/ws/lib/websocket-server.js"(exports, module) {
563386
564230
  "use strict";
563387
- var EventEmitter14 = __require("events");
564231
+ var EventEmitter15 = __require("events");
563388
564232
  var http6 = __require("http");
563389
564233
  var { Duplex: Duplex3 } = __require("stream");
563390
564234
  var { createHash: createHash31 } = __require("crypto");
@@ -563397,7 +564241,7 @@ var require_websocket_server2 = __commonJS({
563397
564241
  var RUNNING = 0;
563398
564242
  var CLOSING = 1;
563399
564243
  var CLOSED = 2;
563400
- var WebSocketServer4 = class extends EventEmitter14 {
564244
+ var WebSocketServer4 = class extends EventEmitter15 {
563401
564245
  /**
563402
564246
  * Create a `WebSocketServer` instance.
563403
564247
  *
@@ -564285,6 +565129,9 @@ var init_command_registry = __esm({
564285
565129
  ["/wizard", "Alias for /setup"],
564286
565130
  ["/parallel", "Show current Ollama parallel inference slots"],
564287
565131
  ["/parallel <1-15>", "Set parallel slots (restarts Ollama, max 15)"],
565132
+ ["/broker", "Show resource broker status — loaded models, in-flight loads, RAM/VRAM headroom"],
565133
+ ["/broker evict <host>:<name>", "Force eviction of a tracked model (e.g. /broker evict ollama:moondream)"],
565134
+ ["/broker threshold <ram|vram|idle> <v>", "Tune broker thresholds (RAM/VRAM in MB, idle in seconds)"],
564288
565135
  ["/ollama cleanup", "Dry-run stale Ollama pool process cleanup"],
564289
565136
  ["/ollama cleanup --execute", "Terminate guarded stale Ollama pool runners"],
564290
565137
  ["/mcp", "Show MCP server/tool status and controls"],
@@ -564554,6 +565401,7 @@ var init_command_registry = __esm({
564554
565401
  endpoint: "network",
564555
565402
  provider: "network",
564556
565403
  parallel: "runtime",
565404
+ broker: "runtime",
564557
565405
  setup: "runtime",
564558
565406
  wizard: "runtime",
564559
565407
  mcp: "tools",
@@ -564695,6 +565543,7 @@ var init_command_registry = __esm({
564695
565543
  "model",
564696
565544
  "endpoint",
564697
565545
  "parallel",
565546
+ "broker",
564698
565547
  "commands",
564699
565548
  "cmds",
564700
565549
  "selfmodify",
@@ -565588,7 +566437,7 @@ var init_task_complete_box = __esm({
565588
566437
  });
565589
566438
 
565590
566439
  // packages/cli/src/tui/model-picker.ts
565591
- import { totalmem as totalmem3 } from "node:os";
566440
+ import { totalmem as totalmem4 } from "node:os";
565592
566441
  function isImageGenModel(name10, family) {
565593
566442
  return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
565594
566443
  }
@@ -565977,7 +566826,7 @@ async function queryModelContextSize(baseUrl, modelName) {
565977
566826
  }
565978
566827
  }
565979
566828
  function estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2) {
565980
- const totalMemGB = totalmem3() / 1024 ** 3;
566829
+ const totalMemGB = totalmem4() / 1024 ** 3;
565981
566830
  const usableBytes = totalMemGB * 0.7 * 1024 ** 3;
565982
566831
  const maxTokens = Math.floor(usableBytes / kvBytesPerToken);
565983
566832
  let numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
@@ -567316,7 +568165,7 @@ var init_render = __esm({
567316
568165
  // packages/cli/src/tui/voice-session.ts
567317
568166
  import { createServer as createServer4 } from "node:http";
567318
568167
  import { spawn as spawn25, execSync as execSync49 } from "node:child_process";
567319
- import { EventEmitter as EventEmitter6 } from "node:events";
568168
+ import { EventEmitter as EventEmitter7 } from "node:events";
567320
568169
  function generateFrontendHTML() {
567321
568170
  return `<!DOCTYPE html>
567322
568171
  <html lang="en">
@@ -568051,7 +568900,7 @@ var init_voice_session = __esm({
568051
568900
  init_wrapper2();
568052
568901
  init_render();
568053
568902
  init_typed_node_events();
568054
- VoiceSession = class extends EventEmitter6 {
568903
+ VoiceSession = class extends EventEmitter7 {
568055
568904
  state;
568056
568905
  server = null;
568057
568906
  wss = null;
@@ -569166,11 +570015,11 @@ var init_voice_soul = __esm({
569166
570015
 
569167
570016
  // packages/cli/src/tui/expose.ts
569168
570017
  import { createServer as createServer5, request as httpRequest } from "node:http";
569169
- import { spawn as spawn26, exec as exec2 } from "node:child_process";
569170
- import { EventEmitter as EventEmitter7 } from "node:events";
570018
+ import { spawn as spawn26, exec as exec3 } from "node:child_process";
570019
+ import { EventEmitter as EventEmitter8 } from "node:events";
569171
570020
  import { randomBytes as randomBytes19, timingSafeEqual } from "node:crypto";
569172
570021
  import { URL as URL2 } from "node:url";
569173
- import { loadavg, cpus as cpus2, totalmem as totalmem4, freemem as freemem3 } from "node:os";
570022
+ import { loadavg, cpus as cpus2, totalmem as totalmem5, freemem as freemem4 } from "node:os";
569174
570023
  import { existsSync as existsSync88, readFileSync as readFileSync70, writeFileSync as writeFileSync44, unlinkSync as unlinkSync14, mkdirSync as mkdirSync48, readdirSync as readdirSync29, statSync as statSync34, statfsSync as statfsSync4 } from "node:fs";
569175
570024
  import { join as join103 } from "node:path";
569176
570025
  function cleanForwardHeaders(raw, targetHost) {
@@ -569276,8 +570125,8 @@ function parseRateLimitHeaders(headers) {
569276
570125
  async function collectSystemMetricsAsync() {
569277
570126
  const [l1, l5, l15] = loadavg();
569278
570127
  const cores = cpus2().length;
569279
- const totalMem = totalmem4();
569280
- const freeMem = freemem3();
570128
+ const totalMem = totalmem5();
570129
+ const freeMem = freemem4();
569281
570130
  const usedMem = totalMem - freeMem;
569282
570131
  let disk = {
569283
570132
  path: process.cwd(),
@@ -569310,7 +570159,7 @@ async function collectSystemMetricsAsync() {
569310
570159
  };
569311
570160
  try {
569312
570161
  const smi = await new Promise((resolve55, reject) => {
569313
- exec2(
570162
+ exec3(
569314
570163
  "nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
569315
570164
  { encoding: "utf8", timeout: 3e3 },
569316
570165
  (err, stdout) => err ? reject(err) : resolve55(stdout)
@@ -569398,7 +570247,7 @@ var init_expose = __esm({
569398
570247
  custom: "http://127.0.0.1:11434"
569399
570248
  };
569400
570249
  STATE_FILE_NAME = "expose-state.json";
569401
- ExposeGateway = class _ExposeGateway extends EventEmitter7 {
570250
+ ExposeGateway = class _ExposeGateway extends EventEmitter8 {
569402
570251
  constructor(options2) {
569403
570252
  super();
569404
570253
  this.options = options2;
@@ -570277,7 +571126,7 @@ ${this.formatConnectionInfo()}`);
570277
571126
  }
570278
571127
  };
570279
571128
  P2P_STATE_FILE_NAME = "expose-p2p-state.json";
570280
- ExposeP2PGateway = class _ExposeP2PGateway extends EventEmitter7 {
571129
+ ExposeP2PGateway = class _ExposeP2PGateway extends EventEmitter8 {
570281
571130
  _nexusTool;
570282
571131
  // NexusTool instance
570283
571132
  _kind;
@@ -571048,7 +571897,7 @@ var init_secret_vault = __esm({
571048
571897
  });
571049
571898
 
571050
571899
  // packages/cli/src/tui/p2p/peer-mesh.ts
571051
- import { EventEmitter as EventEmitter8 } from "node:events";
571900
+ import { EventEmitter as EventEmitter9 } from "node:events";
571052
571901
  import { createServer as createServer6 } from "node:http";
571053
571902
  import { randomBytes as randomBytes21, createHash as createHash21, generateKeyPairSync } from "node:crypto";
571054
571903
  var PING_INTERVAL_MS, PEER_TIMEOUT_MS, GOSSIP_INTERVAL_MS, MAX_PEERS, PeerMesh;
@@ -571060,7 +571909,7 @@ var init_peer_mesh = __esm({
571060
571909
  PEER_TIMEOUT_MS = 9e4;
571061
571910
  GOSSIP_INTERVAL_MS = 6e4;
571062
571911
  MAX_PEERS = 50;
571063
- PeerMesh = class extends EventEmitter8 {
571912
+ PeerMesh = class extends EventEmitter9 {
571064
571913
  constructor(options2) {
571065
571914
  super();
571066
571915
  this.options = options2;
@@ -571506,7 +572355,7 @@ var init_peer_mesh = __esm({
571506
572355
  });
571507
572356
 
571508
572357
  // packages/cli/src/tui/p2p/inference-router.ts
571509
- import { EventEmitter as EventEmitter9 } from "node:events";
572358
+ import { EventEmitter as EventEmitter10 } from "node:events";
571510
572359
  var TRUST_WEIGHTS, InferenceRouter;
571511
572360
  var init_inference_router = __esm({
571512
572361
  "packages/cli/src/tui/p2p/inference-router.ts"() {
@@ -571518,7 +572367,7 @@ var init_inference_router = __esm({
571518
572367
  verified: 5,
571519
572368
  public: 2
571520
572369
  };
571521
- InferenceRouter = class extends EventEmitter9 {
572370
+ InferenceRouter = class extends EventEmitter10 {
571522
572371
  mesh;
571523
572372
  vault;
571524
572373
  defaultTimeoutMs;
@@ -571716,7 +572565,7 @@ var init_p2p = __esm({
571716
572565
  });
571717
572566
 
571718
572567
  // packages/cli/src/tui/call-agent.ts
571719
- import { EventEmitter as EventEmitter10 } from "node:events";
572568
+ import { EventEmitter as EventEmitter11 } from "node:events";
571720
572569
  import crypto13 from "node:crypto";
571721
572570
  function adaptTool(tool) {
571722
572571
  return {
@@ -571774,7 +572623,7 @@ var init_call_agent = __esm({
571774
572623
  }
571775
572624
  };
571776
572625
  _globalFeed = null;
571777
- CallSubAgent = class extends EventEmitter10 {
572626
+ CallSubAgent = class extends EventEmitter11 {
571778
572627
  tier;
571779
572628
  clientId;
571780
572629
  runner = null;
@@ -573876,8 +574725,8 @@ __export(system_metrics_exports, {
573876
574725
  getInstantSnapshot: () => getInstantSnapshot,
573877
574726
  instantaneousCpuPct: () => instantaneousCpuPct
573878
574727
  });
573879
- import { loadavg as loadavg2, cpus as cpus3, totalmem as totalmem5, freemem as freemem4, platform as platform4 } from "node:os";
573880
- import { exec as exec3 } from "node:child_process";
574728
+ import { loadavg as loadavg2, cpus as cpus3, totalmem as totalmem6, freemem as freemem5, platform as platform4 } from "node:os";
574729
+ import { exec as exec4 } from "node:child_process";
573881
574730
  import { readFile as readFile22 } from "node:fs/promises";
573882
574731
  function formatRate(bytesPerSec) {
573883
574732
  if (bytesPerSec < 1024) return `${Math.round(bytesPerSec)}B`;
@@ -573924,7 +574773,7 @@ async function collectNetworkMetrics() {
573924
574773
  if (plat === "darwin") {
573925
574774
  try {
573926
574775
  const output = await new Promise((resolve55, reject) => {
573927
- exec3("netstat -ib 2>/dev/null | head -30", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
574776
+ exec4("netstat -ib 2>/dev/null | head -30", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
573928
574777
  });
573929
574778
  let rxBytes = 0, txBytes = 0;
573930
574779
  for (const line of output.split("\n")) {
@@ -573967,7 +574816,7 @@ async function collectGpuMetrics() {
573967
574816
  if (_nvidiaSmiAvailable2 === false) return noGpu;
573968
574817
  try {
573969
574818
  const smi = await new Promise((resolve55, reject) => {
573970
- exec3(
574819
+ exec4(
573971
574820
  "nvidia-smi --query-gpu=index,uuid,utilization.gpu,memory.used,memory.total,name --format=csv,noheader,nounits 2>/dev/null",
573972
574821
  { encoding: "utf8", timeout: 3e3 },
573973
574822
  (err, stdout) => err ? reject(err) : resolve55(stdout)
@@ -574066,8 +574915,8 @@ function instantaneousCpuPct() {
574066
574915
  function collectCpuRam() {
574067
574916
  const cores = cpus3().length;
574068
574917
  const cpuModel = cpus3()[0]?.model ?? "";
574069
- const totalMem = totalmem5();
574070
- const usedMem = totalMem - freemem4();
574918
+ const totalMem = totalmem6();
574919
+ const usedMem = totalMem - freemem5();
574071
574920
  let cpuUtil = instantaneousCpuPct();
574072
574921
  if (cpuUtil < 0) {
574073
574922
  const [l1] = loadavg2();
@@ -574862,6 +575711,7 @@ var init_status_bar = __esm({
574862
575711
  init_text_selection();
574863
575712
  init_daemon_registry();
574864
575713
  init_overlay_lock();
575714
+ init_dist5();
574865
575715
  init_theme();
574866
575716
  init_layout2();
574867
575717
  EXPERT_TOOL_BASELINES = {
@@ -576073,6 +576923,10 @@ var init_status_bar = __esm({
576073
576923
  this._unifiedMetrics = m2;
576074
576924
  if (this.active) this.renderFooterPreserveCursor();
576075
576925
  }, intervalMs);
576926
+ try {
576927
+ getModelBroker().startPolling(Math.max(2e3, intervalMs * 2));
576928
+ } catch {
576929
+ }
576076
576930
  }
576077
576931
  /** Stop all metrics collection (local and remote) */
576078
576932
  stopAllMetrics() {
@@ -580498,7 +581352,7 @@ __export(setup_exports, {
580498
581352
  updateOllama: () => updateOllama
580499
581353
  });
580500
581354
  import * as readline from "node:readline";
580501
- import { execSync as execSync51, spawn as spawn28, exec as exec4 } from "node:child_process";
581355
+ import { execSync as execSync51, spawn as spawn28, exec as exec5 } from "node:child_process";
580502
581356
  import { promisify as promisify6 } from "node:util";
580503
581357
  import { existsSync as existsSync95, writeFileSync as writeFileSync49, readFileSync as readFileSync78, appendFileSync as appendFileSync6, mkdirSync as mkdirSync53 } from "node:fs";
580504
581358
  import { join as join111 } from "node:path";
@@ -583250,7 +584104,7 @@ var init_setup = __esm({
583250
584104
  init_dist();
583251
584105
  init_tui_select();
583252
584106
  init_listen();
583253
- execAsync2 = promisify6(exec4);
584107
+ execAsync2 = promisify6(exec5);
583254
584108
  OMNIUS_FIRST_RUN_BANNER = [
583255
584109
  " ░▒▓██████▓▒░░▒▓██████████████▓▒░░▒▓███████▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░░▒▓███████▓▒░ ",
583256
584110
  "░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░ ",
@@ -595704,6 +596558,9 @@ async function handleSlashCommand(input, ctx3) {
595704
596558
  case "parallel":
595705
596559
  await handleParallel(arg, ctx3);
595706
596560
  return "handled";
596561
+ case "broker":
596562
+ await handleBroker(arg, ctx3);
596563
+ return "handled";
595707
596564
  case "ollama":
595708
596565
  await handleOllama(arg, ctx3);
595709
596566
  return "handled";
@@ -603161,6 +604018,83 @@ async function handlePeerEndpoint(peerId, authKey, ctx3, local) {
603161
604018
  );
603162
604019
  }
603163
604020
  }
604021
+ async function handleBroker(arg, _ctx) {
604022
+ const broker = getModelBroker();
604023
+ const sub = (arg || "").trim().toLowerCase();
604024
+ const snap = await broker.pollOnce();
604025
+ if (!sub || sub === "status" || sub === "list" || sub === "ls") {
604026
+ safeLog("");
604027
+ safeLog(` ${c3.bold("Resource Broker")}`);
604028
+ safeLog("");
604029
+ safeLog(` ${c3.dim("RAM:")} ${snap.ramMB.used} / ${snap.ramMB.total} MB used (${snap.ramMB.free} MB free)`);
604030
+ if (snap.vramMB) {
604031
+ safeLog(` ${c3.dim("VRAM:")} ${snap.vramMB.used} / ${snap.vramMB.total} MB used (${snap.vramMB.free} MB free)`);
604032
+ } else {
604033
+ safeLog(` ${c3.dim("VRAM:")} ${c3.dim("(no GPU detected)")}`);
604034
+ }
604035
+ safeLog(` ${c3.dim("RAM headroom threshold:")} ${broker.ramHeadroomMB} MB`);
604036
+ safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB`);
604037
+ safeLog(` ${c3.dim("Idle-evict threshold:")} ${Math.round(broker.idleEvictMs / 1e3)}s`);
604038
+ safeLog("");
604039
+ if (snap.loaded.length === 0) {
604040
+ safeLog(` ${c3.dim("No loaded models tracked.")}`);
604041
+ } else {
604042
+ safeLog(` ${c3.bold("Loaded models:")}`);
604043
+ const now = Date.now();
604044
+ for (const m2 of snap.loaded) {
604045
+ const idle = Math.round((now - m2.lastUsedAt) / 1e3);
604046
+ const owner = m2.owner ? c3.dim(` [owner=${m2.owner}]`) : "";
604047
+ const ctx3 = m2.numCtx ? c3.dim(` n_ctx=${m2.numCtx}`) : "";
604048
+ safeLog(` ${c3.cyan(m2.name)} (${m2.host}/${m2.domain}) vram=${m2.vramMB}MB ram=${m2.ramMB}MB idle=${idle}s${ctx3}${owner}`);
604049
+ }
604050
+ }
604051
+ if (snap.inflight.length > 0) {
604052
+ safeLog("");
604053
+ safeLog(` ${c3.bold("In-flight loads:")}`);
604054
+ for (const f2 of snap.inflight) {
604055
+ const age = Math.round((Date.now() - f2.startedMs) / 1e3);
604056
+ safeLog(` ${c3.yellow(f2.key)} owner=${f2.owner} ${age}s ago`);
604057
+ }
604058
+ }
604059
+ safeLog("");
604060
+ safeLog(` ${c3.dim("Subcommands: /broker [status|evict <key>|threshold ram|vram|idle <value>]")}`);
604061
+ safeLog("");
604062
+ return;
604063
+ }
604064
+ const parts = (arg || "").trim().split(/\s+/);
604065
+ const op = parts[0]?.toLowerCase();
604066
+ if (op === "evict") {
604067
+ const key = parts[1];
604068
+ if (!key || !key.includes(":")) {
604069
+ renderWarning("Usage: /broker evict <host>:<name> e.g. /broker evict ollama:moondream");
604070
+ return;
604071
+ }
604072
+ const [host, ...rest] = key.split(":");
604073
+ const name10 = rest.join(":");
604074
+ const ok3 = await broker.evict(host, name10, "user-requested");
604075
+ if (ok3) renderInfo(`Evicted ${key} (actively unloaded)`);
604076
+ else renderInfo(`Unregistered ${key} (could not actively unload; subprocess may need manual cleanup)`);
604077
+ return;
604078
+ }
604079
+ if (op === "threshold") {
604080
+ const which3 = parts[1]?.toLowerCase();
604081
+ const value2 = parts[2] ? Number(parts[2]) : NaN;
604082
+ if (!which3 || !Number.isFinite(value2) || value2 < 0) {
604083
+ renderWarning("Usage: /broker threshold <ram|vram|idle> <value> (ram/vram in MB, idle in seconds)");
604084
+ return;
604085
+ }
604086
+ if (which3 === "ram") broker.ramHeadroomMB = value2;
604087
+ else if (which3 === "vram") broker.vramHeadroomMB = value2;
604088
+ else if (which3 === "idle") broker.idleEvictMs = value2 * 1e3;
604089
+ else {
604090
+ renderWarning("Unknown threshold; use ram|vram|idle");
604091
+ return;
604092
+ }
604093
+ renderInfo(`Updated broker threshold ${which3} = ${value2}`);
604094
+ return;
604095
+ }
604096
+ renderWarning("Unknown /broker subcommand. Try: status | evict <host>:<name> | threshold <ram|vram|idle> <value>");
604097
+ }
603164
604098
  async function handleParallel(arg, ctx3) {
603165
604099
  const { execSync: execSync61 } = await import("node:child_process");
603166
604100
  const baseUrl = ctx3.config.backendUrl || "http://localhost:11434";
@@ -604173,9 +605107,9 @@ async function handleUpdate(subcommand, ctx3) {
604173
605107
  }
604174
605108
  };
604175
605109
  }
604176
- const { exec: exec5, spawn: spawn34, execSync: es2 } = await import("node:child_process");
605110
+ const { exec: exec6, spawn: spawn34, execSync: es2 } = await import("node:child_process");
604177
605111
  const execA = (cmd, opts) => new Promise(
604178
- (res, rej) => exec5(
605112
+ (res, rej) => exec6(
604179
605113
  cmd,
604180
605114
  {
604181
605115
  encoding: "utf8",
@@ -604869,7 +605803,7 @@ async function handleUpdate(subcommand, ctx3) {
604869
605803
  installOverlay.setPhase("Native Modules");
604870
605804
  installOverlay.setStatus("Rebuilding native modules...");
604871
605805
  await new Promise((resolve55) => {
604872
- const child = exec5(
605806
+ const child = exec6(
604873
605807
  `${sudoPrefix}npm rebuild -g omnius 2>/dev/null || true`,
604874
605808
  { timeout: 12e4 },
604875
605809
  () => resolve55(true)
@@ -604911,7 +605845,7 @@ async function handleUpdate(subcommand, ctx3) {
604911
605845
  if (fsExists(venvPip2)) {
604912
605846
  installOverlay.setStatus("Upgrading Python packages...");
604913
605847
  await new Promise((resolve55) => {
604914
- const child = exec5(
605848
+ const child = exec6(
604915
605849
  `"${venvPip2}" install --upgrade moondream-station pytesseract Pillow opencv-python-headless numpy 2>/dev/null || true`,
604916
605850
  { timeout: 3e5 },
604917
605851
  (err) => resolve55(!err)
@@ -610886,6 +611820,9 @@ var init_bless_engine = __esm({
610886
611820
  async pingModel() {
610887
611821
  try {
610888
611822
  const url = `${this.config.backendUrl}/api/chat`;
611823
+ const broker = getModelBroker();
611824
+ const trainCtx = await broker.getNctxTrain(this.config.model);
611825
+ const numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 8192) : void 0;
610889
611826
  await fetch(url, {
610890
611827
  method: "POST",
610891
611828
  headers: { "Content-Type": "application/json" },
@@ -610893,11 +611830,12 @@ var init_bless_engine = __esm({
610893
611830
  model: this.config.model,
610894
611831
  messages: [{ role: "user", content: "." }],
610895
611832
  stream: false,
610896
- options: { num_predict: 1 },
611833
+ options: numCtx ? { num_predict: 1, num_ctx: numCtx } : { num_predict: 1 },
610897
611834
  keep_alive: "30m"
610898
611835
  }),
610899
611836
  signal: AbortSignal.timeout(15e3)
610900
611837
  });
611838
+ broker.touch("ollama", this.config.model);
610901
611839
  this.state.keepAlivePings++;
610902
611840
  } catch {
610903
611841
  }
@@ -616279,19 +617217,51 @@ async function queryVisionModel(modelName, imagePath, prompt = "Describe what yo
616279
617217
  if (!existsSync116(imagePath)) return "";
616280
617218
  const imageBuffer = readFileSync95(imagePath);
616281
617219
  const base64Image = imageBuffer.toString("base64");
617220
+ const broker = getModelBroker();
617221
+ const decision2 = await broker.ensureModelLoadable({
617222
+ name: modelName,
617223
+ domain: "vision",
617224
+ host: "ollama",
617225
+ owner: "vision-ingress",
617226
+ requestedNumCtx: 2048
617227
+ });
617228
+ let effectiveModel = modelName;
617229
+ let numCtx;
617230
+ if (decision2.kind === "reject") {
617231
+ return "";
617232
+ } else if (decision2.kind === "degrade") {
617233
+ effectiveModel = decision2.fallback.name;
617234
+ } else if (decision2.kind === "evict") {
617235
+ for (const target of decision2.evictTargets) {
617236
+ await broker.evict(target.host, target.name, "make-room-for-vision");
617237
+ }
617238
+ numCtx = decision2.effectiveNumCtx;
617239
+ } else if (decision2.kind === "ok") {
617240
+ numCtx = decision2.effectiveNumCtx;
617241
+ } else if (decision2.kind === "wait-for-inflight") {
617242
+ const inner = await decision2.promise;
617243
+ if (inner.kind === "ok") numCtx = inner.effectiveNumCtx;
617244
+ else if (inner.kind === "degrade") effectiveModel = inner.fallback.name;
617245
+ else if (inner.kind === "reject") return "";
617246
+ }
617247
+ if (numCtx === void 0) {
617248
+ const trainCtx = await broker.getNctxTrain(effectiveModel);
617249
+ numCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, 4096) : 2048;
617250
+ }
616282
617251
  try {
616283
617252
  const response = await fetch("http://localhost:11434/api/generate", {
616284
617253
  method: "POST",
616285
617254
  headers: { "Content-Type": "application/json" },
616286
617255
  body: JSON.stringify({
616287
- model: modelName,
617256
+ model: effectiveModel,
616288
617257
  prompt,
616289
617258
  images: [base64Image],
616290
617259
  stream: false,
616291
- options: { temperature: 0.3, num_predict: 1024 }
617260
+ options: { temperature: 0.3, num_predict: 1024, num_ctx: numCtx }
616292
617261
  })
616293
617262
  });
616294
617263
  if (!response.ok) return "";
617264
+ broker.touch("ollama", effectiveModel);
616295
617265
  const data = await response.json();
616296
617266
  return (data.response || "").trim();
616297
617267
  } catch {
@@ -616344,6 +617314,7 @@ function formatImageContextPrefix(result) {
616344
617314
  var init_vision_ingress = __esm({
616345
617315
  "packages/cli/src/tui/vision-ingress.ts"() {
616346
617316
  "use strict";
617317
+ init_dist5();
616347
617318
  }
616348
617319
  });
616349
617320
 
@@ -629030,12 +630001,12 @@ var direct_input_exports = {};
629030
630001
  __export(direct_input_exports, {
629031
630002
  DirectInput: () => DirectInput
629032
630003
  });
629033
- import { EventEmitter as EventEmitter11 } from "node:events";
630004
+ import { EventEmitter as EventEmitter12 } from "node:events";
629034
630005
  var DirectInput;
629035
630006
  var init_direct_input = __esm({
629036
630007
  "packages/cli/src/tui/direct-input.ts"() {
629037
630008
  "use strict";
629038
- DirectInput = class extends EventEmitter11 {
630009
+ DirectInput = class extends EventEmitter12 {
629039
630010
  /** Current input line text */
629040
630011
  line = "";
629041
630012
  /** Cursor position within .line (0-based) */
@@ -629754,7 +630725,7 @@ var voicechat_exports = {};
629754
630725
  __export(voicechat_exports, {
629755
630726
  VoiceChatSession: () => VoiceChatSession
629756
630727
  });
629757
- import { EventEmitter as EventEmitter12 } from "node:events";
630728
+ import { EventEmitter as EventEmitter13 } from "node:events";
629758
630729
  function clamp0110(x) {
629759
630730
  return x < 0 ? 0 : x > 1 ? 1 : x;
629760
630731
  }
@@ -629864,7 +630835,7 @@ Rules:
629864
630835
  - Prefer tools for factual queries; otherwise, answer directly with a short reply.`;
629865
630836
  MIN_SIGNAL_SCORE = 0.4;
629866
630837
  NOISE_ONLY_RE = /^(?:[.·…\s,;:!?\-–—_()\[\]{}"'`]+|(?:uh|um|erm|hmm|mm+|uhh+|umm+)[\s.!?]*)+$/i;
629867
- VoiceChatSession = class extends EventEmitter12 {
630838
+ VoiceChatSession = class extends EventEmitter13 {
629868
630839
  voice;
629869
630840
  listen;
629870
630841
  backendUrl;
@@ -630393,7 +631364,7 @@ __export(voice_runtime_exports, {
630393
631364
  synthesizeToWav: () => synthesizeToWav,
630394
631365
  unregisterClient: () => unregisterClient
630395
631366
  });
630396
- import { EventEmitter as EventEmitter13 } from "node:events";
631367
+ import { EventEmitter as EventEmitter14 } from "node:events";
630397
631368
  function getVoiceEngine() {
630398
631369
  if (!_voiceEngine) {
630399
631370
  _voiceEngine = new VoiceEngine();
@@ -630405,7 +631376,7 @@ function getDaemonListenEngine() {
630405
631376
  return _listenEngine;
630406
631377
  }
630407
631378
  function getVoiceBus() {
630408
- if (!_bus) _bus = new EventEmitter13();
631379
+ if (!_bus) _bus = new EventEmitter14();
630409
631380
  return _bus;
630410
631381
  }
630411
631382
  function getRuntimeStatus() {
@@ -661368,8 +662339,8 @@ NEW TASK: ${fullInput}`;
661368
662339
  const updateInfo = await checkForUpdate(version4);
661369
662340
  if (updateInfo) {
661370
662341
  _autoUpdatedThisSession = true;
661371
- const { exec: exec5 } = await import("node:child_process");
661372
- exec5(
662342
+ const { exec: exec6 } = await import("node:child_process");
662343
+ exec6(
661373
662344
  `npm install -g omnius@latest --prefer-online`,
661374
662345
  { timeout: 18e4 },
661375
662346
  (err) => {