omnius 1.0.136 → 1.0.138
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +547 -346
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1337,29 +1337,36 @@ function ramSnapshotMB() {
|
|
|
1337
1337
|
const free = Math.round(freemem() / (1024 * 1024));
|
|
1338
1338
|
return { total, free, used: total - free };
|
|
1339
1339
|
}
|
|
1340
|
-
async function
|
|
1340
|
+
async function vramSnapshotPerDevice() {
|
|
1341
1341
|
if (_nvSmiAvailable === false)
|
|
1342
|
-
return
|
|
1342
|
+
return [];
|
|
1343
1343
|
try {
|
|
1344
1344
|
const out = await new Promise((resolve55, reject) => {
|
|
1345
|
-
exec("nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
|
|
1345
|
+
exec("nvidia-smi --query-gpu=index,uuid,memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
|
|
1346
1346
|
});
|
|
1347
1347
|
_nvSmiAvailable = true;
|
|
1348
|
-
|
|
1348
|
+
const devices = [];
|
|
1349
1349
|
for (const line of out.trim().split("\n")) {
|
|
1350
|
+
if (!line.trim())
|
|
1351
|
+
continue;
|
|
1350
1352
|
const parts = line.split(",").map((s2) => s2.trim());
|
|
1351
|
-
if (parts.length <
|
|
1353
|
+
if (parts.length < 5)
|
|
1352
1354
|
continue;
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1355
|
+
const index = parseInt(parts[0] ?? "-1", 10);
|
|
1356
|
+
if (!Number.isFinite(index) || index < 0)
|
|
1357
|
+
continue;
|
|
1358
|
+
devices.push({
|
|
1359
|
+
index,
|
|
1360
|
+
uuid: parts[1] ?? "",
|
|
1361
|
+
total: parseInt(parts[2] ?? "0", 10) || 0,
|
|
1362
|
+
used: parseInt(parts[3] ?? "0", 10) || 0,
|
|
1363
|
+
free: parseInt(parts[4] ?? "0", 10) || 0
|
|
1364
|
+
});
|
|
1356
1365
|
}
|
|
1357
|
-
|
|
1358
|
-
return null;
|
|
1359
|
-
return { total, used, free };
|
|
1366
|
+
return devices;
|
|
1360
1367
|
} catch {
|
|
1361
1368
|
_nvSmiAvailable = false;
|
|
1362
|
-
return
|
|
1369
|
+
return [];
|
|
1363
1370
|
}
|
|
1364
1371
|
}
|
|
1365
1372
|
function getModelBroker() {
|
|
@@ -1403,7 +1410,9 @@ var init_model_broker = __esm({
|
|
|
1403
1410
|
ramHeadroomMB = DEFAULT_RAM_HEADROOM_MB;
|
|
1404
1411
|
vramHeadroomMB = DEFAULT_VRAM_HEADROOM_MB;
|
|
1405
1412
|
idleEvictMs = DEFAULT_IDLE_EVICT_MS;
|
|
1406
|
-
/** Inference slot capacity (auto-tunes from Ollama
|
|
1413
|
+
/** Inference slot capacity (shared pool aggregate; auto-tunes from Ollama
|
|
1414
|
+
* pool size when known). Per-device cap defaults to ceil(slotCapacity/N)
|
|
1415
|
+
* unless overridden via setPerGpuSlotCapacity. */
|
|
1407
1416
|
slotCapacity = DEFAULT_SLOT_CAPACITY;
|
|
1408
1417
|
/** Maximum queue depth before queue pressure is emitted. */
|
|
1409
1418
|
queueCapacity = DEFAULT_QUEUE_CAPACITY;
|
|
@@ -1419,6 +1428,15 @@ var init_model_broker = __esm({
|
|
|
1419
1428
|
_throughput = /* @__PURE__ */ new Map();
|
|
1420
1429
|
/** Monotonic counter for slot ids. */
|
|
1421
1430
|
_slotIdSeq = 0;
|
|
1431
|
+
/** Per-GPU slot capacity override. When unset, broker derives a per-GPU
|
|
1432
|
+
* cap from slotCapacity / detected device count. */
|
|
1433
|
+
_perGpuSlotCapacity = /* @__PURE__ */ new Map();
|
|
1434
|
+
/** Cached per-device VRAM (refreshed by pollOnce). */
|
|
1435
|
+
_vramByDevice = [];
|
|
1436
|
+
/** Optional provider that maps an Ollama model name to its current GPU.
|
|
1437
|
+
* Wired by the CLI/orchestrator at startup so the broker can copy pool
|
|
1438
|
+
* affinity onto LoadedModel records without importing the pool directly. */
|
|
1439
|
+
_ollamaAffinityProvider = null;
|
|
1422
1440
|
static getInstance() {
|
|
1423
1441
|
if (!_ModelBroker._instance)
|
|
1424
1442
|
_ModelBroker._instance = new _ModelBroker();
|
|
@@ -1471,6 +1489,18 @@ var init_model_broker = __esm({
|
|
|
1471
1489
|
setOllamaBaseUrl(url) {
|
|
1472
1490
|
this._ollamaBaseUrl = url;
|
|
1473
1491
|
}
|
|
1492
|
+
/**
|
|
1493
|
+
* Wire a function that resolves an Ollama model name to its current GPU
|
|
1494
|
+
* affinity (from the Ollama pool's per-instance state). The CLI calls
|
|
1495
|
+
* this at startup with a closure over `getOllamaPool().status()` so the
|
|
1496
|
+
* broker can copy gpuIndex/gpuUuid onto LoadedModel records without
|
|
1497
|
+
* importing from @omnius/orchestrator (which would create a circular dep).
|
|
1498
|
+
*
|
|
1499
|
+
* Pass null to clear.
|
|
1500
|
+
*/
|
|
1501
|
+
setOllamaAffinityProvider(provider) {
|
|
1502
|
+
this._ollamaAffinityProvider = provider;
|
|
1503
|
+
}
|
|
1474
1504
|
/** One poll cycle — refreshes /api/ps and emits snapshot. */
|
|
1475
1505
|
async pollOnce() {
|
|
1476
1506
|
await Promise.all([
|
|
@@ -1539,30 +1569,44 @@ var init_model_broker = __esm({
|
|
|
1539
1569
|
const estVram = spec.estimatedVramMB ?? this.estimateFootprintVramMB(spec);
|
|
1540
1570
|
const estRam = spec.estimatedRamMB ?? this.estimateFootprintRamMB(spec);
|
|
1541
1571
|
const ram = ramSnapshotMB();
|
|
1542
|
-
const vram = await vramSnapshotMB();
|
|
1543
1572
|
const ramFitsAfter = ram.free - estRam >= this.ramHeadroomMB;
|
|
1544
|
-
const
|
|
1573
|
+
const devices = await vramSnapshotPerDevice();
|
|
1574
|
+
this._vramByDevice = devices;
|
|
1575
|
+
let chosenGpu = null;
|
|
1576
|
+
let vramFitsAfter = devices.length === 0;
|
|
1577
|
+
if (devices.length > 0) {
|
|
1578
|
+
const candidates = devices.filter((d2) => spec.preferredGpuIndex === void 0 || d2.index === spec.preferredGpuIndex).filter((d2) => d2.free - estVram >= this.vramHeadroomMB).sort((a2, b) => b.free - a2.free);
|
|
1579
|
+
if (candidates.length > 0) {
|
|
1580
|
+
chosenGpu = candidates[0].index;
|
|
1581
|
+
vramFitsAfter = true;
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1545
1584
|
if (ramFitsAfter && vramFitsAfter) {
|
|
1546
|
-
const
|
|
1585
|
+
const decision2 = { kind: "ok", effectiveNumCtx, gpuIndex: chosenGpu };
|
|
1586
|
+
const promise = Promise.resolve(decision2);
|
|
1547
1587
|
this._inflight.set(key, { startedMs: Date.now(), owner: spec.owner, promise });
|
|
1548
1588
|
setTimeout(() => this._inflight.delete(key), spec.loadTimeoutMs ?? DEFAULT_INFLIGHT_WAIT_MS).unref?.();
|
|
1549
|
-
return
|
|
1589
|
+
return decision2;
|
|
1550
1590
|
}
|
|
1591
|
+
const targetGpu = chosenGpu ?? this.deviceWithMostPressureRelativeTo(devices, estVram);
|
|
1592
|
+
const needVramMB = vramFitsAfter ? 0 : targetGpu !== null ? estVram + this.vramHeadroomMB - (devices.find((d2) => d2.index === targetGpu)?.free ?? 0) : estVram + this.vramHeadroomMB;
|
|
1551
1593
|
const evictTargets = this.pickEvictionCandidates({
|
|
1552
|
-
needVramMB
|
|
1594
|
+
needVramMB,
|
|
1553
1595
|
needRamMB: ramFitsAfter ? 0 : estRam + this.ramHeadroomMB - ram.free,
|
|
1554
1596
|
requestingPriority: spec.priority ?? 0,
|
|
1555
|
-
requestingDomain: spec.domain
|
|
1597
|
+
requestingDomain: spec.domain,
|
|
1598
|
+
targetGpu
|
|
1556
1599
|
});
|
|
1557
1600
|
if (evictTargets.length > 0) {
|
|
1558
|
-
return { kind: "evict", evictTargets, effectiveNumCtx };
|
|
1601
|
+
return { kind: "evict", evictTargets, effectiveNumCtx, gpuIndex: targetGpu };
|
|
1559
1602
|
}
|
|
1560
1603
|
const fallback = await this.findRunnableFallback(spec);
|
|
1561
1604
|
if (fallback) {
|
|
1562
1605
|
this.emit("degraded", spec, fallback, "insufficient-memory-no-evictable");
|
|
1563
1606
|
return { kind: "degrade", fallback, reason: "insufficient-memory-no-evictable" };
|
|
1564
1607
|
}
|
|
1565
|
-
const
|
|
1608
|
+
const perDeviceSummary = devices.length === 0 ? "no GPU" : devices.map((d2) => `gpu${d2.index}=${d2.free}MB`).join(", ");
|
|
1609
|
+
const reason = `insufficient resources (need ~${estRam}MB RAM, ~${estVram}MB VRAM; free ${ram.free}MB RAM; VRAM ${perDeviceSummary}) and no evictable / fallback models`;
|
|
1566
1610
|
this.emit("rejected", spec, reason);
|
|
1567
1611
|
return { kind: "reject", reason };
|
|
1568
1612
|
}
|
|
@@ -1638,10 +1682,22 @@ var init_model_broker = __esm({
|
|
|
1638
1682
|
seen.add(key);
|
|
1639
1683
|
const vramMB = Math.round((m2.size_vram ?? 0) / (1024 * 1024));
|
|
1640
1684
|
const ramMB = Math.round(((m2.size ?? 0) - (m2.size_vram ?? 0)) / (1024 * 1024));
|
|
1685
|
+
let affinity = null;
|
|
1686
|
+
try {
|
|
1687
|
+
affinity = this._ollamaAffinityProvider ? this._ollamaAffinityProvider(m2.name) : null;
|
|
1688
|
+
} catch {
|
|
1689
|
+
affinity = null;
|
|
1690
|
+
}
|
|
1641
1691
|
const existing = this._loaded.get(key);
|
|
1642
1692
|
if (existing) {
|
|
1643
1693
|
existing.vramMB = vramMB || existing.vramMB;
|
|
1644
1694
|
existing.ramMB = ramMB || existing.ramMB;
|
|
1695
|
+
if (affinity) {
|
|
1696
|
+
if (affinity.gpuIndex !== null)
|
|
1697
|
+
existing.gpuIndex = affinity.gpuIndex;
|
|
1698
|
+
if (affinity.gpuUuid !== null)
|
|
1699
|
+
existing.gpuUuid = affinity.gpuUuid;
|
|
1700
|
+
}
|
|
1645
1701
|
} else {
|
|
1646
1702
|
const tracked = this.registerLoaded({
|
|
1647
1703
|
key,
|
|
@@ -1653,7 +1709,9 @@ var init_model_broker = __esm({
|
|
|
1653
1709
|
ramMB,
|
|
1654
1710
|
priority: 0,
|
|
1655
1711
|
loadedAt: now,
|
|
1656
|
-
lastUsedAt: now
|
|
1712
|
+
lastUsedAt: now,
|
|
1713
|
+
gpuIndex: affinity?.gpuIndex ?? null,
|
|
1714
|
+
gpuUuid: affinity?.gpuUuid ?? null
|
|
1657
1715
|
});
|
|
1658
1716
|
void tracked;
|
|
1659
1717
|
}
|
|
@@ -1746,7 +1804,8 @@ var init_model_broker = __esm({
|
|
|
1746
1804
|
m2.domain !== req2.requestingDomain || this.countByDomain(req2.requestingDomain) > 1
|
|
1747
1805
|
);
|
|
1748
1806
|
const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
|
|
1749
|
-
const
|
|
1807
|
+
const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
|
|
1808
|
+
const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
|
|
1750
1809
|
const aIdle = idle(a2) ? 0 : 1;
|
|
1751
1810
|
const bIdle = idle(b) ? 0 : 1;
|
|
1752
1811
|
if (aIdle !== bIdle)
|
|
@@ -1767,6 +1826,24 @@ var init_model_broker = __esm({
|
|
|
1767
1826
|
return targets;
|
|
1768
1827
|
return [];
|
|
1769
1828
|
}
|
|
1829
|
+
/** Pick the GPU whose free-VRAM gap to the requested footprint is smallest
|
|
1830
|
+
* (i.e. closest to fitting). Used when no device cleanly fits — eviction
|
|
1831
|
+
* on this device has the best chance of opening room. Returns null when
|
|
1832
|
+
* no GPUs are detected. */
|
|
1833
|
+
deviceWithMostPressureRelativeTo(devices, needMB) {
|
|
1834
|
+
if (devices.length === 0)
|
|
1835
|
+
return null;
|
|
1836
|
+
let best = null;
|
|
1837
|
+
let bestGap = Infinity;
|
|
1838
|
+
for (const d2 of devices) {
|
|
1839
|
+
const gap = needMB - d2.free;
|
|
1840
|
+
if (gap < bestGap) {
|
|
1841
|
+
bestGap = gap;
|
|
1842
|
+
best = d2;
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
return best?.index ?? null;
|
|
1846
|
+
}
|
|
1770
1847
|
countByDomain(domain) {
|
|
1771
1848
|
let n2 = 0;
|
|
1772
1849
|
for (const m2 of this._loaded.values())
|
|
@@ -1897,17 +1974,31 @@ var init_model_broker = __esm({
|
|
|
1897
1974
|
inflight: [...this._inflight.entries()].map(([key, v]) => ({ key, owner: v.owner, startedMs: v.startedMs })),
|
|
1898
1975
|
ramMB: ram,
|
|
1899
1976
|
vramMB: vram,
|
|
1977
|
+
vramPerDevice: [...this._vramByDevice],
|
|
1900
1978
|
lastPollAt: Date.now(),
|
|
1901
1979
|
slots: this.buildSlotsSnapshot()
|
|
1902
1980
|
};
|
|
1903
1981
|
}
|
|
1904
1982
|
buildSlotsSnapshot() {
|
|
1905
1983
|
const byModel = {};
|
|
1984
|
+
const byGpu = {};
|
|
1906
1985
|
for (const slot of this._activeSlots.values()) {
|
|
1907
1986
|
const k = slot.model;
|
|
1908
1987
|
if (!byModel[k])
|
|
1909
1988
|
byModel[k] = { inUse: 0, tokensPerSec: 0, samples: 0 };
|
|
1910
1989
|
byModel[k].inUse += 1;
|
|
1990
|
+
if (slot.gpuIndex !== null && slot.gpuIndex !== void 0) {
|
|
1991
|
+
if (!byGpu[slot.gpuIndex])
|
|
1992
|
+
byGpu[slot.gpuIndex] = { inUse: 0, capacity: this.perGpuSlotCapacity(slot.gpuIndex), loadedMB: 0 };
|
|
1993
|
+
byGpu[slot.gpuIndex].inUse += 1;
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
for (const m2 of this._loaded.values()) {
|
|
1997
|
+
if (m2.gpuIndex !== null && m2.gpuIndex !== void 0) {
|
|
1998
|
+
if (!byGpu[m2.gpuIndex])
|
|
1999
|
+
byGpu[m2.gpuIndex] = { inUse: 0, capacity: this.perGpuSlotCapacity(m2.gpuIndex), loadedMB: 0 };
|
|
2000
|
+
byGpu[m2.gpuIndex].loadedMB += m2.vramMB;
|
|
2001
|
+
}
|
|
1911
2002
|
}
|
|
1912
2003
|
for (const [model, tp] of this._throughput) {
|
|
1913
2004
|
if (!byModel[model])
|
|
@@ -1915,23 +2006,46 @@ var init_model_broker = __esm({
|
|
|
1915
2006
|
byModel[model].tokensPerSec = tp.tokensPerSec;
|
|
1916
2007
|
byModel[model].samples = tp.samples;
|
|
1917
2008
|
}
|
|
2009
|
+
for (const d2 of this._vramByDevice) {
|
|
2010
|
+
if (!byGpu[d2.index])
|
|
2011
|
+
byGpu[d2.index] = { inUse: 0, capacity: this.perGpuSlotCapacity(d2.index), loadedMB: 0 };
|
|
2012
|
+
}
|
|
1918
2013
|
return {
|
|
1919
2014
|
inUse: this._activeSlots.size,
|
|
1920
2015
|
capacity: this.slotCapacity,
|
|
1921
2016
|
queueDepth: this._slotQueue.length,
|
|
1922
2017
|
queueCapacity: this.queueCapacity,
|
|
1923
|
-
byModel
|
|
2018
|
+
byModel,
|
|
2019
|
+
byGpu
|
|
1924
2020
|
};
|
|
1925
2021
|
}
|
|
2022
|
+
/** Per-GPU slot capacity. Returns the override when set, else ceil(slotCapacity / deviceCount). */
|
|
2023
|
+
perGpuSlotCapacity(gpuIndex) {
|
|
2024
|
+
const override = this._perGpuSlotCapacity.get(gpuIndex);
|
|
2025
|
+
if (override !== void 0)
|
|
2026
|
+
return override;
|
|
2027
|
+
const n2 = Math.max(1, this._vramByDevice.length);
|
|
2028
|
+
return Math.max(1, Math.ceil(this.slotCapacity / n2));
|
|
2029
|
+
}
|
|
1926
2030
|
async checkPressure(snap) {
|
|
1927
2031
|
if (snap.ramMB.free < this.ramHeadroomMB) {
|
|
1928
2032
|
this.emit("pressure", "ram", snap.ramMB.free, this.ramHeadroomMB);
|
|
1929
2033
|
}
|
|
1930
|
-
const
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
2034
|
+
const devices = await vramSnapshotPerDevice();
|
|
2035
|
+
this._vramByDevice = devices;
|
|
2036
|
+
if (devices.length > 0) {
|
|
2037
|
+
let total = 0, used = 0, free = 0;
|
|
2038
|
+
for (const d2 of devices) {
|
|
2039
|
+
total += d2.total;
|
|
2040
|
+
used += d2.used;
|
|
2041
|
+
free += d2.free;
|
|
2042
|
+
}
|
|
2043
|
+
snap.vramMB = { total, used, free };
|
|
2044
|
+
snap.vramPerDevice = devices;
|
|
2045
|
+
for (const d2 of devices) {
|
|
2046
|
+
if (d2.free < this.vramHeadroomMB) {
|
|
2047
|
+
this.emit("pressure", "vram", d2.free, this.vramHeadroomMB);
|
|
2048
|
+
}
|
|
1935
2049
|
}
|
|
1936
2050
|
}
|
|
1937
2051
|
const queueThreshold = Math.floor(this.queueCapacity * 0.8);
|
|
@@ -1960,18 +2074,22 @@ var init_model_broker = __esm({
|
|
|
1960
2074
|
* upstream callers (e.g. Telegram poll loop) should slow ingress.
|
|
1961
2075
|
*/
|
|
1962
2076
|
acquireInferenceSlot(spec) {
|
|
1963
|
-
|
|
2077
|
+
const chosenGpu = this.pickGpuForSlot(spec);
|
|
2078
|
+
const gpuOk = chosenGpu === null || this.activeSlotsOnGpu(chosenGpu) < this.perGpuSlotCapacity(chosenGpu);
|
|
2079
|
+
if (gpuOk && this._activeSlots.size < this.slotCapacity) {
|
|
1964
2080
|
return Promise.resolve(this.admitSlot(
|
|
1965
2081
|
spec,
|
|
1966
2082
|
/*reserved*/
|
|
1967
|
-
false
|
|
2083
|
+
false,
|
|
2084
|
+
chosenGpu
|
|
1968
2085
|
));
|
|
1969
2086
|
}
|
|
1970
2087
|
if (spec.sessionKey && !this._reservedBySession.has(spec.sessionKey) && this._activeSlots.size < this.slotCapacity + 1) {
|
|
1971
2088
|
const slot = this.admitSlot(
|
|
1972
2089
|
spec,
|
|
1973
2090
|
/*reserved*/
|
|
1974
|
-
true
|
|
2091
|
+
true,
|
|
2092
|
+
chosenGpu
|
|
1975
2093
|
);
|
|
1976
2094
|
this._reservedBySession.set(spec.sessionKey, slot.info.id);
|
|
1977
2095
|
return Promise.resolve(slot);
|
|
@@ -2038,7 +2156,7 @@ var init_model_broker = __esm({
|
|
|
2038
2156
|
});
|
|
2039
2157
|
}
|
|
2040
2158
|
/** Admit a slot — internal, called from acquire fast path and from drainQueue. */
|
|
2041
|
-
admitSlot(spec, reserved) {
|
|
2159
|
+
admitSlot(spec, reserved, gpuIndex = null) {
|
|
2042
2160
|
const id = `slot-${++this._slotIdSeq}-${Date.now().toString(36)}`;
|
|
2043
2161
|
const info = {
|
|
2044
2162
|
id,
|
|
@@ -2048,7 +2166,8 @@ var init_model_broker = __esm({
|
|
|
2048
2166
|
sessionKey: spec.sessionKey,
|
|
2049
2167
|
acquiredAt: Date.now(),
|
|
2050
2168
|
promptTokens: spec.promptTokens ?? 0,
|
|
2051
|
-
reserved
|
|
2169
|
+
reserved,
|
|
2170
|
+
gpuIndex
|
|
2052
2171
|
};
|
|
2053
2172
|
this._activeSlots.set(id, info);
|
|
2054
2173
|
this.emit("slotAcquired", info);
|
|
@@ -2064,6 +2183,35 @@ var init_model_broker = __esm({
|
|
|
2064
2183
|
}
|
|
2065
2184
|
};
|
|
2066
2185
|
}
|
|
2186
|
+
/** Count of active slots pinned to a given GPU. */
|
|
2187
|
+
activeSlotsOnGpu(gpuIndex) {
|
|
2188
|
+
let n2 = 0;
|
|
2189
|
+
for (const s2 of this._activeSlots.values()) {
|
|
2190
|
+
if (s2.gpuIndex === gpuIndex)
|
|
2191
|
+
n2++;
|
|
2192
|
+
}
|
|
2193
|
+
return n2;
|
|
2194
|
+
}
|
|
2195
|
+
/**
|
|
2196
|
+
* Pick a GPU for a new inference slot. Honors caller's preferredGpuIndex
|
|
2197
|
+
* when set; otherwise picks the GPU with the highest free VRAM that has
|
|
2198
|
+
* room for the estimated footprint and an open per-device slot.
|
|
2199
|
+
*
|
|
2200
|
+
* Returns null when no GPU is detected (CPU-only) or no device fits — in
|
|
2201
|
+
* the latter case the slot is admitted unpinned and the underlying
|
|
2202
|
+
* subprocess will pick whatever CUDA exposes by default.
|
|
2203
|
+
*/
|
|
2204
|
+
pickGpuForSlot(spec) {
|
|
2205
|
+
if (this._vramByDevice.length === 0)
|
|
2206
|
+
return null;
|
|
2207
|
+
const candidates = this._vramByDevice.filter((d2) => spec.preferredGpuIndex === void 0 || d2.index === spec.preferredGpuIndex).filter((d2) => this.activeSlotsOnGpu(d2.index) < this.perGpuSlotCapacity(d2.index)).filter((d2) => spec.estimatedVramMB === void 0 || d2.free >= spec.estimatedVramMB).sort((a2, b) => b.free - a2.free);
|
|
2208
|
+
return candidates[0]?.index ?? null;
|
|
2209
|
+
}
|
|
2210
|
+
/** Configure per-GPU slot capacity. Overrides the slotCapacity-derived default. */
|
|
2211
|
+
setPerGpuSlotCapacity(gpuIndex, capacity) {
|
|
2212
|
+
this._perGpuSlotCapacity.set(gpuIndex, Math.max(1, Math.floor(capacity)));
|
|
2213
|
+
this.drainSlotQueue();
|
|
2214
|
+
}
|
|
2067
2215
|
releaseSlot(info, outcome) {
|
|
2068
2216
|
this._activeSlots.delete(info.id);
|
|
2069
2217
|
if (info.sessionKey && this._reservedBySession.get(info.sessionKey) === info.id) {
|
|
@@ -2089,8 +2237,18 @@ var init_model_broker = __esm({
|
|
|
2089
2237
|
this.drainSlotQueue();
|
|
2090
2238
|
}
|
|
2091
2239
|
drainSlotQueue() {
|
|
2092
|
-
|
|
2093
|
-
|
|
2240
|
+
const queueCopy = [...this._slotQueue];
|
|
2241
|
+
for (const entry of queueCopy) {
|
|
2242
|
+
if (this._activeSlots.size >= this.slotCapacity)
|
|
2243
|
+
break;
|
|
2244
|
+
const chosenGpu = this.pickGpuForSlot(entry.spec);
|
|
2245
|
+
const gpuOk = chosenGpu === null || this.activeSlotsOnGpu(chosenGpu) < this.perGpuSlotCapacity(chosenGpu);
|
|
2246
|
+
if (!gpuOk)
|
|
2247
|
+
continue;
|
|
2248
|
+
const idx = this._slotQueue.indexOf(entry);
|
|
2249
|
+
if (idx < 0)
|
|
2250
|
+
continue;
|
|
2251
|
+
this._slotQueue.splice(idx, 1);
|
|
2094
2252
|
if (entry.onSignalAbort && entry.spec.signal) {
|
|
2095
2253
|
entry.spec.signal.removeEventListener("abort", entry.onSignalAbort);
|
|
2096
2254
|
}
|
|
@@ -2104,7 +2262,8 @@ var init_model_broker = __esm({
|
|
|
2104
2262
|
const slot = this.admitSlot(
|
|
2105
2263
|
entry.spec,
|
|
2106
2264
|
/*reserved*/
|
|
2107
|
-
false
|
|
2265
|
+
false,
|
|
2266
|
+
chosenGpu
|
|
2108
2267
|
);
|
|
2109
2268
|
try {
|
|
2110
2269
|
entry.resolve(slot);
|
|
@@ -19608,26 +19767,16 @@ function extractSkillForQuery(skill, content, query, budgetTokens = 900) {
|
|
|
19608
19767
|
function buildSkillsSummary(skills) {
|
|
19609
19768
|
if (skills.length === 0)
|
|
19610
19769
|
return "";
|
|
19611
|
-
const lines = [
|
|
19612
|
-
"## Skills Index",
|
|
19613
|
-
"",
|
|
19614
|
-
`${skills.length} skills available. Call \`skill_list\` to search, \`skill_execute <name>\` to load full instructions.`,
|
|
19615
|
-
""
|
|
19616
|
-
];
|
|
19617
19770
|
const bySource = /* @__PURE__ */ new Map();
|
|
19618
19771
|
for (const s2 of skills) {
|
|
19619
|
-
|
|
19620
|
-
group.push(s2);
|
|
19621
|
-
bySource.set(s2.source, group);
|
|
19622
|
-
}
|
|
19623
|
-
for (const [source, group] of bySource) {
|
|
19624
|
-
const names = group.map((s2) => {
|
|
19625
|
-
const t2 = s2.triggers[0];
|
|
19626
|
-
return t2 ? `${s2.name}(${t2})` : s2.name;
|
|
19627
|
-
});
|
|
19628
|
-
lines.push(`**${source}** (${group.length}): ${names.join(", ")}`);
|
|
19772
|
+
bySource.set(s2.source, (bySource.get(s2.source) ?? 0) + 1);
|
|
19629
19773
|
}
|
|
19630
|
-
|
|
19774
|
+
const sourcesSummary = [...bySource.entries()].sort((a2, b) => b[1] - a2[1]).map(([source, count]) => `${source}=${count}`).join(", ");
|
|
19775
|
+
return [
|
|
19776
|
+
"## Skills Index",
|
|
19777
|
+
`${skills.length} skills available across ${bySource.size} sources (${sourcesSummary}).`,
|
|
19778
|
+
"Use `skill_list` (with optional `filter` or `source`) to search; `skill_execute <name>` to load full instructions."
|
|
19779
|
+
].join("\n");
|
|
19631
19780
|
}
|
|
19632
19781
|
function safeReaddir2(dir, dirsOnly = false) {
|
|
19633
19782
|
try {
|
|
@@ -255439,6 +255588,11 @@ import sys
|
|
|
255439
255588
|
import time
|
|
255440
255589
|
from pathlib import Path
|
|
255441
255590
|
|
|
255591
|
+
# Broker-picked GPU pinning — MUST run before importing torch.
|
|
255592
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
255593
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
255594
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
255595
|
+
|
|
255442
255596
|
def _progress(stage, message, percent=None):
|
|
255443
255597
|
payload = {"omnius_progress": True, "stage": stage, "message": message}
|
|
255444
255598
|
if percent is not None:
|
|
@@ -255597,9 +255751,15 @@ if __name__ == "__main__":
|
|
|
255597
255751
|
SDCPP_RUNNER = String.raw`#!/usr/bin/env python3
|
|
255598
255752
|
import argparse
|
|
255599
255753
|
import json
|
|
255754
|
+
import os
|
|
255600
255755
|
import time
|
|
255601
255756
|
from pathlib import Path
|
|
255602
255757
|
|
|
255758
|
+
# Broker-picked GPU pinning — sd-cpp's CUDA backend honors CUDA_VISIBLE_DEVICES.
|
|
255759
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
255760
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
255761
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
255762
|
+
|
|
255603
255763
|
def main():
|
|
255604
255764
|
parser = argparse.ArgumentParser()
|
|
255605
255765
|
parser.add_argument("--model-path", required=True)
|
|
@@ -255713,6 +255873,9 @@ if __name__ == "__main__":
|
|
|
255713
255873
|
defaultModel;
|
|
255714
255874
|
defaultBackend;
|
|
255715
255875
|
promptExpander = null;
|
|
255876
|
+
/** Broker-chosen GPU pinning for the in-flight generation. Read by the
|
|
255877
|
+
* spawn path to set OMNIUS_GPU_INDEX in the subprocess env. */
|
|
255878
|
+
_brokerGpuIndex = null;
|
|
255716
255879
|
constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
|
|
255717
255880
|
this.cwd = cwd4;
|
|
255718
255881
|
this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
|
|
@@ -255788,6 +255951,7 @@ if __name__ == "__main__":
|
|
|
255788
255951
|
const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
|
|
255789
255952
|
const broker = getModelBroker();
|
|
255790
255953
|
const firstCandidate = candidates[0];
|
|
255954
|
+
let brokerGpuIndex = null;
|
|
255791
255955
|
if (firstCandidate) {
|
|
255792
255956
|
const decision2 = await broker.ensureModelLoadable({
|
|
255793
255957
|
name: firstCandidate.model,
|
|
@@ -255799,6 +255963,9 @@ if __name__ == "__main__":
|
|
|
255799
255963
|
for (const target of decision2.evictTargets) {
|
|
255800
255964
|
await broker.evict(target.host, target.name, "image-gen-needs-room");
|
|
255801
255965
|
}
|
|
255966
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
255967
|
+
} else if (decision2.kind === "ok") {
|
|
255968
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
255802
255969
|
} else if (decision2.kind === "reject") {
|
|
255803
255970
|
return {
|
|
255804
255971
|
success: false,
|
|
@@ -255808,6 +255975,7 @@ if __name__ == "__main__":
|
|
|
255808
255975
|
};
|
|
255809
255976
|
}
|
|
255810
255977
|
}
|
|
255978
|
+
this._brokerGpuIndex = brokerGpuIndex;
|
|
255811
255979
|
try {
|
|
255812
255980
|
return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
|
|
255813
255981
|
} catch (err) {
|
|
@@ -256310,10 +256478,14 @@ ${errText.slice(0, 800)}`,
|
|
|
256310
256478
|
}
|
|
256311
256479
|
ensureUnifiedCacheDirs();
|
|
256312
256480
|
this.emitProgress({ stage: "load", message: `Starting image generation with ${args.model}` });
|
|
256481
|
+
const runnerEnv = { ...python.env };
|
|
256482
|
+
if (this._brokerGpuIndex !== null) {
|
|
256483
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
256484
|
+
}
|
|
256313
256485
|
const result = await runProcess2(python.command, argv, {
|
|
256314
256486
|
cwd: this.cwd,
|
|
256315
256487
|
timeoutMs: 9e5,
|
|
256316
|
-
env:
|
|
256488
|
+
env: runnerEnv,
|
|
256317
256489
|
progressLabel: `Downloading/loading ${args.model}`,
|
|
256318
256490
|
onProgress: (event) => this.emitProgress(event)
|
|
256319
256491
|
});
|
|
@@ -257609,9 +257781,14 @@ var init_audio_generate = __esm({
|
|
|
257609
257781
|
DEFAULT_MUSIC_MODEL
|
|
257610
257782
|
];
|
|
257611
257783
|
DIFFUSERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
|
|
257612
|
-
import argparse, json, sys, time
|
|
257784
|
+
import argparse, json, os, sys, time
|
|
257613
257785
|
from pathlib import Path
|
|
257614
257786
|
|
|
257787
|
+
# Broker-picked GPU pinning — must run before importing torch.
|
|
257788
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
257789
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
257790
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
257791
|
+
|
|
257615
257792
|
def _format_bytes(value):
|
|
257616
257793
|
try:
|
|
257617
257794
|
n = float(value)
|
|
@@ -257805,9 +257982,14 @@ if __name__ == "__main__":
|
|
|
257805
257982
|
main()
|
|
257806
257983
|
`;
|
|
257807
257984
|
TRANSFORMERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
|
|
257808
|
-
import argparse, json, sys, time
|
|
257985
|
+
import argparse, json, os, sys, time
|
|
257809
257986
|
from pathlib import Path
|
|
257810
257987
|
|
|
257988
|
+
# Broker-picked GPU pinning — must run before importing torch.
|
|
257989
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
257990
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
257991
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
257992
|
+
|
|
257811
257993
|
def _format_bytes(value):
|
|
257812
257994
|
try:
|
|
257813
257995
|
n = float(value)
|
|
@@ -258033,6 +258215,8 @@ if __name__ == "__main__":
|
|
|
258033
258215
|
progressHandler = null;
|
|
258034
258216
|
lastProgressMessage = "";
|
|
258035
258217
|
lastProgressAt = 0;
|
|
258218
|
+
/** Broker-chosen GPU pinning for the in-flight generation. */
|
|
258219
|
+
_brokerGpuIndex = null;
|
|
258036
258220
|
constructor(cwd4, defaults3 = {}) {
|
|
258037
258221
|
this.cwd = cwd4;
|
|
258038
258222
|
this.defaults = defaults3;
|
|
@@ -258198,6 +258382,7 @@ if __name__ == "__main__":
|
|
|
258198
258382
|
const playback = playbackRequested(args);
|
|
258199
258383
|
const broker = getModelBroker();
|
|
258200
258384
|
const firstCandidate = candidates[0];
|
|
258385
|
+
let brokerGpuIndex = null;
|
|
258201
258386
|
if (firstCandidate) {
|
|
258202
258387
|
const decision2 = await broker.ensureModelLoadable({
|
|
258203
258388
|
name: firstCandidate.model,
|
|
@@ -258209,6 +258394,9 @@ if __name__ == "__main__":
|
|
|
258209
258394
|
for (const target of decision2.evictTargets) {
|
|
258210
258395
|
await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
|
|
258211
258396
|
}
|
|
258397
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
258398
|
+
} else if (decision2.kind === "ok") {
|
|
258399
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
258212
258400
|
} else if (decision2.kind === "reject") {
|
|
258213
258401
|
return {
|
|
258214
258402
|
success: false,
|
|
@@ -258218,6 +258406,7 @@ if __name__ == "__main__":
|
|
|
258218
258406
|
};
|
|
258219
258407
|
}
|
|
258220
258408
|
}
|
|
258409
|
+
this._brokerGpuIndex = brokerGpuIndex;
|
|
258221
258410
|
try {
|
|
258222
258411
|
return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
|
|
258223
258412
|
} catch (err) {
|
|
@@ -258384,10 +258573,14 @@ if __name__ == "__main__":
|
|
|
258384
258573
|
}
|
|
258385
258574
|
ensureUnifiedCacheDirs();
|
|
258386
258575
|
this.emitProgress({ stage: "load", message: `Starting ${args.kind} generation with ${args.model}` });
|
|
258576
|
+
const runnerEnv = { ...python.env };
|
|
258577
|
+
if (this._brokerGpuIndex !== null) {
|
|
258578
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
258579
|
+
}
|
|
258387
258580
|
const result = await runProcess3(python.command, argv, {
|
|
258388
258581
|
cwd: this.cwd,
|
|
258389
258582
|
timeoutMs: 9e5,
|
|
258390
|
-
env:
|
|
258583
|
+
env: runnerEnv,
|
|
258391
258584
|
progressLabel: `Downloading/loading ${args.model}`,
|
|
258392
258585
|
onProgress: (event) => this.emitProgress(event)
|
|
258393
258586
|
});
|
|
@@ -259157,7 +259350,7 @@ function parseRunnerJson3(stdout) {
|
|
|
259157
259350
|
}
|
|
259158
259351
|
return null;
|
|
259159
259352
|
}
|
|
259160
|
-
var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL, WAN_TI2V_5B_MODEL, WAN_T2V_A14B_MODEL, WAN_I2V_A14B_MODEL, WAN_S2V_14B_MODEL, COGVIDEOX_5B_MODEL, COGVIDEOX_2B_MODEL, COGVIDEOX_5B_I2V_MODEL, MOCHI_PREVIEW_MODEL, LTX_VIDEO_MODEL, LTX_2_3_MODEL, HUNYUAN_VIDEO_MODEL, DIFFUSERS_VIDEO_PACKAGES, VIDEO_GENERATION_MODEL_PRESETS, VIDEO_GENERATION_QUALITY_LADDER, VIDEO_AUDIO_QUALITY_LADDER, DIFFUSERS_VIDEO_RUNNER, COMFY_BOOTSTRAP_SCRIPT, COMFY_DEFAULT_WORKFLOWS, VideoGenerateTool;
|
|
259353
|
+
var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL, SANA_WM_BIDIRECTIONAL_MODEL, WAN_TI2V_5B_MODEL, WAN_T2V_A14B_MODEL, WAN_I2V_A14B_MODEL, WAN_S2V_14B_MODEL, COGVIDEOX_5B_MODEL, COGVIDEOX_2B_MODEL, COGVIDEOX_5B_I2V_MODEL, MOCHI_PREVIEW_MODEL, LTX_VIDEO_MODEL, LTX_2_3_MODEL, HUNYUAN_VIDEO_MODEL, DIFFUSERS_VIDEO_PACKAGES, VIDEO_GENERATION_MODEL_PRESETS, VIDEO_GENERATION_QUALITY_LADDER, VIDEO_AUDIO_QUALITY_LADDER, DIFFUSERS_VIDEO_RUNNER, COMFY_BOOTSTRAP_SCRIPT, COMFY_DEFAULT_WORKFLOWS, VideoGenerateTool;
|
|
259161
259354
|
var init_video_generate = __esm({
|
|
259162
259355
|
"packages/execution/dist/tools/video-generate.js"() {
|
|
259163
259356
|
"use strict";
|
|
@@ -259167,6 +259360,7 @@ var init_video_generate = __esm({
|
|
|
259167
259360
|
DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
|
|
259168
259361
|
SANA_VIDEO_480P_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
|
|
259169
259362
|
SANA_VIDEO_720P_MODEL = "Efficient-Large-Model/SANA-Video_2B_720p";
|
|
259363
|
+
SANA_WM_BIDIRECTIONAL_MODEL = "Efficient-Large-Model/SANA-WM_bidirectional";
|
|
259170
259364
|
WAN_TI2V_5B_MODEL = "Wan-AI/Wan2.2-TI2V-5B-Diffusers";
|
|
259171
259365
|
WAN_T2V_A14B_MODEL = "Wan-AI/Wan2.2-T2V-A14B-Diffusers";
|
|
259172
259366
|
WAN_I2V_A14B_MODEL = "Wan-AI/Wan2.2-I2V-A14B-Diffusers";
|
|
@@ -259460,6 +259654,41 @@ var init_video_generate = __esm({
|
|
|
259460
259654
|
licenseNote: "Apache 2.0",
|
|
259461
259655
|
note: "Premium Wan T2V; cloud GPU recommended."
|
|
259462
259656
|
},
|
|
259657
|
+
{
|
|
259658
|
+
id: SANA_WM_BIDIRECTIONAL_MODEL,
|
|
259659
|
+
label: "SANA-WM bidirectional (world-model i2v)",
|
|
259660
|
+
kinds: ["i2v"],
|
|
259661
|
+
backend: "diffusers",
|
|
259662
|
+
// SANA-WM declares its concrete class in model_index.json; loaded via
|
|
259663
|
+
// generic DiffusionPipeline.from_pretrained — the runner's auto path
|
|
259664
|
+
// already does this for unknown model names.
|
|
259665
|
+
pipelineClass: "DiffusionPipeline",
|
|
259666
|
+
install: 'python3 .omnius/video-gen/diffusers_text2video.py --model Efficient-Large-Model/SANA-WM_bidirectional --mode i2v --num-frames 121 --fps 24 --width 704 --height 1280 --steps 30 --guidance 5.0 --image <input.png> --prompt "..." --output .omnius/videos/out.mp4',
|
|
259667
|
+
category: "Premium quality",
|
|
259668
|
+
sizeClass: "2.6B DiT + LTX-2 refiner (Sana World Model)",
|
|
259669
|
+
quality: "Image-to-video world model with optional camera-trajectory control. Two-stage generation (Sana DiT + LTX-2 refiner); hybrid linear attention; 6-DoF camera support via .npy matrices or WASD/IJKL action DSL.",
|
|
259670
|
+
output: "Up to ~13s 704×1280 (portrait 720p) MP4 at 24 fps; max 321 frames.",
|
|
259671
|
+
bestUse: "World-model / camera-controlled video from a single first-frame image. Best on H100/A100-class hardware.",
|
|
259672
|
+
minVramGB: 80,
|
|
259673
|
+
recommendedVramGB: 100,
|
|
259674
|
+
deployment: "Diffusers DiffusionPipeline.from_pretrained; bfloat16; aggressive CPU offload mandatory below 100 GB. Bundled LTX-2 refiner runs as stage 2.",
|
|
259675
|
+
steps: 30,
|
|
259676
|
+
guidance: 5,
|
|
259677
|
+
numFrames: 121,
|
|
259678
|
+
fps: 24,
|
|
259679
|
+
width: 704,
|
|
259680
|
+
height: 1280,
|
|
259681
|
+
dtype: "bfloat16",
|
|
259682
|
+
needsCpuOffload: true,
|
|
259683
|
+
frameQuantum: 1,
|
|
259684
|
+
pixelQuantum: 16,
|
|
259685
|
+
// Apache 2.0 base; bundled LTX-2 refiner + VAE inherit the LTX-2
|
|
259686
|
+
// non-commercial license. Surface that explicitly.
|
|
259687
|
+
licenseNote: "Apache 2.0 (bundled LTX-2 refiner/VAE inherit LTX-2 non-commercial terms)",
|
|
259688
|
+
approxDownloadGB: 99,
|
|
259689
|
+
fallbackFor: [WAN_I2V_A14B_MODEL],
|
|
259690
|
+
note: "Sana World Model bidirectional i2v; portrait 704×1280 fixed; camera control via --camera <matrices.npy> or --action <DSL> when the runner supports it."
|
|
259691
|
+
},
|
|
259463
259692
|
{
|
|
259464
259693
|
id: WAN_I2V_A14B_MODEL,
|
|
259465
259694
|
label: "Wan2.2 I2V A14B",
|
|
@@ -259588,6 +259817,9 @@ var init_video_generate = __esm({
|
|
|
259588
259817
|
COGVIDEOX_5B_MODEL,
|
|
259589
259818
|
MOCHI_PREVIEW_MODEL,
|
|
259590
259819
|
COGVIDEOX_2B_MODEL,
|
|
259820
|
+
// Heavy i2v / world-model tier — only attempted when an explicit model
|
|
259821
|
+
// is requested or the consumer-VRAM tier above has failed for an i2v ask.
|
|
259822
|
+
SANA_WM_BIDIRECTIONAL_MODEL,
|
|
259591
259823
|
WAN_I2V_A14B_MODEL,
|
|
259592
259824
|
WAN_T2V_A14B_MODEL,
|
|
259593
259825
|
HUNYUAN_VIDEO_MODEL
|
|
@@ -259606,6 +259838,16 @@ import sys
|
|
|
259606
259838
|
import time
|
|
259607
259839
|
from pathlib import Path
|
|
259608
259840
|
|
|
259841
|
+
# ── GPU pinning ─────────────────────────────────────────────────────
|
|
259842
|
+
# The TS broker picks a GPU per generation via bin-packing across the
|
|
259843
|
+
# available CUDA devices. It passes the chosen index in OMNIUS_GPU_INDEX.
|
|
259844
|
+
# We MUST apply CUDA_VISIBLE_DEVICES BEFORE importing torch, otherwise
|
|
259845
|
+
# torch initializes the device list with all visible GPUs and the model
|
|
259846
|
+
# may land on a different device than the broker reserved capacity on.
|
|
259847
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
259848
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
259849
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
259850
|
+
|
|
259609
259851
|
def _progress(stage, message, percent=None):
|
|
259610
259852
|
payload = {"omnius_progress": True, "stage": stage, "message": message}
|
|
259611
259853
|
if percent is not None:
|
|
@@ -260412,6 +260654,9 @@ if __name__ == "__main__":
|
|
|
260412
260654
|
defaultBackend;
|
|
260413
260655
|
defaultKind;
|
|
260414
260656
|
promptExpander = null;
|
|
260657
|
+
/** GPU index chosen by the broker for the in-flight generation. Read
|
|
260658
|
+
* by the spawn path to set OMNIUS_GPU_INDEX in the subprocess env. */
|
|
260659
|
+
_brokerGpuIndex = null;
|
|
260415
260660
|
constructor(cwd4, defaults3 = {}) {
|
|
260416
260661
|
this.cwd = cwd4;
|
|
260417
260662
|
this.defaultModel = defaults3.model;
|
|
@@ -260501,17 +260746,23 @@ if __name__ == "__main__":
|
|
|
260501
260746
|
const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
|
|
260502
260747
|
const broker = getModelBroker();
|
|
260503
260748
|
const firstCandidate = candidates[0];
|
|
260749
|
+
let brokerGpuIndex = null;
|
|
260504
260750
|
if (firstCandidate) {
|
|
260751
|
+
const preset = firstCandidate.preset;
|
|
260505
260752
|
const decision2 = await broker.ensureModelLoadable({
|
|
260506
260753
|
name: firstCandidate.model,
|
|
260507
260754
|
domain: "video-gen",
|
|
260508
260755
|
host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
|
|
260509
|
-
owner: "video-generate-tool"
|
|
260756
|
+
owner: "video-generate-tool",
|
|
260757
|
+
estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
|
|
260510
260758
|
});
|
|
260511
260759
|
if (decision2.kind === "evict") {
|
|
260512
260760
|
for (const target of decision2.evictTargets) {
|
|
260513
260761
|
await broker.evict(target.host, target.name, "video-gen-needs-room");
|
|
260514
260762
|
}
|
|
260763
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
260764
|
+
} else if (decision2.kind === "ok") {
|
|
260765
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
260515
260766
|
} else if (decision2.kind === "reject") {
|
|
260516
260767
|
return {
|
|
260517
260768
|
success: false,
|
|
@@ -260521,6 +260772,7 @@ if __name__ == "__main__":
|
|
|
260521
260772
|
};
|
|
260522
260773
|
}
|
|
260523
260774
|
}
|
|
260775
|
+
this._brokerGpuIndex = brokerGpuIndex;
|
|
260524
260776
|
if (candidates.length === 0) {
|
|
260525
260777
|
return {
|
|
260526
260778
|
success: false,
|
|
@@ -260942,6 +261194,9 @@ ${llmAnnotation}` : result.llmContent;
|
|
|
260942
261194
|
runnerEnv["HF_TOKEN"] = effectiveToken;
|
|
260943
261195
|
runnerEnv["HUGGING_FACE_HUB_TOKEN"] = effectiveToken;
|
|
260944
261196
|
}
|
|
261197
|
+
if (this._brokerGpuIndex !== null) {
|
|
261198
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
261199
|
+
}
|
|
260945
261200
|
const argv = [
|
|
260946
261201
|
runner,
|
|
260947
261202
|
"--model",
|
|
@@ -570291,18 +570546,6 @@ function formatReflection(notes2, scenario) {
|
|
|
570291
570546
|
];
|
|
570292
570547
|
return lines.join("\n");
|
|
570293
570548
|
}
|
|
570294
|
-
function formatMemory(input, state) {
|
|
570295
|
-
const lines = [];
|
|
570296
|
-
if (input.memoryContext) lines.push(input.memoryContext);
|
|
570297
|
-
if (state.dynamicState && Object.keys(state.dynamicState).length > 0) {
|
|
570298
|
-
const entries = Object.entries(state.dynamicState).slice(0, 12).map(([key, value2]) => `- ${key}: ${compactText(JSON.stringify(value2) ?? String(value2), 220)}`);
|
|
570299
|
-
lines.push(`Dynamic state:
|
|
570300
|
-
${entries.join("\n")}`);
|
|
570301
|
-
}
|
|
570302
|
-
if (state.updatedAt) lines.push(`State updated: ${state.updatedAt}`);
|
|
570303
|
-
if (lines.length === 0) return "No additional retrieved voice-soul memory beyond scoped personality and runtime state.";
|
|
570304
|
-
return lines.join("\n\n");
|
|
570305
|
-
}
|
|
570306
570549
|
function formatFinalVoice(input) {
|
|
570307
570550
|
const voice = findProjectVoice(input.scope);
|
|
570308
570551
|
const lines = [
|
|
@@ -570329,23 +570572,23 @@ function buildSoulContext(input) {
|
|
|
570329
570572
|
const state = loadSoulRuntimeState(input);
|
|
570330
570573
|
const scenario = resolveSoulScenario(input, state);
|
|
570331
570574
|
const tree2 = resolveSoulDecisionTree(input, state, scenario);
|
|
570332
|
-
|
|
570333
|
-
|
|
570334
|
-
"### 1. Authority And Safety Scope",
|
|
570575
|
+
const sections = ["## Voice Soul Context"];
|
|
570576
|
+
const voiceAndScope = [
|
|
570335
570577
|
formatAuthorityScope(input),
|
|
570336
|
-
"### 2. Core Identity",
|
|
570337
570578
|
formatCoreIdentity(input),
|
|
570338
|
-
"### 3. Procedural Decision Tree",
|
|
570339
|
-
formatProceduralConstraints(input, scenario, tree2, state),
|
|
570340
|
-
"### 4. Relationship State",
|
|
570341
|
-
formatRelationshipState(input),
|
|
570342
|
-
"### 5. Current Reflection Notes",
|
|
570343
|
-
formatReflection(input.currentReflection, scenario),
|
|
570344
|
-
"### 6. Minimal Retrieved Memory",
|
|
570345
|
-
formatMemory(input, state),
|
|
570346
|
-
"### 7. Final Voice Guidance",
|
|
570347
570579
|
formatFinalVoice(input)
|
|
570348
|
-
].join("\n\n");
|
|
570580
|
+
].filter(Boolean).join("\n\n");
|
|
570581
|
+
sections.push("### Voice + Scope + Identity", voiceAndScope);
|
|
570582
|
+
const decisionSubstrate = [
|
|
570583
|
+
formatRelationshipState(input),
|
|
570584
|
+
formatProceduralConstraints(input, scenario, tree2, state)
|
|
570585
|
+
].filter(Boolean).join("\n\n");
|
|
570586
|
+
sections.push("### Active Relationship + Scenario", decisionSubstrate);
|
|
570587
|
+
const reflection = formatReflection(input.currentReflection, scenario);
|
|
570588
|
+
if (reflection && reflection.trim().length > 0) {
|
|
570589
|
+
sections.push("### Current Reflection Notes", reflection);
|
|
570590
|
+
}
|
|
570591
|
+
return sections.join("\n\n");
|
|
570349
570592
|
}
|
|
570350
570593
|
var MAX_SOUL_CHARS, MAX_VOICE_CHARS, MAX_SCOPED_PERSONALITY_CHARS, UNCLASSIFIED_SCENARIO;
|
|
570351
570594
|
var init_voice_soul = __esm({
|
|
@@ -577276,7 +577519,32 @@ var init_status_bar = __esm({
|
|
|
577276
577519
|
if (this.active) this.renderFooterPreserveCursor();
|
|
577277
577520
|
}, intervalMs);
|
|
577278
577521
|
try {
|
|
577279
|
-
|
|
577522
|
+
const broker = getModelBroker();
|
|
577523
|
+
try {
|
|
577524
|
+
Promise.resolve().then(() => (init_dist8(), dist_exports3)).then(({ getOllamaPool: getOllamaPool2, resolveDefaultPoolConfig: resolveDefaultPoolConfig2 }) => {
|
|
577525
|
+
try {
|
|
577526
|
+
const config = resolveDefaultPoolConfig2();
|
|
577527
|
+
const pool3 = getOllamaPool2({ baseInstanceUrl: config.baseInstanceUrl });
|
|
577528
|
+
broker.setOllamaAffinityProvider((modelName) => {
|
|
577529
|
+
try {
|
|
577530
|
+
const status = pool3.status?.();
|
|
577531
|
+
if (!status) return null;
|
|
577532
|
+
for (const inst of status.instances ?? []) {
|
|
577533
|
+
void modelName;
|
|
577534
|
+
return { gpuIndex: inst.gpuIndex, gpuUuid: inst.gpuUuid };
|
|
577535
|
+
}
|
|
577536
|
+
return null;
|
|
577537
|
+
} catch {
|
|
577538
|
+
return null;
|
|
577539
|
+
}
|
|
577540
|
+
});
|
|
577541
|
+
} catch {
|
|
577542
|
+
}
|
|
577543
|
+
}).catch(() => {
|
|
577544
|
+
});
|
|
577545
|
+
} catch {
|
|
577546
|
+
}
|
|
577547
|
+
broker.startPolling(Math.max(2e3, intervalMs * 2));
|
|
577280
577548
|
} catch {
|
|
577281
577549
|
}
|
|
577282
577550
|
}
|
|
@@ -604379,14 +604647,22 @@ async function handleBroker(arg, _ctx) {
|
|
|
604379
604647
|
safeLog(` ${c3.bold("Resource Broker")}`);
|
|
604380
604648
|
safeLog("");
|
|
604381
604649
|
safeLog(` ${c3.dim("RAM:")} ${snap.ramMB.used} / ${snap.ramMB.total} MB used (${snap.ramMB.free} MB free)`);
|
|
604382
|
-
if (snap.
|
|
604650
|
+
if (snap.vramPerDevice.length > 0) {
|
|
604651
|
+
safeLog(` ${c3.bold("GPUs:")}`);
|
|
604652
|
+
for (const d2 of snap.vramPerDevice) {
|
|
604653
|
+
const gpuSlots = snap.slots.byGpu[d2.index];
|
|
604654
|
+
const slotInfo = gpuSlots ? ` slots=${gpuSlots.inUse}/${gpuSlots.capacity}, loaded=${gpuSlots.loadedMB}MB` : "";
|
|
604655
|
+
safeLog(` gpu${d2.index} (${d2.uuid.slice(0, 12)}…) ${d2.used} / ${d2.total} MB used (${d2.free} MB free)${slotInfo}`);
|
|
604656
|
+
}
|
|
604657
|
+
} else if (snap.vramMB) {
|
|
604383
604658
|
safeLog(` ${c3.dim("VRAM:")} ${snap.vramMB.used} / ${snap.vramMB.total} MB used (${snap.vramMB.free} MB free)`);
|
|
604384
604659
|
} else {
|
|
604385
604660
|
safeLog(` ${c3.dim("VRAM:")} ${c3.dim("(no GPU detected)")}`);
|
|
604386
604661
|
}
|
|
604387
604662
|
safeLog(` ${c3.dim("RAM headroom threshold:")} ${broker.ramHeadroomMB} MB`);
|
|
604388
|
-
safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB`);
|
|
604663
|
+
safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB (per-device)`);
|
|
604389
604664
|
safeLog(` ${c3.dim("Idle-evict threshold:")} ${Math.round(broker.idleEvictMs / 1e3)}s`);
|
|
604665
|
+
safeLog(` ${c3.dim("Slot capacity:")} ${snap.slots.inUse}/${snap.slots.capacity} active, queue ${snap.slots.queueDepth}/${snap.slots.queueCapacity}`);
|
|
604390
604666
|
safeLog("");
|
|
604391
604667
|
if (snap.loaded.length === 0) {
|
|
604392
604668
|
safeLog(` ${c3.dim("No loaded models tracked.")}`);
|
|
@@ -604397,7 +604673,8 @@ async function handleBroker(arg, _ctx) {
|
|
|
604397
604673
|
const idle = Math.round((now - m2.lastUsedAt) / 1e3);
|
|
604398
604674
|
const owner = m2.owner ? c3.dim(` [owner=${m2.owner}]`) : "";
|
|
604399
604675
|
const ctx3 = m2.numCtx ? c3.dim(` n_ctx=${m2.numCtx}`) : "";
|
|
604400
|
-
|
|
604676
|
+
const gpu = m2.gpuIndex !== null && m2.gpuIndex !== void 0 ? c3.dim(` gpu=${m2.gpuIndex}`) : "";
|
|
604677
|
+
safeLog(` ${c3.cyan(m2.name)} (${m2.host}/${m2.domain}) vram=${m2.vramMB}MB ram=${m2.ramMB}MB${gpu} idle=${idle}s${ctx3}${owner}`);
|
|
604401
604678
|
}
|
|
604402
604679
|
}
|
|
604403
604680
|
if (snap.inflight.length > 0) {
|
|
@@ -618149,6 +618426,95 @@ function parseTelegramSilentReflectionNotes(text) {
|
|
|
618149
618426
|
}
|
|
618150
618427
|
return null;
|
|
618151
618428
|
}
|
|
618429
|
+
function extractPartialTelegramReplyJson(buffer2) {
|
|
618430
|
+
const stripped = stripTelegramHiddenThinking(buffer2).trimStart();
|
|
618431
|
+
if (!stripped.startsWith("{")) {
|
|
618432
|
+
return stripped || null;
|
|
618433
|
+
}
|
|
618434
|
+
const keyMatch = stripped.indexOf('"reply"');
|
|
618435
|
+
if (keyMatch < 0) return null;
|
|
618436
|
+
let i2 = keyMatch + '"reply"'.length;
|
|
618437
|
+
while (i2 < stripped.length && stripped[i2] !== ":") i2++;
|
|
618438
|
+
if (i2 >= stripped.length) return null;
|
|
618439
|
+
i2++;
|
|
618440
|
+
while (i2 < stripped.length && /\s/.test(stripped[i2])) i2++;
|
|
618441
|
+
if (i2 >= stripped.length || stripped[i2] !== '"') return null;
|
|
618442
|
+
i2++;
|
|
618443
|
+
let out = "";
|
|
618444
|
+
while (i2 < stripped.length) {
|
|
618445
|
+
const ch = stripped[i2];
|
|
618446
|
+
if (ch === "\\") {
|
|
618447
|
+
const next = stripped[i2 + 1];
|
|
618448
|
+
if (next === void 0) break;
|
|
618449
|
+
if (next === '"') out += '"';
|
|
618450
|
+
else if (next === "\\") out += "\\";
|
|
618451
|
+
else if (next === "n") out += "\n";
|
|
618452
|
+
else if (next === "t") out += " ";
|
|
618453
|
+
else if (next === "r") out += "\r";
|
|
618454
|
+
else if (next === "/") out += "/";
|
|
618455
|
+
else if (next === "u") {
|
|
618456
|
+
if (i2 + 5 >= stripped.length) break;
|
|
618457
|
+
const hex = stripped.slice(i2 + 2, i2 + 6);
|
|
618458
|
+
const code8 = parseInt(hex, 16);
|
|
618459
|
+
if (Number.isFinite(code8)) out += String.fromCharCode(code8);
|
|
618460
|
+
i2 += 4;
|
|
618461
|
+
} else {
|
|
618462
|
+
out += next;
|
|
618463
|
+
}
|
|
618464
|
+
i2 += 2;
|
|
618465
|
+
continue;
|
|
618466
|
+
}
|
|
618467
|
+
if (ch === '"') {
|
|
618468
|
+
return out;
|
|
618469
|
+
}
|
|
618470
|
+
out += ch;
|
|
618471
|
+
i2++;
|
|
618472
|
+
}
|
|
618473
|
+
return out.length > 0 ? out : null;
|
|
618474
|
+
}
|
|
618475
|
+
function extractFinalTelegramReplyJson(buffer2) {
|
|
618476
|
+
const stripped = stripTelegramHiddenThinking(buffer2).trim();
|
|
618477
|
+
if (!stripped.startsWith("{")) return null;
|
|
618478
|
+
try {
|
|
618479
|
+
const parsed = JSON.parse(stripped);
|
|
618480
|
+
if (typeof parsed.reply === "string") return parsed.reply.trim();
|
|
618481
|
+
} catch {
|
|
618482
|
+
}
|
|
618483
|
+
let depth = 0;
|
|
618484
|
+
let inString = false;
|
|
618485
|
+
let escape2 = false;
|
|
618486
|
+
let end = -1;
|
|
618487
|
+
for (let i2 = 0; i2 < stripped.length; i2++) {
|
|
618488
|
+
const ch = stripped[i2];
|
|
618489
|
+
if (escape2) {
|
|
618490
|
+
escape2 = false;
|
|
618491
|
+
continue;
|
|
618492
|
+
}
|
|
618493
|
+
if (inString) {
|
|
618494
|
+
if (ch === "\\") escape2 = true;
|
|
618495
|
+
else if (ch === '"') inString = false;
|
|
618496
|
+
continue;
|
|
618497
|
+
}
|
|
618498
|
+
if (ch === '"') inString = true;
|
|
618499
|
+
else if (ch === "{") depth++;
|
|
618500
|
+
else if (ch === "}") {
|
|
618501
|
+
depth--;
|
|
618502
|
+
if (depth === 0) {
|
|
618503
|
+
end = i2;
|
|
618504
|
+
break;
|
|
618505
|
+
}
|
|
618506
|
+
}
|
|
618507
|
+
}
|
|
618508
|
+
if (end > 0) {
|
|
618509
|
+
try {
|
|
618510
|
+
const parsed = JSON.parse(stripped.slice(0, end + 1));
|
|
618511
|
+
if (typeof parsed.reply === "string") return parsed.reply.trim();
|
|
618512
|
+
} catch {
|
|
618513
|
+
}
|
|
618514
|
+
}
|
|
618515
|
+
const partial = extractPartialTelegramReplyJson(stripped);
|
|
618516
|
+
return partial && partial.trim().length > 0 ? partial.trim() : null;
|
|
618517
|
+
}
|
|
618152
618518
|
function estimatePromptTokensFromRequest(request) {
|
|
618153
618519
|
let chars = 0;
|
|
618154
618520
|
for (const m2 of request.messages ?? []) {
|
|
@@ -619636,7 +620002,7 @@ function renderTelegramSubAgentError(username, error) {
|
|
|
619636
620002
|
process.stdout.write(` ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
|
|
619637
620003
|
`);
|
|
619638
620004
|
}
|
|
619639
|
-
var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
|
|
620005
|
+
var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
|
|
619640
620006
|
var init_telegram_bridge = __esm({
|
|
619641
620007
|
"packages/cli/src/tui/telegram-bridge.ts"() {
|
|
619642
620008
|
"use strict";
|
|
@@ -619870,6 +620236,12 @@ Rules:
|
|
|
619870
620236
|
7. Do not claim older chat is unavailable when the context stream contains it. If asked what you see, summarize the supplied transcript, speakers, and relationship/tone signals.
|
|
619871
620237
|
8. Mirror the current sender's tone and directness while staying safe and clear.
|
|
619872
620238
|
9. Never send router decisions, skip explanations, memory-stage notes, task-complete summaries, or "no_reply" as chat text.
|
|
620239
|
+
|
|
620240
|
+
Output discipline (your assistant message is sent verbatim to Telegram, ALL of it):
|
|
620241
|
+
- Emit ONLY the final reply text. Do not narrate your reasoning, summarize what you found, organize bullet-point notes, or write phrases like "Let me summarize", "Let me send the reply", "Now I have enough", "Based on the research", "Here's my response:" before the actual reply. Those are scratch-pad phrases that leak when emitted as visible text.
|
|
620242
|
+
- Do not produce a draft followed by the final answer. The first character of your output should be the first character of the message the user will receive.
|
|
620243
|
+
- If you need to think, do it silently. Do not write your reasoning steps as visible prose. If you have an internal scratchpad, keep it internal.
|
|
620244
|
+
- A reply that begins by restating what you found, then says something like "Let me write the response" or "Here's the breakdown", then gives the answer, is wrong twice over: the user sees the restatement AND the answer, doubling the message. Skip the restatement.
|
|
619873
620245
|
`.trim();
|
|
619874
620246
|
ADMIN_CHAT_PROFILE_PROMPT = `
|
|
619875
620247
|
You are replying to the authenticated Telegram admin in a private DM.
|
|
@@ -619902,6 +620274,24 @@ External acquisition contract:
|
|
|
619902
620274
|
TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT = {
|
|
619903
620275
|
type: "json_object"
|
|
619904
620276
|
};
|
|
620277
|
+
TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT = {
|
|
620278
|
+
type: "json_schema",
|
|
620279
|
+
json_schema: {
|
|
620280
|
+
name: "telegram_chat_reply",
|
|
620281
|
+
strict: true,
|
|
620282
|
+
schema: {
|
|
620283
|
+
type: "object",
|
|
620284
|
+
additionalProperties: false,
|
|
620285
|
+
required: ["reply"],
|
|
620286
|
+
properties: {
|
|
620287
|
+
reply: {
|
|
620288
|
+
type: "string",
|
|
620289
|
+
description: "The exact text to send to Telegram. No prefixes, no narration, no scratch reasoning, no bullet-point notes preceding the reply."
|
|
620290
|
+
}
|
|
620291
|
+
}
|
|
620292
|
+
}
|
|
620293
|
+
}
|
|
620294
|
+
};
|
|
619905
620295
|
TELEGRAM_STUCK_SELF_TALK_PREFIXES = [
|
|
619906
620296
|
/^i'?ve been stuck for\b/i,
|
|
619907
620297
|
/^i am (still |currently )?stuck\b/i,
|
|
@@ -622128,6 +622518,14 @@ ${mediaContext}` : ""
|
|
|
622128
622518
|
if (state.lastFollowupAt && now - state.lastFollowupAt < 60 * 6e4) {
|
|
622129
622519
|
return { sent: false, reason: "rate limit held public follow-up" };
|
|
622130
622520
|
}
|
|
622521
|
+
const cooldownEnv = Number.parseInt(process.env["OMNIUS_TG_FOLLOWUP_COOLDOWN_MS"] ?? "", 10);
|
|
622522
|
+
const cooldownMs = Number.isFinite(cooldownEnv) && cooldownEnv >= 6e4 ? cooldownEnv : 10 * 6e4;
|
|
622523
|
+
if (state.lastAssistantMessageAt && now - state.lastAssistantMessageAt < cooldownMs) {
|
|
622524
|
+
return {
|
|
622525
|
+
sent: false,
|
|
622526
|
+
reason: `recent assistant reply suppresses follow-up (${Math.round((now - state.lastAssistantMessageAt) / 1e3)}s ago, cooldown ${Math.round(cooldownMs / 1e3)}s)`
|
|
622527
|
+
};
|
|
622528
|
+
}
|
|
622131
622529
|
const candidateMessageIds = Array.from(new Set([
|
|
622132
622530
|
...artifact.curiosityThreads.flatMap((thread) => thread.sourceMessages ?? []),
|
|
622133
622531
|
...artifact.memoryProposals.flatMap((proposal) => proposal.sourceMessages ?? []),
|
|
@@ -622458,6 +622856,10 @@ ${mediaContext}` : ""
|
|
|
622458
622856
|
chatTitle: msg.chatTitle
|
|
622459
622857
|
};
|
|
622460
622858
|
this.recordChatHistory(sessionKey, entry);
|
|
622859
|
+
try {
|
|
622860
|
+
this.reflectionStateForSession(sessionKey).lastAssistantMessageAt = Date.now();
|
|
622861
|
+
} catch {
|
|
622862
|
+
}
|
|
622461
622863
|
this.persistTelegramAssistantMessage(
|
|
622462
622864
|
msg,
|
|
622463
622865
|
clean5,
|
|
@@ -623409,214 +623811,29 @@ ${lines.join("\n")}`);
|
|
|
623409
623811
|
const cardCount = (this.chatMemoryCards.get(sessionKey) ?? []).length;
|
|
623410
623812
|
const memory = this.chatAssociativeMemory.get(sessionKey);
|
|
623411
623813
|
const factCount = memory?.facts.length ?? 0;
|
|
623412
|
-
const relationshipCount = memory?.relationships.length ?? 0;
|
|
623413
623814
|
const userMemoryCount = memory ? Object.keys(memory.users).length : 0;
|
|
623414
623815
|
const historyCount = (this.chatHistory.get(sessionKey) ?? []).length;
|
|
623415
|
-
let sqliteCount = 0;
|
|
623416
|
-
let episodeCount2 = 0;
|
|
623417
|
-
try {
|
|
623418
|
-
const db = this.telegramDb();
|
|
623419
|
-
if (db) {
|
|
623420
|
-
const row = db.prepare("SELECT COUNT(*) AS n FROM telegram_messages WHERE session_key = ?").get(sessionKey);
|
|
623421
|
-
sqliteCount = Number(row?.n) || 0;
|
|
623422
|
-
}
|
|
623423
|
-
} catch {
|
|
623424
|
-
}
|
|
623425
|
-
if (this.repoRoot) {
|
|
623426
|
-
try {
|
|
623427
|
-
const paths = omniusMemoryDbPaths(this.repoRoot);
|
|
623428
|
-
if (existsSync117(paths.episodes)) {
|
|
623429
|
-
const graph = new TemporalGraph(paths.knowledge);
|
|
623430
|
-
const store2 = new EpisodeStore(paths.episodes, graph);
|
|
623431
|
-
try {
|
|
623432
|
-
const sample = store2.search({ sessionId: sessionKey, limit: 1 }) ?? [];
|
|
623433
|
-
if (sample.length > 0) {
|
|
623434
|
-
const dbAny = store2.db;
|
|
623435
|
-
if (dbAny && typeof dbAny.prepare === "function") {
|
|
623436
|
-
const row = dbAny.prepare("SELECT COUNT(*) AS n FROM episodes WHERE session_id = ?").get(sessionKey);
|
|
623437
|
-
episodeCount2 = Number(row?.n) || 0;
|
|
623438
|
-
} else {
|
|
623439
|
-
episodeCount2 = 1;
|
|
623440
|
-
}
|
|
623441
|
-
}
|
|
623442
|
-
} finally {
|
|
623443
|
-
try {
|
|
623444
|
-
store2.close();
|
|
623445
|
-
} catch {
|
|
623446
|
-
}
|
|
623447
|
-
try {
|
|
623448
|
-
graph.close();
|
|
623449
|
-
} catch {
|
|
623450
|
-
}
|
|
623451
|
-
}
|
|
623452
|
-
}
|
|
623453
|
-
} catch {
|
|
623454
|
-
}
|
|
623455
|
-
}
|
|
623456
623816
|
const chatId = msg.chatId;
|
|
623457
|
-
|
|
623817
|
+
let topicCount = 0;
|
|
623458
623818
|
if (this.repoRoot && chatId !== void 0) {
|
|
623459
623819
|
try {
|
|
623460
623820
|
const memDir = resolve46(this.repoRoot, ".omnius", "memory");
|
|
623461
623821
|
if (existsSync117(memDir)) {
|
|
623462
623822
|
const prefix = this.telegramScopedMemoryPrefix(chatId);
|
|
623463
623823
|
for (const file of readdirSync41(memDir)) {
|
|
623464
|
-
if (
|
|
623465
|
-
const topic = file.slice(prefix.length, -".json".length);
|
|
623466
|
-
if (topic) topicFiles.push(topic);
|
|
623467
|
-
}
|
|
623468
|
-
}
|
|
623469
|
-
} catch {
|
|
623470
|
-
}
|
|
623471
|
-
}
|
|
623472
|
-
const anyMemory = cardCount + factCount + relationshipCount + userMemoryCount + sqliteCount + episodeCount2 + topicFiles.length > 0;
|
|
623473
|
-
if (!anyMemory && historyCount === 0) return "";
|
|
623474
|
-
const lines = [
|
|
623475
|
-
"### Scoped Memory Substrate (this chat - always present)",
|
|
623476
|
-
"Persistent memory is available for this chat. The current turn's lexical scorers may",
|
|
623477
|
-
"have surfaced 0 matches above - that does not mean the substrate is empty. Counts:",
|
|
623478
|
-
`- Memory cards: ${cardCount}`,
|
|
623479
|
-
`- Associative facts: ${factCount}`,
|
|
623480
|
-
`- Associative relationships: ${relationshipCount}`,
|
|
623481
|
-
`- Per-user memories: ${userMemoryCount}`,
|
|
623482
|
-
`- Rolling history entries retained: ${historyCount}`,
|
|
623483
|
-
`- Addressable conversation history rows: ${Math.max(historyCount, sqliteCount)}`,
|
|
623484
|
-
`- SQLite mirror rows: ${sqliteCount}`,
|
|
623485
|
-
`- Episodes (durable, day+ scope): ${episodeCount2}`
|
|
623486
|
-
];
|
|
623487
|
-
if (topicFiles.length > 0) {
|
|
623488
|
-
lines.push("");
|
|
623489
|
-
lines.push("Per-topic memory files (call memory_read with one of these `topic` values):");
|
|
623490
|
-
for (const topic of topicFiles.slice(0, 80)) {
|
|
623491
|
-
lines.push(` - ${topic}`);
|
|
623492
|
-
}
|
|
623493
|
-
}
|
|
623494
|
-
const anchors = this.telegramHistoryAnchorsForSession(sessionKey, 3);
|
|
623495
|
-
const fmtHistoryAnchor = (entry) => {
|
|
623496
|
-
const when = entry.ts ? new Date(entry.ts).toISOString() : "(unknown ts)";
|
|
623497
|
-
const speaker = telegramHistorySpeaker(entry);
|
|
623498
|
-
const kind = entry.role === "assistant" || entry.isBot ? "bot" : "human";
|
|
623499
|
-
const mode = entry.mode ? `/${entry.mode}` : "";
|
|
623500
|
-
const messageId = entry.messageId ? ` msg:${entry.messageId}` : "";
|
|
623501
|
-
return `${when} ${speaker} [${kind}]${mode}${messageId}: ${telegramContextJsonString(String(entry.text || ""), 320)}`;
|
|
623502
|
-
};
|
|
623503
|
-
const sameHistoryAnchor = (a2, b) => {
|
|
623504
|
-
if (!a2 || !b) return false;
|
|
623505
|
-
if (a2.messageId !== void 0 && b.messageId !== void 0) return a2.messageId === b.messageId && a2.role === b.role;
|
|
623506
|
-
return a2.ts === b.ts && a2.role === b.role && a2.text === b.text;
|
|
623507
|
-
};
|
|
623508
|
-
if (anchors.earliest.length > 0 || anchors.latest) {
|
|
623509
|
-
lines.push("");
|
|
623510
|
-
lines.push("Chronological anchors - Telegram conversation history (SQLite mirror preferred; ground truth for 'oldest/newest memory' questions):");
|
|
623511
|
-
if (anchors.earliest[0]) lines.push(` Earliest turn: ${fmtHistoryAnchor(anchors.earliest[0])}`);
|
|
623512
|
-
if (anchors.latest && !sameHistoryAnchor(anchors.earliest[0], anchors.latest)) {
|
|
623513
|
-
lines.push(` Latest turn: ${fmtHistoryAnchor(anchors.latest)}`);
|
|
623514
|
-
}
|
|
623515
|
-
if (anchors.earliest[1]) {
|
|
623516
|
-
lines.push(` 2nd earliest: ${fmtHistoryAnchor(anchors.earliest[1])}`);
|
|
623517
|
-
}
|
|
623518
|
-
if (anchors.earliest[2]) {
|
|
623519
|
-
lines.push(` 3rd earliest: ${fmtHistoryAnchor(anchors.earliest[2])}`);
|
|
623520
|
-
}
|
|
623521
|
-
}
|
|
623522
|
-
const activityStats = this.telegramParticipantActivityStats(sessionKey, { limit: 8 });
|
|
623523
|
-
if (activityStats.length > 0) {
|
|
623524
|
-
lines.push("");
|
|
623525
|
-
lines.push("Activity anchors - participant message counts from the durable mirror/merged history:");
|
|
623526
|
-
for (const stat7 of activityStats) {
|
|
623527
|
-
const first2 = stat7.firstTs ? new Date(stat7.firstTs).toISOString() : "?";
|
|
623528
|
-
const last2 = stat7.lastTs ? new Date(stat7.lastTs).toISOString() : "?";
|
|
623529
|
-
lines.push(` - ${stat7.speaker}: ${stat7.count} message${stat7.count === 1 ? "" : "s"} (first:${first2}, last:${last2})`);
|
|
623530
|
-
}
|
|
623531
|
-
}
|
|
623532
|
-
if (this.repoRoot) {
|
|
623533
|
-
try {
|
|
623534
|
-
const paths = omniusMemoryDbPaths(this.repoRoot);
|
|
623535
|
-
if (existsSync117(paths.episodes)) {
|
|
623536
|
-
const graph = new TemporalGraph(paths.knowledge);
|
|
623537
|
-
const store2 = new EpisodeStore(paths.episodes, graph);
|
|
623538
|
-
try {
|
|
623539
|
-
const dbAny = store2.db;
|
|
623540
|
-
if (dbAny && typeof dbAny.prepare === "function") {
|
|
623541
|
-
const earliest = dbAny.prepare(
|
|
623542
|
-
"SELECT timestamp, modality, tool_name, content, gist FROM episodes WHERE session_id = ? ORDER BY timestamp ASC LIMIT 1"
|
|
623543
|
-
).get(sessionKey);
|
|
623544
|
-
const latest = dbAny.prepare(
|
|
623545
|
-
"SELECT timestamp, modality, tool_name, content, gist FROM episodes WHERE session_id = ? ORDER BY timestamp DESC LIMIT 1"
|
|
623546
|
-
).get(sessionKey);
|
|
623547
|
-
if (earliest || latest) {
|
|
623548
|
-
lines.push("");
|
|
623549
|
-
lines.push("Chronological anchors - episodes.db (durable, may reach further back than rolling history):");
|
|
623550
|
-
const fmtEp = (row) => {
|
|
623551
|
-
const when = row.timestamp ? new Date(row.timestamp).toISOString() : "(unknown ts)";
|
|
623552
|
-
const tag = `[${row.modality || "?"}${row.tool_name ? ":" + row.tool_name : ""}]`;
|
|
623553
|
-
const text = (row.gist || row.content || "").split("\n").filter((ln) => !/^(Telegram|session:|chat:|message_id:|thread_id:|speaker:|mode:)/i.test(ln.trim())).join(" ").replace(/\s+/g, " ").trim();
|
|
623554
|
-
return `${when} ${tag} ${telegramContextJsonString(text, 320)}`;
|
|
623555
|
-
};
|
|
623556
|
-
if (earliest) lines.push(` Earliest episode: ${fmtEp(earliest)}`);
|
|
623557
|
-
if (latest && (!earliest || earliest.timestamp !== latest.timestamp)) lines.push(` Latest episode: ${fmtEp(latest)}`);
|
|
623558
|
-
}
|
|
623559
|
-
}
|
|
623560
|
-
} finally {
|
|
623561
|
-
try {
|
|
623562
|
-
store2.close();
|
|
623563
|
-
} catch {
|
|
623564
|
-
}
|
|
623565
|
-
try {
|
|
623566
|
-
graph.close();
|
|
623567
|
-
} catch {
|
|
623568
|
-
}
|
|
623824
|
+
if (file.endsWith(".json") && file.startsWith(prefix)) topicCount++;
|
|
623569
623825
|
}
|
|
623570
623826
|
}
|
|
623571
623827
|
} catch {
|
|
623572
623828
|
}
|
|
623573
623829
|
}
|
|
623574
|
-
|
|
623575
|
-
|
|
623576
|
-
|
|
623577
|
-
|
|
623578
|
-
|
|
623579
|
-
|
|
623580
|
-
|
|
623581
|
-
const file = join131(memDir, `${prefix}${topic}.json`);
|
|
623582
|
-
if (!existsSync117(file)) continue;
|
|
623583
|
-
let parsed;
|
|
623584
|
-
try {
|
|
623585
|
-
parsed = JSON.parse(readFileSync96(file, "utf8"));
|
|
623586
|
-
} catch {
|
|
623587
|
-
continue;
|
|
623588
|
-
}
|
|
623589
|
-
for (const [key, entry] of Object.entries(parsed)) {
|
|
623590
|
-
if (!entry || typeof entry !== "object") continue;
|
|
623591
|
-
const rawTs = entry.timestamp;
|
|
623592
|
-
const ts = typeof rawTs === "string" ? Date.parse(rawTs) : typeof rawTs === "number" ? rawTs : NaN;
|
|
623593
|
-
if (!Number.isFinite(ts)) continue;
|
|
623594
|
-
const value2 = String(entry.value ?? "");
|
|
623595
|
-
if (!earliestEntry || ts < earliestEntry.ts) earliestEntry = { topic, key, value: value2, ts };
|
|
623596
|
-
if (!latestEntry || ts > latestEntry.ts) latestEntry = { topic, key, value: value2, ts };
|
|
623597
|
-
}
|
|
623598
|
-
}
|
|
623599
|
-
if (earliestEntry || latestEntry) {
|
|
623600
|
-
lines.push("");
|
|
623601
|
-
lines.push("Chronological anchors - memory_write entries (most-trusted, agent-asserted):");
|
|
623602
|
-
const fmtMem = (e2) => {
|
|
623603
|
-
const when = new Date(e2.ts).toISOString();
|
|
623604
|
-
return `${when} topic="${e2.topic}" key="${e2.key}" → ${telegramContextJsonString(e2.value, 320)}`;
|
|
623605
|
-
};
|
|
623606
|
-
if (earliestEntry) lines.push(` Earliest memory_write: ${fmtMem(earliestEntry)}`);
|
|
623607
|
-
if (latestEntry && (!earliestEntry || earliestEntry.ts !== latestEntry.ts)) lines.push(` Latest memory_write: ${fmtMem(latestEntry)}`);
|
|
623608
|
-
}
|
|
623609
|
-
} catch {
|
|
623610
|
-
}
|
|
623611
|
-
}
|
|
623612
|
-
lines.push("");
|
|
623613
|
-
lines.push("Rules:");
|
|
623614
|
-
lines.push(" 1. Do not tell the user 'memory is empty' or 'nothing has been stored' for this chat without first calling memory_search and memory_read on a relevant topic from the list above.");
|
|
623615
|
-
lines.push(" 2. If the structured sections (cards/facts/sqlite/episodes) above did not surface what the user asked about, that is a scoring miss, not absence. Call memory_search with broader tokens or pick a topic above with memory_read.");
|
|
623616
|
-
lines.push(" 3. The rolling-history block is base context; the cards/facts/episodes are retrieval-augmented. Treat them as the same memory, surfaced different ways.");
|
|
623617
|
-
lines.push(" 4. For 'what is your oldest/earliest memory' or 'most recent memory' questions: answer directly from the 'Chronological anchors' lines above. Quote the timestamp and content. Do not call tools first and do not report 'empty'.");
|
|
623618
|
-
lines.push(" 5. memory_search accepts natural-language time phrases inside the `query` argument or explicit `since`/`until`/`bucket` args. Examples: query='what did manitcor say yesterday', query='last 3 hours', query='earlier today', query='2 days ago', query='since 2026-05-15', query='between 2026-05-15 and 2026-05-16', query='oldest memory about github', query='most recent flux discussion'. Use these for chronological/'how far back' style queries instead of guessing - the tool parses the phrase, filters by time, and returns the right window.");
|
|
623619
|
-
return lines.join("\n");
|
|
623830
|
+
const hasMemory = cardCount + factCount + userMemoryCount + topicCount > 0 || historyCount > 0;
|
|
623831
|
+
if (!hasMemory) return "";
|
|
623832
|
+
return [
|
|
623833
|
+
"### Memory Substrate (this chat)",
|
|
623834
|
+
`Available: ${cardCount} cards, ${factCount} facts, ${userMemoryCount} per-user memories, ${historyCount} rolling-history msgs, ${topicCount} memory_read topics.`,
|
|
623835
|
+
"Recall via memory_search (semantic similarity / graph traversal / episodes). For exact reads: memory_read(topic, key). The substrate is associative — recall is by cue, not by date."
|
|
623836
|
+
].join("\n");
|
|
623620
623837
|
}
|
|
623621
623838
|
buildTelegramConversationContextStream(sessionKey, msg, maxRecent = TELEGRAM_CONTEXT_RECENT_DEFAULT, salienceSignals = []) {
|
|
623622
623839
|
this.ensureTelegramConversationLoaded(sessionKey);
|
|
@@ -623685,32 +623902,16 @@ ${lines.join("\n")}`);
|
|
|
623685
623902
|
sections.push(`### Participants And Relationship Signals${tierNote}
|
|
623686
623903
|
${participantLines.join("\n")}`);
|
|
623687
623904
|
}
|
|
623688
|
-
const
|
|
623689
|
-
|
|
623690
|
-
|
|
623691
|
-
isGroup ? 14 : 8
|
|
623692
|
-
);
|
|
623693
|
-
if (associativeContext) {
|
|
623694
|
-
sections.push(associativeContext);
|
|
623695
|
-
}
|
|
623696
|
-
const sqliteMirrorContext = this.relevantTelegramSqliteMirrorContext(
|
|
623697
|
-
sessionKey,
|
|
623698
|
-
msg,
|
|
623699
|
-
isGroup ? 14 : 8
|
|
623700
|
-
);
|
|
623701
|
-
if (sqliteMirrorContext) {
|
|
623702
|
-
sections.push(sqliteMirrorContext);
|
|
623703
|
-
}
|
|
623704
|
-
try {
|
|
623705
|
-
const episodicContext = this.relevantTelegramEpisodicMemoryContext(
|
|
623905
|
+
const ASSOCIATIVE_MIN_TURNS = isGroup ? 8 : 4;
|
|
623906
|
+
if (retainedCount >= ASSOCIATIVE_MIN_TURNS) {
|
|
623907
|
+
const associativeContext = this.relevantTelegramAssociativeMemoryContext(
|
|
623706
623908
|
sessionKey,
|
|
623707
623909
|
msg,
|
|
623708
|
-
isGroup ?
|
|
623910
|
+
isGroup ? 14 : 8
|
|
623709
623911
|
);
|
|
623710
|
-
if (
|
|
623711
|
-
sections.push(
|
|
623912
|
+
if (associativeContext) {
|
|
623913
|
+
sections.push(associativeContext);
|
|
623712
623914
|
}
|
|
623713
|
-
} catch {
|
|
623714
623915
|
}
|
|
623715
623916
|
const memoryCards = this.relevantTelegramMemoryCards(sessionKey, msg, isGroup ? 10 : 6);
|
|
623716
623917
|
if (memoryCards.length > 0) {
|
|
@@ -623741,10 +623942,6 @@ ${notes2}`;
|
|
|
623741
623942
|
${cardLines.join("\n")}`);
|
|
623742
623943
|
}
|
|
623743
623944
|
}
|
|
623744
|
-
const channelDaydream = this.formatLatestTelegramChannelDaydreamContext(sessionKey);
|
|
623745
|
-
if (channelDaydream) {
|
|
623746
|
-
sections.push(channelDaydream);
|
|
623747
|
-
}
|
|
623748
623945
|
const recentMedia = this.recentTelegramMediaEntries(msg.chatId, 10);
|
|
623749
623946
|
if (recentMedia.length > 0) {
|
|
623750
623947
|
const mediaLines = recentMedia.map((entry) => {
|
|
@@ -623763,26 +623960,33 @@ ${cardLines.join("\n")}`);
|
|
|
623763
623960
|
].join("\n"));
|
|
623764
623961
|
}
|
|
623765
623962
|
if (olderCount > 0) {
|
|
623963
|
+
const halfLifeMs = (isGroup ? 24 : 48) * 60 * 60 * 1e3;
|
|
623964
|
+
const now = Date.now();
|
|
623766
623965
|
const older = history.slice(0, olderCount);
|
|
623767
623966
|
const bySpeaker = /* @__PURE__ */ new Map();
|
|
623768
623967
|
for (const entry of older) {
|
|
623769
623968
|
if (!entry.text.trim()) continue;
|
|
623770
623969
|
const speaker = telegramHistorySpeaker(entry);
|
|
623970
|
+
const ageMs = Math.max(0, now - (entry.ts ?? 0));
|
|
623971
|
+
const weight = Math.exp(-ageMs / halfLifeMs);
|
|
623771
623972
|
const existing = bySpeaker.get(speaker);
|
|
623772
623973
|
const text = truncateTelegramContextLine(entry.text, 180);
|
|
623773
623974
|
if (existing) {
|
|
623774
623975
|
existing.count += 1;
|
|
623775
623976
|
existing.last = text;
|
|
623977
|
+
existing.weightSum += weight;
|
|
623978
|
+
existing.maxWeight = Math.max(existing.maxWeight, weight);
|
|
623776
623979
|
} else {
|
|
623777
|
-
bySpeaker.set(speaker, { count: 1, first: text, last: text });
|
|
623980
|
+
bySpeaker.set(speaker, { count: 1, first: text, last: text, weightSum: weight, maxWeight: weight });
|
|
623778
623981
|
}
|
|
623779
623982
|
}
|
|
623780
|
-
const olderLines = [...bySpeaker.entries()].slice(0,
|
|
623983
|
+
const olderLines = [...bySpeaker.entries()].sort(([, a2], [, b]) => b.maxWeight - a2.maxWeight).slice(0, 5).map(([speaker, info]) => {
|
|
623781
623984
|
const range = info.first === info.last ? info.first : `${info.first} -> ${info.last}`;
|
|
623782
|
-
|
|
623985
|
+
const decayLabel = info.maxWeight >= 0.5 ? "fresh" : info.maxWeight >= 0.1 ? "decayed" : "stale";
|
|
623986
|
+
return `- ${speaker}: ${info.count} earlier msg(s) [${decayLabel}]; digest=${telegramContextJsonString(range, 200)}`;
|
|
623783
623987
|
});
|
|
623784
623988
|
if (olderLines.length > 0) {
|
|
623785
|
-
sections.push(`### Earlier Retained Thread Digest
|
|
623989
|
+
sections.push(`### Earlier Retained Thread Digest (recency-weighted)
|
|
623786
623990
|
${olderLines.join("\n")}`);
|
|
623787
623991
|
}
|
|
623788
623992
|
}
|
|
@@ -624096,7 +624300,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624096
624300
|
const completionHeadroom = 4096;
|
|
624097
624301
|
const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + completionHeadroom)) : Math.max(2048, promptTokens + completionHeadroom);
|
|
624098
624302
|
const requestWithCtx = { ...request, numCtx: targetCtx };
|
|
624099
|
-
const
|
|
624303
|
+
const brokerBypass = process.env["OMNIUS_DISABLE_BROKER_ADMISSION"] === "1";
|
|
624304
|
+
const slot = brokerBypass ? null : await broker.acquireInferenceSlot({
|
|
624100
624305
|
model,
|
|
624101
624306
|
domain: "chat",
|
|
624102
624307
|
owner: `telegram-bridge/${kind}`,
|
|
@@ -624107,7 +624312,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624107
624312
|
if (process.env["OMNIUS_BROKER_TRACE"] === "1") {
|
|
624108
624313
|
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
624109
624314
|
sessionKey,
|
|
624110
|
-
`inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot
|
|
624315
|
+
`inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot ? slot.info.id : "bypass"}${slot?.info.reserved ? " reserved" : ""}`
|
|
624111
624316
|
));
|
|
624112
624317
|
}
|
|
624113
624318
|
const streamFn = backend.chatCompletionStream;
|
|
@@ -624136,10 +624341,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624136
624341
|
}
|
|
624137
624342
|
const usage = result.usage;
|
|
624138
624343
|
completionTokens = usage?.completion_tokens ?? 0;
|
|
624139
|
-
slot
|
|
624344
|
+
slot?.release({ ok: true, completionTokens });
|
|
624140
624345
|
return result;
|
|
624141
624346
|
} catch (err) {
|
|
624142
|
-
slot
|
|
624347
|
+
slot?.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
|
|
624143
624348
|
throw err;
|
|
624144
624349
|
} finally {
|
|
624145
624350
|
this.deregisterTelegramInference(id);
|
|
@@ -625140,34 +625345,25 @@ ${list}` : "No shared group target is currently known for this sender. Ask in th
|
|
|
625140
625345
|
return join131(this.repoRoot, ".omnius", "telegram-runner-state", safe);
|
|
625141
625346
|
}
|
|
625142
625347
|
buildTelegramAdminOverviewContext(currentSessionKey) {
|
|
625143
|
-
const sections = [];
|
|
625144
625348
|
this.ensureAllTelegramConversationsLoaded();
|
|
625145
625349
|
const chatEntries = [...this.chatHistory.entries()].filter(([sessionKey, history]) => sessionKey !== currentSessionKey && history.length > 0).sort(([, a2], [, b]) => (b[b.length - 1]?.ts ?? 0) - (a2[a2.length - 1]?.ts ?? 0)).slice(0, 18);
|
|
625350
|
+
if (chatEntries.length === 0) return "";
|
|
625351
|
+
const indexLines = [];
|
|
625146
625352
|
for (const [sessionKey, history] of chatEntries) {
|
|
625147
625353
|
const latest = history[history.length - 1];
|
|
625148
|
-
const
|
|
625149
|
-
|
|
625150
|
-
|
|
625151
|
-
}).
|
|
625152
|
-
const
|
|
625153
|
-
|
|
625154
|
-
|
|
625155
|
-
const cards = (this.chatMemoryCards.get(sessionKey) ?? []).slice(0, 4).map((card) => ` - ${card.title}: ${card.notes.slice(-1)[0] ?? ""}`).join("\n");
|
|
625156
|
-
sections.push([
|
|
625157
|
-
`- ${sessionKey} (chat_id ${String(latest.chatId ?? "unknown")}; ${latest.chatType || "chat"}${latest.chatTitle ? `: ${latest.chatTitle}` : ""})`,
|
|
625158
|
-
participants ? ` Participants: ${participants}` : "",
|
|
625159
|
-
` Latest: ${telegramHistorySpeaker(latest)}: ${truncateTelegramContextLine(latest.text, 180)}`,
|
|
625160
|
-
recent ? ` Recent:
|
|
625161
|
-
${recent}` : "",
|
|
625162
|
-
cards ? ` Memory cards:
|
|
625163
|
-
${cards}` : ""
|
|
625164
|
-
].filter(Boolean).join("\n"));
|
|
625165
|
-
}
|
|
625166
|
-
if (sections.length === 0) return "";
|
|
625354
|
+
const participantCount = this.chatParticipants.get(sessionKey)?.size ?? 0;
|
|
625355
|
+
const ageMs = Date.now() - (latest.ts ?? 0);
|
|
625356
|
+
const ageMin = Math.round(ageMs / 6e4);
|
|
625357
|
+
const ageStr = ageMin < 60 ? `${ageMin}m ago` : ageMin < 24 * 60 ? `${Math.round(ageMin / 60)}h ago` : `${Math.round(ageMin / (24 * 60))}d ago`;
|
|
625358
|
+
const label = latest.chatTitle ? `"${latest.chatTitle}"` : sessionKey;
|
|
625359
|
+
indexLines.push(`- ${label} (chat_id ${String(latest.chatId ?? "?")}; ${latest.chatType || "chat"}): ${participantCount} participants; last ${ageStr}; ${history.length} retained msgs`);
|
|
625360
|
+
}
|
|
625167
625361
|
return [
|
|
625168
|
-
"## Admin Telegram Omniscience",
|
|
625169
|
-
"
|
|
625170
|
-
|
|
625362
|
+
"## Admin Telegram Omniscience (index only)",
|
|
625363
|
+
"One-way context for the authenticated admin private DM. Other Telegram sessions the bot has observed are listed below with one line each.",
|
|
625364
|
+
"For details on a specific chat, use memory_search (mode=episodes) with a topic keyword or chat_id — the always-loaded view is intentionally compact.",
|
|
625365
|
+
"Never inject admin/private DM content into public groups.",
|
|
625366
|
+
indexLines.join("\n")
|
|
625171
625367
|
].join("\n\n");
|
|
625172
625368
|
}
|
|
625173
625369
|
buildTelegramSessionContext(msg, toolContext, profile, modelTier) {
|
|
@@ -626287,8 +626483,9 @@ ${conversationStream}`
|
|
|
626287
626483
|
messages: this.buildTelegramChatMessages(msg, toolContext, mediaContext),
|
|
626288
626484
|
tools: [],
|
|
626289
626485
|
temperature: 0.4,
|
|
626290
|
-
maxTokens:
|
|
626291
|
-
timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4)
|
|
626486
|
+
maxTokens: 1500,
|
|
626487
|
+
timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4),
|
|
626488
|
+
responseFormat: TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT
|
|
626292
626489
|
});
|
|
626293
626490
|
let accumulated = "";
|
|
626294
626491
|
let streamError;
|
|
@@ -626315,7 +626512,8 @@ ${conversationStream}`
|
|
|
626315
626512
|
} else {
|
|
626316
626513
|
this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
|
|
626317
626514
|
accumulated += piece;
|
|
626318
|
-
|
|
626515
|
+
const partial = extractPartialTelegramReplyJson(accumulated);
|
|
626516
|
+
if (partial !== null) await onToken(partial);
|
|
626319
626517
|
}
|
|
626320
626518
|
}
|
|
626321
626519
|
} catch (err) {
|
|
@@ -626337,11 +626535,14 @@ ${conversationStream}`
|
|
|
626337
626535
|
}
|
|
626338
626536
|
this.updateTelegramInferenceFinal(inferenceId, result);
|
|
626339
626537
|
accumulated = result.choices[0]?.message?.content ?? "";
|
|
626340
|
-
|
|
626538
|
+
const fullExtracted = extractPartialTelegramReplyJson(accumulated);
|
|
626539
|
+
if (fullExtracted) await onToken(fullExtracted);
|
|
626341
626540
|
}
|
|
626342
626541
|
} finally {
|
|
626343
626542
|
this.deregisterTelegramInference(inferenceId);
|
|
626344
626543
|
}
|
|
626544
|
+
const extracted = extractFinalTelegramReplyJson(accumulated);
|
|
626545
|
+
if (extracted) return extracted;
|
|
626345
626546
|
return stripTelegramHiddenThinking(accumulated).trim();
|
|
626346
626547
|
}
|
|
626347
626548
|
retainTelegramVisibleReplyDraft(subAgent, draft, streamToolNames = subAgent.currentStreamToolNames) {
|