omnius 1.0.135 → 1.0.137
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +644 -168
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1337,29 +1337,36 @@ function ramSnapshotMB() {
|
|
|
1337
1337
|
const free = Math.round(freemem() / (1024 * 1024));
|
|
1338
1338
|
return { total, free, used: total - free };
|
|
1339
1339
|
}
|
|
1340
|
-
async function
|
|
1340
|
+
async function vramSnapshotPerDevice() {
|
|
1341
1341
|
if (_nvSmiAvailable === false)
|
|
1342
|
-
return
|
|
1342
|
+
return [];
|
|
1343
1343
|
try {
|
|
1344
1344
|
const out = await new Promise((resolve55, reject) => {
|
|
1345
|
-
exec("nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
|
|
1345
|
+
exec("nvidia-smi --query-gpu=index,uuid,memory.total,memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null", { encoding: "utf8", timeout: 3e3 }, (err, stdout) => err ? reject(err) : resolve55(stdout));
|
|
1346
1346
|
});
|
|
1347
1347
|
_nvSmiAvailable = true;
|
|
1348
|
-
|
|
1348
|
+
const devices = [];
|
|
1349
1349
|
for (const line of out.trim().split("\n")) {
|
|
1350
|
+
if (!line.trim())
|
|
1351
|
+
continue;
|
|
1350
1352
|
const parts = line.split(",").map((s2) => s2.trim());
|
|
1351
|
-
if (parts.length <
|
|
1353
|
+
if (parts.length < 5)
|
|
1354
|
+
continue;
|
|
1355
|
+
const index = parseInt(parts[0] ?? "-1", 10);
|
|
1356
|
+
if (!Number.isFinite(index) || index < 0)
|
|
1352
1357
|
continue;
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1358
|
+
devices.push({
|
|
1359
|
+
index,
|
|
1360
|
+
uuid: parts[1] ?? "",
|
|
1361
|
+
total: parseInt(parts[2] ?? "0", 10) || 0,
|
|
1362
|
+
used: parseInt(parts[3] ?? "0", 10) || 0,
|
|
1363
|
+
free: parseInt(parts[4] ?? "0", 10) || 0
|
|
1364
|
+
});
|
|
1356
1365
|
}
|
|
1357
|
-
|
|
1358
|
-
return null;
|
|
1359
|
-
return { total, used, free };
|
|
1366
|
+
return devices;
|
|
1360
1367
|
} catch {
|
|
1361
1368
|
_nvSmiAvailable = false;
|
|
1362
|
-
return
|
|
1369
|
+
return [];
|
|
1363
1370
|
}
|
|
1364
1371
|
}
|
|
1365
1372
|
function getModelBroker() {
|
|
@@ -1374,7 +1381,7 @@ var init_model_broker = __esm({
|
|
|
1374
1381
|
DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
|
|
1375
1382
|
DEFAULT_POLL_MS = 4e3;
|
|
1376
1383
|
DEFAULT_INFLIGHT_WAIT_MS = 6e4;
|
|
1377
|
-
DEFAULT_SLOT_CAPACITY =
|
|
1384
|
+
DEFAULT_SLOT_CAPACITY = 8;
|
|
1378
1385
|
DEFAULT_QUEUE_CAPACITY = 50;
|
|
1379
1386
|
THROUGHPUT_EMA_ALPHA = 0.2;
|
|
1380
1387
|
THROUGHPUT_INITIAL_TPS = 25;
|
|
@@ -1403,7 +1410,9 @@ var init_model_broker = __esm({
|
|
|
1403
1410
|
ramHeadroomMB = DEFAULT_RAM_HEADROOM_MB;
|
|
1404
1411
|
vramHeadroomMB = DEFAULT_VRAM_HEADROOM_MB;
|
|
1405
1412
|
idleEvictMs = DEFAULT_IDLE_EVICT_MS;
|
|
1406
|
-
/** Inference slot capacity (auto-tunes from Ollama
|
|
1413
|
+
/** Inference slot capacity (shared pool aggregate; auto-tunes from Ollama
|
|
1414
|
+
* pool size when known). Per-device cap defaults to ceil(slotCapacity/N)
|
|
1415
|
+
* unless overridden via setPerGpuSlotCapacity. */
|
|
1407
1416
|
slotCapacity = DEFAULT_SLOT_CAPACITY;
|
|
1408
1417
|
/** Maximum queue depth before queue pressure is emitted. */
|
|
1409
1418
|
queueCapacity = DEFAULT_QUEUE_CAPACITY;
|
|
@@ -1419,6 +1428,15 @@ var init_model_broker = __esm({
|
|
|
1419
1428
|
_throughput = /* @__PURE__ */ new Map();
|
|
1420
1429
|
/** Monotonic counter for slot ids. */
|
|
1421
1430
|
_slotIdSeq = 0;
|
|
1431
|
+
/** Per-GPU slot capacity override. When unset, broker derives a per-GPU
|
|
1432
|
+
* cap from slotCapacity / detected device count. */
|
|
1433
|
+
_perGpuSlotCapacity = /* @__PURE__ */ new Map();
|
|
1434
|
+
/** Cached per-device VRAM (refreshed by pollOnce). */
|
|
1435
|
+
_vramByDevice = [];
|
|
1436
|
+
/** Optional provider that maps an Ollama model name to its current GPU.
|
|
1437
|
+
* Wired by the CLI/orchestrator at startup so the broker can copy pool
|
|
1438
|
+
* affinity onto LoadedModel records without importing the pool directly. */
|
|
1439
|
+
_ollamaAffinityProvider = null;
|
|
1422
1440
|
static getInstance() {
|
|
1423
1441
|
if (!_ModelBroker._instance)
|
|
1424
1442
|
_ModelBroker._instance = new _ModelBroker();
|
|
@@ -1471,6 +1489,18 @@ var init_model_broker = __esm({
|
|
|
1471
1489
|
setOllamaBaseUrl(url) {
|
|
1472
1490
|
this._ollamaBaseUrl = url;
|
|
1473
1491
|
}
|
|
1492
|
+
/**
|
|
1493
|
+
* Wire a function that resolves an Ollama model name to its current GPU
|
|
1494
|
+
* affinity (from the Ollama pool's per-instance state). The CLI calls
|
|
1495
|
+
* this at startup with a closure over `getOllamaPool().status()` so the
|
|
1496
|
+
* broker can copy gpuIndex/gpuUuid onto LoadedModel records without
|
|
1497
|
+
* importing from @omnius/orchestrator (which would create a circular dep).
|
|
1498
|
+
*
|
|
1499
|
+
* Pass null to clear.
|
|
1500
|
+
*/
|
|
1501
|
+
setOllamaAffinityProvider(provider) {
|
|
1502
|
+
this._ollamaAffinityProvider = provider;
|
|
1503
|
+
}
|
|
1474
1504
|
/** One poll cycle — refreshes /api/ps and emits snapshot. */
|
|
1475
1505
|
async pollOnce() {
|
|
1476
1506
|
await Promise.all([
|
|
@@ -1539,30 +1569,44 @@ var init_model_broker = __esm({
|
|
|
1539
1569
|
const estVram = spec.estimatedVramMB ?? this.estimateFootprintVramMB(spec);
|
|
1540
1570
|
const estRam = spec.estimatedRamMB ?? this.estimateFootprintRamMB(spec);
|
|
1541
1571
|
const ram = ramSnapshotMB();
|
|
1542
|
-
const vram = await vramSnapshotMB();
|
|
1543
1572
|
const ramFitsAfter = ram.free - estRam >= this.ramHeadroomMB;
|
|
1544
|
-
const
|
|
1573
|
+
const devices = await vramSnapshotPerDevice();
|
|
1574
|
+
this._vramByDevice = devices;
|
|
1575
|
+
let chosenGpu = null;
|
|
1576
|
+
let vramFitsAfter = devices.length === 0;
|
|
1577
|
+
if (devices.length > 0) {
|
|
1578
|
+
const candidates = devices.filter((d2) => spec.preferredGpuIndex === void 0 || d2.index === spec.preferredGpuIndex).filter((d2) => d2.free - estVram >= this.vramHeadroomMB).sort((a2, b) => b.free - a2.free);
|
|
1579
|
+
if (candidates.length > 0) {
|
|
1580
|
+
chosenGpu = candidates[0].index;
|
|
1581
|
+
vramFitsAfter = true;
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1545
1584
|
if (ramFitsAfter && vramFitsAfter) {
|
|
1546
|
-
const
|
|
1585
|
+
const decision2 = { kind: "ok", effectiveNumCtx, gpuIndex: chosenGpu };
|
|
1586
|
+
const promise = Promise.resolve(decision2);
|
|
1547
1587
|
this._inflight.set(key, { startedMs: Date.now(), owner: spec.owner, promise });
|
|
1548
1588
|
setTimeout(() => this._inflight.delete(key), spec.loadTimeoutMs ?? DEFAULT_INFLIGHT_WAIT_MS).unref?.();
|
|
1549
|
-
return
|
|
1589
|
+
return decision2;
|
|
1550
1590
|
}
|
|
1591
|
+
const targetGpu = chosenGpu ?? this.deviceWithMostPressureRelativeTo(devices, estVram);
|
|
1592
|
+
const needVramMB = vramFitsAfter ? 0 : targetGpu !== null ? estVram + this.vramHeadroomMB - (devices.find((d2) => d2.index === targetGpu)?.free ?? 0) : estVram + this.vramHeadroomMB;
|
|
1551
1593
|
const evictTargets = this.pickEvictionCandidates({
|
|
1552
|
-
needVramMB
|
|
1594
|
+
needVramMB,
|
|
1553
1595
|
needRamMB: ramFitsAfter ? 0 : estRam + this.ramHeadroomMB - ram.free,
|
|
1554
1596
|
requestingPriority: spec.priority ?? 0,
|
|
1555
|
-
requestingDomain: spec.domain
|
|
1597
|
+
requestingDomain: spec.domain,
|
|
1598
|
+
targetGpu
|
|
1556
1599
|
});
|
|
1557
1600
|
if (evictTargets.length > 0) {
|
|
1558
|
-
return { kind: "evict", evictTargets, effectiveNumCtx };
|
|
1601
|
+
return { kind: "evict", evictTargets, effectiveNumCtx, gpuIndex: targetGpu };
|
|
1559
1602
|
}
|
|
1560
1603
|
const fallback = await this.findRunnableFallback(spec);
|
|
1561
1604
|
if (fallback) {
|
|
1562
1605
|
this.emit("degraded", spec, fallback, "insufficient-memory-no-evictable");
|
|
1563
1606
|
return { kind: "degrade", fallback, reason: "insufficient-memory-no-evictable" };
|
|
1564
1607
|
}
|
|
1565
|
-
const
|
|
1608
|
+
const perDeviceSummary = devices.length === 0 ? "no GPU" : devices.map((d2) => `gpu${d2.index}=${d2.free}MB`).join(", ");
|
|
1609
|
+
const reason = `insufficient resources (need ~${estRam}MB RAM, ~${estVram}MB VRAM; free ${ram.free}MB RAM; VRAM ${perDeviceSummary}) and no evictable / fallback models`;
|
|
1566
1610
|
this.emit("rejected", spec, reason);
|
|
1567
1611
|
return { kind: "reject", reason };
|
|
1568
1612
|
}
|
|
@@ -1638,10 +1682,22 @@ var init_model_broker = __esm({
|
|
|
1638
1682
|
seen.add(key);
|
|
1639
1683
|
const vramMB = Math.round((m2.size_vram ?? 0) / (1024 * 1024));
|
|
1640
1684
|
const ramMB = Math.round(((m2.size ?? 0) - (m2.size_vram ?? 0)) / (1024 * 1024));
|
|
1685
|
+
let affinity = null;
|
|
1686
|
+
try {
|
|
1687
|
+
affinity = this._ollamaAffinityProvider ? this._ollamaAffinityProvider(m2.name) : null;
|
|
1688
|
+
} catch {
|
|
1689
|
+
affinity = null;
|
|
1690
|
+
}
|
|
1641
1691
|
const existing = this._loaded.get(key);
|
|
1642
1692
|
if (existing) {
|
|
1643
1693
|
existing.vramMB = vramMB || existing.vramMB;
|
|
1644
1694
|
existing.ramMB = ramMB || existing.ramMB;
|
|
1695
|
+
if (affinity) {
|
|
1696
|
+
if (affinity.gpuIndex !== null)
|
|
1697
|
+
existing.gpuIndex = affinity.gpuIndex;
|
|
1698
|
+
if (affinity.gpuUuid !== null)
|
|
1699
|
+
existing.gpuUuid = affinity.gpuUuid;
|
|
1700
|
+
}
|
|
1645
1701
|
} else {
|
|
1646
1702
|
const tracked = this.registerLoaded({
|
|
1647
1703
|
key,
|
|
@@ -1653,7 +1709,9 @@ var init_model_broker = __esm({
|
|
|
1653
1709
|
ramMB,
|
|
1654
1710
|
priority: 0,
|
|
1655
1711
|
loadedAt: now,
|
|
1656
|
-
lastUsedAt: now
|
|
1712
|
+
lastUsedAt: now,
|
|
1713
|
+
gpuIndex: affinity?.gpuIndex ?? null,
|
|
1714
|
+
gpuUuid: affinity?.gpuUuid ?? null
|
|
1657
1715
|
});
|
|
1658
1716
|
void tracked;
|
|
1659
1717
|
}
|
|
@@ -1746,7 +1804,8 @@ var init_model_broker = __esm({
|
|
|
1746
1804
|
m2.domain !== req2.requestingDomain || this.countByDomain(req2.requestingDomain) > 1
|
|
1747
1805
|
);
|
|
1748
1806
|
const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
|
|
1749
|
-
const
|
|
1807
|
+
const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
|
|
1808
|
+
const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
|
|
1750
1809
|
const aIdle = idle(a2) ? 0 : 1;
|
|
1751
1810
|
const bIdle = idle(b) ? 0 : 1;
|
|
1752
1811
|
if (aIdle !== bIdle)
|
|
@@ -1767,6 +1826,24 @@ var init_model_broker = __esm({
|
|
|
1767
1826
|
return targets;
|
|
1768
1827
|
return [];
|
|
1769
1828
|
}
|
|
1829
|
+
/** Pick the GPU whose free-VRAM gap to the requested footprint is smallest
|
|
1830
|
+
* (i.e. closest to fitting). Used when no device cleanly fits — eviction
|
|
1831
|
+
* on this device has the best chance of opening room. Returns null when
|
|
1832
|
+
* no GPUs are detected. */
|
|
1833
|
+
deviceWithMostPressureRelativeTo(devices, needMB) {
|
|
1834
|
+
if (devices.length === 0)
|
|
1835
|
+
return null;
|
|
1836
|
+
let best = null;
|
|
1837
|
+
let bestGap = Infinity;
|
|
1838
|
+
for (const d2 of devices) {
|
|
1839
|
+
const gap = needMB - d2.free;
|
|
1840
|
+
if (gap < bestGap) {
|
|
1841
|
+
bestGap = gap;
|
|
1842
|
+
best = d2;
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
return best?.index ?? null;
|
|
1846
|
+
}
|
|
1770
1847
|
countByDomain(domain) {
|
|
1771
1848
|
let n2 = 0;
|
|
1772
1849
|
for (const m2 of this._loaded.values())
|
|
@@ -1897,17 +1974,31 @@ var init_model_broker = __esm({
|
|
|
1897
1974
|
inflight: [...this._inflight.entries()].map(([key, v]) => ({ key, owner: v.owner, startedMs: v.startedMs })),
|
|
1898
1975
|
ramMB: ram,
|
|
1899
1976
|
vramMB: vram,
|
|
1977
|
+
vramPerDevice: [...this._vramByDevice],
|
|
1900
1978
|
lastPollAt: Date.now(),
|
|
1901
1979
|
slots: this.buildSlotsSnapshot()
|
|
1902
1980
|
};
|
|
1903
1981
|
}
|
|
1904
1982
|
buildSlotsSnapshot() {
|
|
1905
1983
|
const byModel = {};
|
|
1984
|
+
const byGpu = {};
|
|
1906
1985
|
for (const slot of this._activeSlots.values()) {
|
|
1907
1986
|
const k = slot.model;
|
|
1908
1987
|
if (!byModel[k])
|
|
1909
1988
|
byModel[k] = { inUse: 0, tokensPerSec: 0, samples: 0 };
|
|
1910
1989
|
byModel[k].inUse += 1;
|
|
1990
|
+
if (slot.gpuIndex !== null && slot.gpuIndex !== void 0) {
|
|
1991
|
+
if (!byGpu[slot.gpuIndex])
|
|
1992
|
+
byGpu[slot.gpuIndex] = { inUse: 0, capacity: this.perGpuSlotCapacity(slot.gpuIndex), loadedMB: 0 };
|
|
1993
|
+
byGpu[slot.gpuIndex].inUse += 1;
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
for (const m2 of this._loaded.values()) {
|
|
1997
|
+
if (m2.gpuIndex !== null && m2.gpuIndex !== void 0) {
|
|
1998
|
+
if (!byGpu[m2.gpuIndex])
|
|
1999
|
+
byGpu[m2.gpuIndex] = { inUse: 0, capacity: this.perGpuSlotCapacity(m2.gpuIndex), loadedMB: 0 };
|
|
2000
|
+
byGpu[m2.gpuIndex].loadedMB += m2.vramMB;
|
|
2001
|
+
}
|
|
1911
2002
|
}
|
|
1912
2003
|
for (const [model, tp] of this._throughput) {
|
|
1913
2004
|
if (!byModel[model])
|
|
@@ -1915,23 +2006,46 @@ var init_model_broker = __esm({
|
|
|
1915
2006
|
byModel[model].tokensPerSec = tp.tokensPerSec;
|
|
1916
2007
|
byModel[model].samples = tp.samples;
|
|
1917
2008
|
}
|
|
2009
|
+
for (const d2 of this._vramByDevice) {
|
|
2010
|
+
if (!byGpu[d2.index])
|
|
2011
|
+
byGpu[d2.index] = { inUse: 0, capacity: this.perGpuSlotCapacity(d2.index), loadedMB: 0 };
|
|
2012
|
+
}
|
|
1918
2013
|
return {
|
|
1919
2014
|
inUse: this._activeSlots.size,
|
|
1920
2015
|
capacity: this.slotCapacity,
|
|
1921
2016
|
queueDepth: this._slotQueue.length,
|
|
1922
2017
|
queueCapacity: this.queueCapacity,
|
|
1923
|
-
byModel
|
|
2018
|
+
byModel,
|
|
2019
|
+
byGpu
|
|
1924
2020
|
};
|
|
1925
2021
|
}
|
|
2022
|
+
/** Per-GPU slot capacity. Returns the override when set, else ceil(slotCapacity / deviceCount). */
|
|
2023
|
+
perGpuSlotCapacity(gpuIndex) {
|
|
2024
|
+
const override = this._perGpuSlotCapacity.get(gpuIndex);
|
|
2025
|
+
if (override !== void 0)
|
|
2026
|
+
return override;
|
|
2027
|
+
const n2 = Math.max(1, this._vramByDevice.length);
|
|
2028
|
+
return Math.max(1, Math.ceil(this.slotCapacity / n2));
|
|
2029
|
+
}
|
|
1926
2030
|
async checkPressure(snap) {
|
|
1927
2031
|
if (snap.ramMB.free < this.ramHeadroomMB) {
|
|
1928
2032
|
this.emit("pressure", "ram", snap.ramMB.free, this.ramHeadroomMB);
|
|
1929
2033
|
}
|
|
1930
|
-
const
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
2034
|
+
const devices = await vramSnapshotPerDevice();
|
|
2035
|
+
this._vramByDevice = devices;
|
|
2036
|
+
if (devices.length > 0) {
|
|
2037
|
+
let total = 0, used = 0, free = 0;
|
|
2038
|
+
for (const d2 of devices) {
|
|
2039
|
+
total += d2.total;
|
|
2040
|
+
used += d2.used;
|
|
2041
|
+
free += d2.free;
|
|
2042
|
+
}
|
|
2043
|
+
snap.vramMB = { total, used, free };
|
|
2044
|
+
snap.vramPerDevice = devices;
|
|
2045
|
+
for (const d2 of devices) {
|
|
2046
|
+
if (d2.free < this.vramHeadroomMB) {
|
|
2047
|
+
this.emit("pressure", "vram", d2.free, this.vramHeadroomMB);
|
|
2048
|
+
}
|
|
1935
2049
|
}
|
|
1936
2050
|
}
|
|
1937
2051
|
const queueThreshold = Math.floor(this.queueCapacity * 0.8);
|
|
@@ -1960,23 +2074,54 @@ var init_model_broker = __esm({
|
|
|
1960
2074
|
* upstream callers (e.g. Telegram poll loop) should slow ingress.
|
|
1961
2075
|
*/
|
|
1962
2076
|
acquireInferenceSlot(spec) {
|
|
1963
|
-
|
|
2077
|
+
const chosenGpu = this.pickGpuForSlot(spec);
|
|
2078
|
+
const gpuOk = chosenGpu === null || this.activeSlotsOnGpu(chosenGpu) < this.perGpuSlotCapacity(chosenGpu);
|
|
2079
|
+
if (gpuOk && this._activeSlots.size < this.slotCapacity) {
|
|
1964
2080
|
return Promise.resolve(this.admitSlot(
|
|
1965
2081
|
spec,
|
|
1966
2082
|
/*reserved*/
|
|
1967
|
-
false
|
|
2083
|
+
false,
|
|
2084
|
+
chosenGpu
|
|
1968
2085
|
));
|
|
1969
2086
|
}
|
|
1970
2087
|
if (spec.sessionKey && !this._reservedBySession.has(spec.sessionKey) && this._activeSlots.size < this.slotCapacity + 1) {
|
|
1971
2088
|
const slot = this.admitSlot(
|
|
1972
2089
|
spec,
|
|
1973
2090
|
/*reserved*/
|
|
1974
|
-
true
|
|
2091
|
+
true,
|
|
2092
|
+
chosenGpu
|
|
1975
2093
|
);
|
|
1976
2094
|
this._reservedBySession.set(spec.sessionKey, slot.info.id);
|
|
1977
2095
|
return Promise.resolve(slot);
|
|
1978
2096
|
}
|
|
1979
2097
|
return new Promise((resolve55, reject) => {
|
|
2098
|
+
if (this._slotQueue.length >= this.queueCapacity) {
|
|
2099
|
+
const newPrio = spec.priority ?? 0;
|
|
2100
|
+
let victim = -1;
|
|
2101
|
+
let victimPrio = Infinity;
|
|
2102
|
+
for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
|
|
2103
|
+
const p2 = this._slotQueue[i2].spec.priority ?? 0;
|
|
2104
|
+
if (p2 < victimPrio) {
|
|
2105
|
+
victimPrio = p2;
|
|
2106
|
+
victim = i2;
|
|
2107
|
+
}
|
|
2108
|
+
if (victimPrio === 0)
|
|
2109
|
+
break;
|
|
2110
|
+
}
|
|
2111
|
+
if (victim >= 0 && victimPrio < newPrio) {
|
|
2112
|
+
const dropped = this._slotQueue.splice(victim, 1)[0];
|
|
2113
|
+
if (dropped.onSignalAbort && dropped.spec.signal) {
|
|
2114
|
+
dropped.spec.signal.removeEventListener("abort", dropped.onSignalAbort);
|
|
2115
|
+
}
|
|
2116
|
+
try {
|
|
2117
|
+
dropped.reject(new Error("broker queue shed: capacity reached, lower-priority entry displaced"));
|
|
2118
|
+
} catch {
|
|
2119
|
+
}
|
|
2120
|
+
} else {
|
|
2121
|
+
reject(new Error(`broker queue full (capacity=${this.queueCapacity}); caller priority ${newPrio} insufficient to displace`));
|
|
2122
|
+
return;
|
|
2123
|
+
}
|
|
2124
|
+
}
|
|
1980
2125
|
const entry = { spec, resolve: resolve55, reject, enqueuedAt: Date.now() };
|
|
1981
2126
|
if (spec.signal) {
|
|
1982
2127
|
const onAbort = () => {
|
|
@@ -2011,7 +2156,7 @@ var init_model_broker = __esm({
|
|
|
2011
2156
|
});
|
|
2012
2157
|
}
|
|
2013
2158
|
/** Admit a slot — internal, called from acquire fast path and from drainQueue. */
|
|
2014
|
-
admitSlot(spec, reserved) {
|
|
2159
|
+
admitSlot(spec, reserved, gpuIndex = null) {
|
|
2015
2160
|
const id = `slot-${++this._slotIdSeq}-${Date.now().toString(36)}`;
|
|
2016
2161
|
const info = {
|
|
2017
2162
|
id,
|
|
@@ -2021,7 +2166,8 @@ var init_model_broker = __esm({
|
|
|
2021
2166
|
sessionKey: spec.sessionKey,
|
|
2022
2167
|
acquiredAt: Date.now(),
|
|
2023
2168
|
promptTokens: spec.promptTokens ?? 0,
|
|
2024
|
-
reserved
|
|
2169
|
+
reserved,
|
|
2170
|
+
gpuIndex
|
|
2025
2171
|
};
|
|
2026
2172
|
this._activeSlots.set(id, info);
|
|
2027
2173
|
this.emit("slotAcquired", info);
|
|
@@ -2037,6 +2183,35 @@ var init_model_broker = __esm({
|
|
|
2037
2183
|
}
|
|
2038
2184
|
};
|
|
2039
2185
|
}
|
|
2186
|
+
/** Count of active slots pinned to a given GPU. */
|
|
2187
|
+
activeSlotsOnGpu(gpuIndex) {
|
|
2188
|
+
let n2 = 0;
|
|
2189
|
+
for (const s2 of this._activeSlots.values()) {
|
|
2190
|
+
if (s2.gpuIndex === gpuIndex)
|
|
2191
|
+
n2++;
|
|
2192
|
+
}
|
|
2193
|
+
return n2;
|
|
2194
|
+
}
|
|
2195
|
+
/**
|
|
2196
|
+
* Pick a GPU for a new inference slot. Honors caller's preferredGpuIndex
|
|
2197
|
+
* when set; otherwise picks the GPU with the highest free VRAM that has
|
|
2198
|
+
* room for the estimated footprint and an open per-device slot.
|
|
2199
|
+
*
|
|
2200
|
+
* Returns null when no GPU is detected (CPU-only) or no device fits — in
|
|
2201
|
+
* the latter case the slot is admitted unpinned and the underlying
|
|
2202
|
+
* subprocess will pick whatever CUDA exposes by default.
|
|
2203
|
+
*/
|
|
2204
|
+
pickGpuForSlot(spec) {
|
|
2205
|
+
if (this._vramByDevice.length === 0)
|
|
2206
|
+
return null;
|
|
2207
|
+
const candidates = this._vramByDevice.filter((d2) => spec.preferredGpuIndex === void 0 || d2.index === spec.preferredGpuIndex).filter((d2) => this.activeSlotsOnGpu(d2.index) < this.perGpuSlotCapacity(d2.index)).filter((d2) => spec.estimatedVramMB === void 0 || d2.free >= spec.estimatedVramMB).sort((a2, b) => b.free - a2.free);
|
|
2208
|
+
return candidates[0]?.index ?? null;
|
|
2209
|
+
}
|
|
2210
|
+
/** Configure per-GPU slot capacity. Overrides the slotCapacity-derived default. */
|
|
2211
|
+
setPerGpuSlotCapacity(gpuIndex, capacity) {
|
|
2212
|
+
this._perGpuSlotCapacity.set(gpuIndex, Math.max(1, Math.floor(capacity)));
|
|
2213
|
+
this.drainSlotQueue();
|
|
2214
|
+
}
|
|
2040
2215
|
releaseSlot(info, outcome) {
|
|
2041
2216
|
this._activeSlots.delete(info.id);
|
|
2042
2217
|
if (info.sessionKey && this._reservedBySession.get(info.sessionKey) === info.id) {
|
|
@@ -2062,8 +2237,18 @@ var init_model_broker = __esm({
|
|
|
2062
2237
|
this.drainSlotQueue();
|
|
2063
2238
|
}
|
|
2064
2239
|
drainSlotQueue() {
|
|
2065
|
-
|
|
2066
|
-
|
|
2240
|
+
const queueCopy = [...this._slotQueue];
|
|
2241
|
+
for (const entry of queueCopy) {
|
|
2242
|
+
if (this._activeSlots.size >= this.slotCapacity)
|
|
2243
|
+
break;
|
|
2244
|
+
const chosenGpu = this.pickGpuForSlot(entry.spec);
|
|
2245
|
+
const gpuOk = chosenGpu === null || this.activeSlotsOnGpu(chosenGpu) < this.perGpuSlotCapacity(chosenGpu);
|
|
2246
|
+
if (!gpuOk)
|
|
2247
|
+
continue;
|
|
2248
|
+
const idx = this._slotQueue.indexOf(entry);
|
|
2249
|
+
if (idx < 0)
|
|
2250
|
+
continue;
|
|
2251
|
+
this._slotQueue.splice(idx, 1);
|
|
2067
2252
|
if (entry.onSignalAbort && entry.spec.signal) {
|
|
2068
2253
|
entry.spec.signal.removeEventListener("abort", entry.onSignalAbort);
|
|
2069
2254
|
}
|
|
@@ -2077,7 +2262,8 @@ var init_model_broker = __esm({
|
|
|
2077
2262
|
const slot = this.admitSlot(
|
|
2078
2263
|
entry.spec,
|
|
2079
2264
|
/*reserved*/
|
|
2080
|
-
false
|
|
2265
|
+
false,
|
|
2266
|
+
chosenGpu
|
|
2081
2267
|
);
|
|
2082
2268
|
try {
|
|
2083
2269
|
entry.resolve(slot);
|
|
@@ -19581,26 +19767,16 @@ function extractSkillForQuery(skill, content, query, budgetTokens = 900) {
|
|
|
19581
19767
|
function buildSkillsSummary(skills) {
|
|
19582
19768
|
if (skills.length === 0)
|
|
19583
19769
|
return "";
|
|
19584
|
-
const lines = [
|
|
19585
|
-
"## Skills Index",
|
|
19586
|
-
"",
|
|
19587
|
-
`${skills.length} skills available. Call \`skill_list\` to search, \`skill_execute <name>\` to load full instructions.`,
|
|
19588
|
-
""
|
|
19589
|
-
];
|
|
19590
19770
|
const bySource = /* @__PURE__ */ new Map();
|
|
19591
19771
|
for (const s2 of skills) {
|
|
19592
|
-
|
|
19593
|
-
group.push(s2);
|
|
19594
|
-
bySource.set(s2.source, group);
|
|
19595
|
-
}
|
|
19596
|
-
for (const [source, group] of bySource) {
|
|
19597
|
-
const names = group.map((s2) => {
|
|
19598
|
-
const t2 = s2.triggers[0];
|
|
19599
|
-
return t2 ? `${s2.name}(${t2})` : s2.name;
|
|
19600
|
-
});
|
|
19601
|
-
lines.push(`**${source}** (${group.length}): ${names.join(", ")}`);
|
|
19772
|
+
bySource.set(s2.source, (bySource.get(s2.source) ?? 0) + 1);
|
|
19602
19773
|
}
|
|
19603
|
-
|
|
19774
|
+
const sourcesSummary = [...bySource.entries()].sort((a2, b) => b[1] - a2[1]).map(([source, count]) => `${source}=${count}`).join(", ");
|
|
19775
|
+
return [
|
|
19776
|
+
"## Skills Index",
|
|
19777
|
+
`${skills.length} skills available across ${bySource.size} sources (${sourcesSummary}).`,
|
|
19778
|
+
"Use `skill_list` (with optional `filter` or `source`) to search; `skill_execute <name>` to load full instructions."
|
|
19779
|
+
].join("\n");
|
|
19604
19780
|
}
|
|
19605
19781
|
function safeReaddir2(dir, dirsOnly = false) {
|
|
19606
19782
|
try {
|
|
@@ -255412,6 +255588,11 @@ import sys
|
|
|
255412
255588
|
import time
|
|
255413
255589
|
from pathlib import Path
|
|
255414
255590
|
|
|
255591
|
+
# Broker-picked GPU pinning — MUST run before importing torch.
|
|
255592
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
255593
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
255594
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
255595
|
+
|
|
255415
255596
|
def _progress(stage, message, percent=None):
|
|
255416
255597
|
payload = {"omnius_progress": True, "stage": stage, "message": message}
|
|
255417
255598
|
if percent is not None:
|
|
@@ -255570,9 +255751,15 @@ if __name__ == "__main__":
|
|
|
255570
255751
|
SDCPP_RUNNER = String.raw`#!/usr/bin/env python3
|
|
255571
255752
|
import argparse
|
|
255572
255753
|
import json
|
|
255754
|
+
import os
|
|
255573
255755
|
import time
|
|
255574
255756
|
from pathlib import Path
|
|
255575
255757
|
|
|
255758
|
+
# Broker-picked GPU pinning — sd-cpp's CUDA backend honors CUDA_VISIBLE_DEVICES.
|
|
255759
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
255760
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
255761
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
255762
|
+
|
|
255576
255763
|
def main():
|
|
255577
255764
|
parser = argparse.ArgumentParser()
|
|
255578
255765
|
parser.add_argument("--model-path", required=True)
|
|
@@ -255686,6 +255873,9 @@ if __name__ == "__main__":
|
|
|
255686
255873
|
defaultModel;
|
|
255687
255874
|
defaultBackend;
|
|
255688
255875
|
promptExpander = null;
|
|
255876
|
+
/** Broker-chosen GPU pinning for the in-flight generation. Read by the
|
|
255877
|
+
* spawn path to set OMNIUS_GPU_INDEX in the subprocess env. */
|
|
255878
|
+
_brokerGpuIndex = null;
|
|
255689
255879
|
constructor(cwd4, ollamaUrl = "http://localhost:11434", defaults3 = {}) {
|
|
255690
255880
|
this.cwd = cwd4;
|
|
255691
255881
|
this.ollamaUrl = ollamaUrl.replace(/\/v1\/?$/, "").replace(/\/$/, "");
|
|
@@ -255761,6 +255951,7 @@ if __name__ == "__main__":
|
|
|
255761
255951
|
const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
|
|
255762
255952
|
const broker = getModelBroker();
|
|
255763
255953
|
const firstCandidate = candidates[0];
|
|
255954
|
+
let brokerGpuIndex = null;
|
|
255764
255955
|
if (firstCandidate) {
|
|
255765
255956
|
const decision2 = await broker.ensureModelLoadable({
|
|
255766
255957
|
name: firstCandidate.model,
|
|
@@ -255772,6 +255963,9 @@ if __name__ == "__main__":
|
|
|
255772
255963
|
for (const target of decision2.evictTargets) {
|
|
255773
255964
|
await broker.evict(target.host, target.name, "image-gen-needs-room");
|
|
255774
255965
|
}
|
|
255966
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
255967
|
+
} else if (decision2.kind === "ok") {
|
|
255968
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
255775
255969
|
} else if (decision2.kind === "reject") {
|
|
255776
255970
|
return {
|
|
255777
255971
|
success: false,
|
|
@@ -255781,6 +255975,7 @@ if __name__ == "__main__":
|
|
|
255781
255975
|
};
|
|
255782
255976
|
}
|
|
255783
255977
|
}
|
|
255978
|
+
this._brokerGpuIndex = brokerGpuIndex;
|
|
255784
255979
|
try {
|
|
255785
255980
|
return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
|
|
255786
255981
|
} catch (err) {
|
|
@@ -256283,10 +256478,14 @@ ${errText.slice(0, 800)}`,
|
|
|
256283
256478
|
}
|
|
256284
256479
|
ensureUnifiedCacheDirs();
|
|
256285
256480
|
this.emitProgress({ stage: "load", message: `Starting image generation with ${args.model}` });
|
|
256481
|
+
const runnerEnv = { ...python.env };
|
|
256482
|
+
if (this._brokerGpuIndex !== null) {
|
|
256483
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
256484
|
+
}
|
|
256286
256485
|
const result = await runProcess2(python.command, argv, {
|
|
256287
256486
|
cwd: this.cwd,
|
|
256288
256487
|
timeoutMs: 9e5,
|
|
256289
|
-
env:
|
|
256488
|
+
env: runnerEnv,
|
|
256290
256489
|
progressLabel: `Downloading/loading ${args.model}`,
|
|
256291
256490
|
onProgress: (event) => this.emitProgress(event)
|
|
256292
256491
|
});
|
|
@@ -257582,9 +257781,14 @@ var init_audio_generate = __esm({
|
|
|
257582
257781
|
DEFAULT_MUSIC_MODEL
|
|
257583
257782
|
];
|
|
257584
257783
|
DIFFUSERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
|
|
257585
|
-
import argparse, json, sys, time
|
|
257784
|
+
import argparse, json, os, sys, time
|
|
257586
257785
|
from pathlib import Path
|
|
257587
257786
|
|
|
257787
|
+
# Broker-picked GPU pinning — must run before importing torch.
|
|
257788
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
257789
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
257790
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
257791
|
+
|
|
257588
257792
|
def _format_bytes(value):
|
|
257589
257793
|
try:
|
|
257590
257794
|
n = float(value)
|
|
@@ -257778,9 +257982,14 @@ if __name__ == "__main__":
|
|
|
257778
257982
|
main()
|
|
257779
257983
|
`;
|
|
257780
257984
|
TRANSFORMERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
|
|
257781
|
-
import argparse, json, sys, time
|
|
257985
|
+
import argparse, json, os, sys, time
|
|
257782
257986
|
from pathlib import Path
|
|
257783
257987
|
|
|
257988
|
+
# Broker-picked GPU pinning — must run before importing torch.
|
|
257989
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
257990
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
257991
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
257992
|
+
|
|
257784
257993
|
def _format_bytes(value):
|
|
257785
257994
|
try:
|
|
257786
257995
|
n = float(value)
|
|
@@ -258006,6 +258215,8 @@ if __name__ == "__main__":
|
|
|
258006
258215
|
progressHandler = null;
|
|
258007
258216
|
lastProgressMessage = "";
|
|
258008
258217
|
lastProgressAt = 0;
|
|
258218
|
+
/** Broker-chosen GPU pinning for the in-flight generation. */
|
|
258219
|
+
_brokerGpuIndex = null;
|
|
258009
258220
|
constructor(cwd4, defaults3 = {}) {
|
|
258010
258221
|
this.cwd = cwd4;
|
|
258011
258222
|
this.defaults = defaults3;
|
|
@@ -258171,6 +258382,7 @@ if __name__ == "__main__":
|
|
|
258171
258382
|
const playback = playbackRequested(args);
|
|
258172
258383
|
const broker = getModelBroker();
|
|
258173
258384
|
const firstCandidate = candidates[0];
|
|
258385
|
+
let brokerGpuIndex = null;
|
|
258174
258386
|
if (firstCandidate) {
|
|
258175
258387
|
const decision2 = await broker.ensureModelLoadable({
|
|
258176
258388
|
name: firstCandidate.model,
|
|
@@ -258182,6 +258394,9 @@ if __name__ == "__main__":
|
|
|
258182
258394
|
for (const target of decision2.evictTargets) {
|
|
258183
258395
|
await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
|
|
258184
258396
|
}
|
|
258397
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
258398
|
+
} else if (decision2.kind === "ok") {
|
|
258399
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
258185
258400
|
} else if (decision2.kind === "reject") {
|
|
258186
258401
|
return {
|
|
258187
258402
|
success: false,
|
|
@@ -258191,6 +258406,7 @@ if __name__ == "__main__":
|
|
|
258191
258406
|
};
|
|
258192
258407
|
}
|
|
258193
258408
|
}
|
|
258409
|
+
this._brokerGpuIndex = brokerGpuIndex;
|
|
258194
258410
|
try {
|
|
258195
258411
|
return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
|
|
258196
258412
|
} catch (err) {
|
|
@@ -258357,10 +258573,14 @@ if __name__ == "__main__":
|
|
|
258357
258573
|
}
|
|
258358
258574
|
ensureUnifiedCacheDirs();
|
|
258359
258575
|
this.emitProgress({ stage: "load", message: `Starting ${args.kind} generation with ${args.model}` });
|
|
258576
|
+
const runnerEnv = { ...python.env };
|
|
258577
|
+
if (this._brokerGpuIndex !== null) {
|
|
258578
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
258579
|
+
}
|
|
258360
258580
|
const result = await runProcess3(python.command, argv, {
|
|
258361
258581
|
cwd: this.cwd,
|
|
258362
258582
|
timeoutMs: 9e5,
|
|
258363
|
-
env:
|
|
258583
|
+
env: runnerEnv,
|
|
258364
258584
|
progressLabel: `Downloading/loading ${args.model}`,
|
|
258365
258585
|
onProgress: (event) => this.emitProgress(event)
|
|
258366
258586
|
});
|
|
@@ -259130,7 +259350,7 @@ function parseRunnerJson3(stdout) {
|
|
|
259130
259350
|
}
|
|
259131
259351
|
return null;
|
|
259132
259352
|
}
|
|
259133
|
-
var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL, WAN_TI2V_5B_MODEL, WAN_T2V_A14B_MODEL, WAN_I2V_A14B_MODEL, WAN_S2V_14B_MODEL, COGVIDEOX_5B_MODEL, COGVIDEOX_2B_MODEL, COGVIDEOX_5B_I2V_MODEL, MOCHI_PREVIEW_MODEL, LTX_VIDEO_MODEL, LTX_2_3_MODEL, HUNYUAN_VIDEO_MODEL, DIFFUSERS_VIDEO_PACKAGES, VIDEO_GENERATION_MODEL_PRESETS, VIDEO_GENERATION_QUALITY_LADDER, VIDEO_AUDIO_QUALITY_LADDER, DIFFUSERS_VIDEO_RUNNER, COMFY_BOOTSTRAP_SCRIPT, COMFY_DEFAULT_WORKFLOWS, VideoGenerateTool;
|
|
259353
|
+
var DEFAULT_DIFFUSERS_VIDEO_MODEL, SANA_VIDEO_480P_MODEL, SANA_VIDEO_720P_MODEL, SANA_WM_BIDIRECTIONAL_MODEL, WAN_TI2V_5B_MODEL, WAN_T2V_A14B_MODEL, WAN_I2V_A14B_MODEL, WAN_S2V_14B_MODEL, COGVIDEOX_5B_MODEL, COGVIDEOX_2B_MODEL, COGVIDEOX_5B_I2V_MODEL, MOCHI_PREVIEW_MODEL, LTX_VIDEO_MODEL, LTX_2_3_MODEL, HUNYUAN_VIDEO_MODEL, DIFFUSERS_VIDEO_PACKAGES, VIDEO_GENERATION_MODEL_PRESETS, VIDEO_GENERATION_QUALITY_LADDER, VIDEO_AUDIO_QUALITY_LADDER, DIFFUSERS_VIDEO_RUNNER, COMFY_BOOTSTRAP_SCRIPT, COMFY_DEFAULT_WORKFLOWS, VideoGenerateTool;
|
|
259134
259354
|
var init_video_generate = __esm({
|
|
259135
259355
|
"packages/execution/dist/tools/video-generate.js"() {
|
|
259136
259356
|
"use strict";
|
|
@@ -259140,6 +259360,7 @@ var init_video_generate = __esm({
|
|
|
259140
259360
|
DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
|
|
259141
259361
|
SANA_VIDEO_480P_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
|
|
259142
259362
|
SANA_VIDEO_720P_MODEL = "Efficient-Large-Model/SANA-Video_2B_720p";
|
|
259363
|
+
SANA_WM_BIDIRECTIONAL_MODEL = "Efficient-Large-Model/SANA-WM_bidirectional";
|
|
259143
259364
|
WAN_TI2V_5B_MODEL = "Wan-AI/Wan2.2-TI2V-5B-Diffusers";
|
|
259144
259365
|
WAN_T2V_A14B_MODEL = "Wan-AI/Wan2.2-T2V-A14B-Diffusers";
|
|
259145
259366
|
WAN_I2V_A14B_MODEL = "Wan-AI/Wan2.2-I2V-A14B-Diffusers";
|
|
@@ -259433,6 +259654,41 @@ var init_video_generate = __esm({
|
|
|
259433
259654
|
licenseNote: "Apache 2.0",
|
|
259434
259655
|
note: "Premium Wan T2V; cloud GPU recommended."
|
|
259435
259656
|
},
|
|
259657
|
+
{
|
|
259658
|
+
id: SANA_WM_BIDIRECTIONAL_MODEL,
|
|
259659
|
+
label: "SANA-WM bidirectional (world-model i2v)",
|
|
259660
|
+
kinds: ["i2v"],
|
|
259661
|
+
backend: "diffusers",
|
|
259662
|
+
// SANA-WM declares its concrete class in model_index.json; loaded via
|
|
259663
|
+
// generic DiffusionPipeline.from_pretrained — the runner's auto path
|
|
259664
|
+
// already does this for unknown model names.
|
|
259665
|
+
pipelineClass: "DiffusionPipeline",
|
|
259666
|
+
install: 'python3 .omnius/video-gen/diffusers_text2video.py --model Efficient-Large-Model/SANA-WM_bidirectional --mode i2v --num-frames 121 --fps 24 --width 704 --height 1280 --steps 30 --guidance 5.0 --image <input.png> --prompt "..." --output .omnius/videos/out.mp4',
|
|
259667
|
+
category: "Premium quality",
|
|
259668
|
+
sizeClass: "2.6B DiT + LTX-2 refiner (Sana World Model)",
|
|
259669
|
+
quality: "Image-to-video world model with optional camera-trajectory control. Two-stage generation (Sana DiT + LTX-2 refiner); hybrid linear attention; 6-DoF camera support via .npy matrices or WASD/IJKL action DSL.",
|
|
259670
|
+
output: "Up to ~13s 704×1280 (portrait 720p) MP4 at 24 fps; max 321 frames.",
|
|
259671
|
+
bestUse: "World-model / camera-controlled video from a single first-frame image. Best on H100/A100-class hardware.",
|
|
259672
|
+
minVramGB: 80,
|
|
259673
|
+
recommendedVramGB: 100,
|
|
259674
|
+
deployment: "Diffusers DiffusionPipeline.from_pretrained; bfloat16; aggressive CPU offload mandatory below 100 GB. Bundled LTX-2 refiner runs as stage 2.",
|
|
259675
|
+
steps: 30,
|
|
259676
|
+
guidance: 5,
|
|
259677
|
+
numFrames: 121,
|
|
259678
|
+
fps: 24,
|
|
259679
|
+
width: 704,
|
|
259680
|
+
height: 1280,
|
|
259681
|
+
dtype: "bfloat16",
|
|
259682
|
+
needsCpuOffload: true,
|
|
259683
|
+
frameQuantum: 1,
|
|
259684
|
+
pixelQuantum: 16,
|
|
259685
|
+
// Apache 2.0 base; bundled LTX-2 refiner + VAE inherit the LTX-2
|
|
259686
|
+
// non-commercial license. Surface that explicitly.
|
|
259687
|
+
licenseNote: "Apache 2.0 (bundled LTX-2 refiner/VAE inherit LTX-2 non-commercial terms)",
|
|
259688
|
+
approxDownloadGB: 99,
|
|
259689
|
+
fallbackFor: [WAN_I2V_A14B_MODEL],
|
|
259690
|
+
note: "Sana World Model bidirectional i2v; portrait 704×1280 fixed; camera control via --camera <matrices.npy> or --action <DSL> when the runner supports it."
|
|
259691
|
+
},
|
|
259436
259692
|
{
|
|
259437
259693
|
id: WAN_I2V_A14B_MODEL,
|
|
259438
259694
|
label: "Wan2.2 I2V A14B",
|
|
@@ -259561,6 +259817,9 @@ var init_video_generate = __esm({
|
|
|
259561
259817
|
COGVIDEOX_5B_MODEL,
|
|
259562
259818
|
MOCHI_PREVIEW_MODEL,
|
|
259563
259819
|
COGVIDEOX_2B_MODEL,
|
|
259820
|
+
// Heavy i2v / world-model tier — only attempted when an explicit model
|
|
259821
|
+
// is requested or the consumer-VRAM tier above has failed for an i2v ask.
|
|
259822
|
+
SANA_WM_BIDIRECTIONAL_MODEL,
|
|
259564
259823
|
WAN_I2V_A14B_MODEL,
|
|
259565
259824
|
WAN_T2V_A14B_MODEL,
|
|
259566
259825
|
HUNYUAN_VIDEO_MODEL
|
|
@@ -259579,6 +259838,16 @@ import sys
|
|
|
259579
259838
|
import time
|
|
259580
259839
|
from pathlib import Path
|
|
259581
259840
|
|
|
259841
|
+
# ── GPU pinning ─────────────────────────────────────────────────────
|
|
259842
|
+
# The TS broker picks a GPU per generation via bin-packing across the
|
|
259843
|
+
# available CUDA devices. It passes the chosen index in OMNIUS_GPU_INDEX.
|
|
259844
|
+
# We MUST apply CUDA_VISIBLE_DEVICES BEFORE importing torch, otherwise
|
|
259845
|
+
# torch initializes the device list with all visible GPUs and the model
|
|
259846
|
+
# may land on a different device than the broker reserved capacity on.
|
|
259847
|
+
_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
|
|
259848
|
+
if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
|
|
259849
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
|
|
259850
|
+
|
|
259582
259851
|
def _progress(stage, message, percent=None):
|
|
259583
259852
|
payload = {"omnius_progress": True, "stage": stage, "message": message}
|
|
259584
259853
|
if percent is not None:
|
|
@@ -260385,6 +260654,9 @@ if __name__ == "__main__":
|
|
|
260385
260654
|
defaultBackend;
|
|
260386
260655
|
defaultKind;
|
|
260387
260656
|
promptExpander = null;
|
|
260657
|
+
/** GPU index chosen by the broker for the in-flight generation. Read
|
|
260658
|
+
* by the spawn path to set OMNIUS_GPU_INDEX in the subprocess env. */
|
|
260659
|
+
_brokerGpuIndex = null;
|
|
260388
260660
|
constructor(cwd4, defaults3 = {}) {
|
|
260389
260661
|
this.cwd = cwd4;
|
|
260390
260662
|
this.defaultModel = defaults3.model;
|
|
@@ -260474,17 +260746,23 @@ if __name__ == "__main__":
|
|
|
260474
260746
|
const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
|
|
260475
260747
|
const broker = getModelBroker();
|
|
260476
260748
|
const firstCandidate = candidates[0];
|
|
260749
|
+
let brokerGpuIndex = null;
|
|
260477
260750
|
if (firstCandidate) {
|
|
260751
|
+
const preset = firstCandidate.preset;
|
|
260478
260752
|
const decision2 = await broker.ensureModelLoadable({
|
|
260479
260753
|
name: firstCandidate.model,
|
|
260480
260754
|
domain: "video-gen",
|
|
260481
260755
|
host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
|
|
260482
|
-
owner: "video-generate-tool"
|
|
260756
|
+
owner: "video-generate-tool",
|
|
260757
|
+
estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
|
|
260483
260758
|
});
|
|
260484
260759
|
if (decision2.kind === "evict") {
|
|
260485
260760
|
for (const target of decision2.evictTargets) {
|
|
260486
260761
|
await broker.evict(target.host, target.name, "video-gen-needs-room");
|
|
260487
260762
|
}
|
|
260763
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
260764
|
+
} else if (decision2.kind === "ok") {
|
|
260765
|
+
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
260488
260766
|
} else if (decision2.kind === "reject") {
|
|
260489
260767
|
return {
|
|
260490
260768
|
success: false,
|
|
@@ -260494,6 +260772,7 @@ if __name__ == "__main__":
|
|
|
260494
260772
|
};
|
|
260495
260773
|
}
|
|
260496
260774
|
}
|
|
260775
|
+
this._brokerGpuIndex = brokerGpuIndex;
|
|
260497
260776
|
if (candidates.length === 0) {
|
|
260498
260777
|
return {
|
|
260499
260778
|
success: false,
|
|
@@ -260915,6 +261194,9 @@ ${llmAnnotation}` : result.llmContent;
|
|
|
260915
261194
|
runnerEnv["HF_TOKEN"] = effectiveToken;
|
|
260916
261195
|
runnerEnv["HUGGING_FACE_HUB_TOKEN"] = effectiveToken;
|
|
260917
261196
|
}
|
|
261197
|
+
if (this._brokerGpuIndex !== null) {
|
|
261198
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
261199
|
+
}
|
|
260918
261200
|
const argv = [
|
|
260919
261201
|
runner,
|
|
260920
261202
|
"--model",
|
|
@@ -570264,18 +570546,6 @@ function formatReflection(notes2, scenario) {
|
|
|
570264
570546
|
];
|
|
570265
570547
|
return lines.join("\n");
|
|
570266
570548
|
}
|
|
570267
|
-
function formatMemory(input, state) {
|
|
570268
|
-
const lines = [];
|
|
570269
|
-
if (input.memoryContext) lines.push(input.memoryContext);
|
|
570270
|
-
if (state.dynamicState && Object.keys(state.dynamicState).length > 0) {
|
|
570271
|
-
const entries = Object.entries(state.dynamicState).slice(0, 12).map(([key, value2]) => `- ${key}: ${compactText(JSON.stringify(value2) ?? String(value2), 220)}`);
|
|
570272
|
-
lines.push(`Dynamic state:
|
|
570273
|
-
${entries.join("\n")}`);
|
|
570274
|
-
}
|
|
570275
|
-
if (state.updatedAt) lines.push(`State updated: ${state.updatedAt}`);
|
|
570276
|
-
if (lines.length === 0) return "No additional retrieved voice-soul memory beyond scoped personality and runtime state.";
|
|
570277
|
-
return lines.join("\n\n");
|
|
570278
|
-
}
|
|
570279
570549
|
function formatFinalVoice(input) {
|
|
570280
570550
|
const voice = findProjectVoice(input.scope);
|
|
570281
570551
|
const lines = [
|
|
@@ -570302,23 +570572,23 @@ function buildSoulContext(input) {
|
|
|
570302
570572
|
const state = loadSoulRuntimeState(input);
|
|
570303
570573
|
const scenario = resolveSoulScenario(input, state);
|
|
570304
570574
|
const tree2 = resolveSoulDecisionTree(input, state, scenario);
|
|
570305
|
-
|
|
570306
|
-
|
|
570307
|
-
"### 1. Authority And Safety Scope",
|
|
570575
|
+
const sections = ["## Voice Soul Context"];
|
|
570576
|
+
const voiceAndScope = [
|
|
570308
570577
|
formatAuthorityScope(input),
|
|
570309
|
-
"### 2. Core Identity",
|
|
570310
570578
|
formatCoreIdentity(input),
|
|
570311
|
-
"### 3. Procedural Decision Tree",
|
|
570312
|
-
formatProceduralConstraints(input, scenario, tree2, state),
|
|
570313
|
-
"### 4. Relationship State",
|
|
570314
|
-
formatRelationshipState(input),
|
|
570315
|
-
"### 5. Current Reflection Notes",
|
|
570316
|
-
formatReflection(input.currentReflection, scenario),
|
|
570317
|
-
"### 6. Minimal Retrieved Memory",
|
|
570318
|
-
formatMemory(input, state),
|
|
570319
|
-
"### 7. Final Voice Guidance",
|
|
570320
570579
|
formatFinalVoice(input)
|
|
570321
|
-
].join("\n\n");
|
|
570580
|
+
].filter(Boolean).join("\n\n");
|
|
570581
|
+
sections.push("### Voice + Scope + Identity", voiceAndScope);
|
|
570582
|
+
const decisionSubstrate = [
|
|
570583
|
+
formatRelationshipState(input),
|
|
570584
|
+
formatProceduralConstraints(input, scenario, tree2, state)
|
|
570585
|
+
].filter(Boolean).join("\n\n");
|
|
570586
|
+
sections.push("### Active Relationship + Scenario", decisionSubstrate);
|
|
570587
|
+
const reflection = formatReflection(input.currentReflection, scenario);
|
|
570588
|
+
if (reflection && reflection.trim().length > 0) {
|
|
570589
|
+
sections.push("### Current Reflection Notes", reflection);
|
|
570590
|
+
}
|
|
570591
|
+
return sections.join("\n\n");
|
|
570322
570592
|
}
|
|
570323
570593
|
var MAX_SOUL_CHARS, MAX_VOICE_CHARS, MAX_SCOPED_PERSONALITY_CHARS, UNCLASSIFIED_SCENARIO;
|
|
570324
570594
|
var init_voice_soul = __esm({
|
|
@@ -577249,7 +577519,32 @@ var init_status_bar = __esm({
|
|
|
577249
577519
|
if (this.active) this.renderFooterPreserveCursor();
|
|
577250
577520
|
}, intervalMs);
|
|
577251
577521
|
try {
|
|
577252
|
-
|
|
577522
|
+
const broker = getModelBroker();
|
|
577523
|
+
try {
|
|
577524
|
+
Promise.resolve().then(() => (init_dist8(), dist_exports3)).then(({ getOllamaPool: getOllamaPool2, resolveDefaultPoolConfig: resolveDefaultPoolConfig2 }) => {
|
|
577525
|
+
try {
|
|
577526
|
+
const config = resolveDefaultPoolConfig2();
|
|
577527
|
+
const pool3 = getOllamaPool2({ baseInstanceUrl: config.baseInstanceUrl });
|
|
577528
|
+
broker.setOllamaAffinityProvider((modelName) => {
|
|
577529
|
+
try {
|
|
577530
|
+
const status = pool3.status?.();
|
|
577531
|
+
if (!status) return null;
|
|
577532
|
+
for (const inst of status.instances ?? []) {
|
|
577533
|
+
void modelName;
|
|
577534
|
+
return { gpuIndex: inst.gpuIndex, gpuUuid: inst.gpuUuid };
|
|
577535
|
+
}
|
|
577536
|
+
return null;
|
|
577537
|
+
} catch {
|
|
577538
|
+
return null;
|
|
577539
|
+
}
|
|
577540
|
+
});
|
|
577541
|
+
} catch {
|
|
577542
|
+
}
|
|
577543
|
+
}).catch(() => {
|
|
577544
|
+
});
|
|
577545
|
+
} catch {
|
|
577546
|
+
}
|
|
577547
|
+
broker.startPolling(Math.max(2e3, intervalMs * 2));
|
|
577253
577548
|
} catch {
|
|
577254
577549
|
}
|
|
577255
577550
|
}
|
|
@@ -604352,14 +604647,22 @@ async function handleBroker(arg, _ctx) {
|
|
|
604352
604647
|
safeLog(` ${c3.bold("Resource Broker")}`);
|
|
604353
604648
|
safeLog("");
|
|
604354
604649
|
safeLog(` ${c3.dim("RAM:")} ${snap.ramMB.used} / ${snap.ramMB.total} MB used (${snap.ramMB.free} MB free)`);
|
|
604355
|
-
if (snap.
|
|
604650
|
+
if (snap.vramPerDevice.length > 0) {
|
|
604651
|
+
safeLog(` ${c3.bold("GPUs:")}`);
|
|
604652
|
+
for (const d2 of snap.vramPerDevice) {
|
|
604653
|
+
const gpuSlots = snap.slots.byGpu[d2.index];
|
|
604654
|
+
const slotInfo = gpuSlots ? ` slots=${gpuSlots.inUse}/${gpuSlots.capacity}, loaded=${gpuSlots.loadedMB}MB` : "";
|
|
604655
|
+
safeLog(` gpu${d2.index} (${d2.uuid.slice(0, 12)}…) ${d2.used} / ${d2.total} MB used (${d2.free} MB free)${slotInfo}`);
|
|
604656
|
+
}
|
|
604657
|
+
} else if (snap.vramMB) {
|
|
604356
604658
|
safeLog(` ${c3.dim("VRAM:")} ${snap.vramMB.used} / ${snap.vramMB.total} MB used (${snap.vramMB.free} MB free)`);
|
|
604357
604659
|
} else {
|
|
604358
604660
|
safeLog(` ${c3.dim("VRAM:")} ${c3.dim("(no GPU detected)")}`);
|
|
604359
604661
|
}
|
|
604360
604662
|
safeLog(` ${c3.dim("RAM headroom threshold:")} ${broker.ramHeadroomMB} MB`);
|
|
604361
|
-
safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB`);
|
|
604663
|
+
safeLog(` ${c3.dim("VRAM headroom threshold:")} ${broker.vramHeadroomMB} MB (per-device)`);
|
|
604362
604664
|
safeLog(` ${c3.dim("Idle-evict threshold:")} ${Math.round(broker.idleEvictMs / 1e3)}s`);
|
|
604665
|
+
safeLog(` ${c3.dim("Slot capacity:")} ${snap.slots.inUse}/${snap.slots.capacity} active, queue ${snap.slots.queueDepth}/${snap.slots.queueCapacity}`);
|
|
604363
604666
|
safeLog("");
|
|
604364
604667
|
if (snap.loaded.length === 0) {
|
|
604365
604668
|
safeLog(` ${c3.dim("No loaded models tracked.")}`);
|
|
@@ -604370,7 +604673,8 @@ async function handleBroker(arg, _ctx) {
|
|
|
604370
604673
|
const idle = Math.round((now - m2.lastUsedAt) / 1e3);
|
|
604371
604674
|
const owner = m2.owner ? c3.dim(` [owner=${m2.owner}]`) : "";
|
|
604372
604675
|
const ctx3 = m2.numCtx ? c3.dim(` n_ctx=${m2.numCtx}`) : "";
|
|
604373
|
-
|
|
604676
|
+
const gpu = m2.gpuIndex !== null && m2.gpuIndex !== void 0 ? c3.dim(` gpu=${m2.gpuIndex}`) : "";
|
|
604677
|
+
safeLog(` ${c3.cyan(m2.name)} (${m2.host}/${m2.domain}) vram=${m2.vramMB}MB ram=${m2.ramMB}MB${gpu} idle=${idle}s${ctx3}${owner}`);
|
|
604374
604678
|
}
|
|
604375
604679
|
}
|
|
604376
604680
|
if (snap.inflight.length > 0) {
|
|
@@ -618122,6 +618426,95 @@ function parseTelegramSilentReflectionNotes(text) {
|
|
|
618122
618426
|
}
|
|
618123
618427
|
return null;
|
|
618124
618428
|
}
|
|
618429
|
+
function extractPartialTelegramReplyJson(buffer2) {
|
|
618430
|
+
const stripped = stripTelegramHiddenThinking(buffer2).trimStart();
|
|
618431
|
+
if (!stripped.startsWith("{")) {
|
|
618432
|
+
return stripped || null;
|
|
618433
|
+
}
|
|
618434
|
+
const keyMatch = stripped.indexOf('"reply"');
|
|
618435
|
+
if (keyMatch < 0) return null;
|
|
618436
|
+
let i2 = keyMatch + '"reply"'.length;
|
|
618437
|
+
while (i2 < stripped.length && stripped[i2] !== ":") i2++;
|
|
618438
|
+
if (i2 >= stripped.length) return null;
|
|
618439
|
+
i2++;
|
|
618440
|
+
while (i2 < stripped.length && /\s/.test(stripped[i2])) i2++;
|
|
618441
|
+
if (i2 >= stripped.length || stripped[i2] !== '"') return null;
|
|
618442
|
+
i2++;
|
|
618443
|
+
let out = "";
|
|
618444
|
+
while (i2 < stripped.length) {
|
|
618445
|
+
const ch = stripped[i2];
|
|
618446
|
+
if (ch === "\\") {
|
|
618447
|
+
const next = stripped[i2 + 1];
|
|
618448
|
+
if (next === void 0) break;
|
|
618449
|
+
if (next === '"') out += '"';
|
|
618450
|
+
else if (next === "\\") out += "\\";
|
|
618451
|
+
else if (next === "n") out += "\n";
|
|
618452
|
+
else if (next === "t") out += " ";
|
|
618453
|
+
else if (next === "r") out += "\r";
|
|
618454
|
+
else if (next === "/") out += "/";
|
|
618455
|
+
else if (next === "u") {
|
|
618456
|
+
if (i2 + 5 >= stripped.length) break;
|
|
618457
|
+
const hex = stripped.slice(i2 + 2, i2 + 6);
|
|
618458
|
+
const code8 = parseInt(hex, 16);
|
|
618459
|
+
if (Number.isFinite(code8)) out += String.fromCharCode(code8);
|
|
618460
|
+
i2 += 4;
|
|
618461
|
+
} else {
|
|
618462
|
+
out += next;
|
|
618463
|
+
}
|
|
618464
|
+
i2 += 2;
|
|
618465
|
+
continue;
|
|
618466
|
+
}
|
|
618467
|
+
if (ch === '"') {
|
|
618468
|
+
return out;
|
|
618469
|
+
}
|
|
618470
|
+
out += ch;
|
|
618471
|
+
i2++;
|
|
618472
|
+
}
|
|
618473
|
+
return out.length > 0 ? out : null;
|
|
618474
|
+
}
|
|
618475
|
+
function extractFinalTelegramReplyJson(buffer2) {
|
|
618476
|
+
const stripped = stripTelegramHiddenThinking(buffer2).trim();
|
|
618477
|
+
if (!stripped.startsWith("{")) return null;
|
|
618478
|
+
try {
|
|
618479
|
+
const parsed = JSON.parse(stripped);
|
|
618480
|
+
if (typeof parsed.reply === "string") return parsed.reply.trim();
|
|
618481
|
+
} catch {
|
|
618482
|
+
}
|
|
618483
|
+
let depth = 0;
|
|
618484
|
+
let inString = false;
|
|
618485
|
+
let escape2 = false;
|
|
618486
|
+
let end = -1;
|
|
618487
|
+
for (let i2 = 0; i2 < stripped.length; i2++) {
|
|
618488
|
+
const ch = stripped[i2];
|
|
618489
|
+
if (escape2) {
|
|
618490
|
+
escape2 = false;
|
|
618491
|
+
continue;
|
|
618492
|
+
}
|
|
618493
|
+
if (inString) {
|
|
618494
|
+
if (ch === "\\") escape2 = true;
|
|
618495
|
+
else if (ch === '"') inString = false;
|
|
618496
|
+
continue;
|
|
618497
|
+
}
|
|
618498
|
+
if (ch === '"') inString = true;
|
|
618499
|
+
else if (ch === "{") depth++;
|
|
618500
|
+
else if (ch === "}") {
|
|
618501
|
+
depth--;
|
|
618502
|
+
if (depth === 0) {
|
|
618503
|
+
end = i2;
|
|
618504
|
+
break;
|
|
618505
|
+
}
|
|
618506
|
+
}
|
|
618507
|
+
}
|
|
618508
|
+
if (end > 0) {
|
|
618509
|
+
try {
|
|
618510
|
+
const parsed = JSON.parse(stripped.slice(0, end + 1));
|
|
618511
|
+
if (typeof parsed.reply === "string") return parsed.reply.trim();
|
|
618512
|
+
} catch {
|
|
618513
|
+
}
|
|
618514
|
+
}
|
|
618515
|
+
const partial = extractPartialTelegramReplyJson(stripped);
|
|
618516
|
+
return partial && partial.trim().length > 0 ? partial.trim() : null;
|
|
618517
|
+
}
|
|
618125
618518
|
function estimatePromptTokensFromRequest(request) {
|
|
618126
618519
|
let chars = 0;
|
|
618127
618520
|
for (const m2 of request.messages ?? []) {
|
|
@@ -618138,6 +618531,32 @@ function estimatePromptTokensFromRequest(request) {
|
|
|
618138
618531
|
}
|
|
618139
618532
|
return Math.ceil(chars / 4);
|
|
618140
618533
|
}
|
|
618534
|
+
function isLikelyTruncatedRouterJson(text) {
|
|
618535
|
+
if (typeof text !== "string") return false;
|
|
618536
|
+
const stripped = text.replace(/^\s*<think>[\s\S]*?<\/think>\s*/i, "").trim();
|
|
618537
|
+
if (!stripped.startsWith("{")) return false;
|
|
618538
|
+
let depth = 0;
|
|
618539
|
+
let inString = false;
|
|
618540
|
+
let escape2 = false;
|
|
618541
|
+
for (let i2 = 0; i2 < stripped.length; i2++) {
|
|
618542
|
+
const ch = stripped[i2];
|
|
618543
|
+
if (escape2) {
|
|
618544
|
+
escape2 = false;
|
|
618545
|
+
continue;
|
|
618546
|
+
}
|
|
618547
|
+
if (inString) {
|
|
618548
|
+
if (ch === "\\") escape2 = true;
|
|
618549
|
+
else if (ch === '"') inString = false;
|
|
618550
|
+
continue;
|
|
618551
|
+
}
|
|
618552
|
+
if (ch === '"') inString = true;
|
|
618553
|
+
else if (ch === "{") depth++;
|
|
618554
|
+
else if (ch === "}") depth--;
|
|
618555
|
+
}
|
|
618556
|
+
if (depth <= 0) return false;
|
|
618557
|
+
const hits = (stripped.includes('"route"') ? 1 : 0) + (stripped.includes('"should_reply"') ? 1 : 0) + (stripped.includes('"confidence"') ? 1 : 0) + (stripped.includes('"reason"') ? 1 : 0) + (stripped.includes('"silent_disposition"') ? 1 : 0) + (stripped.includes('"mental_note"') ? 1 : 0);
|
|
618558
|
+
return hits >= 3;
|
|
618559
|
+
}
|
|
618141
618560
|
function telegramRouterTimeoutMs(configTimeoutMs, _minMs, _legacyMaxMs) {
|
|
618142
618561
|
void _minMs;
|
|
618143
618562
|
void _legacyMaxMs;
|
|
@@ -619583,7 +620002,7 @@ function renderTelegramSubAgentError(username, error) {
|
|
|
619583
620002
|
process.stdout.write(` ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
|
|
619584
620003
|
`);
|
|
619585
620004
|
}
|
|
619586
|
-
var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
|
|
620005
|
+
var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
|
|
619587
620006
|
var init_telegram_bridge = __esm({
|
|
619588
620007
|
"packages/cli/src/tui/telegram-bridge.ts"() {
|
|
619589
620008
|
"use strict";
|
|
@@ -619817,6 +620236,12 @@ Rules:
|
|
|
619817
620236
|
7. Do not claim older chat is unavailable when the context stream contains it. If asked what you see, summarize the supplied transcript, speakers, and relationship/tone signals.
|
|
619818
620237
|
8. Mirror the current sender's tone and directness while staying safe and clear.
|
|
619819
620238
|
9. Never send router decisions, skip explanations, memory-stage notes, task-complete summaries, or "no_reply" as chat text.
|
|
620239
|
+
|
|
620240
|
+
Output discipline (your assistant message is sent verbatim to Telegram, ALL of it):
|
|
620241
|
+
- Emit ONLY the final reply text. Do not narrate your reasoning, summarize what you found, organize bullet-point notes, or write phrases like "Let me summarize", "Let me send the reply", "Now I have enough", "Based on the research", "Here's my response:" before the actual reply. Those are scratch-pad phrases that leak when emitted as visible text.
|
|
620242
|
+
- Do not produce a draft followed by the final answer. The first character of your output should be the first character of the message the user will receive.
|
|
620243
|
+
- If you need to think, do it silently. Do not write your reasoning steps as visible prose. If you have an internal scratchpad, keep it internal.
|
|
620244
|
+
- A reply that begins by restating what you found, then says something like "Let me write the response" or "Here's the breakdown", then gives the answer, is wrong twice over: the user sees the restatement AND the answer, doubling the message. Skip the restatement.
|
|
619820
620245
|
`.trim();
|
|
619821
620246
|
ADMIN_CHAT_PROFILE_PROMPT = `
|
|
619822
620247
|
You are replying to the authenticated Telegram admin in a private DM.
|
|
@@ -619849,6 +620274,24 @@ External acquisition contract:
|
|
|
619849
620274
|
TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT = {
|
|
619850
620275
|
type: "json_object"
|
|
619851
620276
|
};
|
|
620277
|
+
TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT = {
|
|
620278
|
+
type: "json_schema",
|
|
620279
|
+
json_schema: {
|
|
620280
|
+
name: "telegram_chat_reply",
|
|
620281
|
+
strict: true,
|
|
620282
|
+
schema: {
|
|
620283
|
+
type: "object",
|
|
620284
|
+
additionalProperties: false,
|
|
620285
|
+
required: ["reply"],
|
|
620286
|
+
properties: {
|
|
620287
|
+
reply: {
|
|
620288
|
+
type: "string",
|
|
620289
|
+
description: "The exact text to send to Telegram. No prefixes, no narration, no scratch reasoning, no bullet-point notes preceding the reply."
|
|
620290
|
+
}
|
|
620291
|
+
}
|
|
620292
|
+
}
|
|
620293
|
+
}
|
|
620294
|
+
};
|
|
619852
620295
|
TELEGRAM_STUCK_SELF_TALK_PREFIXES = [
|
|
619853
620296
|
/^i'?ve been stuck for\b/i,
|
|
619854
620297
|
/^i am (still |currently )?stuck\b/i,
|
|
@@ -622075,6 +622518,14 @@ ${mediaContext}` : ""
|
|
|
622075
622518
|
if (state.lastFollowupAt && now - state.lastFollowupAt < 60 * 6e4) {
|
|
622076
622519
|
return { sent: false, reason: "rate limit held public follow-up" };
|
|
622077
622520
|
}
|
|
622521
|
+
const cooldownEnv = Number.parseInt(process.env["OMNIUS_TG_FOLLOWUP_COOLDOWN_MS"] ?? "", 10);
|
|
622522
|
+
const cooldownMs = Number.isFinite(cooldownEnv) && cooldownEnv >= 6e4 ? cooldownEnv : 10 * 6e4;
|
|
622523
|
+
if (state.lastAssistantMessageAt && now - state.lastAssistantMessageAt < cooldownMs) {
|
|
622524
|
+
return {
|
|
622525
|
+
sent: false,
|
|
622526
|
+
reason: `recent assistant reply suppresses follow-up (${Math.round((now - state.lastAssistantMessageAt) / 1e3)}s ago, cooldown ${Math.round(cooldownMs / 1e3)}s)`
|
|
622527
|
+
};
|
|
622528
|
+
}
|
|
622078
622529
|
const candidateMessageIds = Array.from(new Set([
|
|
622079
622530
|
...artifact.curiosityThreads.flatMap((thread) => thread.sourceMessages ?? []),
|
|
622080
622531
|
...artifact.memoryProposals.flatMap((proposal) => proposal.sourceMessages ?? []),
|
|
@@ -622405,6 +622856,10 @@ ${mediaContext}` : ""
|
|
|
622405
622856
|
chatTitle: msg.chatTitle
|
|
622406
622857
|
};
|
|
622407
622858
|
this.recordChatHistory(sessionKey, entry);
|
|
622859
|
+
try {
|
|
622860
|
+
this.reflectionStateForSession(sessionKey).lastAssistantMessageAt = Date.now();
|
|
622861
|
+
} catch {
|
|
622862
|
+
}
|
|
622408
622863
|
this.persistTelegramAssistantMessage(
|
|
622409
622864
|
msg,
|
|
622410
622865
|
clean5,
|
|
@@ -623632,32 +624087,16 @@ ${lines.join("\n")}`);
|
|
|
623632
624087
|
sections.push(`### Participants And Relationship Signals${tierNote}
|
|
623633
624088
|
${participantLines.join("\n")}`);
|
|
623634
624089
|
}
|
|
623635
|
-
const
|
|
623636
|
-
|
|
623637
|
-
|
|
623638
|
-
isGroup ? 14 : 8
|
|
623639
|
-
);
|
|
623640
|
-
if (associativeContext) {
|
|
623641
|
-
sections.push(associativeContext);
|
|
623642
|
-
}
|
|
623643
|
-
const sqliteMirrorContext = this.relevantTelegramSqliteMirrorContext(
|
|
623644
|
-
sessionKey,
|
|
623645
|
-
msg,
|
|
623646
|
-
isGroup ? 14 : 8
|
|
623647
|
-
);
|
|
623648
|
-
if (sqliteMirrorContext) {
|
|
623649
|
-
sections.push(sqliteMirrorContext);
|
|
623650
|
-
}
|
|
623651
|
-
try {
|
|
623652
|
-
const episodicContext = this.relevantTelegramEpisodicMemoryContext(
|
|
624090
|
+
const ASSOCIATIVE_MIN_TURNS = isGroup ? 8 : 4;
|
|
624091
|
+
if (retainedCount >= ASSOCIATIVE_MIN_TURNS) {
|
|
624092
|
+
const associativeContext = this.relevantTelegramAssociativeMemoryContext(
|
|
623653
624093
|
sessionKey,
|
|
623654
624094
|
msg,
|
|
623655
|
-
isGroup ?
|
|
624095
|
+
isGroup ? 14 : 8
|
|
623656
624096
|
);
|
|
623657
|
-
if (
|
|
623658
|
-
sections.push(
|
|
624097
|
+
if (associativeContext) {
|
|
624098
|
+
sections.push(associativeContext);
|
|
623659
624099
|
}
|
|
623660
|
-
} catch {
|
|
623661
624100
|
}
|
|
623662
624101
|
const memoryCards = this.relevantTelegramMemoryCards(sessionKey, msg, isGroup ? 10 : 6);
|
|
623663
624102
|
if (memoryCards.length > 0) {
|
|
@@ -623688,10 +624127,6 @@ ${notes2}`;
|
|
|
623688
624127
|
${cardLines.join("\n")}`);
|
|
623689
624128
|
}
|
|
623690
624129
|
}
|
|
623691
|
-
const channelDaydream = this.formatLatestTelegramChannelDaydreamContext(sessionKey);
|
|
623692
|
-
if (channelDaydream) {
|
|
623693
|
-
sections.push(channelDaydream);
|
|
623694
|
-
}
|
|
623695
624130
|
const recentMedia = this.recentTelegramMediaEntries(msg.chatId, 10);
|
|
623696
624131
|
if (recentMedia.length > 0) {
|
|
623697
624132
|
const mediaLines = recentMedia.map((entry) => {
|
|
@@ -623710,26 +624145,33 @@ ${cardLines.join("\n")}`);
|
|
|
623710
624145
|
].join("\n"));
|
|
623711
624146
|
}
|
|
623712
624147
|
if (olderCount > 0) {
|
|
624148
|
+
const halfLifeMs = (isGroup ? 24 : 48) * 60 * 60 * 1e3;
|
|
624149
|
+
const now = Date.now();
|
|
623713
624150
|
const older = history.slice(0, olderCount);
|
|
623714
624151
|
const bySpeaker = /* @__PURE__ */ new Map();
|
|
623715
624152
|
for (const entry of older) {
|
|
623716
624153
|
if (!entry.text.trim()) continue;
|
|
623717
624154
|
const speaker = telegramHistorySpeaker(entry);
|
|
624155
|
+
const ageMs = Math.max(0, now - (entry.ts ?? 0));
|
|
624156
|
+
const weight = Math.exp(-ageMs / halfLifeMs);
|
|
623718
624157
|
const existing = bySpeaker.get(speaker);
|
|
623719
624158
|
const text = truncateTelegramContextLine(entry.text, 180);
|
|
623720
624159
|
if (existing) {
|
|
623721
624160
|
existing.count += 1;
|
|
623722
624161
|
existing.last = text;
|
|
624162
|
+
existing.weightSum += weight;
|
|
624163
|
+
existing.maxWeight = Math.max(existing.maxWeight, weight);
|
|
623723
624164
|
} else {
|
|
623724
|
-
bySpeaker.set(speaker, { count: 1, first: text, last: text });
|
|
624165
|
+
bySpeaker.set(speaker, { count: 1, first: text, last: text, weightSum: weight, maxWeight: weight });
|
|
623725
624166
|
}
|
|
623726
624167
|
}
|
|
623727
|
-
const olderLines = [...bySpeaker.entries()].slice(0,
|
|
624168
|
+
const olderLines = [...bySpeaker.entries()].sort(([, a2], [, b]) => b.maxWeight - a2.maxWeight).slice(0, 5).map(([speaker, info]) => {
|
|
623728
624169
|
const range = info.first === info.last ? info.first : `${info.first} -> ${info.last}`;
|
|
623729
|
-
|
|
624170
|
+
const decayLabel = info.maxWeight >= 0.5 ? "fresh" : info.maxWeight >= 0.1 ? "decayed" : "stale";
|
|
624171
|
+
return `- ${speaker}: ${info.count} earlier msg(s) [${decayLabel}]; digest=${telegramContextJsonString(range, 200)}`;
|
|
623730
624172
|
});
|
|
623731
624173
|
if (olderLines.length > 0) {
|
|
623732
|
-
sections.push(`### Earlier Retained Thread Digest
|
|
624174
|
+
sections.push(`### Earlier Retained Thread Digest (recency-weighted)
|
|
623733
624175
|
${olderLines.join("\n")}`);
|
|
623734
624176
|
}
|
|
623735
624177
|
}
|
|
@@ -623949,7 +624391,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
623949
624391
|
],
|
|
623950
624392
|
tools: [],
|
|
623951
624393
|
temperature: 0,
|
|
623952
|
-
|
|
624394
|
+
// Reflection has 12 string fields; 650 was tight enough to truncate.
|
|
624395
|
+
maxTokens: 1500,
|
|
623953
624396
|
timeoutMs: telegramRouterTimeoutMs(timeoutMs),
|
|
623954
624397
|
think: false
|
|
623955
624398
|
},
|
|
@@ -624039,9 +624482,11 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624039
624482
|
const promptTokens = estimatePromptTokensFromRequest(request);
|
|
624040
624483
|
const broker = getModelBroker();
|
|
624041
624484
|
const trainCtx = await broker.getNctxTrain(model).catch(() => null);
|
|
624042
|
-
const
|
|
624485
|
+
const completionHeadroom = 4096;
|
|
624486
|
+
const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + completionHeadroom)) : Math.max(2048, promptTokens + completionHeadroom);
|
|
624043
624487
|
const requestWithCtx = { ...request, numCtx: targetCtx };
|
|
624044
|
-
const
|
|
624488
|
+
const brokerBypass = process.env["OMNIUS_DISABLE_BROKER_ADMISSION"] === "1";
|
|
624489
|
+
const slot = brokerBypass ? null : await broker.acquireInferenceSlot({
|
|
624045
624490
|
model,
|
|
624046
624491
|
domain: "chat",
|
|
624047
624492
|
owner: `telegram-bridge/${kind}`,
|
|
@@ -624049,10 +624494,12 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624049
624494
|
promptTokens,
|
|
624050
624495
|
priority: kind === "router" || kind === "router-repair" || kind === "router-strict-retry" ? 1 : 0
|
|
624051
624496
|
});
|
|
624052
|
-
|
|
624053
|
-
|
|
624054
|
-
|
|
624055
|
-
|
|
624497
|
+
if (process.env["OMNIUS_BROKER_TRACE"] === "1") {
|
|
624498
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
624499
|
+
sessionKey,
|
|
624500
|
+
`inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot ? slot.info.id : "bypass"}${slot?.info.reserved ? " reserved" : ""}`
|
|
624501
|
+
));
|
|
624502
|
+
}
|
|
624056
624503
|
const streamFn = backend.chatCompletionStream;
|
|
624057
624504
|
const id = this.registerTelegramInference(kind, sessionKey, model);
|
|
624058
624505
|
let completionTokens = 0;
|
|
@@ -624079,10 +624526,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624079
624526
|
}
|
|
624080
624527
|
const usage = result.usage;
|
|
624081
624528
|
completionTokens = usage?.completion_tokens ?? 0;
|
|
624082
|
-
slot
|
|
624529
|
+
slot?.release({ ok: true, completionTokens });
|
|
624083
624530
|
return result;
|
|
624084
624531
|
} catch (err) {
|
|
624085
|
-
slot
|
|
624532
|
+
slot?.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
|
|
624086
624533
|
throw err;
|
|
624087
624534
|
} finally {
|
|
624088
624535
|
this.deregisterTelegramInference(id);
|
|
@@ -624274,7 +624721,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624274
624721
|
getTelegramThinkingVisible() {
|
|
624275
624722
|
return this.telegramThinkingVisible;
|
|
624276
624723
|
}
|
|
624277
|
-
async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics) {
|
|
624724
|
+
async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
|
|
624278
624725
|
const rawPreview = telegramRouterRawPreview(rawOutput, 4e3);
|
|
624279
624726
|
if (!rawPreview || telegramDecisionOutputHasDanglingJson(rawOutput)) {
|
|
624280
624727
|
if (diagnostics) {
|
|
@@ -624309,10 +624756,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624309
624756
|
],
|
|
624310
624757
|
tools: [],
|
|
624311
624758
|
temperature: 0,
|
|
624312
|
-
maxTokens:
|
|
624759
|
+
maxTokens: 1500,
|
|
624313
624760
|
timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
|
|
624314
624761
|
think: false
|
|
624315
|
-
});
|
|
624762
|
+
}, diagnostics, "router-repair", sessionKey);
|
|
624316
624763
|
const repairedText = result.choices[0]?.message?.content ?? "";
|
|
624317
624764
|
if (telegramDecisionRecoverableFlag(repairedText) === false) {
|
|
624318
624765
|
if (diagnostics) diagnostics.repairStatus = "no-recoverable-output";
|
|
@@ -624344,7 +624791,7 @@ ${repairedText}`,
|
|
|
624344
624791
|
return null;
|
|
624345
624792
|
}
|
|
624346
624793
|
}
|
|
624347
|
-
async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics) {
|
|
624794
|
+
async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
|
|
624348
624795
|
const invalidPreview = telegramRouterRawPreview(rawOutput, 1200) ?? "(empty assistant content)";
|
|
624349
624796
|
const routeInstruction = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred from context.` : `Infer route live from context.`;
|
|
624350
624797
|
const trimmedUserPrompt = userPrompt.length > 4e3 ? `…
|
|
@@ -624376,10 +624823,10 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
|
|
|
624376
624823
|
],
|
|
624377
624824
|
tools: [],
|
|
624378
624825
|
temperature: 0,
|
|
624379
|
-
maxTokens:
|
|
624826
|
+
maxTokens: 2400,
|
|
624380
624827
|
timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
|
|
624381
624828
|
think: false
|
|
624382
|
-
});
|
|
624829
|
+
}, diagnostics, "router-strict-retry", sessionKey);
|
|
624383
624830
|
const retryText = result.choices[0]?.message?.content ?? "";
|
|
624384
624831
|
if (diagnostics) diagnostics.strictRetryPreview = telegramRouterRawPreview(retryText, 320);
|
|
624385
624832
|
const parsed = parseTelegramInteractionDecision(retryText, forcedRoute, {
|
|
@@ -624762,10 +625209,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
624762
625209
|
],
|
|
624763
625210
|
tools: [],
|
|
624764
625211
|
temperature: 0,
|
|
624765
|
-
|
|
625212
|
+
// Router JSON schema has ~18 string-valued fields when reflection is
|
|
625213
|
+
// embedded (consolidated mode). 1000 tokens was the documented cause
|
|
625214
|
+
// of truncated JSON → repair → strict-retry cascade. 2400 is enough
|
|
625215
|
+
// for normal verbose values without slowing the call appreciably.
|
|
625216
|
+
maxTokens: 2400,
|
|
624766
625217
|
timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
|
|
624767
625218
|
think: false
|
|
624768
|
-
}, diagnostics);
|
|
625219
|
+
}, diagnostics, "router", sessionKey);
|
|
624769
625220
|
const text = result.choices[0]?.message?.content ?? "";
|
|
624770
625221
|
const routerLatencyMs = Date.now() - routerStartMs;
|
|
624771
625222
|
try {
|
|
@@ -624788,12 +625239,40 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
624788
625239
|
if (parsed) {
|
|
624789
625240
|
return this.applyTelegramSilentReflectionNotes(parsed, reflectionNotes);
|
|
624790
625241
|
}
|
|
625242
|
+
if (isLikelyTruncatedRouterJson(text)) {
|
|
625243
|
+
if (diagnostics) diagnostics.repairStatus = "skipped-truncation-rerun";
|
|
625244
|
+
try {
|
|
625245
|
+
const reissued = await this.telegramRouterJsonCompletion(backend, {
|
|
625246
|
+
messages: [
|
|
625247
|
+
{
|
|
625248
|
+
role: "system",
|
|
625249
|
+
content: "You perform live Telegram route and stimulation inference. Output strict JSON only."
|
|
625250
|
+
},
|
|
625251
|
+
{ role: "user", content: userPrompt }
|
|
625252
|
+
],
|
|
625253
|
+
tools: [],
|
|
625254
|
+
temperature: 0,
|
|
625255
|
+
maxTokens: 4096,
|
|
625256
|
+
timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
|
|
625257
|
+
think: false
|
|
625258
|
+
}, diagnostics, "router", sessionKey);
|
|
625259
|
+
const reissuedText = reissued.choices[0]?.message?.content ?? "";
|
|
625260
|
+
const reparsed = parseTelegramInteractionDecision(reissuedText, forcedRoute, {
|
|
625261
|
+
defaultShouldReply: false
|
|
625262
|
+
});
|
|
625263
|
+
if (reparsed) {
|
|
625264
|
+
return this.applyTelegramSilentReflectionNotes(reparsed, reflectionNotes);
|
|
625265
|
+
}
|
|
625266
|
+
} catch {
|
|
625267
|
+
}
|
|
625268
|
+
}
|
|
624791
625269
|
const repaired = await this.repairTelegramInteractionDecision(
|
|
624792
625270
|
backend,
|
|
624793
625271
|
text,
|
|
624794
625272
|
forcedRoute,
|
|
624795
625273
|
config.timeoutMs ?? 3e4,
|
|
624796
|
-
diagnostics
|
|
625274
|
+
diagnostics,
|
|
625275
|
+
sessionKey
|
|
624797
625276
|
);
|
|
624798
625277
|
if (repaired) {
|
|
624799
625278
|
return this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes);
|
|
@@ -624804,7 +625283,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
624804
625283
|
text,
|
|
624805
625284
|
forcedRoute,
|
|
624806
625285
|
config.timeoutMs ?? 3e4,
|
|
624807
|
-
diagnostics
|
|
625286
|
+
diagnostics,
|
|
625287
|
+
sessionKey
|
|
624808
625288
|
);
|
|
624809
625289
|
if (strictRetry) {
|
|
624810
625290
|
return this.applyTelegramSilentReflectionNotes(strictRetry, reflectionNotes);
|
|
@@ -625050,34 +625530,25 @@ ${list}` : "No shared group target is currently known for this sender. Ask in th
|
|
|
625050
625530
|
return join131(this.repoRoot, ".omnius", "telegram-runner-state", safe);
|
|
625051
625531
|
}
|
|
625052
625532
|
buildTelegramAdminOverviewContext(currentSessionKey) {
|
|
625053
|
-
const sections = [];
|
|
625054
625533
|
this.ensureAllTelegramConversationsLoaded();
|
|
625055
625534
|
const chatEntries = [...this.chatHistory.entries()].filter(([sessionKey, history]) => sessionKey !== currentSessionKey && history.length > 0).sort(([, a2], [, b]) => (b[b.length - 1]?.ts ?? 0) - (a2[a2.length - 1]?.ts ?? 0)).slice(0, 18);
|
|
625535
|
+
if (chatEntries.length === 0) return "";
|
|
625536
|
+
const indexLines = [];
|
|
625056
625537
|
for (const [sessionKey, history] of chatEntries) {
|
|
625057
625538
|
const latest = history[history.length - 1];
|
|
625058
|
-
const
|
|
625059
|
-
|
|
625060
|
-
|
|
625061
|
-
}).
|
|
625062
|
-
const
|
|
625063
|
-
|
|
625064
|
-
|
|
625065
|
-
const cards = (this.chatMemoryCards.get(sessionKey) ?? []).slice(0, 4).map((card) => ` - ${card.title}: ${card.notes.slice(-1)[0] ?? ""}`).join("\n");
|
|
625066
|
-
sections.push([
|
|
625067
|
-
`- ${sessionKey} (chat_id ${String(latest.chatId ?? "unknown")}; ${latest.chatType || "chat"}${latest.chatTitle ? `: ${latest.chatTitle}` : ""})`,
|
|
625068
|
-
participants ? ` Participants: ${participants}` : "",
|
|
625069
|
-
` Latest: ${telegramHistorySpeaker(latest)}: ${truncateTelegramContextLine(latest.text, 180)}`,
|
|
625070
|
-
recent ? ` Recent:
|
|
625071
|
-
${recent}` : "",
|
|
625072
|
-
cards ? ` Memory cards:
|
|
625073
|
-
${cards}` : ""
|
|
625074
|
-
].filter(Boolean).join("\n"));
|
|
625075
|
-
}
|
|
625076
|
-
if (sections.length === 0) return "";
|
|
625539
|
+
const participantCount = this.chatParticipants.get(sessionKey)?.size ?? 0;
|
|
625540
|
+
const ageMs = Date.now() - (latest.ts ?? 0);
|
|
625541
|
+
const ageMin = Math.round(ageMs / 6e4);
|
|
625542
|
+
const ageStr = ageMin < 60 ? `${ageMin}m ago` : ageMin < 24 * 60 ? `${Math.round(ageMin / 60)}h ago` : `${Math.round(ageMin / (24 * 60))}d ago`;
|
|
625543
|
+
const label = latest.chatTitle ? `"${latest.chatTitle}"` : sessionKey;
|
|
625544
|
+
indexLines.push(`- ${label} (chat_id ${String(latest.chatId ?? "?")}; ${latest.chatType || "chat"}): ${participantCount} participants; last ${ageStr}; ${history.length} retained msgs`);
|
|
625545
|
+
}
|
|
625077
625546
|
return [
|
|
625078
|
-
"## Admin Telegram Omniscience",
|
|
625079
|
-
"
|
|
625080
|
-
|
|
625547
|
+
"## Admin Telegram Omniscience (index only)",
|
|
625548
|
+
"One-way context for the authenticated admin private DM. Other Telegram sessions the bot has observed are listed below with one line each.",
|
|
625549
|
+
"For details on a specific chat, use telegram_memory_search with the chat_id or topic — the always-loaded view is intentionally compact.",
|
|
625550
|
+
"Never inject admin/private DM content into public groups.",
|
|
625551
|
+
indexLines.join("\n")
|
|
625081
625552
|
].join("\n\n");
|
|
625082
625553
|
}
|
|
625083
625554
|
buildTelegramSessionContext(msg, toolContext, profile, modelTier) {
|
|
@@ -626197,8 +626668,9 @@ ${conversationStream}`
|
|
|
626197
626668
|
messages: this.buildTelegramChatMessages(msg, toolContext, mediaContext),
|
|
626198
626669
|
tools: [],
|
|
626199
626670
|
temperature: 0.4,
|
|
626200
|
-
maxTokens:
|
|
626201
|
-
timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4)
|
|
626671
|
+
maxTokens: 1500,
|
|
626672
|
+
timeoutMs: Math.max(config.timeoutMs ?? 3e5, 12e4),
|
|
626673
|
+
responseFormat: TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT
|
|
626202
626674
|
});
|
|
626203
626675
|
let accumulated = "";
|
|
626204
626676
|
let streamError;
|
|
@@ -626225,7 +626697,8 @@ ${conversationStream}`
|
|
|
626225
626697
|
} else {
|
|
626226
626698
|
this.bumpTelegramInferenceTokens(inferenceId, 1, 0);
|
|
626227
626699
|
accumulated += piece;
|
|
626228
|
-
|
|
626700
|
+
const partial = extractPartialTelegramReplyJson(accumulated);
|
|
626701
|
+
if (partial !== null) await onToken(partial);
|
|
626229
626702
|
}
|
|
626230
626703
|
}
|
|
626231
626704
|
} catch (err) {
|
|
@@ -626247,11 +626720,14 @@ ${conversationStream}`
|
|
|
626247
626720
|
}
|
|
626248
626721
|
this.updateTelegramInferenceFinal(inferenceId, result);
|
|
626249
626722
|
accumulated = result.choices[0]?.message?.content ?? "";
|
|
626250
|
-
|
|
626723
|
+
const fullExtracted = extractPartialTelegramReplyJson(accumulated);
|
|
626724
|
+
if (fullExtracted) await onToken(fullExtracted);
|
|
626251
626725
|
}
|
|
626252
626726
|
} finally {
|
|
626253
626727
|
this.deregisterTelegramInference(inferenceId);
|
|
626254
626728
|
}
|
|
626729
|
+
const extracted = extractFinalTelegramReplyJson(accumulated);
|
|
626730
|
+
if (extracted) return extracted;
|
|
626255
626731
|
return stripTelegramHiddenThinking(accumulated).trim();
|
|
626256
626732
|
}
|
|
626257
626733
|
retainTelegramVisibleReplyDraft(subAgent, draft, streamToolNames = subAgent.currentStreamToolNames) {
|