omnius 1.0.187 → 1.0.188
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +575 -132
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1412,6 +1412,18 @@ var init_tool_executor = __esm({
|
|
|
1412
1412
|
import { EventEmitter } from "node:events";
|
|
1413
1413
|
import { totalmem, freemem } from "node:os";
|
|
1414
1414
|
import { exec } from "node:child_process";
|
|
1415
|
+
function dedupeLoadedModels(models) {
|
|
1416
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1417
|
+
const out = [];
|
|
1418
|
+
for (const model of models) {
|
|
1419
|
+
const key = `${model.host}:${model.name}`;
|
|
1420
|
+
if (seen.has(key))
|
|
1421
|
+
continue;
|
|
1422
|
+
seen.add(key);
|
|
1423
|
+
out.push(model);
|
|
1424
|
+
}
|
|
1425
|
+
return out;
|
|
1426
|
+
}
|
|
1415
1427
|
function ramSnapshotMB() {
|
|
1416
1428
|
const total = Math.round(totalmem() / (1024 * 1024));
|
|
1417
1429
|
const free = Math.round(freemem() / (1024 * 1024));
|
|
@@ -1690,6 +1702,104 @@ var init_model_broker = __esm({
|
|
|
1690
1702
|
this.emit("rejected", spec, reason);
|
|
1691
1703
|
return { kind: "reject", reason };
|
|
1692
1704
|
}
|
|
1705
|
+
/**
|
|
1706
|
+
* Acquire a short-lived load lease for media/subprocess generation.
|
|
1707
|
+
*
|
|
1708
|
+
* Media generation often needs to temporarily free VRAM occupied by Ollama
|
|
1709
|
+
* chat models. This helper refreshes Ollama state, asks the broker what must
|
|
1710
|
+
* be evicted, unloads those Ollama models with keep_alive=0, and returns a
|
|
1711
|
+
* lease whose release() clears transient inflight state, unloads any
|
|
1712
|
+
* Ollama-hosted requested model, and warms the evicted Ollama models again.
|
|
1713
|
+
*/
|
|
1714
|
+
async acquireTransientModelLoad(spec, options2 = {}) {
|
|
1715
|
+
const reason = options2.reason ?? `${spec.domain}-transient-load`;
|
|
1716
|
+
const evictedModels = [];
|
|
1717
|
+
let gpuIndex = null;
|
|
1718
|
+
let admitted = false;
|
|
1719
|
+
await this.pollOnce().catch(() => {
|
|
1720
|
+
});
|
|
1721
|
+
for (let attempt = 0; attempt < 4; attempt++) {
|
|
1722
|
+
const decision2 = await this.ensureModelLoadable(spec);
|
|
1723
|
+
if (decision2.kind === "wait-for-inflight") {
|
|
1724
|
+
const waited = await decision2.promise.catch((err) => ({
|
|
1725
|
+
kind: "reject",
|
|
1726
|
+
reason: err instanceof Error ? err.message : String(err)
|
|
1727
|
+
}));
|
|
1728
|
+
if (waited.kind === "ok") {
|
|
1729
|
+
gpuIndex = waited.gpuIndex ?? null;
|
|
1730
|
+
admitted = true;
|
|
1731
|
+
break;
|
|
1732
|
+
}
|
|
1733
|
+
if (waited.kind === "evict") {
|
|
1734
|
+
for (const target of waited.evictTargets) {
|
|
1735
|
+
if (await this.evict(target.host, target.name, reason))
|
|
1736
|
+
evictedModels.push(target);
|
|
1737
|
+
}
|
|
1738
|
+
await this.pollOnce().catch(() => {
|
|
1739
|
+
});
|
|
1740
|
+
continue;
|
|
1741
|
+
}
|
|
1742
|
+
if (waited.kind === "degrade")
|
|
1743
|
+
return waited;
|
|
1744
|
+
if (waited.kind === "reject")
|
|
1745
|
+
return waited;
|
|
1746
|
+
continue;
|
|
1747
|
+
}
|
|
1748
|
+
if (decision2.kind === "ok") {
|
|
1749
|
+
gpuIndex = decision2.gpuIndex ?? null;
|
|
1750
|
+
admitted = true;
|
|
1751
|
+
break;
|
|
1752
|
+
}
|
|
1753
|
+
if (decision2.kind === "evict") {
|
|
1754
|
+
for (const target of decision2.evictTargets) {
|
|
1755
|
+
const before = this._loaded.get(`${target.host}:${target.name}`) ?? target;
|
|
1756
|
+
if (await this.evict(target.host, target.name, reason)) {
|
|
1757
|
+
evictedModels.push(before);
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
await this.pollOnce().catch(() => {
|
|
1761
|
+
});
|
|
1762
|
+
continue;
|
|
1763
|
+
}
|
|
1764
|
+
if (decision2.kind === "degrade")
|
|
1765
|
+
return decision2;
|
|
1766
|
+
return decision2;
|
|
1767
|
+
}
|
|
1768
|
+
if (!admitted) {
|
|
1769
|
+
return {
|
|
1770
|
+
kind: "reject",
|
|
1771
|
+
reason: `could not acquire transient load lease for ${spec.host}:${spec.name} after repeated evictions`
|
|
1772
|
+
};
|
|
1773
|
+
}
|
|
1774
|
+
const evictedOllamaModels = dedupeLoadedModels(evictedModels.filter((m2) => m2.host === "ollama"));
|
|
1775
|
+
const broker = this;
|
|
1776
|
+
let released = false;
|
|
1777
|
+
return {
|
|
1778
|
+
kind: "ok",
|
|
1779
|
+
lease: {
|
|
1780
|
+
spec,
|
|
1781
|
+
gpuIndex,
|
|
1782
|
+
evictedModels: dedupeLoadedModels(evictedModels),
|
|
1783
|
+
evictedOllamaModels,
|
|
1784
|
+
async release() {
|
|
1785
|
+
if (released)
|
|
1786
|
+
return;
|
|
1787
|
+
released = true;
|
|
1788
|
+
broker.clearInflight(spec.host, spec.name);
|
|
1789
|
+
if ((options2.unloadRequestedOllama ?? true) && spec.host === "ollama") {
|
|
1790
|
+
await broker.unloadOllamaModel(spec.name, `${reason}-complete`).catch(() => false);
|
|
1791
|
+
}
|
|
1792
|
+
if (options2.restoreOllama !== false && evictedOllamaModels.length > 0) {
|
|
1793
|
+
await broker.restoreOllamaModels(evictedOllamaModels, {
|
|
1794
|
+
keepAlive: options2.restoreKeepAlive ?? "30m"
|
|
1795
|
+
});
|
|
1796
|
+
}
|
|
1797
|
+
await broker.pollOnce().catch(() => {
|
|
1798
|
+
});
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
};
|
|
1802
|
+
}
|
|
1693
1803
|
/**
|
|
1694
1804
|
* Register a model that has been successfully loaded.
|
|
1695
1805
|
* Callers MUST call this after a successful load so the broker can track LRU.
|
|
@@ -1743,6 +1853,66 @@ var init_model_broker = __esm({
|
|
|
1743
1853
|
this.emit("evicted", m2, reason);
|
|
1744
1854
|
return actively;
|
|
1745
1855
|
}
|
|
1856
|
+
/** Force-unload an Ollama model even when it is not currently tracked. */
|
|
1857
|
+
async unloadOllamaModel(modelName, reason = "ollama-unload") {
|
|
1858
|
+
const key = `ollama:${modelName}`;
|
|
1859
|
+
const existing = this._loaded.get(key);
|
|
1860
|
+
const ok3 = await this.ollamaUnload(modelName).catch(() => false);
|
|
1861
|
+
if (existing) {
|
|
1862
|
+
this._loaded.delete(key);
|
|
1863
|
+
this.emit("evicted", existing, reason);
|
|
1864
|
+
}
|
|
1865
|
+
return ok3;
|
|
1866
|
+
}
|
|
1867
|
+
/** Best-effort warm/reload of an Ollama model after temporary eviction. */
|
|
1868
|
+
async warmOllamaModel(modelName, keepAlive = "30m") {
|
|
1869
|
+
const bodies = [
|
|
1870
|
+
{ model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 0 } },
|
|
1871
|
+
{ model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 1 } }
|
|
1872
|
+
];
|
|
1873
|
+
for (const body of bodies) {
|
|
1874
|
+
try {
|
|
1875
|
+
const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
|
|
1876
|
+
method: "POST",
|
|
1877
|
+
headers: { "Content-Type": "application/json" },
|
|
1878
|
+
body: JSON.stringify(body),
|
|
1879
|
+
signal: AbortSignal.timeout(12e4)
|
|
1880
|
+
});
|
|
1881
|
+
if (!res.ok)
|
|
1882
|
+
continue;
|
|
1883
|
+
await this.refreshOllamaPs().catch(() => {
|
|
1884
|
+
});
|
|
1885
|
+
return true;
|
|
1886
|
+
} catch {
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
try {
|
|
1890
|
+
const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
|
|
1891
|
+
method: "POST",
|
|
1892
|
+
headers: { "Content-Type": "application/json" },
|
|
1893
|
+
body: JSON.stringify({
|
|
1894
|
+
model: modelName,
|
|
1895
|
+
stream: false,
|
|
1896
|
+
keep_alive: keepAlive
|
|
1897
|
+
}),
|
|
1898
|
+
signal: AbortSignal.timeout(12e4)
|
|
1899
|
+
});
|
|
1900
|
+
if (!res.ok)
|
|
1901
|
+
return false;
|
|
1902
|
+
await this.refreshOllamaPs().catch(() => {
|
|
1903
|
+
});
|
|
1904
|
+
return true;
|
|
1905
|
+
} catch {
|
|
1906
|
+
return false;
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
/** Restore a set of previously evicted Ollama models, oldest first. */
|
|
1910
|
+
async restoreOllamaModels(models, options2 = {}) {
|
|
1911
|
+
const unique2 = dedupeLoadedModels(models.filter((m2) => m2.host === "ollama")).sort((a2, b) => a2.lastUsedAt - b.lastUsedAt);
|
|
1912
|
+
for (const model of unique2) {
|
|
1913
|
+
await this.warmOllamaModel(model.name, options2.keepAlive ?? "30m").catch(() => false);
|
|
1914
|
+
}
|
|
1915
|
+
}
|
|
1746
1916
|
// ------------------------------------------------------------------
|
|
1747
1917
|
// Internal — Ollama
|
|
1748
1918
|
// ------------------------------------------------------------------
|
|
@@ -1885,7 +2055,7 @@ var init_model_broker = __esm({
|
|
|
1885
2055
|
);
|
|
1886
2056
|
const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
|
|
1887
2057
|
const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
|
|
1888
|
-
const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
|
|
2058
|
+
const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).filter((m2) => !this.hasActiveSlotForModel(m2)).sort((a2, b) => {
|
|
1889
2059
|
const aIdle = idle(a2) ? 0 : 1;
|
|
1890
2060
|
const bIdle = idle(b) ? 0 : 1;
|
|
1891
2061
|
if (aIdle !== bIdle)
|
|
@@ -1931,6 +2101,13 @@ var init_model_broker = __esm({
|
|
|
1931
2101
|
n2++;
|
|
1932
2102
|
return n2;
|
|
1933
2103
|
}
|
|
2104
|
+
hasActiveSlotForModel(model) {
|
|
2105
|
+
for (const slot of this._activeSlots.values()) {
|
|
2106
|
+
if (slot.model === model.name)
|
|
2107
|
+
return true;
|
|
2108
|
+
}
|
|
2109
|
+
return false;
|
|
2110
|
+
}
|
|
1934
2111
|
// ------------------------------------------------------------------
|
|
1935
2112
|
// Internal — fallback resolution
|
|
1936
2113
|
// ------------------------------------------------------------------
|
|
@@ -22761,6 +22938,17 @@ function evictModelsToFreeSpace(args) {
|
|
|
22761
22938
|
writeMeta(meta);
|
|
22762
22939
|
return { evicted, bytesFreed, finalFreeBytes: disk.freeBytes };
|
|
22763
22940
|
}
|
|
22941
|
+
function estimateReclaimableCacheBytes(keepRepos) {
|
|
22942
|
+
const keep = new Set(keepRepos ?? []);
|
|
22943
|
+
let total = 0;
|
|
22944
|
+
for (const entry of readMeta().entries) {
|
|
22945
|
+
if (keep.has(entry.repo))
|
|
22946
|
+
continue;
|
|
22947
|
+
const measured = measureRepoCacheBytes(entry.repo);
|
|
22948
|
+
total += Math.max(0, measured || entry.sizeBytes || 0);
|
|
22949
|
+
}
|
|
22950
|
+
return total;
|
|
22951
|
+
}
|
|
22764
22952
|
function ensureDiskSpaceForDownload(args) {
|
|
22765
22953
|
ensureUnifiedCacheDirs();
|
|
22766
22954
|
const safetyMargin = args.safetyMarginBytes ?? 1 * 1024 ** 3;
|
|
@@ -22769,6 +22957,10 @@ function ensureDiskSpaceForDownload(args) {
|
|
|
22769
22957
|
if (disk.freeBytes >= target) {
|
|
22770
22958
|
return { ok: true, evicted: [], freeBytes: disk.freeBytes };
|
|
22771
22959
|
}
|
|
22960
|
+
const reclaimableBytes = estimateReclaimableCacheBytes(args.keepRepos);
|
|
22961
|
+
if (disk.freeBytes + reclaimableBytes < target) {
|
|
22962
|
+
throw new InsufficientDiskSpaceError(args.approxDownloadBytes, disk.freeBytes, disk.totalBytes, []);
|
|
22963
|
+
}
|
|
22772
22964
|
const evictionResult = evictModelsToFreeSpace({
|
|
22773
22965
|
neededBytes: args.approxDownloadBytes,
|
|
22774
22966
|
keepRepos: args.keepRepos,
|
|
@@ -259208,6 +259400,12 @@ function imageCandidateFor(model, requestedBackend) {
|
|
|
259208
259400
|
preset: getImageGenerationPreset(resolved)
|
|
259209
259401
|
};
|
|
259210
259402
|
}
|
|
259403
|
+
function imageCandidateHost(candidate) {
|
|
259404
|
+
return candidate.backend === "ollama" ? "ollama" : "diffusers-py";
|
|
259405
|
+
}
|
|
259406
|
+
function imageCandidateEstimatedVramMB(candidate) {
|
|
259407
|
+
return candidate.preset?.minVramGB !== void 0 ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
|
|
259408
|
+
}
|
|
259211
259409
|
function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
|
|
259212
259410
|
const ladder = imageGenerationQualityLadder();
|
|
259213
259411
|
const candidates = [];
|
|
@@ -259511,9 +259709,15 @@ function annotateImageFallbackSuccess(result, failed, winner) {
|
|
|
259511
259709
|
...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
|
|
259512
259710
|
""
|
|
259513
259711
|
].join("\n");
|
|
259712
|
+
const llmPrefix = [
|
|
259713
|
+
`Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
|
|
259714
|
+
...failed.map((attempt, index) => formatImageAttempt(attempt.candidate, attempt.reason, index))
|
|
259715
|
+
].join("\n");
|
|
259514
259716
|
return {
|
|
259515
259717
|
...result,
|
|
259516
|
-
output: prefix + result.output
|
|
259718
|
+
output: prefix + result.output,
|
|
259719
|
+
llmContent: result.llmContent ? `${llmPrefix}
|
|
259720
|
+
${result.llmContent}` : result.llmContent
|
|
259517
259721
|
};
|
|
259518
259722
|
}
|
|
259519
259723
|
function parseRunnerJson(stdout) {
|
|
@@ -260321,6 +260525,45 @@ if __name__ == "__main__":
|
|
|
260321
260525
|
this.lastProgressAt = now;
|
|
260322
260526
|
this.progressHandler(event);
|
|
260323
260527
|
}
|
|
260528
|
+
async acquireTransientLoadLease(args) {
|
|
260529
|
+
if (!args.candidate)
|
|
260530
|
+
return null;
|
|
260531
|
+
const broker = getModelBroker();
|
|
260532
|
+
const decision2 = await broker.acquireTransientModelLoad({
|
|
260533
|
+
name: args.candidate.model,
|
|
260534
|
+
domain: "image-gen",
|
|
260535
|
+
host: imageCandidateHost(args.candidate),
|
|
260536
|
+
owner: "image-generate-tool",
|
|
260537
|
+
estimatedVramMB: imageCandidateEstimatedVramMB(args.candidate)
|
|
260538
|
+
}, {
|
|
260539
|
+
reason: args.reason,
|
|
260540
|
+
restoreOllama: true,
|
|
260541
|
+
unloadRequestedOllama: true
|
|
260542
|
+
});
|
|
260543
|
+
if (decision2.kind === "reject") {
|
|
260544
|
+
return {
|
|
260545
|
+
success: false,
|
|
260546
|
+
output: "",
|
|
260547
|
+
error: `Image generation blocked by resource broker: ${decision2.reason}`,
|
|
260548
|
+
durationMs: performance.now() - args.start
|
|
260549
|
+
};
|
|
260550
|
+
}
|
|
260551
|
+
if (decision2.kind === "degrade") {
|
|
260552
|
+
return {
|
|
260553
|
+
success: false,
|
|
260554
|
+
output: "",
|
|
260555
|
+
error: `Image generation needs a broker fallback (${decision2.fallback.name}), but image candidate fallback must be selected by the image ladder: ${decision2.reason}`,
|
|
260556
|
+
durationMs: performance.now() - args.start
|
|
260557
|
+
};
|
|
260558
|
+
}
|
|
260559
|
+
if (decision2.lease.evictedOllamaModels.length > 0) {
|
|
260560
|
+
this.emitProgress({
|
|
260561
|
+
stage: "setup",
|
|
260562
|
+
message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for image generation`
|
|
260563
|
+
});
|
|
260564
|
+
}
|
|
260565
|
+
return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
|
|
260566
|
+
}
|
|
260324
260567
|
async execute(args) {
|
|
260325
260568
|
const start2 = performance.now();
|
|
260326
260569
|
const action = String(args["action"] ?? "generate");
|
|
@@ -260363,33 +260606,6 @@ if __name__ == "__main__":
|
|
|
260363
260606
|
const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
|
|
260364
260607
|
const seed = optionalNumberArg(args["seed"]);
|
|
260365
260608
|
const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
|
|
260366
|
-
const broker = getModelBroker();
|
|
260367
|
-
const firstCandidate = candidates[0];
|
|
260368
|
-
let brokerGpuIndex = null;
|
|
260369
|
-
if (firstCandidate) {
|
|
260370
|
-
const decision2 = await broker.ensureModelLoadable({
|
|
260371
|
-
name: firstCandidate.model,
|
|
260372
|
-
domain: "image-gen",
|
|
260373
|
-
host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
|
|
260374
|
-
owner: "image-generate-tool"
|
|
260375
|
-
});
|
|
260376
|
-
if (decision2.kind === "evict") {
|
|
260377
|
-
for (const target of decision2.evictTargets) {
|
|
260378
|
-
await broker.evict(target.host, target.name, "image-gen-needs-room");
|
|
260379
|
-
}
|
|
260380
|
-
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
260381
|
-
} else if (decision2.kind === "ok") {
|
|
260382
|
-
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
260383
|
-
} else if (decision2.kind === "reject") {
|
|
260384
|
-
return {
|
|
260385
|
-
success: false,
|
|
260386
|
-
output: "",
|
|
260387
|
-
error: `Image generation blocked by resource broker: ${decision2.reason}`,
|
|
260388
|
-
durationMs: performance.now() - start2
|
|
260389
|
-
};
|
|
260390
|
-
}
|
|
260391
|
-
}
|
|
260392
|
-
this._brokerGpuIndex = brokerGpuIndex;
|
|
260393
260609
|
try {
|
|
260394
260610
|
return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
|
|
260395
260611
|
} catch (err) {
|
|
@@ -260408,7 +260624,30 @@ if __name__ == "__main__":
|
|
|
260408
260624
|
stage: "setup",
|
|
260409
260625
|
message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
260410
260626
|
});
|
|
260411
|
-
const
|
|
260627
|
+
const leaseDecision = await this.acquireTransientLoadLease({
|
|
260628
|
+
candidate,
|
|
260629
|
+
reason: "image-prewarm-needs-room",
|
|
260630
|
+
start: args.start
|
|
260631
|
+
});
|
|
260632
|
+
if (leaseDecision && "success" in leaseDecision) {
|
|
260633
|
+
failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
|
|
260634
|
+
if (index < args.candidates.length - 1) {
|
|
260635
|
+
this.emitProgress({
|
|
260636
|
+
stage: "setup",
|
|
260637
|
+
message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
|
|
260638
|
+
});
|
|
260639
|
+
}
|
|
260640
|
+
continue;
|
|
260641
|
+
}
|
|
260642
|
+
const lease = leaseDecision?.lease;
|
|
260643
|
+
this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
|
|
260644
|
+
let result;
|
|
260645
|
+
try {
|
|
260646
|
+
result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
|
|
260647
|
+
} finally {
|
|
260648
|
+
await lease?.release();
|
|
260649
|
+
this._brokerGpuIndex = null;
|
|
260650
|
+
}
|
|
260412
260651
|
if (result.success)
|
|
260413
260652
|
return annotateImageFallbackSuccess(result, failed, candidate);
|
|
260414
260653
|
failed.push({ candidate, reason: summarizeToolResult(result) });
|
|
@@ -260447,7 +260686,30 @@ if __name__ == "__main__":
|
|
|
260447
260686
|
message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
260448
260687
|
});
|
|
260449
260688
|
const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
|
|
260450
|
-
const
|
|
260689
|
+
const leaseDecision = await this.acquireTransientLoadLease({
|
|
260690
|
+
candidate,
|
|
260691
|
+
reason: "image-gen-needs-room",
|
|
260692
|
+
start: args.start
|
|
260693
|
+
});
|
|
260694
|
+
if (leaseDecision && "success" in leaseDecision) {
|
|
260695
|
+
failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
|
|
260696
|
+
if (index < args.candidates.length - 1) {
|
|
260697
|
+
this.emitProgress({
|
|
260698
|
+
stage: "setup",
|
|
260699
|
+
message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
|
|
260700
|
+
});
|
|
260701
|
+
}
|
|
260702
|
+
continue;
|
|
260703
|
+
}
|
|
260704
|
+
const lease = leaseDecision?.lease;
|
|
260705
|
+
this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
|
|
260706
|
+
let result;
|
|
260707
|
+
try {
|
|
260708
|
+
result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
|
|
260709
|
+
} finally {
|
|
260710
|
+
await lease?.release();
|
|
260711
|
+
this._brokerGpuIndex = null;
|
|
260712
|
+
}
|
|
260451
260713
|
if (result.success) {
|
|
260452
260714
|
await this.writeImageSidecar(result, {
|
|
260453
260715
|
originalPrompt: args.prompt,
|
|
@@ -260671,6 +260933,17 @@ ${errText.slice(0, 1200)}`,
|
|
|
260671
260933
|
}
|
|
260672
260934
|
ensureUnifiedCacheDirs();
|
|
260673
260935
|
this.emitProgress({ stage: "load", message: `Downloading/loading image model ${args.model}` });
|
|
260936
|
+
const runnerEnv = { ...python.env };
|
|
260937
|
+
if (this._brokerGpuIndex !== null) {
|
|
260938
|
+
if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
|
|
260939
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
260940
|
+
} else {
|
|
260941
|
+
this.emitProgress({
|
|
260942
|
+
stage: "setup",
|
|
260943
|
+
message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
|
|
260944
|
+
});
|
|
260945
|
+
}
|
|
260946
|
+
}
|
|
260674
260947
|
const result = await runProcess2(python.command, [
|
|
260675
260948
|
runner,
|
|
260676
260949
|
"--model",
|
|
@@ -260683,7 +260956,7 @@ ${errText.slice(0, 1200)}`,
|
|
|
260683
260956
|
], {
|
|
260684
260957
|
cwd: this.cwd,
|
|
260685
260958
|
timeoutMs: 18e5,
|
|
260686
|
-
env:
|
|
260959
|
+
env: runnerEnv,
|
|
260687
260960
|
progressLabel: `Downloading/loading ${args.model}`,
|
|
260688
260961
|
onProgress: (event) => this.emitProgress(event)
|
|
260689
260962
|
});
|
|
@@ -261735,6 +262008,18 @@ function audioCandidateFor(kind, model, requestedBackend) {
|
|
|
261735
262008
|
preset: getAudioGenerationPreset(model, kind)
|
|
261736
262009
|
};
|
|
261737
262010
|
}
|
|
262011
|
+
function audioCandidateHost(candidate) {
|
|
262012
|
+
if (candidate.backend === "project")
|
|
262013
|
+
return null;
|
|
262014
|
+
if (candidate.backend === "audiocraft")
|
|
262015
|
+
return "audiocraft";
|
|
262016
|
+
if (candidate.backend === "tangoflux")
|
|
262017
|
+
return "tangoflux";
|
|
262018
|
+
return "diffusers-py";
|
|
262019
|
+
}
|
|
262020
|
+
function audioCandidateEstimatedVramMB(candidate) {
|
|
262021
|
+
return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
|
|
262022
|
+
}
|
|
261738
262023
|
function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
|
|
261739
262024
|
const ladder = audioGenerationQualityLadder(kind);
|
|
261740
262025
|
const candidates = [];
|
|
@@ -261891,9 +262176,15 @@ function annotateAudioFallbackSuccess(result, failed, winner) {
|
|
|
261891
262176
|
...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
|
|
261892
262177
|
""
|
|
261893
262178
|
].join("\n");
|
|
262179
|
+
const llmPrefix = [
|
|
262180
|
+
`Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
|
|
262181
|
+
...failed.map((attempt, index) => formatAudioAttempt(attempt.candidate, attempt.reason, index))
|
|
262182
|
+
].join("\n");
|
|
261894
262183
|
return {
|
|
261895
262184
|
...result,
|
|
261896
|
-
output: prefix + result.output
|
|
262185
|
+
output: prefix + result.output,
|
|
262186
|
+
llmContent: result.llmContent ? `${llmPrefix}
|
|
262187
|
+
${result.llmContent}` : result.llmContent
|
|
261897
262188
|
};
|
|
261898
262189
|
}
|
|
261899
262190
|
var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
|
|
@@ -262730,6 +263021,48 @@ if __name__ == "__main__":
|
|
|
262730
263021
|
this.lastProgressAt = now;
|
|
262731
263022
|
this.progressHandler(event);
|
|
262732
263023
|
}
|
|
263024
|
+
async acquireTransientLoadLease(args) {
|
|
263025
|
+
if (!args.candidate)
|
|
263026
|
+
return null;
|
|
263027
|
+
const host = audioCandidateHost(args.candidate);
|
|
263028
|
+
if (!host)
|
|
263029
|
+
return null;
|
|
263030
|
+
const broker = getModelBroker();
|
|
263031
|
+
const decision2 = await broker.acquireTransientModelLoad({
|
|
263032
|
+
name: args.candidate.model,
|
|
263033
|
+
domain: args.kind,
|
|
263034
|
+
host,
|
|
263035
|
+
owner: `audio-generate-tool/${args.kind}`,
|
|
263036
|
+
estimatedVramMB: audioCandidateEstimatedVramMB(args.candidate)
|
|
263037
|
+
}, {
|
|
263038
|
+
reason: args.reason,
|
|
263039
|
+
restoreOllama: true,
|
|
263040
|
+
unloadRequestedOllama: false
|
|
263041
|
+
});
|
|
263042
|
+
if (decision2.kind === "reject") {
|
|
263043
|
+
return {
|
|
263044
|
+
success: false,
|
|
263045
|
+
output: "",
|
|
263046
|
+
error: `${args.kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
|
|
263047
|
+
durationMs: performance.now() - args.start
|
|
263048
|
+
};
|
|
263049
|
+
}
|
|
263050
|
+
if (decision2.kind === "degrade") {
|
|
263051
|
+
return {
|
|
263052
|
+
success: false,
|
|
263053
|
+
output: "",
|
|
263054
|
+
error: `${args.kind === "music" ? "Music" : "Sound"} generation needs a broker fallback (${decision2.fallback.name}), but audio candidate fallback must be selected by the audio ladder: ${decision2.reason}`,
|
|
263055
|
+
durationMs: performance.now() - args.start
|
|
263056
|
+
};
|
|
263057
|
+
}
|
|
263058
|
+
if (decision2.lease.evictedOllamaModels.length > 0) {
|
|
263059
|
+
this.emitProgress({
|
|
263060
|
+
stage: "setup",
|
|
263061
|
+
message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for ${args.kind} generation`
|
|
263062
|
+
});
|
|
263063
|
+
}
|
|
263064
|
+
return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
|
|
263065
|
+
}
|
|
262733
263066
|
async prewarmPythonBackend(args) {
|
|
262734
263067
|
const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
|
|
262735
263068
|
let python;
|
|
@@ -262776,6 +263109,17 @@ if __name__ == "__main__":
|
|
|
262776
263109
|
}
|
|
262777
263110
|
ensureUnifiedCacheDirs();
|
|
262778
263111
|
this.emitProgress({ stage: "load", message: `Downloading/loading ${args.kind} model ${args.model}` });
|
|
263112
|
+
const runnerEnv = { ...python.env };
|
|
263113
|
+
if (this._brokerGpuIndex !== null) {
|
|
263114
|
+
if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
|
|
263115
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
263116
|
+
} else {
|
|
263117
|
+
this.emitProgress({
|
|
263118
|
+
stage: "setup",
|
|
263119
|
+
message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
|
|
263120
|
+
});
|
|
263121
|
+
}
|
|
263122
|
+
}
|
|
262779
263123
|
const result = await runProcess3(python.command, [
|
|
262780
263124
|
runner,
|
|
262781
263125
|
"--kind",
|
|
@@ -262792,7 +263136,7 @@ if __name__ == "__main__":
|
|
|
262792
263136
|
], {
|
|
262793
263137
|
cwd: this.cwd,
|
|
262794
263138
|
timeoutMs: 18e5,
|
|
262795
|
-
env:
|
|
263139
|
+
env: runnerEnv,
|
|
262796
263140
|
progressLabel: `Downloading/loading ${args.model}`,
|
|
262797
263141
|
onProgress: (event) => this.emitProgress(event)
|
|
262798
263142
|
});
|
|
@@ -262872,33 +263216,6 @@ if __name__ == "__main__":
|
|
|
262872
263216
|
const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
|
|
262873
263217
|
const seed = optionalNumberArg2(args["seed"]);
|
|
262874
263218
|
const playback = playbackRequested(args);
|
|
262875
|
-
const broker = getModelBroker();
|
|
262876
|
-
const firstCandidate = candidates[0];
|
|
262877
|
-
let brokerGpuIndex = null;
|
|
262878
|
-
if (firstCandidate) {
|
|
262879
|
-
const decision2 = await broker.ensureModelLoadable({
|
|
262880
|
-
name: firstCandidate.model,
|
|
262881
|
-
domain: kind === "music" ? "music" : "sound",
|
|
262882
|
-
host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
|
|
262883
|
-
owner: `audio-generate-tool/${kind}`
|
|
262884
|
-
});
|
|
262885
|
-
if (decision2.kind === "evict") {
|
|
262886
|
-
for (const target of decision2.evictTargets) {
|
|
262887
|
-
await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
|
|
262888
|
-
}
|
|
262889
|
-
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
262890
|
-
} else if (decision2.kind === "ok") {
|
|
262891
|
-
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
262892
|
-
} else if (decision2.kind === "reject") {
|
|
262893
|
-
return {
|
|
262894
|
-
success: false,
|
|
262895
|
-
output: "",
|
|
262896
|
-
error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
|
|
262897
|
-
durationMs: performance.now() - start2
|
|
262898
|
-
};
|
|
262899
|
-
}
|
|
262900
|
-
}
|
|
262901
|
-
this._brokerGpuIndex = brokerGpuIndex;
|
|
262902
263219
|
try {
|
|
262903
263220
|
return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
|
|
262904
263221
|
} catch (err) {
|
|
@@ -262918,15 +263235,39 @@ if __name__ == "__main__":
|
|
|
262918
263235
|
stage: "setup",
|
|
262919
263236
|
message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
262920
263237
|
});
|
|
262921
|
-
const
|
|
263238
|
+
const leaseDecision = await this.acquireTransientLoadLease({
|
|
262922
263239
|
kind: args.kind,
|
|
262923
|
-
|
|
262924
|
-
|
|
262925
|
-
|
|
262926
|
-
|
|
262927
|
-
|
|
262928
|
-
|
|
262929
|
-
|
|
263240
|
+
candidate,
|
|
263241
|
+
reason: `${args.kind}-prewarm-needs-room`,
|
|
263242
|
+
start: args.start
|
|
263243
|
+
});
|
|
263244
|
+
if (leaseDecision && "success" in leaseDecision) {
|
|
263245
|
+
failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
|
|
263246
|
+
if (index < args.candidates.length - 1) {
|
|
263247
|
+
this.emitProgress({
|
|
263248
|
+
stage: "setup",
|
|
263249
|
+
message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
|
|
263250
|
+
});
|
|
263251
|
+
}
|
|
263252
|
+
continue;
|
|
263253
|
+
}
|
|
263254
|
+
const lease = leaseDecision?.lease;
|
|
263255
|
+
this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
|
|
263256
|
+
let result;
|
|
263257
|
+
try {
|
|
263258
|
+
result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
|
|
263259
|
+
kind: args.kind,
|
|
263260
|
+
backend: candidate.backend,
|
|
263261
|
+
runnerBackend: candidate.backend,
|
|
263262
|
+
model: candidate.model,
|
|
263263
|
+
duration,
|
|
263264
|
+
start: args.start,
|
|
263265
|
+
python: args.args["python"]
|
|
263266
|
+
});
|
|
263267
|
+
} finally {
|
|
263268
|
+
await lease?.release();
|
|
263269
|
+
this._brokerGpuIndex = null;
|
|
263270
|
+
}
|
|
262930
263271
|
if (result.success)
|
|
262931
263272
|
return annotateAudioFallbackSuccess(result, failed, candidate);
|
|
262932
263273
|
failed.push({ candidate, reason: summarizeToolResult2(result) });
|
|
@@ -262954,19 +263295,43 @@ if __name__ == "__main__":
|
|
|
262954
263295
|
stage: "setup",
|
|
262955
263296
|
message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
262956
263297
|
});
|
|
262957
|
-
const
|
|
263298
|
+
const leaseDecision = await this.acquireTransientLoadLease({
|
|
262958
263299
|
kind: args.kind,
|
|
262959
|
-
|
|
262960
|
-
|
|
262961
|
-
|
|
262962
|
-
|
|
262963
|
-
|
|
262964
|
-
|
|
262965
|
-
|
|
262966
|
-
|
|
262967
|
-
|
|
262968
|
-
|
|
262969
|
-
|
|
263300
|
+
candidate,
|
|
263301
|
+
reason: `${args.kind}-gen-needs-room`,
|
|
263302
|
+
start: args.start
|
|
263303
|
+
});
|
|
263304
|
+
if (leaseDecision && "success" in leaseDecision) {
|
|
263305
|
+
failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
|
|
263306
|
+
if (index < args.candidates.length - 1) {
|
|
263307
|
+
this.emitProgress({
|
|
263308
|
+
stage: "setup",
|
|
263309
|
+
message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
|
|
263310
|
+
});
|
|
263311
|
+
}
|
|
263312
|
+
continue;
|
|
263313
|
+
}
|
|
263314
|
+
const lease = leaseDecision?.lease;
|
|
263315
|
+
this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
|
|
263316
|
+
let result;
|
|
263317
|
+
try {
|
|
263318
|
+
result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
|
|
263319
|
+
kind: args.kind,
|
|
263320
|
+
backend: candidate.backend,
|
|
263321
|
+
runnerBackend: candidate.backend,
|
|
263322
|
+
prompt: args.prompt,
|
|
263323
|
+
model: candidate.model,
|
|
263324
|
+
duration,
|
|
263325
|
+
steps,
|
|
263326
|
+
seed: args.seed,
|
|
263327
|
+
playback: args.playback,
|
|
263328
|
+
start: args.start,
|
|
263329
|
+
python: args.args["python"]
|
|
263330
|
+
});
|
|
263331
|
+
} finally {
|
|
263332
|
+
await lease?.release();
|
|
263333
|
+
this._brokerGpuIndex = null;
|
|
263334
|
+
}
|
|
262970
263335
|
if (result.success)
|
|
262971
263336
|
return annotateAudioFallbackSuccess(result, failed, candidate);
|
|
262972
263337
|
failed.push({ candidate, reason: summarizeToolResult2(result) });
|
|
@@ -263306,6 +263671,12 @@ function videoCandidateFor(model, requestedBackend, requestedKind) {
|
|
|
263306
263671
|
}
|
|
263307
263672
|
return { model, backend, preset };
|
|
263308
263673
|
}
|
|
263674
|
+
function videoCandidateHost(candidate) {
|
|
263675
|
+
return candidate.backend === "comfyui" ? "comfyui" : "diffusers-py";
|
|
263676
|
+
}
|
|
263677
|
+
function videoCandidateEstimatedVramMB(candidate) {
|
|
263678
|
+
return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
|
|
263679
|
+
}
|
|
263309
263680
|
function videoGenerationFallbackCandidates(requestedModel, requestedBackend, requestedKind, allowFallback = true, options2 = {}) {
|
|
263310
263681
|
const preferAudioVideo = Boolean(options2.preferNativeAudioVideo);
|
|
263311
263682
|
const baseLadderIds = preferAudioVideo ? [...VIDEO_AUDIO_QUALITY_LADDER, ...VIDEO_GENERATION_QUALITY_LADDER] : VIDEO_GENERATION_QUALITY_LADDER;
|
|
@@ -263871,9 +264242,15 @@ function annotateVideoFallbackSuccess(result, failed, winner) {
|
|
|
263871
264242
|
...failed.map((attempt, index) => ` ${formatVideoAttempt(attempt.candidate, attempt.reason, index)}`),
|
|
263872
264243
|
""
|
|
263873
264244
|
].join("\n");
|
|
264245
|
+
const llmPrefix = [
|
|
264246
|
+
`Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
|
|
264247
|
+
...failed.map((attempt, index) => formatVideoAttempt(attempt.candidate, attempt.reason, index))
|
|
264248
|
+
].join("\n");
|
|
263874
264249
|
return {
|
|
263875
264250
|
...result,
|
|
263876
|
-
output: prefix + result.output
|
|
264251
|
+
output: prefix + result.output,
|
|
264252
|
+
llmContent: result.llmContent ? `${llmPrefix}
|
|
264253
|
+
${result.llmContent}` : result.llmContent
|
|
263877
264254
|
};
|
|
263878
264255
|
}
|
|
263879
264256
|
function parseRunnerJson3(stdout) {
|
|
@@ -265240,6 +265617,45 @@ if __name__ == "__main__":
|
|
|
265240
265617
|
this.lastProgressAt = now;
|
|
265241
265618
|
this.progressHandler(event);
|
|
265242
265619
|
}
|
|
265620
|
+
async acquireTransientLoadLease(args) {
|
|
265621
|
+
if (!args.candidate)
|
|
265622
|
+
return null;
|
|
265623
|
+
const broker = getModelBroker();
|
|
265624
|
+
const decision2 = await broker.acquireTransientModelLoad({
|
|
265625
|
+
name: args.candidate.model,
|
|
265626
|
+
domain: "video-gen",
|
|
265627
|
+
host: videoCandidateHost(args.candidate),
|
|
265628
|
+
owner: "video-generate-tool",
|
|
265629
|
+
estimatedVramMB: videoCandidateEstimatedVramMB(args.candidate)
|
|
265630
|
+
}, {
|
|
265631
|
+
reason: args.reason,
|
|
265632
|
+
restoreOllama: true,
|
|
265633
|
+
unloadRequestedOllama: false
|
|
265634
|
+
});
|
|
265635
|
+
if (decision2.kind === "reject") {
|
|
265636
|
+
return {
|
|
265637
|
+
success: false,
|
|
265638
|
+
output: "",
|
|
265639
|
+
error: `Video generation blocked by resource broker: ${decision2.reason}`,
|
|
265640
|
+
durationMs: performance.now() - args.start
|
|
265641
|
+
};
|
|
265642
|
+
}
|
|
265643
|
+
if (decision2.kind === "degrade") {
|
|
265644
|
+
return {
|
|
265645
|
+
success: false,
|
|
265646
|
+
output: "",
|
|
265647
|
+
error: `Video generation needs a broker fallback (${decision2.fallback.name}), but video candidate fallback must be selected by the video ladder: ${decision2.reason}`,
|
|
265648
|
+
durationMs: performance.now() - args.start
|
|
265649
|
+
};
|
|
265650
|
+
}
|
|
265651
|
+
if (decision2.lease.evictedOllamaModels.length > 0) {
|
|
265652
|
+
this.emitProgress({
|
|
265653
|
+
stage: "setup",
|
|
265654
|
+
message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for video generation`
|
|
265655
|
+
});
|
|
265656
|
+
}
|
|
265657
|
+
return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
|
|
265658
|
+
}
|
|
265243
265659
|
async execute(args) {
|
|
265244
265660
|
const start2 = performance.now();
|
|
265245
265661
|
const action = String(args["action"] ?? "generate");
|
|
@@ -265295,35 +265711,6 @@ if __name__ == "__main__":
|
|
|
265295
265711
|
const withAudio = booleanArg3(args["with_audio"], false);
|
|
265296
265712
|
const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
|
|
265297
265713
|
const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
|
|
265298
|
-
const broker = getModelBroker();
|
|
265299
|
-
const firstCandidate = candidates[0];
|
|
265300
|
-
let brokerGpuIndex = null;
|
|
265301
|
-
if (firstCandidate) {
|
|
265302
|
-
const preset = firstCandidate.preset;
|
|
265303
|
-
const decision2 = await broker.ensureModelLoadable({
|
|
265304
|
-
name: firstCandidate.model,
|
|
265305
|
-
domain: "video-gen",
|
|
265306
|
-
host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
|
|
265307
|
-
owner: "video-generate-tool",
|
|
265308
|
-
estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
|
|
265309
|
-
});
|
|
265310
|
-
if (decision2.kind === "evict") {
|
|
265311
|
-
for (const target of decision2.evictTargets) {
|
|
265312
|
-
await broker.evict(target.host, target.name, "video-gen-needs-room");
|
|
265313
|
-
}
|
|
265314
|
-
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
265315
|
-
} else if (decision2.kind === "ok") {
|
|
265316
|
-
brokerGpuIndex = decision2.gpuIndex ?? null;
|
|
265317
|
-
} else if (decision2.kind === "reject") {
|
|
265318
|
-
return {
|
|
265319
|
-
success: false,
|
|
265320
|
-
output: "",
|
|
265321
|
-
error: `Video generation blocked by resource broker: ${decision2.reason}`,
|
|
265322
|
-
durationMs: performance.now() - start2
|
|
265323
|
-
};
|
|
265324
|
-
}
|
|
265325
|
-
}
|
|
265326
|
-
this._brokerGpuIndex = brokerGpuIndex;
|
|
265327
265714
|
if (candidates.length === 0) {
|
|
265328
265715
|
return {
|
|
265329
265716
|
success: false,
|
|
@@ -265373,7 +265760,30 @@ if __name__ == "__main__":
|
|
|
265373
265760
|
failed.push({ candidate, reason: "ComfyUI backend not yet implemented." });
|
|
265374
265761
|
continue;
|
|
265375
265762
|
}
|
|
265376
|
-
const
|
|
265763
|
+
const leaseDecision = await this.acquireTransientLoadLease({
|
|
265764
|
+
candidate,
|
|
265765
|
+
reason: "video-prewarm-needs-room",
|
|
265766
|
+
start: args.start
|
|
265767
|
+
});
|
|
265768
|
+
if (leaseDecision && "success" in leaseDecision) {
|
|
265769
|
+
failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
|
|
265770
|
+
if (index < args.candidates.length - 1) {
|
|
265771
|
+
this.emitProgress({
|
|
265772
|
+
stage: "setup",
|
|
265773
|
+
message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
|
|
265774
|
+
});
|
|
265775
|
+
}
|
|
265776
|
+
continue;
|
|
265777
|
+
}
|
|
265778
|
+
const lease = leaseDecision?.lease;
|
|
265779
|
+
this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
|
|
265780
|
+
let result;
|
|
265781
|
+
try {
|
|
265782
|
+
result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
|
|
265783
|
+
} finally {
|
|
265784
|
+
await lease?.release();
|
|
265785
|
+
this._brokerGpuIndex = null;
|
|
265786
|
+
}
|
|
265377
265787
|
if (result.success)
|
|
265378
265788
|
return annotateVideoFallbackSuccess(result, failed, candidate);
|
|
265379
265789
|
failed.push({ candidate, reason: summarizeToolResult3(result) });
|
|
@@ -265459,26 +265869,48 @@ if __name__ == "__main__":
|
|
|
265459
265869
|
start: args.start
|
|
265460
265870
|
});
|
|
265461
265871
|
} else {
|
|
265462
|
-
|
|
265463
|
-
|
|
265464
|
-
|
|
265465
|
-
|
|
265466
|
-
kind: args.kind,
|
|
265467
|
-
imageArg: args.imageArg,
|
|
265468
|
-
audioInput: args.audioInput,
|
|
265469
|
-
width,
|
|
265470
|
-
height,
|
|
265471
|
-
numFrames,
|
|
265472
|
-
fps,
|
|
265473
|
-
steps,
|
|
265474
|
-
guidance,
|
|
265475
|
-
negativePrompt,
|
|
265476
|
-
seed: args.seed,
|
|
265477
|
-
hfToken: hfTokenOverride,
|
|
265478
|
-
autoAcceptLicense,
|
|
265479
|
-
start: args.start,
|
|
265480
|
-
python: args.args["python"]
|
|
265872
|
+
const leaseDecision = await this.acquireTransientLoadLease({
|
|
265873
|
+
candidate,
|
|
265874
|
+
reason: "video-gen-needs-room",
|
|
265875
|
+
start: args.start
|
|
265481
265876
|
});
|
|
265877
|
+
if (leaseDecision && "success" in leaseDecision) {
|
|
265878
|
+
failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
|
|
265879
|
+
if (index < args.candidates.length - 1) {
|
|
265880
|
+
this.emitProgress({
|
|
265881
|
+
stage: "setup",
|
|
265882
|
+
message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
|
|
265883
|
+
});
|
|
265884
|
+
}
|
|
265885
|
+
continue;
|
|
265886
|
+
}
|
|
265887
|
+
const lease = leaseDecision?.lease;
|
|
265888
|
+
this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
|
|
265889
|
+
try {
|
|
265890
|
+
result = await this.generateWithDiffusers({
|
|
265891
|
+
prompt: promptForCandidate,
|
|
265892
|
+
model: candidate.model,
|
|
265893
|
+
preset,
|
|
265894
|
+
kind: args.kind,
|
|
265895
|
+
imageArg: args.imageArg,
|
|
265896
|
+
audioInput: args.audioInput,
|
|
265897
|
+
width,
|
|
265898
|
+
height,
|
|
265899
|
+
numFrames,
|
|
265900
|
+
fps,
|
|
265901
|
+
steps,
|
|
265902
|
+
guidance,
|
|
265903
|
+
negativePrompt,
|
|
265904
|
+
seed: args.seed,
|
|
265905
|
+
hfToken: hfTokenOverride,
|
|
265906
|
+
autoAcceptLicense,
|
|
265907
|
+
start: args.start,
|
|
265908
|
+
python: args.args["python"]
|
|
265909
|
+
});
|
|
265910
|
+
} finally {
|
|
265911
|
+
await lease?.release();
|
|
265912
|
+
this._brokerGpuIndex = null;
|
|
265913
|
+
}
|
|
265482
265914
|
}
|
|
265483
265915
|
let nativeAudio = preset.nativeAudioVideo === true;
|
|
265484
265916
|
let audioPath;
|
|
@@ -265670,6 +266102,17 @@ ${llmAnnotation}` : result.llmContent;
|
|
|
265670
266102
|
}
|
|
265671
266103
|
ensureUnifiedCacheDirs();
|
|
265672
266104
|
this.emitProgress({ stage: "load", message: `Downloading/loading video model ${args.candidate.model}` });
|
|
266105
|
+
const runnerEnv = { ...python.env };
|
|
266106
|
+
if (this._brokerGpuIndex !== null) {
|
|
266107
|
+
if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
|
|
266108
|
+
runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
|
|
266109
|
+
} else {
|
|
266110
|
+
this.emitProgress({
|
|
266111
|
+
stage: "setup",
|
|
266112
|
+
message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
|
|
266113
|
+
});
|
|
266114
|
+
}
|
|
266115
|
+
}
|
|
265673
266116
|
const result = await runProcess4(python.command, [
|
|
265674
266117
|
runner,
|
|
265675
266118
|
"--model",
|
|
@@ -265685,7 +266128,7 @@ ${llmAnnotation}` : result.llmContent;
|
|
|
265685
266128
|
], {
|
|
265686
266129
|
cwd: this.cwd,
|
|
265687
266130
|
timeoutMs: 18e5,
|
|
265688
|
-
env:
|
|
266131
|
+
env: runnerEnv,
|
|
265689
266132
|
progressLabel: `Downloading/loading ${args.candidate.model}`,
|
|
265690
266133
|
onProgress: (event) => this.emitProgress(event)
|
|
265691
266134
|
});
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.188",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.188",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED