omnius 1.0.187 → 1.0.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1412,6 +1412,18 @@ var init_tool_executor = __esm({
1412
1412
  import { EventEmitter } from "node:events";
1413
1413
  import { totalmem, freemem } from "node:os";
1414
1414
  import { exec } from "node:child_process";
1415
+ function dedupeLoadedModels(models) {
1416
+ const seen = /* @__PURE__ */ new Set();
1417
+ const out = [];
1418
+ for (const model of models) {
1419
+ const key = `${model.host}:${model.name}`;
1420
+ if (seen.has(key))
1421
+ continue;
1422
+ seen.add(key);
1423
+ out.push(model);
1424
+ }
1425
+ return out;
1426
+ }
1415
1427
  function ramSnapshotMB() {
1416
1428
  const total = Math.round(totalmem() / (1024 * 1024));
1417
1429
  const free = Math.round(freemem() / (1024 * 1024));
@@ -1690,6 +1702,104 @@ var init_model_broker = __esm({
1690
1702
  this.emit("rejected", spec, reason);
1691
1703
  return { kind: "reject", reason };
1692
1704
  }
1705
+ /**
1706
+ * Acquire a short-lived load lease for media/subprocess generation.
1707
+ *
1708
+ * Media generation often needs to temporarily free VRAM occupied by Ollama
1709
+ * chat models. This helper refreshes Ollama state, asks the broker what must
1710
+ * be evicted, unloads those Ollama models with keep_alive=0, and returns a
1711
+ * lease whose release() clears transient inflight state, unloads any
1712
+ * Ollama-hosted requested model, and warms the evicted Ollama models again.
1713
+ */
1714
+ async acquireTransientModelLoad(spec, options2 = {}) {
1715
+ const reason = options2.reason ?? `${spec.domain}-transient-load`;
1716
+ const evictedModels = [];
1717
+ let gpuIndex = null;
1718
+ let admitted = false;
1719
+ await this.pollOnce().catch(() => {
1720
+ });
1721
+ for (let attempt = 0; attempt < 4; attempt++) {
1722
+ const decision2 = await this.ensureModelLoadable(spec);
1723
+ if (decision2.kind === "wait-for-inflight") {
1724
+ const waited = await decision2.promise.catch((err) => ({
1725
+ kind: "reject",
1726
+ reason: err instanceof Error ? err.message : String(err)
1727
+ }));
1728
+ if (waited.kind === "ok") {
1729
+ gpuIndex = waited.gpuIndex ?? null;
1730
+ admitted = true;
1731
+ break;
1732
+ }
1733
+ if (waited.kind === "evict") {
1734
+ for (const target of waited.evictTargets) {
1735
+ if (await this.evict(target.host, target.name, reason))
1736
+ evictedModels.push(target);
1737
+ }
1738
+ await this.pollOnce().catch(() => {
1739
+ });
1740
+ continue;
1741
+ }
1742
+ if (waited.kind === "degrade")
1743
+ return waited;
1744
+ if (waited.kind === "reject")
1745
+ return waited;
1746
+ continue;
1747
+ }
1748
+ if (decision2.kind === "ok") {
1749
+ gpuIndex = decision2.gpuIndex ?? null;
1750
+ admitted = true;
1751
+ break;
1752
+ }
1753
+ if (decision2.kind === "evict") {
1754
+ for (const target of decision2.evictTargets) {
1755
+ const before = this._loaded.get(`${target.host}:${target.name}`) ?? target;
1756
+ if (await this.evict(target.host, target.name, reason)) {
1757
+ evictedModels.push(before);
1758
+ }
1759
+ }
1760
+ await this.pollOnce().catch(() => {
1761
+ });
1762
+ continue;
1763
+ }
1764
+ if (decision2.kind === "degrade")
1765
+ return decision2;
1766
+ return decision2;
1767
+ }
1768
+ if (!admitted) {
1769
+ return {
1770
+ kind: "reject",
1771
+ reason: `could not acquire transient load lease for ${spec.host}:${spec.name} after repeated evictions`
1772
+ };
1773
+ }
1774
+ const evictedOllamaModels = dedupeLoadedModels(evictedModels.filter((m2) => m2.host === "ollama"));
1775
+ const broker = this;
1776
+ let released = false;
1777
+ return {
1778
+ kind: "ok",
1779
+ lease: {
1780
+ spec,
1781
+ gpuIndex,
1782
+ evictedModels: dedupeLoadedModels(evictedModels),
1783
+ evictedOllamaModels,
1784
+ async release() {
1785
+ if (released)
1786
+ return;
1787
+ released = true;
1788
+ broker.clearInflight(spec.host, spec.name);
1789
+ if ((options2.unloadRequestedOllama ?? true) && spec.host === "ollama") {
1790
+ await broker.unloadOllamaModel(spec.name, `${reason}-complete`).catch(() => false);
1791
+ }
1792
+ if (options2.restoreOllama !== false && evictedOllamaModels.length > 0) {
1793
+ await broker.restoreOllamaModels(evictedOllamaModels, {
1794
+ keepAlive: options2.restoreKeepAlive ?? "30m"
1795
+ });
1796
+ }
1797
+ await broker.pollOnce().catch(() => {
1798
+ });
1799
+ }
1800
+ }
1801
+ };
1802
+ }
1693
1803
  /**
1694
1804
  * Register a model that has been successfully loaded.
1695
1805
  * Callers MUST call this after a successful load so the broker can track LRU.
@@ -1743,6 +1853,66 @@ var init_model_broker = __esm({
1743
1853
  this.emit("evicted", m2, reason);
1744
1854
  return actively;
1745
1855
  }
1856
+ /** Force-unload an Ollama model even when it is not currently tracked. */
1857
+ async unloadOllamaModel(modelName, reason = "ollama-unload") {
1858
+ const key = `ollama:${modelName}`;
1859
+ const existing = this._loaded.get(key);
1860
+ const ok3 = await this.ollamaUnload(modelName).catch(() => false);
1861
+ if (existing) {
1862
+ this._loaded.delete(key);
1863
+ this.emit("evicted", existing, reason);
1864
+ }
1865
+ return ok3;
1866
+ }
1867
+ /** Best-effort warm/reload of an Ollama model after temporary eviction. */
1868
+ async warmOllamaModel(modelName, keepAlive = "30m") {
1869
+ const bodies = [
1870
+ { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 0 } },
1871
+ { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 1 } }
1872
+ ];
1873
+ for (const body of bodies) {
1874
+ try {
1875
+ const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
1876
+ method: "POST",
1877
+ headers: { "Content-Type": "application/json" },
1878
+ body: JSON.stringify(body),
1879
+ signal: AbortSignal.timeout(12e4)
1880
+ });
1881
+ if (!res.ok)
1882
+ continue;
1883
+ await this.refreshOllamaPs().catch(() => {
1884
+ });
1885
+ return true;
1886
+ } catch {
1887
+ }
1888
+ }
1889
+ try {
1890
+ const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
1891
+ method: "POST",
1892
+ headers: { "Content-Type": "application/json" },
1893
+ body: JSON.stringify({
1894
+ model: modelName,
1895
+ stream: false,
1896
+ keep_alive: keepAlive
1897
+ }),
1898
+ signal: AbortSignal.timeout(12e4)
1899
+ });
1900
+ if (!res.ok)
1901
+ return false;
1902
+ await this.refreshOllamaPs().catch(() => {
1903
+ });
1904
+ return true;
1905
+ } catch {
1906
+ return false;
1907
+ }
1908
+ }
1909
+ /** Restore a set of previously evicted Ollama models, oldest first. */
1910
+ async restoreOllamaModels(models, options2 = {}) {
1911
+ const unique2 = dedupeLoadedModels(models.filter((m2) => m2.host === "ollama")).sort((a2, b) => a2.lastUsedAt - b.lastUsedAt);
1912
+ for (const model of unique2) {
1913
+ await this.warmOllamaModel(model.name, options2.keepAlive ?? "30m").catch(() => false);
1914
+ }
1915
+ }
1746
1916
  // ------------------------------------------------------------------
1747
1917
  // Internal — Ollama
1748
1918
  // ------------------------------------------------------------------
@@ -1885,7 +2055,7 @@ var init_model_broker = __esm({
1885
2055
  );
1886
2056
  const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
1887
2057
  const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
1888
- const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
2058
+ const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).filter((m2) => !this.hasActiveSlotForModel(m2)).sort((a2, b) => {
1889
2059
  const aIdle = idle(a2) ? 0 : 1;
1890
2060
  const bIdle = idle(b) ? 0 : 1;
1891
2061
  if (aIdle !== bIdle)
@@ -1931,6 +2101,13 @@ var init_model_broker = __esm({
1931
2101
  n2++;
1932
2102
  return n2;
1933
2103
  }
2104
+ hasActiveSlotForModel(model) {
2105
+ for (const slot of this._activeSlots.values()) {
2106
+ if (slot.model === model.name)
2107
+ return true;
2108
+ }
2109
+ return false;
2110
+ }
1934
2111
  // ------------------------------------------------------------------
1935
2112
  // Internal — fallback resolution
1936
2113
  // ------------------------------------------------------------------
@@ -22761,6 +22938,17 @@ function evictModelsToFreeSpace(args) {
22761
22938
  writeMeta(meta);
22762
22939
  return { evicted, bytesFreed, finalFreeBytes: disk.freeBytes };
22763
22940
  }
22941
+ function estimateReclaimableCacheBytes(keepRepos) {
22942
+ const keep = new Set(keepRepos ?? []);
22943
+ let total = 0;
22944
+ for (const entry of readMeta().entries) {
22945
+ if (keep.has(entry.repo))
22946
+ continue;
22947
+ const measured = measureRepoCacheBytes(entry.repo);
22948
+ total += Math.max(0, measured || entry.sizeBytes || 0);
22949
+ }
22950
+ return total;
22951
+ }
22764
22952
  function ensureDiskSpaceForDownload(args) {
22765
22953
  ensureUnifiedCacheDirs();
22766
22954
  const safetyMargin = args.safetyMarginBytes ?? 1 * 1024 ** 3;
@@ -22769,6 +22957,10 @@ function ensureDiskSpaceForDownload(args) {
22769
22957
  if (disk.freeBytes >= target) {
22770
22958
  return { ok: true, evicted: [], freeBytes: disk.freeBytes };
22771
22959
  }
22960
+ const reclaimableBytes = estimateReclaimableCacheBytes(args.keepRepos);
22961
+ if (disk.freeBytes + reclaimableBytes < target) {
22962
+ throw new InsufficientDiskSpaceError(args.approxDownloadBytes, disk.freeBytes, disk.totalBytes, []);
22963
+ }
22772
22964
  const evictionResult = evictModelsToFreeSpace({
22773
22965
  neededBytes: args.approxDownloadBytes,
22774
22966
  keepRepos: args.keepRepos,
@@ -259208,6 +259400,12 @@ function imageCandidateFor(model, requestedBackend) {
259208
259400
  preset: getImageGenerationPreset(resolved)
259209
259401
  };
259210
259402
  }
259403
+ function imageCandidateHost(candidate) {
259404
+ return candidate.backend === "ollama" ? "ollama" : "diffusers-py";
259405
+ }
259406
+ function imageCandidateEstimatedVramMB(candidate) {
259407
+ return candidate.preset?.minVramGB !== void 0 ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
259408
+ }
259211
259409
  function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
259212
259410
  const ladder = imageGenerationQualityLadder();
259213
259411
  const candidates = [];
@@ -259511,9 +259709,15 @@ function annotateImageFallbackSuccess(result, failed, winner) {
259511
259709
  ...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
259512
259710
  ""
259513
259711
  ].join("\n");
259712
+ const llmPrefix = [
259713
+ `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
259714
+ ...failed.map((attempt, index) => formatImageAttempt(attempt.candidate, attempt.reason, index))
259715
+ ].join("\n");
259514
259716
  return {
259515
259717
  ...result,
259516
- output: prefix + result.output
259718
+ output: prefix + result.output,
259719
+ llmContent: result.llmContent ? `${llmPrefix}
259720
+ ${result.llmContent}` : result.llmContent
259517
259721
  };
259518
259722
  }
259519
259723
  function parseRunnerJson(stdout) {
@@ -260321,6 +260525,45 @@ if __name__ == "__main__":
260321
260525
  this.lastProgressAt = now;
260322
260526
  this.progressHandler(event);
260323
260527
  }
260528
+ async acquireTransientLoadLease(args) {
260529
+ if (!args.candidate)
260530
+ return null;
260531
+ const broker = getModelBroker();
260532
+ const decision2 = await broker.acquireTransientModelLoad({
260533
+ name: args.candidate.model,
260534
+ domain: "image-gen",
260535
+ host: imageCandidateHost(args.candidate),
260536
+ owner: "image-generate-tool",
260537
+ estimatedVramMB: imageCandidateEstimatedVramMB(args.candidate)
260538
+ }, {
260539
+ reason: args.reason,
260540
+ restoreOllama: true,
260541
+ unloadRequestedOllama: true
260542
+ });
260543
+ if (decision2.kind === "reject") {
260544
+ return {
260545
+ success: false,
260546
+ output: "",
260547
+ error: `Image generation blocked by resource broker: ${decision2.reason}`,
260548
+ durationMs: performance.now() - args.start
260549
+ };
260550
+ }
260551
+ if (decision2.kind === "degrade") {
260552
+ return {
260553
+ success: false,
260554
+ output: "",
260555
+ error: `Image generation needs a broker fallback (${decision2.fallback.name}), but image candidate fallback must be selected by the image ladder: ${decision2.reason}`,
260556
+ durationMs: performance.now() - args.start
260557
+ };
260558
+ }
260559
+ if (decision2.lease.evictedOllamaModels.length > 0) {
260560
+ this.emitProgress({
260561
+ stage: "setup",
260562
+ message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for image generation`
260563
+ });
260564
+ }
260565
+ return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
260566
+ }
260324
260567
  async execute(args) {
260325
260568
  const start2 = performance.now();
260326
260569
  const action = String(args["action"] ?? "generate");
@@ -260363,33 +260606,6 @@ if __name__ == "__main__":
260363
260606
  const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
260364
260607
  const seed = optionalNumberArg(args["seed"]);
260365
260608
  const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
260366
- const broker = getModelBroker();
260367
- const firstCandidate = candidates[0];
260368
- let brokerGpuIndex = null;
260369
- if (firstCandidate) {
260370
- const decision2 = await broker.ensureModelLoadable({
260371
- name: firstCandidate.model,
260372
- domain: "image-gen",
260373
- host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
260374
- owner: "image-generate-tool"
260375
- });
260376
- if (decision2.kind === "evict") {
260377
- for (const target of decision2.evictTargets) {
260378
- await broker.evict(target.host, target.name, "image-gen-needs-room");
260379
- }
260380
- brokerGpuIndex = decision2.gpuIndex ?? null;
260381
- } else if (decision2.kind === "ok") {
260382
- brokerGpuIndex = decision2.gpuIndex ?? null;
260383
- } else if (decision2.kind === "reject") {
260384
- return {
260385
- success: false,
260386
- output: "",
260387
- error: `Image generation blocked by resource broker: ${decision2.reason}`,
260388
- durationMs: performance.now() - start2
260389
- };
260390
- }
260391
- }
260392
- this._brokerGpuIndex = brokerGpuIndex;
260393
260609
  try {
260394
260610
  return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
260395
260611
  } catch (err) {
@@ -260408,7 +260624,30 @@ if __name__ == "__main__":
260408
260624
  stage: "setup",
260409
260625
  message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
260410
260626
  });
260411
- const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
260627
+ const leaseDecision = await this.acquireTransientLoadLease({
260628
+ candidate,
260629
+ reason: "image-prewarm-needs-room",
260630
+ start: args.start
260631
+ });
260632
+ if (leaseDecision && "success" in leaseDecision) {
260633
+ failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
260634
+ if (index < args.candidates.length - 1) {
260635
+ this.emitProgress({
260636
+ stage: "setup",
260637
+ message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
260638
+ });
260639
+ }
260640
+ continue;
260641
+ }
260642
+ const lease = leaseDecision?.lease;
260643
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
260644
+ let result;
260645
+ try {
260646
+ result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
260647
+ } finally {
260648
+ await lease?.release();
260649
+ this._brokerGpuIndex = null;
260650
+ }
260412
260651
  if (result.success)
260413
260652
  return annotateImageFallbackSuccess(result, failed, candidate);
260414
260653
  failed.push({ candidate, reason: summarizeToolResult(result) });
@@ -260447,7 +260686,30 @@ if __name__ == "__main__":
260447
260686
  message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
260448
260687
  });
260449
260688
  const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
260450
- const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
260689
+ const leaseDecision = await this.acquireTransientLoadLease({
260690
+ candidate,
260691
+ reason: "image-gen-needs-room",
260692
+ start: args.start
260693
+ });
260694
+ if (leaseDecision && "success" in leaseDecision) {
260695
+ failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
260696
+ if (index < args.candidates.length - 1) {
260697
+ this.emitProgress({
260698
+ stage: "setup",
260699
+ message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
260700
+ });
260701
+ }
260702
+ continue;
260703
+ }
260704
+ const lease = leaseDecision?.lease;
260705
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
260706
+ let result;
260707
+ try {
260708
+ result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
260709
+ } finally {
260710
+ await lease?.release();
260711
+ this._brokerGpuIndex = null;
260712
+ }
260451
260713
  if (result.success) {
260452
260714
  await this.writeImageSidecar(result, {
260453
260715
  originalPrompt: args.prompt,
@@ -260671,6 +260933,17 @@ ${errText.slice(0, 1200)}`,
260671
260933
  }
260672
260934
  ensureUnifiedCacheDirs();
260673
260935
  this.emitProgress({ stage: "load", message: `Downloading/loading image model ${args.model}` });
260936
+ const runnerEnv = { ...python.env };
260937
+ if (this._brokerGpuIndex !== null) {
260938
+ if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
260939
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
260940
+ } else {
260941
+ this.emitProgress({
260942
+ stage: "setup",
260943
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
260944
+ });
260945
+ }
260946
+ }
260674
260947
  const result = await runProcess2(python.command, [
260675
260948
  runner,
260676
260949
  "--model",
@@ -260683,7 +260956,7 @@ ${errText.slice(0, 1200)}`,
260683
260956
  ], {
260684
260957
  cwd: this.cwd,
260685
260958
  timeoutMs: 18e5,
260686
- env: python.env,
260959
+ env: runnerEnv,
260687
260960
  progressLabel: `Downloading/loading ${args.model}`,
260688
260961
  onProgress: (event) => this.emitProgress(event)
260689
260962
  });
@@ -261735,6 +262008,18 @@ function audioCandidateFor(kind, model, requestedBackend) {
261735
262008
  preset: getAudioGenerationPreset(model, kind)
261736
262009
  };
261737
262010
  }
262011
+ function audioCandidateHost(candidate) {
262012
+ if (candidate.backend === "project")
262013
+ return null;
262014
+ if (candidate.backend === "audiocraft")
262015
+ return "audiocraft";
262016
+ if (candidate.backend === "tangoflux")
262017
+ return "tangoflux";
262018
+ return "diffusers-py";
262019
+ }
262020
+ function audioCandidateEstimatedVramMB(candidate) {
262021
+ return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
262022
+ }
261738
262023
  function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
261739
262024
  const ladder = audioGenerationQualityLadder(kind);
261740
262025
  const candidates = [];
@@ -261891,9 +262176,15 @@ function annotateAudioFallbackSuccess(result, failed, winner) {
261891
262176
  ...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
261892
262177
  ""
261893
262178
  ].join("\n");
262179
+ const llmPrefix = [
262180
+ `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
262181
+ ...failed.map((attempt, index) => formatAudioAttempt(attempt.candidate, attempt.reason, index))
262182
+ ].join("\n");
261894
262183
  return {
261895
262184
  ...result,
261896
- output: prefix + result.output
262185
+ output: prefix + result.output,
262186
+ llmContent: result.llmContent ? `${llmPrefix}
262187
+ ${result.llmContent}` : result.llmContent
261897
262188
  };
261898
262189
  }
261899
262190
  var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
@@ -262730,6 +263021,48 @@ if __name__ == "__main__":
262730
263021
  this.lastProgressAt = now;
262731
263022
  this.progressHandler(event);
262732
263023
  }
263024
+ async acquireTransientLoadLease(args) {
263025
+ if (!args.candidate)
263026
+ return null;
263027
+ const host = audioCandidateHost(args.candidate);
263028
+ if (!host)
263029
+ return null;
263030
+ const broker = getModelBroker();
263031
+ const decision2 = await broker.acquireTransientModelLoad({
263032
+ name: args.candidate.model,
263033
+ domain: args.kind,
263034
+ host,
263035
+ owner: `audio-generate-tool/${args.kind}`,
263036
+ estimatedVramMB: audioCandidateEstimatedVramMB(args.candidate)
263037
+ }, {
263038
+ reason: args.reason,
263039
+ restoreOllama: true,
263040
+ unloadRequestedOllama: false
263041
+ });
263042
+ if (decision2.kind === "reject") {
263043
+ return {
263044
+ success: false,
263045
+ output: "",
263046
+ error: `${args.kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
263047
+ durationMs: performance.now() - args.start
263048
+ };
263049
+ }
263050
+ if (decision2.kind === "degrade") {
263051
+ return {
263052
+ success: false,
263053
+ output: "",
263054
+ error: `${args.kind === "music" ? "Music" : "Sound"} generation needs a broker fallback (${decision2.fallback.name}), but audio candidate fallback must be selected by the audio ladder: ${decision2.reason}`,
263055
+ durationMs: performance.now() - args.start
263056
+ };
263057
+ }
263058
+ if (decision2.lease.evictedOllamaModels.length > 0) {
263059
+ this.emitProgress({
263060
+ stage: "setup",
263061
+ message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for ${args.kind} generation`
263062
+ });
263063
+ }
263064
+ return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
263065
+ }
262733
263066
  async prewarmPythonBackend(args) {
262734
263067
  const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
262735
263068
  let python;
@@ -262776,6 +263109,17 @@ if __name__ == "__main__":
262776
263109
  }
262777
263110
  ensureUnifiedCacheDirs();
262778
263111
  this.emitProgress({ stage: "load", message: `Downloading/loading ${args.kind} model ${args.model}` });
263112
+ const runnerEnv = { ...python.env };
263113
+ if (this._brokerGpuIndex !== null) {
263114
+ if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
263115
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
263116
+ } else {
263117
+ this.emitProgress({
263118
+ stage: "setup",
263119
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
263120
+ });
263121
+ }
263122
+ }
262779
263123
  const result = await runProcess3(python.command, [
262780
263124
  runner,
262781
263125
  "--kind",
@@ -262792,7 +263136,7 @@ if __name__ == "__main__":
262792
263136
  ], {
262793
263137
  cwd: this.cwd,
262794
263138
  timeoutMs: 18e5,
262795
- env: python.env,
263139
+ env: runnerEnv,
262796
263140
  progressLabel: `Downloading/loading ${args.model}`,
262797
263141
  onProgress: (event) => this.emitProgress(event)
262798
263142
  });
@@ -262872,33 +263216,6 @@ if __name__ == "__main__":
262872
263216
  const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
262873
263217
  const seed = optionalNumberArg2(args["seed"]);
262874
263218
  const playback = playbackRequested(args);
262875
- const broker = getModelBroker();
262876
- const firstCandidate = candidates[0];
262877
- let brokerGpuIndex = null;
262878
- if (firstCandidate) {
262879
- const decision2 = await broker.ensureModelLoadable({
262880
- name: firstCandidate.model,
262881
- domain: kind === "music" ? "music" : "sound",
262882
- host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
262883
- owner: `audio-generate-tool/${kind}`
262884
- });
262885
- if (decision2.kind === "evict") {
262886
- for (const target of decision2.evictTargets) {
262887
- await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
262888
- }
262889
- brokerGpuIndex = decision2.gpuIndex ?? null;
262890
- } else if (decision2.kind === "ok") {
262891
- brokerGpuIndex = decision2.gpuIndex ?? null;
262892
- } else if (decision2.kind === "reject") {
262893
- return {
262894
- success: false,
262895
- output: "",
262896
- error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
262897
- durationMs: performance.now() - start2
262898
- };
262899
- }
262900
- }
262901
- this._brokerGpuIndex = brokerGpuIndex;
262902
263219
  try {
262903
263220
  return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
262904
263221
  } catch (err) {
@@ -262918,15 +263235,39 @@ if __name__ == "__main__":
262918
263235
  stage: "setup",
262919
263236
  message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
262920
263237
  });
262921
- const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
263238
+ const leaseDecision = await this.acquireTransientLoadLease({
262922
263239
  kind: args.kind,
262923
- backend: candidate.backend,
262924
- runnerBackend: candidate.backend,
262925
- model: candidate.model,
262926
- duration,
262927
- start: args.start,
262928
- python: args.args["python"]
262929
- });
263240
+ candidate,
263241
+ reason: `${args.kind}-prewarm-needs-room`,
263242
+ start: args.start
263243
+ });
263244
+ if (leaseDecision && "success" in leaseDecision) {
263245
+ failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
263246
+ if (index < args.candidates.length - 1) {
263247
+ this.emitProgress({
263248
+ stage: "setup",
263249
+ message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
263250
+ });
263251
+ }
263252
+ continue;
263253
+ }
263254
+ const lease = leaseDecision?.lease;
263255
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
263256
+ let result;
263257
+ try {
263258
+ result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
263259
+ kind: args.kind,
263260
+ backend: candidate.backend,
263261
+ runnerBackend: candidate.backend,
263262
+ model: candidate.model,
263263
+ duration,
263264
+ start: args.start,
263265
+ python: args.args["python"]
263266
+ });
263267
+ } finally {
263268
+ await lease?.release();
263269
+ this._brokerGpuIndex = null;
263270
+ }
262930
263271
  if (result.success)
262931
263272
  return annotateAudioFallbackSuccess(result, failed, candidate);
262932
263273
  failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262954,19 +263295,43 @@ if __name__ == "__main__":
262954
263295
  stage: "setup",
262955
263296
  message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
262956
263297
  });
262957
- const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
263298
+ const leaseDecision = await this.acquireTransientLoadLease({
262958
263299
  kind: args.kind,
262959
- backend: candidate.backend,
262960
- runnerBackend: candidate.backend,
262961
- prompt: args.prompt,
262962
- model: candidate.model,
262963
- duration,
262964
- steps,
262965
- seed: args.seed,
262966
- playback: args.playback,
262967
- start: args.start,
262968
- python: args.args["python"]
262969
- });
263300
+ candidate,
263301
+ reason: `${args.kind}-gen-needs-room`,
263302
+ start: args.start
263303
+ });
263304
+ if (leaseDecision && "success" in leaseDecision) {
263305
+ failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
263306
+ if (index < args.candidates.length - 1) {
263307
+ this.emitProgress({
263308
+ stage: "setup",
263309
+ message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
263310
+ });
263311
+ }
263312
+ continue;
263313
+ }
263314
+ const lease = leaseDecision?.lease;
263315
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
263316
+ let result;
263317
+ try {
263318
+ result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
263319
+ kind: args.kind,
263320
+ backend: candidate.backend,
263321
+ runnerBackend: candidate.backend,
263322
+ prompt: args.prompt,
263323
+ model: candidate.model,
263324
+ duration,
263325
+ steps,
263326
+ seed: args.seed,
263327
+ playback: args.playback,
263328
+ start: args.start,
263329
+ python: args.args["python"]
263330
+ });
263331
+ } finally {
263332
+ await lease?.release();
263333
+ this._brokerGpuIndex = null;
263334
+ }
262970
263335
  if (result.success)
262971
263336
  return annotateAudioFallbackSuccess(result, failed, candidate);
262972
263337
  failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -263306,6 +263671,12 @@ function videoCandidateFor(model, requestedBackend, requestedKind) {
263306
263671
  }
263307
263672
  return { model, backend, preset };
263308
263673
  }
263674
+ function videoCandidateHost(candidate) {
263675
+ return candidate.backend === "comfyui" ? "comfyui" : "diffusers-py";
263676
+ }
263677
+ function videoCandidateEstimatedVramMB(candidate) {
263678
+ return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
263679
+ }
263309
263680
  function videoGenerationFallbackCandidates(requestedModel, requestedBackend, requestedKind, allowFallback = true, options2 = {}) {
263310
263681
  const preferAudioVideo = Boolean(options2.preferNativeAudioVideo);
263311
263682
  const baseLadderIds = preferAudioVideo ? [...VIDEO_AUDIO_QUALITY_LADDER, ...VIDEO_GENERATION_QUALITY_LADDER] : VIDEO_GENERATION_QUALITY_LADDER;
@@ -263871,9 +264242,15 @@ function annotateVideoFallbackSuccess(result, failed, winner) {
263871
264242
  ...failed.map((attempt, index) => ` ${formatVideoAttempt(attempt.candidate, attempt.reason, index)}`),
263872
264243
  ""
263873
264244
  ].join("\n");
264245
+ const llmPrefix = [
264246
+ `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
264247
+ ...failed.map((attempt, index) => formatVideoAttempt(attempt.candidate, attempt.reason, index))
264248
+ ].join("\n");
263874
264249
  return {
263875
264250
  ...result,
263876
- output: prefix + result.output
264251
+ output: prefix + result.output,
264252
+ llmContent: result.llmContent ? `${llmPrefix}
264253
+ ${result.llmContent}` : result.llmContent
263877
264254
  };
263878
264255
  }
263879
264256
  function parseRunnerJson3(stdout) {
@@ -265240,6 +265617,45 @@ if __name__ == "__main__":
265240
265617
  this.lastProgressAt = now;
265241
265618
  this.progressHandler(event);
265242
265619
  }
265620
+ async acquireTransientLoadLease(args) {
265621
+ if (!args.candidate)
265622
+ return null;
265623
+ const broker = getModelBroker();
265624
+ const decision2 = await broker.acquireTransientModelLoad({
265625
+ name: args.candidate.model,
265626
+ domain: "video-gen",
265627
+ host: videoCandidateHost(args.candidate),
265628
+ owner: "video-generate-tool",
265629
+ estimatedVramMB: videoCandidateEstimatedVramMB(args.candidate)
265630
+ }, {
265631
+ reason: args.reason,
265632
+ restoreOllama: true,
265633
+ unloadRequestedOllama: false
265634
+ });
265635
+ if (decision2.kind === "reject") {
265636
+ return {
265637
+ success: false,
265638
+ output: "",
265639
+ error: `Video generation blocked by resource broker: ${decision2.reason}`,
265640
+ durationMs: performance.now() - args.start
265641
+ };
265642
+ }
265643
+ if (decision2.kind === "degrade") {
265644
+ return {
265645
+ success: false,
265646
+ output: "",
265647
+ error: `Video generation needs a broker fallback (${decision2.fallback.name}), but video candidate fallback must be selected by the video ladder: ${decision2.reason}`,
265648
+ durationMs: performance.now() - args.start
265649
+ };
265650
+ }
265651
+ if (decision2.lease.evictedOllamaModels.length > 0) {
265652
+ this.emitProgress({
265653
+ stage: "setup",
265654
+ message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for video generation`
265655
+ });
265656
+ }
265657
+ return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
265658
+ }
265243
265659
  async execute(args) {
265244
265660
  const start2 = performance.now();
265245
265661
  const action = String(args["action"] ?? "generate");
@@ -265295,35 +265711,6 @@ if __name__ == "__main__":
265295
265711
  const withAudio = booleanArg3(args["with_audio"], false);
265296
265712
  const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
265297
265713
  const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
265298
- const broker = getModelBroker();
265299
- const firstCandidate = candidates[0];
265300
- let brokerGpuIndex = null;
265301
- if (firstCandidate) {
265302
- const preset = firstCandidate.preset;
265303
- const decision2 = await broker.ensureModelLoadable({
265304
- name: firstCandidate.model,
265305
- domain: "video-gen",
265306
- host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
265307
- owner: "video-generate-tool",
265308
- estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
265309
- });
265310
- if (decision2.kind === "evict") {
265311
- for (const target of decision2.evictTargets) {
265312
- await broker.evict(target.host, target.name, "video-gen-needs-room");
265313
- }
265314
- brokerGpuIndex = decision2.gpuIndex ?? null;
265315
- } else if (decision2.kind === "ok") {
265316
- brokerGpuIndex = decision2.gpuIndex ?? null;
265317
- } else if (decision2.kind === "reject") {
265318
- return {
265319
- success: false,
265320
- output: "",
265321
- error: `Video generation blocked by resource broker: ${decision2.reason}`,
265322
- durationMs: performance.now() - start2
265323
- };
265324
- }
265325
- }
265326
- this._brokerGpuIndex = brokerGpuIndex;
265327
265714
  if (candidates.length === 0) {
265328
265715
  return {
265329
265716
  success: false,
@@ -265373,7 +265760,30 @@ if __name__ == "__main__":
265373
265760
  failed.push({ candidate, reason: "ComfyUI backend not yet implemented." });
265374
265761
  continue;
265375
265762
  }
265376
- const result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
265763
+ const leaseDecision = await this.acquireTransientLoadLease({
265764
+ candidate,
265765
+ reason: "video-prewarm-needs-room",
265766
+ start: args.start
265767
+ });
265768
+ if (leaseDecision && "success" in leaseDecision) {
265769
+ failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
265770
+ if (index < args.candidates.length - 1) {
265771
+ this.emitProgress({
265772
+ stage: "setup",
265773
+ message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
265774
+ });
265775
+ }
265776
+ continue;
265777
+ }
265778
+ const lease = leaseDecision?.lease;
265779
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
265780
+ let result;
265781
+ try {
265782
+ result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
265783
+ } finally {
265784
+ await lease?.release();
265785
+ this._brokerGpuIndex = null;
265786
+ }
265377
265787
  if (result.success)
265378
265788
  return annotateVideoFallbackSuccess(result, failed, candidate);
265379
265789
  failed.push({ candidate, reason: summarizeToolResult3(result) });
@@ -265459,26 +265869,48 @@ if __name__ == "__main__":
265459
265869
  start: args.start
265460
265870
  });
265461
265871
  } else {
265462
- result = await this.generateWithDiffusers({
265463
- prompt: promptForCandidate,
265464
- model: candidate.model,
265465
- preset,
265466
- kind: args.kind,
265467
- imageArg: args.imageArg,
265468
- audioInput: args.audioInput,
265469
- width,
265470
- height,
265471
- numFrames,
265472
- fps,
265473
- steps,
265474
- guidance,
265475
- negativePrompt,
265476
- seed: args.seed,
265477
- hfToken: hfTokenOverride,
265478
- autoAcceptLicense,
265479
- start: args.start,
265480
- python: args.args["python"]
265872
+ const leaseDecision = await this.acquireTransientLoadLease({
265873
+ candidate,
265874
+ reason: "video-gen-needs-room",
265875
+ start: args.start
265481
265876
  });
265877
+ if (leaseDecision && "success" in leaseDecision) {
265878
+ failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
265879
+ if (index < args.candidates.length - 1) {
265880
+ this.emitProgress({
265881
+ stage: "setup",
265882
+ message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
265883
+ });
265884
+ }
265885
+ continue;
265886
+ }
265887
+ const lease = leaseDecision?.lease;
265888
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
265889
+ try {
265890
+ result = await this.generateWithDiffusers({
265891
+ prompt: promptForCandidate,
265892
+ model: candidate.model,
265893
+ preset,
265894
+ kind: args.kind,
265895
+ imageArg: args.imageArg,
265896
+ audioInput: args.audioInput,
265897
+ width,
265898
+ height,
265899
+ numFrames,
265900
+ fps,
265901
+ steps,
265902
+ guidance,
265903
+ negativePrompt,
265904
+ seed: args.seed,
265905
+ hfToken: hfTokenOverride,
265906
+ autoAcceptLicense,
265907
+ start: args.start,
265908
+ python: args.args["python"]
265909
+ });
265910
+ } finally {
265911
+ await lease?.release();
265912
+ this._brokerGpuIndex = null;
265913
+ }
265482
265914
  }
265483
265915
  let nativeAudio = preset.nativeAudioVideo === true;
265484
265916
  let audioPath;
@@ -265670,6 +266102,17 @@ ${llmAnnotation}` : result.llmContent;
265670
266102
  }
265671
266103
  ensureUnifiedCacheDirs();
265672
266104
  this.emitProgress({ stage: "load", message: `Downloading/loading video model ${args.candidate.model}` });
266105
+ const runnerEnv = { ...python.env };
266106
+ if (this._brokerGpuIndex !== null) {
266107
+ if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
266108
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
266109
+ } else {
266110
+ this.emitProgress({
266111
+ stage: "setup",
266112
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
266113
+ });
266114
+ }
266115
+ }
265673
266116
  const result = await runProcess4(python.command, [
265674
266117
  runner,
265675
266118
  "--model",
@@ -265685,7 +266128,7 @@ ${llmAnnotation}` : result.llmContent;
265685
266128
  ], {
265686
266129
  cwd: this.cwd,
265687
266130
  timeoutMs: 18e5,
265688
- env: python.env,
266131
+ env: runnerEnv,
265689
266132
  progressLabel: `Downloading/loading ${args.candidate.model}`,
265690
266133
  onProgress: (event) => this.emitProgress(event)
265691
266134
  });
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.187",
3
+ "version": "1.0.188",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.187",
9
+ "version": "1.0.188",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.187",
3
+ "version": "1.0.188",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",