npm - omnius - Versions diffs - 1.0.187 → 1.0.188 - Mend

omnius 1.0.187 → 1.0.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -1412,6 +1412,18 @@ var init_tool_executor = __esm({
 import { EventEmitter } from "node:events";
 import { totalmem, freemem } from "node:os";
 import { exec } from "node:child_process";
+function dedupeLoadedModels(models) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const model of models) {
+    const key = `${model.host}:${model.name}`;
+    if (seen.has(key))
+      continue;
+    seen.add(key);
+    out.push(model);
+  }
+  return out;
+}
 function ramSnapshotMB() {
   const total = Math.round(totalmem() / (1024 * 1024));
   const free = Math.round(freemem() / (1024 * 1024));
@@ -1690,6 +1702,104 @@ var init_model_broker = __esm({
         this.emit("rejected", spec, reason);
         return { kind: "reject", reason };
       }
+      /**
+       * Acquire a short-lived load lease for media/subprocess generation.
+       *
+       * Media generation often needs to temporarily free VRAM occupied by Ollama
+       * chat models. This helper refreshes Ollama state, asks the broker what must
+       * be evicted, unloads those Ollama models with keep_alive=0, and returns a
+       * lease whose release() clears transient inflight state, unloads any
+       * Ollama-hosted requested model, and warms the evicted Ollama models again.
+       */
+      async acquireTransientModelLoad(spec, options2 = {}) {
+        const reason = options2.reason ?? `${spec.domain}-transient-load`;
+        const evictedModels = [];
+        let gpuIndex = null;
+        let admitted = false;
+        await this.pollOnce().catch(() => {
+        });
+        for (let attempt = 0; attempt < 4; attempt++) {
+          const decision2 = await this.ensureModelLoadable(spec);
+          if (decision2.kind === "wait-for-inflight") {
+            const waited = await decision2.promise.catch((err) => ({
+              kind: "reject",
+              reason: err instanceof Error ? err.message : String(err)
+            }));
+            if (waited.kind === "ok") {
+              gpuIndex = waited.gpuIndex ?? null;
+              admitted = true;
+              break;
+            }
+            if (waited.kind === "evict") {
+              for (const target of waited.evictTargets) {
+                if (await this.evict(target.host, target.name, reason))
+                  evictedModels.push(target);
+              }
+              await this.pollOnce().catch(() => {
+              });
+              continue;
+            }
+            if (waited.kind === "degrade")
+              return waited;
+            if (waited.kind === "reject")
+              return waited;
+            continue;
+          }
+          if (decision2.kind === "ok") {
+            gpuIndex = decision2.gpuIndex ?? null;
+            admitted = true;
+            break;
+          }
+          if (decision2.kind === "evict") {
+            for (const target of decision2.evictTargets) {
+              const before = this._loaded.get(`${target.host}:${target.name}`) ?? target;
+              if (await this.evict(target.host, target.name, reason)) {
+                evictedModels.push(before);
+              }
+            }
+            await this.pollOnce().catch(() => {
+            });
+            continue;
+          }
+          if (decision2.kind === "degrade")
+            return decision2;
+          return decision2;
+        }
+        if (!admitted) {
+          return {
+            kind: "reject",
+            reason: `could not acquire transient load lease for ${spec.host}:${spec.name} after repeated evictions`
+          };
+        }
+        const evictedOllamaModels = dedupeLoadedModels(evictedModels.filter((m2) => m2.host === "ollama"));
+        const broker = this;
+        let released = false;
+        return {
+          kind: "ok",
+          lease: {
+            spec,
+            gpuIndex,
+            evictedModels: dedupeLoadedModels(evictedModels),
+            evictedOllamaModels,
+            async release() {
+              if (released)
+                return;
+              released = true;
+              broker.clearInflight(spec.host, spec.name);
+              if ((options2.unloadRequestedOllama ?? true) && spec.host === "ollama") {
+                await broker.unloadOllamaModel(spec.name, `${reason}-complete`).catch(() => false);
+              }
+              if (options2.restoreOllama !== false && evictedOllamaModels.length > 0) {
+                await broker.restoreOllamaModels(evictedOllamaModels, {
+                  keepAlive: options2.restoreKeepAlive ?? "30m"
+                });
+              }
+              await broker.pollOnce().catch(() => {
+              });
+            }
+          }
+        };
+      }
       /**
        * Register a model that has been successfully loaded.
        * Callers MUST call this after a successful load so the broker can track LRU.
@@ -1743,6 +1853,66 @@ var init_model_broker = __esm({
         this.emit("evicted", m2, reason);
         return actively;
       }
+      /** Force-unload an Ollama model even when it is not currently tracked. */
+      async unloadOllamaModel(modelName, reason = "ollama-unload") {
+        const key = `ollama:${modelName}`;
+        const existing = this._loaded.get(key);
+        const ok3 = await this.ollamaUnload(modelName).catch(() => false);
+        if (existing) {
+          this._loaded.delete(key);
+          this.emit("evicted", existing, reason);
+        }
+        return ok3;
+      }
+      /** Best-effort warm/reload of an Ollama model after temporary eviction. */
+      async warmOllamaModel(modelName, keepAlive = "30m") {
+        const bodies = [
+          { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 0 } },
+          { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 1 } }
+        ];
+        for (const body of bodies) {
+          try {
+            const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
+              method: "POST",
+              headers: { "Content-Type": "application/json" },
+              body: JSON.stringify(body),
+              signal: AbortSignal.timeout(12e4)
+            });
+            if (!res.ok)
+              continue;
+            await this.refreshOllamaPs().catch(() => {
+            });
+            return true;
+          } catch {
+          }
+        }
+        try {
+          const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+              model: modelName,
+              stream: false,
+              keep_alive: keepAlive
+            }),
+            signal: AbortSignal.timeout(12e4)
+          });
+          if (!res.ok)
+            return false;
+          await this.refreshOllamaPs().catch(() => {
+          });
+          return true;
+        } catch {
+          return false;
+        }
+      }
+      /** Restore a set of previously evicted Ollama models, oldest first. */
+      async restoreOllamaModels(models, options2 = {}) {
+        const unique2 = dedupeLoadedModels(models.filter((m2) => m2.host === "ollama")).sort((a2, b) => a2.lastUsedAt - b.lastUsedAt);
+        for (const model of unique2) {
+          await this.warmOllamaModel(model.name, options2.keepAlive ?? "30m").catch(() => false);
+        }
+      }
       // ------------------------------------------------------------------
       // Internal — Ollama
       // ------------------------------------------------------------------
@@ -1885,7 +2055,7 @@ var init_model_broker = __esm({
         );
         const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
         const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
-        const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
+        const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).filter((m2) => !this.hasActiveSlotForModel(m2)).sort((a2, b) => {
           const aIdle = idle(a2) ? 0 : 1;
           const bIdle = idle(b) ? 0 : 1;
           if (aIdle !== bIdle)
@@ -1931,6 +2101,13 @@ var init_model_broker = __esm({
             n2++;
         return n2;
       }
+      hasActiveSlotForModel(model) {
+        for (const slot of this._activeSlots.values()) {
+          if (slot.model === model.name)
+            return true;
+        }
+        return false;
+      }
       // ------------------------------------------------------------------
       // Internal — fallback resolution
       // ------------------------------------------------------------------
@@ -22761,6 +22938,17 @@ function evictModelsToFreeSpace(args) {
   writeMeta(meta);
   return { evicted, bytesFreed, finalFreeBytes: disk.freeBytes };
 }
+function estimateReclaimableCacheBytes(keepRepos) {
+  const keep = new Set(keepRepos ?? []);
+  let total = 0;
+  for (const entry of readMeta().entries) {
+    if (keep.has(entry.repo))
+      continue;
+    const measured = measureRepoCacheBytes(entry.repo);
+    total += Math.max(0, measured || entry.sizeBytes || 0);
+  }
+  return total;
+}
 function ensureDiskSpaceForDownload(args) {
   ensureUnifiedCacheDirs();
   const safetyMargin = args.safetyMarginBytes ?? 1 * 1024 ** 3;
@@ -22769,6 +22957,10 @@ function ensureDiskSpaceForDownload(args) {
   if (disk.freeBytes >= target) {
     return { ok: true, evicted: [], freeBytes: disk.freeBytes };
   }
+  const reclaimableBytes = estimateReclaimableCacheBytes(args.keepRepos);
+  if (disk.freeBytes + reclaimableBytes < target) {
+    throw new InsufficientDiskSpaceError(args.approxDownloadBytes, disk.freeBytes, disk.totalBytes, []);
+  }
   const evictionResult = evictModelsToFreeSpace({
     neededBytes: args.approxDownloadBytes,
     keepRepos: args.keepRepos,
@@ -259208,6 +259400,12 @@ function imageCandidateFor(model, requestedBackend) {
     preset: getImageGenerationPreset(resolved)
   };
 }
+function imageCandidateHost(candidate) {
+  return candidate.backend === "ollama" ? "ollama" : "diffusers-py";
+}
+function imageCandidateEstimatedVramMB(candidate) {
+  return candidate.preset?.minVramGB !== void 0 ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
   const ladder = imageGenerationQualityLadder();
   const candidates = [];
@@ -259511,9 +259709,15 @@ function annotateImageFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatImageAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 function parseRunnerJson(stdout) {
@@ -260321,6 +260525,45 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: "image-gen",
+          host: imageCandidateHost(args.candidate),
+          owner: "image-generate-tool",
+          estimatedVramMB: imageCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: true
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `Image generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `Image generation needs a broker fallback (${decision2.fallback.name}), but image candidate fallback must be selected by the image ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for image generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async execute(args) {
         const start2 = performance.now();
         const action = String(args["action"] ?? "generate");
@@ -260363,33 +260606,6 @@ if __name__ == "__main__":
         const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
         const seed = optionalNumberArg(args["seed"]);
         const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: "image-gen",
-            host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
-            owner: "image-generate-tool"
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, "image-gen-needs-room");
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `Image generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         try {
           return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
         } catch (err) {
@@ -260408,7 +260624,30 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "image-prewarm-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateImageFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult(result) });
@@ -260447,7 +260686,30 @@ if __name__ == "__main__":
             message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
           const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
-          const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "image-gen-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success) {
             await this.writeImageSidecar(result, {
               originalPrompt: args.prompt,
@@ -260671,6 +260933,17 @@ ${errText.slice(0, 1200)}`,
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading image model ${args.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess2(python.command, [
           runner,
           "--model",
@@ -260683,7 +260956,7 @@ ${errText.slice(0, 1200)}`,
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -261735,6 +262008,18 @@ function audioCandidateFor(kind, model, requestedBackend) {
     preset: getAudioGenerationPreset(model, kind)
   };
 }
+function audioCandidateHost(candidate) {
+  if (candidate.backend === "project")
+    return null;
+  if (candidate.backend === "audiocraft")
+    return "audiocraft";
+  if (candidate.backend === "tangoflux")
+    return "tangoflux";
+  return "diffusers-py";
+}
+function audioCandidateEstimatedVramMB(candidate) {
+  return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
   const ladder = audioGenerationQualityLadder(kind);
   const candidates = [];
@@ -261891,9 +262176,15 @@ function annotateAudioFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatAudioAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
@@ -262730,6 +263021,48 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const host = audioCandidateHost(args.candidate);
+        if (!host)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: args.kind,
+          host,
+          owner: `audio-generate-tool/${args.kind}`,
+          estimatedVramMB: audioCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: false
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `${args.kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `${args.kind === "music" ? "Music" : "Sound"} generation needs a broker fallback (${decision2.fallback.name}), but audio candidate fallback must be selected by the audio ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for ${args.kind} generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async prewarmPythonBackend(args) {
         const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
         let python;
@@ -262776,6 +263109,17 @@ if __name__ == "__main__":
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading ${args.kind} model ${args.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess3(python.command, [
           runner,
           "--kind",
@@ -262792,7 +263136,7 @@ if __name__ == "__main__":
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -262872,33 +263216,6 @@ if __name__ == "__main__":
         const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
         const seed = optionalNumberArg2(args["seed"]);
         const playback = playbackRequested(args);
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: kind === "music" ? "music" : "sound",
-            host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
-            owner: `audio-generate-tool/${kind}`
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         try {
           return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
         } catch (err) {
@@ -262918,15 +263235,39 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+          const leaseDecision = await this.acquireTransientLoadLease({
             kind: args.kind,
-            backend: candidate.backend,
-            runnerBackend: candidate.backend,
-            model: candidate.model,
-            duration,
-            start: args.start,
-            python: args.args["python"]
-          });
+            candidate,
+            reason: `${args.kind}-prewarm-needs-room`,
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+              kind: args.kind,
+              backend: candidate.backend,
+              runnerBackend: candidate.backend,
+              model: candidate.model,
+              duration,
+              start: args.start,
+              python: args.args["python"]
+            });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateAudioFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262954,19 +263295,43 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+          const leaseDecision = await this.acquireTransientLoadLease({
             kind: args.kind,
-            backend: candidate.backend,
-            runnerBackend: candidate.backend,
-            prompt: args.prompt,
-            model: candidate.model,
-            duration,
-            steps,
-            seed: args.seed,
-            playback: args.playback,
-            start: args.start,
-            python: args.args["python"]
-          });
+            candidate,
+            reason: `${args.kind}-gen-needs-room`,
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+              kind: args.kind,
+              backend: candidate.backend,
+              runnerBackend: candidate.backend,
+              prompt: args.prompt,
+              model: candidate.model,
+              duration,
+              steps,
+              seed: args.seed,
+              playback: args.playback,
+              start: args.start,
+              python: args.args["python"]
+            });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateAudioFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -263306,6 +263671,12 @@ function videoCandidateFor(model, requestedBackend, requestedKind) {
   }
   return { model, backend, preset };
 }
+function videoCandidateHost(candidate) {
+  return candidate.backend === "comfyui" ? "comfyui" : "diffusers-py";
+}
+function videoCandidateEstimatedVramMB(candidate) {
+  return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function videoGenerationFallbackCandidates(requestedModel, requestedBackend, requestedKind, allowFallback = true, options2 = {}) {
   const preferAudioVideo = Boolean(options2.preferNativeAudioVideo);
   const baseLadderIds = preferAudioVideo ? [...VIDEO_AUDIO_QUALITY_LADDER, ...VIDEO_GENERATION_QUALITY_LADDER] : VIDEO_GENERATION_QUALITY_LADDER;
@@ -263871,9 +264242,15 @@ function annotateVideoFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatVideoAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatVideoAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 function parseRunnerJson3(stdout) {
@@ -265240,6 +265617,45 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: "video-gen",
+          host: videoCandidateHost(args.candidate),
+          owner: "video-generate-tool",
+          estimatedVramMB: videoCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: false
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `Video generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `Video generation needs a broker fallback (${decision2.fallback.name}), but video candidate fallback must be selected by the video ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for video generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async execute(args) {
         const start2 = performance.now();
         const action = String(args["action"] ?? "generate");
@@ -265295,35 +265711,6 @@ if __name__ == "__main__":
         const withAudio = booleanArg3(args["with_audio"], false);
         const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
         const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const preset = firstCandidate.preset;
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: "video-gen",
-            host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
-            owner: "video-generate-tool",
-            estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, "video-gen-needs-room");
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `Video generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         if (candidates.length === 0) {
           return {
             success: false,
@@ -265373,7 +265760,30 @@ if __name__ == "__main__":
             failed.push({ candidate, reason: "ComfyUI backend not yet implemented." });
             continue;
           }
-          const result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "video-prewarm-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateVideoFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult3(result) });
@@ -265459,26 +265869,48 @@ if __name__ == "__main__":
               start: args.start
             });
           } else {
-            result = await this.generateWithDiffusers({
-              prompt: promptForCandidate,
-              model: candidate.model,
-              preset,
-              kind: args.kind,
-              imageArg: args.imageArg,
-              audioInput: args.audioInput,
-              width,
-              height,
-              numFrames,
-              fps,
-              steps,
-              guidance,
-              negativePrompt,
-              seed: args.seed,
-              hfToken: hfTokenOverride,
-              autoAcceptLicense,
-              start: args.start,
-              python: args.args["python"]
+            const leaseDecision = await this.acquireTransientLoadLease({
+              candidate,
+              reason: "video-gen-needs-room",
+              start: args.start
             });
+            if (leaseDecision && "success" in leaseDecision) {
+              failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
+              if (index < args.candidates.length - 1) {
+                this.emitProgress({
+                  stage: "setup",
+                  message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+                });
+              }
+              continue;
+            }
+            const lease = leaseDecision?.lease;
+            this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+            try {
+              result = await this.generateWithDiffusers({
+                prompt: promptForCandidate,
+                model: candidate.model,
+                preset,
+                kind: args.kind,
+                imageArg: args.imageArg,
+                audioInput: args.audioInput,
+                width,
+                height,
+                numFrames,
+                fps,
+                steps,
+                guidance,
+                negativePrompt,
+                seed: args.seed,
+                hfToken: hfTokenOverride,
+                autoAcceptLicense,
+                start: args.start,
+                python: args.args["python"]
+              });
+            } finally {
+              await lease?.release();
+              this._brokerGpuIndex = null;
+            }
           }
           let nativeAudio = preset.nativeAudioVideo === true;
           let audioPath;
@@ -265670,6 +266102,17 @@ ${llmAnnotation}` : result.llmContent;
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading video model ${args.candidate.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess4(python.command, [
           runner,
           "--model",
@@ -265685,7 +266128,7 @@ ${llmAnnotation}` : result.llmContent;
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.candidate.model}`,
           onProgress: (event) => this.emitProgress(event)
         });

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.187",
+  "version": "1.0.188",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.187",
+      "version": "1.0.188",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.187",
+  "version": "1.0.188",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",