npm - omnius - Versions diffs - 1.0.187 → 1.0.189 - Mend

omnius 1.0.187 → 1.0.189

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -1412,6 +1412,18 @@ var init_tool_executor = __esm({
 import { EventEmitter } from "node:events";
 import { totalmem, freemem } from "node:os";
 import { exec } from "node:child_process";
+function dedupeLoadedModels(models) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const model of models) {
+    const key = `${model.host}:${model.name}`;
+    if (seen.has(key))
+      continue;
+    seen.add(key);
+    out.push(model);
+  }
+  return out;
+}
 function ramSnapshotMB() {
   const total = Math.round(totalmem() / (1024 * 1024));
   const free = Math.round(freemem() / (1024 * 1024));
@@ -1690,6 +1702,104 @@ var init_model_broker = __esm({
         this.emit("rejected", spec, reason);
         return { kind: "reject", reason };
       }
+      /**
+       * Acquire a short-lived load lease for media/subprocess generation.
+       *
+       * Media generation often needs to temporarily free VRAM occupied by Ollama
+       * chat models. This helper refreshes Ollama state, asks the broker what must
+       * be evicted, unloads those Ollama models with keep_alive=0, and returns a
+       * lease whose release() clears transient inflight state, unloads any
+       * Ollama-hosted requested model, and warms the evicted Ollama models again.
+       */
+      async acquireTransientModelLoad(spec, options2 = {}) {
+        const reason = options2.reason ?? `${spec.domain}-transient-load`;
+        const evictedModels = [];
+        let gpuIndex = null;
+        let admitted = false;
+        await this.pollOnce().catch(() => {
+        });
+        for (let attempt = 0; attempt < 4; attempt++) {
+          const decision2 = await this.ensureModelLoadable(spec);
+          if (decision2.kind === "wait-for-inflight") {
+            const waited = await decision2.promise.catch((err) => ({
+              kind: "reject",
+              reason: err instanceof Error ? err.message : String(err)
+            }));
+            if (waited.kind === "ok") {
+              gpuIndex = waited.gpuIndex ?? null;
+              admitted = true;
+              break;
+            }
+            if (waited.kind === "evict") {
+              for (const target of waited.evictTargets) {
+                if (await this.evict(target.host, target.name, reason))
+                  evictedModels.push(target);
+              }
+              await this.pollOnce().catch(() => {
+              });
+              continue;
+            }
+            if (waited.kind === "degrade")
+              return waited;
+            if (waited.kind === "reject")
+              return waited;
+            continue;
+          }
+          if (decision2.kind === "ok") {
+            gpuIndex = decision2.gpuIndex ?? null;
+            admitted = true;
+            break;
+          }
+          if (decision2.kind === "evict") {
+            for (const target of decision2.evictTargets) {
+              const before = this._loaded.get(`${target.host}:${target.name}`) ?? target;
+              if (await this.evict(target.host, target.name, reason)) {
+                evictedModels.push(before);
+              }
+            }
+            await this.pollOnce().catch(() => {
+            });
+            continue;
+          }
+          if (decision2.kind === "degrade")
+            return decision2;
+          return decision2;
+        }
+        if (!admitted) {
+          return {
+            kind: "reject",
+            reason: `could not acquire transient load lease for ${spec.host}:${spec.name} after repeated evictions`
+          };
+        }
+        const evictedOllamaModels = dedupeLoadedModels(evictedModels.filter((m2) => m2.host === "ollama"));
+        const broker = this;
+        let released = false;
+        return {
+          kind: "ok",
+          lease: {
+            spec,
+            gpuIndex,
+            evictedModels: dedupeLoadedModels(evictedModels),
+            evictedOllamaModels,
+            async release() {
+              if (released)
+                return;
+              released = true;
+              broker.clearInflight(spec.host, spec.name);
+              if ((options2.unloadRequestedOllama ?? true) && spec.host === "ollama") {
+                await broker.unloadOllamaModel(spec.name, `${reason}-complete`).catch(() => false);
+              }
+              if (options2.restoreOllama !== false && evictedOllamaModels.length > 0) {
+                await broker.restoreOllamaModels(evictedOllamaModels, {
+                  keepAlive: options2.restoreKeepAlive ?? "30m"
+                });
+              }
+              await broker.pollOnce().catch(() => {
+              });
+            }
+          }
+        };
+      }
       /**
        * Register a model that has been successfully loaded.
        * Callers MUST call this after a successful load so the broker can track LRU.
@@ -1743,6 +1853,66 @@ var init_model_broker = __esm({
         this.emit("evicted", m2, reason);
         return actively;
       }
+      /** Force-unload an Ollama model even when it is not currently tracked. */
+      async unloadOllamaModel(modelName, reason = "ollama-unload") {
+        const key = `ollama:${modelName}`;
+        const existing = this._loaded.get(key);
+        const ok3 = await this.ollamaUnload(modelName).catch(() => false);
+        if (existing) {
+          this._loaded.delete(key);
+          this.emit("evicted", existing, reason);
+        }
+        return ok3;
+      }
+      /** Best-effort warm/reload of an Ollama model after temporary eviction. */
+      async warmOllamaModel(modelName, keepAlive = "30m") {
+        const bodies = [
+          { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 0 } },
+          { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 1 } }
+        ];
+        for (const body of bodies) {
+          try {
+            const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
+              method: "POST",
+              headers: { "Content-Type": "application/json" },
+              body: JSON.stringify(body),
+              signal: AbortSignal.timeout(12e4)
+            });
+            if (!res.ok)
+              continue;
+            await this.refreshOllamaPs().catch(() => {
+            });
+            return true;
+          } catch {
+          }
+        }
+        try {
+          const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+              model: modelName,
+              stream: false,
+              keep_alive: keepAlive
+            }),
+            signal: AbortSignal.timeout(12e4)
+          });
+          if (!res.ok)
+            return false;
+          await this.refreshOllamaPs().catch(() => {
+          });
+          return true;
+        } catch {
+          return false;
+        }
+      }
+      /** Restore a set of previously evicted Ollama models, oldest first. */
+      async restoreOllamaModels(models, options2 = {}) {
+        const unique2 = dedupeLoadedModels(models.filter((m2) => m2.host === "ollama")).sort((a2, b) => a2.lastUsedAt - b.lastUsedAt);
+        for (const model of unique2) {
+          await this.warmOllamaModel(model.name, options2.keepAlive ?? "30m").catch(() => false);
+        }
+      }
       // ------------------------------------------------------------------
       // Internal — Ollama
       // ------------------------------------------------------------------
@@ -1885,7 +2055,7 @@ var init_model_broker = __esm({
         );
         const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
         const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
-        const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
+        const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).filter((m2) => !this.hasActiveSlotForModel(m2)).sort((a2, b) => {
           const aIdle = idle(a2) ? 0 : 1;
           const bIdle = idle(b) ? 0 : 1;
           if (aIdle !== bIdle)
@@ -1931,6 +2101,13 @@ var init_model_broker = __esm({
             n2++;
         return n2;
       }
+      hasActiveSlotForModel(model) {
+        for (const slot of this._activeSlots.values()) {
+          if (slot.model === model.name)
+            return true;
+        }
+        return false;
+      }
       // ------------------------------------------------------------------
       // Internal — fallback resolution
       // ------------------------------------------------------------------
@@ -22761,6 +22938,17 @@ function evictModelsToFreeSpace(args) {
   writeMeta(meta);
   return { evicted, bytesFreed, finalFreeBytes: disk.freeBytes };
 }
+function estimateReclaimableCacheBytes(keepRepos) {
+  const keep = new Set(keepRepos ?? []);
+  let total = 0;
+  for (const entry of readMeta().entries) {
+    if (keep.has(entry.repo))
+      continue;
+    const measured = measureRepoCacheBytes(entry.repo);
+    total += Math.max(0, measured || entry.sizeBytes || 0);
+  }
+  return total;
+}
 function ensureDiskSpaceForDownload(args) {
   ensureUnifiedCacheDirs();
   const safetyMargin = args.safetyMarginBytes ?? 1 * 1024 ** 3;
@@ -22769,6 +22957,10 @@ function ensureDiskSpaceForDownload(args) {
   if (disk.freeBytes >= target) {
     return { ok: true, evicted: [], freeBytes: disk.freeBytes };
   }
+  const reclaimableBytes = estimateReclaimableCacheBytes(args.keepRepos);
+  if (disk.freeBytes + reclaimableBytes < target) {
+    throw new InsufficientDiskSpaceError(args.approxDownloadBytes, disk.freeBytes, disk.totalBytes, []);
+  }
   const evictionResult = evictModelsToFreeSpace({
     neededBytes: args.approxDownloadBytes,
     keepRepos: args.keepRepos,
@@ -259208,6 +259400,12 @@ function imageCandidateFor(model, requestedBackend) {
     preset: getImageGenerationPreset(resolved)
   };
 }
+function imageCandidateHost(candidate) {
+  return candidate.backend === "ollama" ? "ollama" : "diffusers-py";
+}
+function imageCandidateEstimatedVramMB(candidate) {
+  return candidate.preset?.minVramGB !== void 0 ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
   const ladder = imageGenerationQualityLadder();
   const candidates = [];
@@ -259511,9 +259709,15 @@ function annotateImageFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatImageAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 function parseRunnerJson(stdout) {
@@ -260321,6 +260525,45 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: "image-gen",
+          host: imageCandidateHost(args.candidate),
+          owner: "image-generate-tool",
+          estimatedVramMB: imageCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: true
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `Image generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `Image generation needs a broker fallback (${decision2.fallback.name}), but image candidate fallback must be selected by the image ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for image generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async execute(args) {
         const start2 = performance.now();
         const action = String(args["action"] ?? "generate");
@@ -260363,33 +260606,6 @@ if __name__ == "__main__":
         const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
         const seed = optionalNumberArg(args["seed"]);
         const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: "image-gen",
-            host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
-            owner: "image-generate-tool"
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, "image-gen-needs-room");
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `Image generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         try {
           return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
         } catch (err) {
@@ -260408,7 +260624,30 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "image-prewarm-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateImageFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult(result) });
@@ -260447,7 +260686,30 @@ if __name__ == "__main__":
             message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
           const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
-          const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "image-gen-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success) {
             await this.writeImageSidecar(result, {
               originalPrompt: args.prompt,
@@ -260671,6 +260933,17 @@ ${errText.slice(0, 1200)}`,
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading image model ${args.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess2(python.command, [
           runner,
           "--model",
@@ -260683,7 +260956,7 @@ ${errText.slice(0, 1200)}`,
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -261735,6 +262008,18 @@ function audioCandidateFor(kind, model, requestedBackend) {
     preset: getAudioGenerationPreset(model, kind)
   };
 }
+function audioCandidateHost(candidate) {
+  if (candidate.backend === "project")
+    return null;
+  if (candidate.backend === "audiocraft")
+    return "audiocraft";
+  if (candidate.backend === "tangoflux")
+    return "tangoflux";
+  return "diffusers-py";
+}
+function audioCandidateEstimatedVramMB(candidate) {
+  return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
   const ladder = audioGenerationQualityLadder(kind);
   const candidates = [];
@@ -261891,9 +262176,15 @@ function annotateAudioFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatAudioAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
@@ -262730,6 +263021,48 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const host = audioCandidateHost(args.candidate);
+        if (!host)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: args.kind,
+          host,
+          owner: `audio-generate-tool/${args.kind}`,
+          estimatedVramMB: audioCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: false
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `${args.kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `${args.kind === "music" ? "Music" : "Sound"} generation needs a broker fallback (${decision2.fallback.name}), but audio candidate fallback must be selected by the audio ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for ${args.kind} generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async prewarmPythonBackend(args) {
         const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
         let python;
@@ -262776,6 +263109,17 @@ if __name__ == "__main__":
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading ${args.kind} model ${args.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess3(python.command, [
           runner,
           "--kind",
@@ -262792,7 +263136,7 @@ if __name__ == "__main__":
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -262872,33 +263216,6 @@ if __name__ == "__main__":
         const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
         const seed = optionalNumberArg2(args["seed"]);
         const playback = playbackRequested(args);
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: kind === "music" ? "music" : "sound",
-            host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
-            owner: `audio-generate-tool/${kind}`
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         try {
           return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
         } catch (err) {
@@ -262918,15 +263235,39 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+          const leaseDecision = await this.acquireTransientLoadLease({
             kind: args.kind,
-            backend: candidate.backend,
-            runnerBackend: candidate.backend,
-            model: candidate.model,
-            duration,
-            start: args.start,
-            python: args.args["python"]
-          });
+            candidate,
+            reason: `${args.kind}-prewarm-needs-room`,
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+              kind: args.kind,
+              backend: candidate.backend,
+              runnerBackend: candidate.backend,
+              model: candidate.model,
+              duration,
+              start: args.start,
+              python: args.args["python"]
+            });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateAudioFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262954,19 +263295,43 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+          const leaseDecision = await this.acquireTransientLoadLease({
             kind: args.kind,
-            backend: candidate.backend,
-            runnerBackend: candidate.backend,
-            prompt: args.prompt,
-            model: candidate.model,
-            duration,
-            steps,
-            seed: args.seed,
-            playback: args.playback,
-            start: args.start,
-            python: args.args["python"]
-          });
+            candidate,
+            reason: `${args.kind}-gen-needs-room`,
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+              kind: args.kind,
+              backend: candidate.backend,
+              runnerBackend: candidate.backend,
+              prompt: args.prompt,
+              model: candidate.model,
+              duration,
+              steps,
+              seed: args.seed,
+              playback: args.playback,
+              start: args.start,
+              python: args.args["python"]
+            });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateAudioFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -263306,6 +263671,12 @@ function videoCandidateFor(model, requestedBackend, requestedKind) {
   }
   return { model, backend, preset };
 }
+function videoCandidateHost(candidate) {
+  return candidate.backend === "comfyui" ? "comfyui" : "diffusers-py";
+}
+function videoCandidateEstimatedVramMB(candidate) {
+  return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function videoGenerationFallbackCandidates(requestedModel, requestedBackend, requestedKind, allowFallback = true, options2 = {}) {
   const preferAudioVideo = Boolean(options2.preferNativeAudioVideo);
   const baseLadderIds = preferAudioVideo ? [...VIDEO_AUDIO_QUALITY_LADDER, ...VIDEO_GENERATION_QUALITY_LADDER] : VIDEO_GENERATION_QUALITY_LADDER;
@@ -263871,9 +264242,15 @@ function annotateVideoFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatVideoAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatVideoAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 function parseRunnerJson3(stdout) {
@@ -265240,6 +265617,45 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: "video-gen",
+          host: videoCandidateHost(args.candidate),
+          owner: "video-generate-tool",
+          estimatedVramMB: videoCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: false
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `Video generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `Video generation needs a broker fallback (${decision2.fallback.name}), but video candidate fallback must be selected by the video ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for video generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async execute(args) {
         const start2 = performance.now();
         const action = String(args["action"] ?? "generate");
@@ -265295,35 +265711,6 @@ if __name__ == "__main__":
         const withAudio = booleanArg3(args["with_audio"], false);
         const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
         const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const preset = firstCandidate.preset;
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: "video-gen",
-            host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
-            owner: "video-generate-tool",
-            estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, "video-gen-needs-room");
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `Video generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         if (candidates.length === 0) {
           return {
             success: false,
@@ -265373,7 +265760,30 @@ if __name__ == "__main__":
             failed.push({ candidate, reason: "ComfyUI backend not yet implemented." });
             continue;
           }
-          const result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "video-prewarm-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateVideoFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult3(result) });
@@ -265459,26 +265869,48 @@ if __name__ == "__main__":
               start: args.start
             });
           } else {
-            result = await this.generateWithDiffusers({
-              prompt: promptForCandidate,
-              model: candidate.model,
-              preset,
-              kind: args.kind,
-              imageArg: args.imageArg,
-              audioInput: args.audioInput,
-              width,
-              height,
-              numFrames,
-              fps,
-              steps,
-              guidance,
-              negativePrompt,
-              seed: args.seed,
-              hfToken: hfTokenOverride,
-              autoAcceptLicense,
-              start: args.start,
-              python: args.args["python"]
+            const leaseDecision = await this.acquireTransientLoadLease({
+              candidate,
+              reason: "video-gen-needs-room",
+              start: args.start
             });
+            if (leaseDecision && "success" in leaseDecision) {
+              failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
+              if (index < args.candidates.length - 1) {
+                this.emitProgress({
+                  stage: "setup",
+                  message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+                });
+              }
+              continue;
+            }
+            const lease = leaseDecision?.lease;
+            this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+            try {
+              result = await this.generateWithDiffusers({
+                prompt: promptForCandidate,
+                model: candidate.model,
+                preset,
+                kind: args.kind,
+                imageArg: args.imageArg,
+                audioInput: args.audioInput,
+                width,
+                height,
+                numFrames,
+                fps,
+                steps,
+                guidance,
+                negativePrompt,
+                seed: args.seed,
+                hfToken: hfTokenOverride,
+                autoAcceptLicense,
+                start: args.start,
+                python: args.args["python"]
+              });
+            } finally {
+              await lease?.release();
+              this._brokerGpuIndex = null;
+            }
           }
           let nativeAudio = preset.nativeAudioVideo === true;
           let audioPath;
@@ -265670,6 +266102,17 @@ ${llmAnnotation}` : result.llmContent;
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading video model ${args.candidate.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess4(python.command, [
           runner,
           "--model",
@@ -265685,7 +266128,7 @@ ${llmAnnotation}` : result.llmContent;
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.candidate.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -568924,6 +569367,78 @@ var init_spinner = __esm({
   }
 });
+// packages/cli/src/tui/generative-progress.ts
+function generationKindForToolName(toolName) {
+  if (toolName === "generate_image") return "image";
+  if (toolName === "generate_audio") return "audio";
+  if (toolName === "generate_video") return "video";
+  if (toolName === "generate_tts" || toolName === "create_audio_file") return "tts";
+  return null;
+}
+function formatGenerativeProgress(kind, event, options2 = {}) {
+  const width = Math.max(8, Math.min(32, options2.width ?? (options2.surface === "telegram" ? 12 : 20)));
+  const label = kindLabel(kind);
+  const stage = stageLabel(event.stage);
+  const pct = finitePercent(event.percent);
+  const bytes = formatProgressBytes(event);
+  const elapsed = formatElapsed(event.elapsedMs);
+  const message2 = compactProgressMessage(event.message);
+  if (typeof pct === "number") {
+    const filled = Math.max(0, Math.min(width, Math.round(pct / 100 * width)));
+    const bar = `${"#".repeat(filled)}${"-".repeat(width - filled)}`;
+    return `${label} ${stage}: [${bar}] ${pct}% ${message2}${bytes}${elapsed}`;
+  }
+  return `${label} ${stage}: ${message2}${bytes}${elapsed}`;
+}
+function kindLabel(kind) {
+  if (kind === "tts") return "TTS";
+  return kind.slice(0, 1).toUpperCase() + kind.slice(1);
+}
+function stageLabel(stage) {
+  const normalized = String(stage || "process").trim().toLowerCase();
+  if (normalized === "setup") return "setup";
+  if (normalized === "download") return "download";
+  if (normalized === "load") return "load";
+  if (normalized === "generate") return "infer";
+  if (normalized === "save") return "save";
+  if (normalized === "thumbnail") return "thumbnail";
+  if (normalized === "hf_token_required") return "auth";
+  return "process";
+}
+function finitePercent(value2) {
+  if (typeof value2 !== "number" || !Number.isFinite(value2)) return void 0;
+  return Math.max(0, Math.min(100, Math.round(value2)));
+}
+function formatProgressBytes(event) {
+  if (typeof event.totalBytes !== "number" || !Number.isFinite(event.totalBytes) || event.totalBytes <= 0) {
+    return "";
+  }
+  const downloaded = typeof event.downloadedBytes === "number" && Number.isFinite(event.downloadedBytes) ? Math.max(0, event.downloadedBytes) : 0;
+  return ` (${formatBytes3(downloaded)} / ${formatBytes3(event.totalBytes)})`;
+}
+function formatElapsed(elapsedMs2) {
+  if (typeof elapsedMs2 !== "number" || !Number.isFinite(elapsedMs2) || elapsedMs2 <= 1500) return "";
+  return ` ${Math.round(elapsedMs2 / 1e3)}s`;
+}
+function compactProgressMessage(message2) {
+  return String(message2 || "working").replace(/\s+/g, " ").trim().slice(0, 220);
+}
+function formatBytes3(value2) {
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  let amount = Math.max(0, value2);
+  let idx = 0;
+  while (amount >= 1024 && idx < units.length - 1) {
+    amount /= 1024;
+    idx++;
+  }
+  return idx === 0 ? `${Math.round(amount)}B` : `${amount.toFixed(1)}${units[idx]}`;
+}
+var init_generative_progress = __esm({
+  "packages/cli/src/tui/generative-progress.ts"() {
+    "use strict";
+  }
+});
 // packages/cli/src/api/py-embed.ts
 var py_embed_exports = {};
 __export(py_embed_exports, {
@@ -575515,7 +576030,7 @@ async function fetchOllamaModels(baseUrl) {
     const family = m2.details?.family;
     return {
       name: m2.name,
-      size: formatBytes3(m2.size),
+      size: formatBytes4(m2.size),
       sizeBytes: m2.size,
       modified: formatRelativeTime(m2.modified_at),
       parameterSize: m2.details?.parameter_size,
@@ -575961,7 +576476,7 @@ async function queryModelCapabilities(baseUrl, modelName) {
     return caps;
   }
 }
-function formatBytes3(bytes) {
+function formatBytes4(bytes) {
   if (bytes < 1024) return `${bytes} B`;
   const units = ["KB", "MB", "GB", "TB"];
   let size = bytes;
@@ -595166,7 +595681,7 @@ function formatWorkspaceExplorer(result) {
   const width = Math.max(12, ...result.entries.map((entry) => entry.path.length));
   for (const entry of result.entries) {
     lines.push(
-      `  ${entry.path.padEnd(Math.min(width, 70)).slice(0, 70)}  ${entry.kind.padEnd(6)}  ${formatBytes4(entry.sizeBytes).padStart(8)}`
+      `  ${entry.path.padEnd(Math.min(width, 70)).slice(0, 70)}  ${entry.kind.padEnd(6)}  ${formatBytes5(entry.sizeBytes).padStart(8)}`
     );
   }
   lines.push("");
@@ -595188,7 +595703,7 @@ function previewWorkspaceFile(root, relPath, options2 = {}) {
     return [
       "",
       `  File Preview: ${relPath}`,
-      `  Size: ${formatBytes4(st.size)} (too large for inline preview)`,
+      `  Size: ${formatBytes5(st.size)} (too large for inline preview)`,
       ""
     ].join("\n");
   }
@@ -595199,7 +595714,7 @@ function previewWorkspaceFile(root, relPath, options2 = {}) {
   return [
     "",
     `  File Preview: ${relPath}`,
-    `  Size: ${formatBytes4(st.size)}    Lines: ${rawLines.length}${rawLines.length > maxLines ? " (truncated)" : ""}`,
+    `  Size: ${formatBytes5(st.size)}    Lines: ${rawLines.length}${rawLines.length > maxLines ? " (truncated)" : ""}`,
     "",
     ...visible.map((line, idx) => `  ${String(idx + 1).padStart(gutter)} | ${line}`),
     ""
@@ -595230,7 +595745,7 @@ function scoreWorkspaceFile(entry, query) {
   }
   return score;
 }
-function formatBytes4(bytes) {
+function formatBytes5(bytes) {
   if (bytes < 1024) return `${bytes} B`;
   if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
   return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
@@ -600895,7 +601410,7 @@ function describeTaskComplete(summary, completed, personality = 2, _stark = fals
   }
   return `Task completed, but no summary was generated to describe the outcome`;
 }
-function formatBytes5(bytes) {
+function formatBytes6(bytes) {
   if (bytes < 1024) return `${bytes}B`;
   if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)}KB`;
   return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
@@ -603605,7 +604120,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`
               const pct = Math.round(received / contentLength * 100);
               if (pct === 25 || pct === 50 || pct === 75 || pct === 100) {
                 renderInfo(
-                  `  ${pct}% (${formatBytes5(received)} / ${formatBytes5(contentLength)})`
+                  `  ${pct}% (${formatBytes6(received)} / ${formatBytes6(contentLength)})`
                 );
               }
             }
@@ -603613,7 +604128,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`
           const fullBuffer = Buffer.concat(chunks);
           writeFileSync54(onnxPath, fullBuffer);
           renderInfo(
-            `${model.label} model downloaded (${formatBytes5(fullBuffer.length)}).`
+            `${model.label} model downloaded (${formatBytes6(fullBuffer.length)}).`
           );
         }
       }
@@ -611742,15 +612257,7 @@ async function handleImageCommand(ctx3, arg, hasLocal) {
   return "handled";
 }
 function formatImageGenerationProgress(event) {
-  const pct = event.percent;
-  const elapsed = event.elapsedMs && event.elapsedMs > 1500 ? ` ${Math.round(event.elapsedMs / 1e3)}s` : "";
-  if (typeof pct === "number") {
-    const width = 20;
-    const filled = Math.max(0, Math.min(width, Math.round(pct / 100 * width)));
-    const bar = `${"#".repeat(filled)}${"-".repeat(width - filled)}`;
-    return `Image ${event.stage}: [${bar}] ${pct}% ${event.message}${elapsed}`;
-  }
-  return `Image ${event.stage}: ${event.message}${elapsed}`;
+  return formatGenerativeProgress("image", event);
 }
 function rateVideoPresetForHardware(preset, specs) {
   const min = preset.minVramGB;
@@ -612030,15 +612537,7 @@ async function handleVideoCommand(ctx3, arg, hasLocal) {
   return "handled";
 }
 function formatVideoGenerationProgress(event) {
-  const pct = event.percent;
-  const elapsed = event.elapsedMs && event.elapsedMs > 1500 ? ` ${Math.round(event.elapsedMs / 1e3)}s` : "";
-  if (typeof pct === "number") {
-    const width = 20;
-    const filled = Math.max(0, Math.min(width, Math.round(pct / 100 * width)));
-    const bar = `${"#".repeat(filled)}${"-".repeat(width - filled)}`;
-    return `Video ${event.stage}: [${bar}] ${pct}% ${event.message}${elapsed}`;
-  }
-  return `Video ${event.stage}: ${event.message}${elapsed}`;
+  return formatGenerativeProgress("video", event);
 }
 function activeAudioModel(settings, kind) {
   return kind === "music" ? settings.musicModel : settings.soundModel;
@@ -612346,16 +612845,7 @@ async function handleAudioGenerationCommand(ctx3, arg, hasLocal, kind) {
   return "handled";
 }
 function formatAudioGenerationProgress(event) {
-  const pct = event.percent;
-  const elapsed = event.elapsedMs && event.elapsedMs > 1500 ? ` ${Math.round(event.elapsedMs / 1e3)}s` : "";
-  const bytes = typeof event.totalBytes === "number" && event.totalBytes > 0 ? ` (${formatFileSize(event.downloadedBytes ?? 0)} / ${formatFileSize(event.totalBytes)})` : "";
-  if (typeof pct === "number") {
-    const width = 20;
-    const filled = Math.max(0, Math.min(width, Math.round(pct / 100 * width)));
-    const bar = `${"#".repeat(filled)}${"-".repeat(width - filled)}`;
-    return `Audio ${event.stage}: [${bar}] ${pct}% ${event.message}${bytes}${elapsed}`;
-  }
-  return `Audio ${event.stage}: ${event.message}${bytes}${elapsed}`;
+  return formatGenerativeProgress("audio", event);
 }
 async function showHelpMenu(ctx3) {
   const slashCommands = getSlashHelpEntries();
@@ -617504,6 +617994,7 @@ var init_commands = __esm({
     "use strict";
     init_model_picker();
     init_render();
+    init_generative_progress();
     init_command_registry();
     init_hf_token_prompt();
     init_dist5();
@@ -626249,6 +626740,10 @@ function scopedTool(base3, root, mode) {
   if (typeof baseSetExpander === "function") {
     wrapper.setPromptExpander = (expander) => baseSetExpander.call(base3, expander);
   }
+  const baseSetProgress = base3.setProgressCallback;
+  if (typeof baseSetProgress === "function") {
+    wrapper.setProgressCallback = (handler) => baseSetProgress.call(base3, handler);
+  }
   return wrapper;
 }
 function withTelegramAutoAttachmentNotice(result, artifactCount) {
@@ -626557,6 +627052,16 @@ var init_telegram_creative_tools = __esm({
         },
         required: []
       };
+      progressHandler = null;
+      setProgressCallback(handler) {
+        this.progressHandler = handler;
+      }
+      emitProgress(start2, event) {
+        try {
+          this.progressHandler?.({ ...event, elapsedMs: performance.now() - start2 });
+        } catch {
+        }
+      }
       async execute(args) {
         const start2 = performance.now();
         const text = typeof args["text"] === "string" && args["text"].trim() ? args["text"].trim() : typeof args["input"] === "string" && args["input"].trim() ? args["input"].trim() : typeof args["prompt"] === "string" && args["prompt"].trim() ? args["prompt"].trim() : "";
@@ -626590,8 +627095,10 @@ var init_telegram_creative_tools = __esm({
         }
         let result;
         try {
+          this.emitProgress(start2, { stage: "setup", message: "Preparing scoped TTS audio file" });
           await mkdir19(dirname37(guarded.path.abs), { recursive: true });
           const tts = new TtsGenerateTool();
+          this.emitProgress(start2, { stage: "load", message: "Starting TTS backend" });
           result = await tts.execute({
             text,
             output: guarded.path.abs,
@@ -626624,6 +627131,7 @@ ${(result.error || result.output || "").slice(0, 1200)}`,
         }
         rememberCreated(this.root, guarded.path.abs);
         const sizeKB = Math.round(statSync43(guarded.path.abs).size / 1024);
+        this.emitProgress(start2, { stage: "save", message: `Saved scoped audio file (${sizeKB}KB)` });
         return withTelegramAutoAttachmentNotice({
           success: true,
           output: `Created audio file: ${guarded.path.abs} (${sizeKB}KB WAV)
@@ -631246,7 +631754,13 @@ function normalizeTelegramCallbackQuery(update2) {
     data
   };
 }
-function adaptTool5(tool, todoSessionId) {
+function adaptTool5(tool, todoSessionId, progress) {
+  const progressTool = tool;
+  if (generationKindForToolName(tool.name) && typeof progressTool.setProgressCallback === "function") {
+    progressTool.setProgressCallback((event) => {
+      progress?.onProgress(tool.name, event);
+    });
+  }
   return {
     name: tool.name,
     description: tool.description,
@@ -631258,7 +631772,15 @@ function adaptTool5(tool, todoSessionId) {
       }
       try {
         const result = await tool.execute(args);
+        progress?.complete(tool.name, result);
         return { success: result.success, output: result.output, error: result.error, llmContent: result.llmContent };
+      } catch (err) {
+        progress?.complete(tool.name, {
+          success: false,
+          output: "",
+          error: err instanceof Error ? err.message : String(err)
+        });
+        throw err;
       } finally {
         if (todoSessionId && (tool.name === "todo_write" || tool.name === "todo_read")) {
           setTodoSessionId(previousTodoSession);
@@ -631429,6 +631951,7 @@ var init_telegram_bridge = __esm({
     init_scoped_personality();
     init_voice_soul();
     init_telegram_creative_tools();
+    init_generative_progress();
     init_omnius_directory();
     init_stimulation();
     init_pid_controller();
@@ -637910,6 +638433,91 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
       async sendChatAction(chatId, action) {
         await this.apiCall("sendChatAction", { chat_id: chatId, action });
       }
+      createTelegramGenerativeProgressBridge(chatId, msg) {
+        if (chatId === void 0) return void 0;
+        const states = /* @__PURE__ */ new Map();
+        const stateFor = (toolName) => {
+          let state = states.get(toolName);
+          if (!state) {
+            state = {
+              messageId: null,
+              pump: null,
+              queuedHtml: null,
+              lastRenderedAt: 0
+            };
+            states.set(toolName, state);
+          }
+          return state;
+        };
+        const kindTitle = (toolName) => {
+          const kind = generationKindForToolName(toolName);
+          if (!kind) return "Generation";
+          if (kind === "tts") return "TTS audio";
+          return `${kind.slice(0, 1).toUpperCase()}${kind.slice(1)}`;
+        };
+        const enqueue = (state, html) => {
+          state.queuedHtml = html;
+          if (state.pump) return;
+          state.pump = (async () => {
+            while (state.queuedHtml) {
+              const nextHtml = state.queuedHtml;
+              state.queuedHtml = null;
+              if (state.messageId === null) {
+                const messageId = await this.sendLiveMessage(chatId, nextHtml, msg?.messageId);
+                if (!messageId) return;
+                state.messageId = messageId;
+              } else {
+                await this.editLiveMessage(chatId, state.messageId, nextHtml);
+              }
+            }
+          })().catch(() => {
+          }).finally(() => {
+            state.pump = null;
+            if (state.queuedHtml) enqueue(state, state.queuedHtml);
+          });
+        };
+        const progressHtml = (toolName, event) => {
+          const kind = generationKindForToolName(toolName);
+          if (!kind) return null;
+          const rendered = redactTelegramLocalPaths(formatGenerativeProgress(kind, event, { surface: "telegram" }));
+          return `<b>${escapeTelegramHTML(kindTitle(toolName))} generation</b>
+<code>${escapeTelegramHTML(rendered)}</code>`;
+        };
+        const completeHtml = (toolName, result) => {
+          if (!generationKindForToolName(toolName)) return null;
+          if (result.success) {
+            return `<b>${escapeTelegramHTML(kindTitle(toolName))} generation complete</b>`;
+          }
+          const reason = redactTelegramLocalPaths((result.error || result.output || "Generation failed").slice(0, 900));
+          return `<b>${escapeTelegramHTML(kindTitle(toolName))} generation failed</b>
+<code>${escapeTelegramHTML(reason)}</code>`;
+        };
+        return {
+          onProgress: (toolName, event) => {
+            const html = progressHtml(toolName, event);
+            if (!html) return;
+            const state = stateFor(toolName);
+            const now = Date.now();
+            const stage = String(event.stage || "process");
+            const percent = typeof event.percent === "number" && Number.isFinite(event.percent) ? Math.round(event.percent) : void 0;
+            const terminalStage = stage === "save" || stage === "thumbnail" || stage === "hf_token_required";
+            const shouldRender = state.lastRenderedAt === 0 || stage !== state.lastStage || typeof percent === "number" && (state.lastPercent === void 0 || Math.abs(percent - state.lastPercent) >= 5) || now - state.lastRenderedAt >= 3500 || terminalStage;
+            if (!shouldRender) return;
+            state.lastRenderedAt = now;
+            state.lastStage = stage;
+            state.lastPercent = percent;
+            enqueue(state, html);
+          },
+          complete: (toolName, result) => {
+            if (!generationKindForToolName(toolName)) return;
+            const state = states.get(toolName);
+            if (!state && result.success) return;
+            const html = completeHtml(toolName, result);
+            if (!html) return;
+            enqueue(state ?? stateFor(toolName), html);
+          }
+        };
+      }
       // ── Live message streaming (editMessageText pattern) ─────────────────
       /**
        * Send a placeholder message that will be progressively edited with
@@ -639942,6 +640550,7 @@ Scoped workspace: ${scopedRoot}`,
         const imageDefaults = this.imageGenerationDefaultsForRepo(repoRoot);
         const audioDefaults = this.audioGenerationDefaultsForRepo(repoRoot);
         const videoDefaults = this.videoGenerationDefaultsForRepo(repoRoot);
+        const generativeProgress = this.createTelegramGenerativeProgressBridge(chatId, msg);
         const taskComplete = {
           name: "task_complete",
           description: "Internal completion signal for Telegram runs. Put the actual user-facing reply in assistant text before calling this. Use summary 'no_reply' only to silently skip responding; never write that sentinel as assistant text.",
@@ -640088,7 +640697,7 @@ Scoped workspace: ${scopedRoot}`,
             }
           }
         }
-        const unfilteredAdaptedTools = allTools.map((tool) => adaptTool5(tool, todoSessionId));
+        const unfilteredAdaptedTools = allTools.map((tool) => adaptTool5(tool, todoSessionId, generativeProgress));
         let adaptedTools = unfilteredAdaptedTools;
         adaptedTools = applyToolPolicy(adaptedTools, context2, this.toolPolicyConfig);
         if (context2 === "telegram-admin-dm") {
@@ -640103,9 +640712,9 @@ Scoped workspace: ${scopedRoot}`,
             imageDefaults,
             audioDefaults,
             videoDefaults
-          ).map((tool) => adaptTool5(tool, todoSessionId));
+          ).map((tool) => adaptTool5(tool, todoSessionId, generativeProgress));
           adaptedTools.push(...creativeTools);
-          adaptedTools.push(adaptTool5(this.buildTelegramSendFileTool(context2, repoRoot, chatId, msg), todoSessionId));
+          adaptedTools.push(adaptTool5(this.buildTelegramSendFileTool(context2, repoRoot, chatId, msg), todoSessionId, generativeProgress));
           adaptedTools = this.filterNonAdminTelegramTools(adaptedTools);
           adaptedTools = adaptedTools.map((tool) => this.applyTelegramPublicQuota(tool, context2, chatId, msg));
         }
@@ -642850,7 +643459,7 @@ function buildShellLiveBlockLines(state, width) {
   const w = Math.max(36, width);
   const inner = Math.max(1, w - 4);
   const elapsed = Math.max(0, Date.now() - state.startedAt);
-  const status = state.status === "running" ? `live ${formatElapsed(elapsed)}` : state.status;
+  const status = state.status === "running" ? `live ${formatElapsed2(elapsed)}` : state.status;
   const title = ` Shell ${status} `;
   const top = `╭${fitWithFill(`─${title}`, w - 2, "─")}╮`;
   const bottom = `╰${"─".repeat(w - 2)}╯`;
@@ -642895,7 +643504,7 @@ function fitWithFill(value2, width, fill) {
   if (chars.length > width) return chars.slice(0, width).join("");
   return value2 + fill.repeat(width - chars.length);
 }
-function formatElapsed(ms) {
+function formatElapsed2(ms) {
   const seconds = Math.floor(ms / 1e3);
   if (seconds < 60) return `${seconds}s`;
   const minutes = Math.floor(seconds / 60);
@@ -668394,6 +669003,12 @@ function getVersion4() {
   return "0.0.0";
 }
 function adaptTool6(tool) {
+  const progressTool = tool;
+  if (generationKindForToolName(tool.name) && typeof progressTool.setProgressCallback === "function") {
+    progressTool.setProgressCallback((event) => {
+      _generativeProgressSink?.(tool.name, event);
+    });
+  }
   return {
     name: tool.name,
     description: tool.description,
@@ -668704,6 +669319,17 @@ function audioGenerationDefaultsForRepo(repoRoot) {
 function createConfiguredAudioGenerateTool(repoRoot) {
   return new AudioGenerateTool(repoRoot, audioGenerationDefaultsForRepo(repoRoot));
 }
+function videoGenerationDefaultsForRepo(repoRoot) {
+  const settings = resolveSettings(repoRoot);
+  return {
+    model: typeof settings.videoModel === "string" && settings.videoModel.trim() ? settings.videoModel : void 0,
+    backend: settings.videoBackend,
+    defaultKind: settings.videoKind
+  };
+}
+function createConfiguredVideoGenerateTool(repoRoot) {
+  return new VideoGenerateTool(repoRoot, videoGenerationDefaultsForRepo(repoRoot));
+}
 function buildSubAgentTools(repoRoot, config) {
   return [
     // File + search
@@ -668802,6 +669428,7 @@ function buildSubAgentTools(repoRoot, config) {
     new CameraCaptureTool(),
     createConfiguredImageGenerateTool(repoRoot, config.backendUrl),
     createConfiguredAudioGenerateTool(repoRoot),
+    createConfiguredVideoGenerateTool(repoRoot),
     // Hardware sensors + radios (read-only scans)
     new GpsLocationTool(),
     new WifiControlTool(),
@@ -668889,6 +669516,8 @@ function buildTools(repoRoot, config, contextWindowSize, modelTier) {
     createConfiguredImageGenerateTool(repoRoot, config.backendUrl),
     // Sound/music Generation — local Diffusers/AudioCraft/Stable Audio paths
     createConfiguredAudioGenerateTool(repoRoot),
+    // Video Generation — local Diffusers/ComfyUI video pipelines
+    createConfiguredVideoGenerateTool(repoRoot),
     // Structured file reading (CSV, JSON, Markdown, binary detection)
     new StructuredReadTool(repoRoot),
     // Vision tools (Moondream — desktop awareness + point-and-click)
@@ -669551,16 +670180,6 @@ async function renderAsciiPreviewForImage(imagePath, displayPath, title, writer)
     return "";
   }
 }
-function formatImageGenerationProgress2(event) {
-  const elapsed = event.elapsedMs && event.elapsedMs > 1500 ? ` ${Math.round(event.elapsedMs / 1e3)}s` : "";
-  if (typeof event.percent === "number") {
-    const width = 20;
-    const filled = Math.max(0, Math.min(width, Math.round(event.percent / 100 * width)));
-    const bar = `${"#".repeat(filled)}${"-".repeat(width - filled)}`;
-    return `Image ${event.stage}: [${bar}] ${event.percent}% ${event.message}${elapsed}`;
-  }
-  return `Image ${event.stage}: ${event.message}${elapsed}`;
-}
 async function renderAsciiPreviewForToolResult(toolName, output, repoRoot, writer) {
   if (!output) return;
   try {
@@ -670701,14 +671320,11 @@ ${entry.fullContent}`
       fn();
     }
   };
-  for (const tool of tools) {
-    const maybeProgressTool = tool;
-    if (typeof maybeProgressTool.setProgressCallback === "function") {
-      maybeProgressTool.setProgressCallback((event) => {
-        contentWrite(() => renderInfo(formatImageGenerationProgress2(event)));
-      });
-    }
-  }
+  _generativeProgressSink = (toolName, event) => {
+    const kind = generationKindForToolName(toolName);
+    if (!kind) return;
+    contentWrite(() => renderInfo(formatGenerativeProgress(kind, event, { surface: "tui" })));
+  };
   let liveShellBlock = null;
   const scheduleLiveShellRepaint = () => {
     if (!liveShellBlock || liveShellBlock.repaintTimer || !statusBar?.isActive)
@@ -677484,7 +678100,7 @@ Rules:
     process.exit(1);
   }
 }
-var NEXUS_DIRECTORY_ORIGIN3, NEXUS_AGENT_DIRECTORY_URL, NEXUS_SPONSORS_URL3, _interactiveSessionActive, _interactiveSessionReason, _voiceChatSession2, taskManager, _apiCallbacks, _shellToolRef, _replToolRef, _fullSubAgentToolRef, _agentToolRef, _sendMessageToolRef, _agentLifecycleMgr, _activeRunnerRef, _parentRunnerForArchive, _wireSubAgentCallbacks, _wireAgentToolCallbacks, _wireSubAgentToolCallbacks, _autoUpdatedThisSession, _mcpManager, _pluginManager, _mcpTools, SELF_IMPROVE_INTERVAL, _tasksSinceImprove;
+var NEXUS_DIRECTORY_ORIGIN3, NEXUS_AGENT_DIRECTORY_URL, NEXUS_SPONSORS_URL3, _generativeProgressSink, _interactiveSessionActive, _interactiveSessionReason, _voiceChatSession2, taskManager, _apiCallbacks, _shellToolRef, _replToolRef, _fullSubAgentToolRef, _agentToolRef, _sendMessageToolRef, _agentLifecycleMgr, _activeRunnerRef, _parentRunnerForArchive, _wireSubAgentCallbacks, _wireAgentToolCallbacks, _wireSubAgentToolCallbacks, _autoUpdatedThisSession, _mcpManager, _pluginManager, _mcpTools, SELF_IMPROVE_INTERVAL, _tasksSinceImprove;
 var init_interactive = __esm({
   "packages/cli/src/tui/interactive.ts"() {
     "use strict";
@@ -677493,6 +678109,7 @@ var init_interactive = __esm({
     init_dist8();
     init_dist8();
     init_dist5();
+    init_generative_progress();
     init_dist();
     init_listen();
     init_voice_session();
@@ -677543,6 +678160,7 @@ var init_interactive = __esm({
     NEXUS_DIRECTORY_ORIGIN3 = (process.env["OMNIUS_NEXUS_DIRECTORY_ORIGIN"] || process.env["OMNIUS_NEXUS_SIGNALING_SERVER"] || "https://openagents.nexus").replace(/\/+$/, "");
     NEXUS_AGENT_DIRECTORY_URL = `${NEXUS_DIRECTORY_ORIGIN3}/api/v1/directory`;
     NEXUS_SPONSORS_URL3 = `${NEXUS_DIRECTORY_ORIGIN3}/api/v1/sponsors`;
+    _generativeProgressSink = null;
     _interactiveSessionActive = false;
     _interactiveSessionReason = "";
     _voiceChatSession2 = null;