npm - omnius - Versions diffs - 1.0.186 → 1.0.188 - Mend

omnius 1.0.186 → 1.0.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -1412,6 +1412,18 @@ var init_tool_executor = __esm({
 import { EventEmitter } from "node:events";
 import { totalmem, freemem } from "node:os";
 import { exec } from "node:child_process";
+function dedupeLoadedModels(models) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const model of models) {
+    const key = `${model.host}:${model.name}`;
+    if (seen.has(key))
+      continue;
+    seen.add(key);
+    out.push(model);
+  }
+  return out;
+}
 function ramSnapshotMB() {
   const total = Math.round(totalmem() / (1024 * 1024));
   const free = Math.round(freemem() / (1024 * 1024));
@@ -1690,6 +1702,104 @@ var init_model_broker = __esm({
         this.emit("rejected", spec, reason);
         return { kind: "reject", reason };
       }
+      /**
+       * Acquire a short-lived load lease for media/subprocess generation.
+       *
+       * Media generation often needs to temporarily free VRAM occupied by Ollama
+       * chat models. This helper refreshes Ollama state, asks the broker what must
+       * be evicted, unloads those Ollama models with keep_alive=0, and returns a
+       * lease whose release() clears transient inflight state, unloads any
+       * Ollama-hosted requested model, and warms the evicted Ollama models again.
+       */
+      async acquireTransientModelLoad(spec, options2 = {}) {
+        const reason = options2.reason ?? `${spec.domain}-transient-load`;
+        const evictedModels = [];
+        let gpuIndex = null;
+        let admitted = false;
+        await this.pollOnce().catch(() => {
+        });
+        for (let attempt = 0; attempt < 4; attempt++) {
+          const decision2 = await this.ensureModelLoadable(spec);
+          if (decision2.kind === "wait-for-inflight") {
+            const waited = await decision2.promise.catch((err) => ({
+              kind: "reject",
+              reason: err instanceof Error ? err.message : String(err)
+            }));
+            if (waited.kind === "ok") {
+              gpuIndex = waited.gpuIndex ?? null;
+              admitted = true;
+              break;
+            }
+            if (waited.kind === "evict") {
+              for (const target of waited.evictTargets) {
+                if (await this.evict(target.host, target.name, reason))
+                  evictedModels.push(target);
+              }
+              await this.pollOnce().catch(() => {
+              });
+              continue;
+            }
+            if (waited.kind === "degrade")
+              return waited;
+            if (waited.kind === "reject")
+              return waited;
+            continue;
+          }
+          if (decision2.kind === "ok") {
+            gpuIndex = decision2.gpuIndex ?? null;
+            admitted = true;
+            break;
+          }
+          if (decision2.kind === "evict") {
+            for (const target of decision2.evictTargets) {
+              const before = this._loaded.get(`${target.host}:${target.name}`) ?? target;
+              if (await this.evict(target.host, target.name, reason)) {
+                evictedModels.push(before);
+              }
+            }
+            await this.pollOnce().catch(() => {
+            });
+            continue;
+          }
+          if (decision2.kind === "degrade")
+            return decision2;
+          return decision2;
+        }
+        if (!admitted) {
+          return {
+            kind: "reject",
+            reason: `could not acquire transient load lease for ${spec.host}:${spec.name} after repeated evictions`
+          };
+        }
+        const evictedOllamaModels = dedupeLoadedModels(evictedModels.filter((m2) => m2.host === "ollama"));
+        const broker = this;
+        let released = false;
+        return {
+          kind: "ok",
+          lease: {
+            spec,
+            gpuIndex,
+            evictedModels: dedupeLoadedModels(evictedModels),
+            evictedOllamaModels,
+            async release() {
+              if (released)
+                return;
+              released = true;
+              broker.clearInflight(spec.host, spec.name);
+              if ((options2.unloadRequestedOllama ?? true) && spec.host === "ollama") {
+                await broker.unloadOllamaModel(spec.name, `${reason}-complete`).catch(() => false);
+              }
+              if (options2.restoreOllama !== false && evictedOllamaModels.length > 0) {
+                await broker.restoreOllamaModels(evictedOllamaModels, {
+                  keepAlive: options2.restoreKeepAlive ?? "30m"
+                });
+              }
+              await broker.pollOnce().catch(() => {
+              });
+            }
+          }
+        };
+      }
       /**
        * Register a model that has been successfully loaded.
        * Callers MUST call this after a successful load so the broker can track LRU.
@@ -1743,6 +1853,66 @@ var init_model_broker = __esm({
         this.emit("evicted", m2, reason);
         return actively;
       }
+      /** Force-unload an Ollama model even when it is not currently tracked. */
+      async unloadOllamaModel(modelName, reason = "ollama-unload") {
+        const key = `ollama:${modelName}`;
+        const existing = this._loaded.get(key);
+        const ok3 = await this.ollamaUnload(modelName).catch(() => false);
+        if (existing) {
+          this._loaded.delete(key);
+          this.emit("evicted", existing, reason);
+        }
+        return ok3;
+      }
+      /** Best-effort warm/reload of an Ollama model after temporary eviction. */
+      async warmOllamaModel(modelName, keepAlive = "30m") {
+        const bodies = [
+          { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 0 } },
+          { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 1 } }
+        ];
+        for (const body of bodies) {
+          try {
+            const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
+              method: "POST",
+              headers: { "Content-Type": "application/json" },
+              body: JSON.stringify(body),
+              signal: AbortSignal.timeout(12e4)
+            });
+            if (!res.ok)
+              continue;
+            await this.refreshOllamaPs().catch(() => {
+            });
+            return true;
+          } catch {
+          }
+        }
+        try {
+          const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+              model: modelName,
+              stream: false,
+              keep_alive: keepAlive
+            }),
+            signal: AbortSignal.timeout(12e4)
+          });
+          if (!res.ok)
+            return false;
+          await this.refreshOllamaPs().catch(() => {
+          });
+          return true;
+        } catch {
+          return false;
+        }
+      }
+      /** Restore a set of previously evicted Ollama models, oldest first. */
+      async restoreOllamaModels(models, options2 = {}) {
+        const unique2 = dedupeLoadedModels(models.filter((m2) => m2.host === "ollama")).sort((a2, b) => a2.lastUsedAt - b.lastUsedAt);
+        for (const model of unique2) {
+          await this.warmOllamaModel(model.name, options2.keepAlive ?? "30m").catch(() => false);
+        }
+      }
       // ------------------------------------------------------------------
       // Internal — Ollama
       // ------------------------------------------------------------------
@@ -1885,7 +2055,7 @@ var init_model_broker = __esm({
         );
         const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
         const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
-        const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
+        const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).filter((m2) => !this.hasActiveSlotForModel(m2)).sort((a2, b) => {
           const aIdle = idle(a2) ? 0 : 1;
           const bIdle = idle(b) ? 0 : 1;
           if (aIdle !== bIdle)
@@ -1931,6 +2101,13 @@ var init_model_broker = __esm({
             n2++;
         return n2;
       }
+      hasActiveSlotForModel(model) {
+        for (const slot of this._activeSlots.values()) {
+          if (slot.model === model.name)
+            return true;
+        }
+        return false;
+      }
       // ------------------------------------------------------------------
       // Internal — fallback resolution
       // ------------------------------------------------------------------
@@ -22761,6 +22938,17 @@ function evictModelsToFreeSpace(args) {
   writeMeta(meta);
   return { evicted, bytesFreed, finalFreeBytes: disk.freeBytes };
 }
+function estimateReclaimableCacheBytes(keepRepos) {
+  const keep = new Set(keepRepos ?? []);
+  let total = 0;
+  for (const entry of readMeta().entries) {
+    if (keep.has(entry.repo))
+      continue;
+    const measured = measureRepoCacheBytes(entry.repo);
+    total += Math.max(0, measured || entry.sizeBytes || 0);
+  }
+  return total;
+}
 function ensureDiskSpaceForDownload(args) {
   ensureUnifiedCacheDirs();
   const safetyMargin = args.safetyMarginBytes ?? 1 * 1024 ** 3;
@@ -22769,6 +22957,10 @@ function ensureDiskSpaceForDownload(args) {
   if (disk.freeBytes >= target) {
     return { ok: true, evicted: [], freeBytes: disk.freeBytes };
   }
+  const reclaimableBytes = estimateReclaimableCacheBytes(args.keepRepos);
+  if (disk.freeBytes + reclaimableBytes < target) {
+    throw new InsufficientDiskSpaceError(args.approxDownloadBytes, disk.freeBytes, disk.totalBytes, []);
+  }
   const evictionResult = evictModelsToFreeSpace({
     neededBytes: args.approxDownloadBytes,
     keepRepos: args.keepRepos,
@@ -25161,11 +25353,154 @@ ${content}`
   }
 });
+// packages/execution/dist/tools/cuda-device-filter.js
+import { execFileSync as execFileSync3 } from "node:child_process";
+function cleanEnvValue(value2) {
+  const trimmed = value2?.trim();
+  return trimmed ? trimmed : void 0;
+}
+function envFlagEnabled(value2) {
+  return /^(1|true|yes|on)$/i.test(value2?.trim() ?? "");
+}
+function parseCudaComputeCapability(value2) {
+  const match = value2?.match(/(\d+(?:\.\d+)?)/);
+  if (!match)
+    return null;
+  const parsed = Number(match[1]);
+  return Number.isFinite(parsed) ? parsed : null;
+}
+function modalityEnvPrefix(modality) {
+  return `OMNIUS_${modality.toUpperCase()}`;
+}
+function parseMinCudaComputeCapability(modality, env2) {
+  const prefix = modalityEnvPrefix(modality);
+  const parsed = parseCudaComputeCapability(env2[`${prefix}_MIN_CUDA_CC`]) ?? parseCudaComputeCapability(env2["OMNIUS_MEDIA_MIN_CUDA_CC"]);
+  return parsed && parsed > 0 ? parsed : DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY;
+}
+function splitCudaVisibleDevices(value2) {
+  return (value2 ?? "").split(",").map((part) => part.trim()).filter(Boolean);
+}
+function parseCudaDeviceInfo(text) {
+  const devices = [];
+  for (const line of text.split(/\r?\n/)) {
+    const trimmed = line.trim();
+    if (!trimmed)
+      continue;
+    const parts = trimmed.split(",").map((part) => part.trim());
+    if (parts.length < 4)
+      continue;
+    const index = Number.parseInt(parts.shift() ?? "", 10);
+    const capability = parseCudaComputeCapability(parts.pop());
+    const uuid = cleanEnvValue(parts.shift());
+    const name10 = cleanEnvValue(parts.join(", "));
+    if (!Number.isFinite(index) || index < 0)
+      continue;
+    devices.push({ index, uuid, name: name10, computeCapability: capability });
+  }
+  return devices;
+}
+function detectCudaDevices() {
+  try {
+    const out = execFileSync3("nvidia-smi", ["--query-gpu=index,uuid,name,compute_cap", "--format=csv,noheader,nounits"], {
+      encoding: "utf8",
+      timeout: 5e3,
+      stdio: ["ignore", "pipe", "ignore"]
+    });
+    return parseCudaDeviceInfo(out);
+  } catch {
+    return [];
+  }
+}
+function resolveMediaCudaVisibleDevicesForEnv(args) {
+  const env2 = args.env ?? process.env;
+  const prefix = modalityEnvPrefix(args.modality);
+  const explicit = cleanEnvValue(env2[`${prefix}_CUDA_VISIBLE_DEVICES`]) ?? cleanEnvValue(env2["OMNIUS_MEDIA_CUDA_VISIBLE_DEVICES"]) ?? (args.modality === "audio" ? cleanEnvValue(env2["OMNIUS_AUDIO_GPU"]) : void 0);
+  if (explicit)
+    return explicit;
+  const current = cleanEnvValue(env2["CUDA_VISIBLE_DEVICES"]);
+  if (envFlagEnabled(env2[`${prefix}_DISABLE_CUDA_FILTER`]) || envFlagEnabled(env2["OMNIUS_MEDIA_DISABLE_CUDA_FILTER"])) {
+    return current;
+  }
+  const devices = args.devices ?? detectCudaDevices();
+  const minComputeCapability = args.minComputeCapability ?? parseMinCudaComputeCapability(args.modality, env2);
+  const compatible = devices.filter((device) => device.computeCapability !== null && device.computeCapability >= minComputeCapability);
+  if (compatible.length === 0)
+    return current;
+  const compatibleTokens = /* @__PURE__ */ new Set();
+  for (const device of compatible) {
+    compatibleTokens.add(String(device.index));
+    if (device.uuid)
+      compatibleTokens.add(device.uuid);
+  }
+  if (current) {
+    const requested = splitCudaVisibleDevices(current);
+    const canFilter = requested.length > 0 && requested.every((token) => /^\d+$/.test(token) || token.startsWith("GPU-"));
+    if (!canFilter)
+      return current;
+    const kept = requested.filter((token) => compatibleTokens.has(token));
+    return (kept.length > 0 ? kept : compatible.map((device) => String(device.index))).join(",");
+  }
+  return compatible.map((device) => String(device.index)).join(",");
+}
+function mediaBrokerGpuIndexIsCompatible(gpuIndex, modality, env2 = process.env, devices = detectCudaDevices()) {
+  const prefix = modalityEnvPrefix(modality);
+  if (envFlagEnabled(env2[`${prefix}_DISABLE_CUDA_FILTER`]) || envFlagEnabled(env2["OMNIUS_MEDIA_DISABLE_CUDA_FILTER"])) {
+    return true;
+  }
+  if (devices.length === 0)
+    return true;
+  const minComputeCapability = parseMinCudaComputeCapability(modality, env2);
+  const device = devices.find((candidate) => candidate.index === gpuIndex);
+  if (!device || device.computeCapability === null)
+    return true;
+  return device.computeCapability >= minComputeCapability;
+}
+function applyMediaCudaDeviceFilterToEnv(env2, modality) {
+  const cudaVisibleDevices = resolveMediaCudaVisibleDevicesForEnv({ modality, env: env2 });
+  if (cudaVisibleDevices) {
+    env2["CUDA_VISIBLE_DEVICES"] = cudaVisibleDevices;
+    env2["PYTORCH_NVML_BASED_CUDA_CHECK"] = process.env["PYTORCH_NVML_BASED_CUDA_CHECK"] ?? "1";
+  }
+  return env2;
+}
+var DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY;
+var init_cuda_device_filter = __esm({
+  "packages/execution/dist/tools/cuda-device-filter.js"() {
+    "use strict";
+    DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY = 7.5;
+  }
+});
 // packages/execution/dist/tools/transcribe-tool.js
 import { existsSync as existsSync29, mkdirSync as mkdirSync13, writeFileSync as writeFileSync14, readFileSync as readFileSync22, unlinkSync as unlinkSync3, readdirSync as readdirSync13 } from "node:fs";
 import { join as join32, basename as basename6, extname as extname3, resolve as resolve17 } from "node:path";
 import { homedir as homedir10 } from "node:os";
-import { execFileSync as execFileSync3, execSync as execSync15 } from "node:child_process";
+import { execFileSync as execFileSync4, execSync as execSync15 } from "node:child_process";
+function transcriptionPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "asr");
+  return env2;
+}
+async function withProcessEnv(env2, fn) {
+  const previous = /* @__PURE__ */ new Map();
+  for (const [key, value2] of Object.entries(env2)) {
+    previous.set(key, process.env[key]);
+    if (value2 === void 0)
+      delete process.env[key];
+    else
+      process.env[key] = value2;
+  }
+  try {
+    return await fn();
+  } finally {
+    for (const [key, value2] of previous) {
+      if (value2 === void 0)
+        delete process.env[key];
+      else
+        process.env[key] = value2;
+    }
+  }
+}
 function whisperRamEstimate(model) {
   const m2 = model.toLowerCase();
   if (m2.includes("large"))
@@ -25251,6 +25586,7 @@ var init_transcribe_tool = __esm({
     "use strict";
     init_model_broker();
     init_network_egress_policy();
+    init_cuda_device_filter();
     AUDIO_EXTS = /* @__PURE__ */ new Set([
       ".mp3",
       ".wav",
@@ -25353,13 +25689,13 @@ var init_transcribe_tool = __esm({
           return this.execViaCli(filePath, model, diarize, start2);
         }
         try {
-          const result = await tc.transcribe(filePath, {
+          const result = await withProcessEnv(transcriptionPythonEnv(), () => tc.transcribe(filePath, {
             model,
             format: "json",
             diarize,
             wordTimestamps: true
             // Always get timestamps for structured output
-          });
+          }));
           const transcriptDir = join32(this.workingDir, ".omnius", "transcripts");
           mkdirSync13(transcriptDir, { recursive: true });
           const fileBase = basename6(filePath).replace(/\.[^.]+$/, "");
@@ -25453,7 +25789,8 @@ var init_transcribe_tool = __esm({
             timeout: 3e5,
             // 5 min max
             cwd: this.workingDir,
-            stdio: ["pipe", "pipe", "pipe"]
+            stdio: ["pipe", "pipe", "pipe"],
+            env: transcriptionPythonEnv()
           });
           return {
             success: true,
@@ -25522,7 +25859,7 @@ var init_transcribe_tool = __esm({
             }
             tmpFile = `${tmpBase}.mp3`;
             try {
-              execFileSync3("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", `${tmpBase}.%(ext)s`, url], { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
+              execFileSync4("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", `${tmpBase}.%(ext)s`, url], { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
               if (!existsSync29(tmpFile)) {
                 const files = readdirSync13(tmpDir).filter((f2) => f2.startsWith(`download-`) && f2 !== ".gitkeep");
                 const match = files.find((f2) => f2.includes(basename6(tmpBase)));
@@ -25647,13 +25984,13 @@ ${result.output}`,
         try {
           let title = "download";
           try {
-            title = execFileSync3("yt-dlp", ["--get-title", url], { timeout: 15e3, stdio: "pipe" }).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
+            title = execFileSync4("yt-dlp", ["--get-title", url], { timeout: 15e3, stdio: "pipe" }).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
           } catch {
           }
           if (format3 === "mp4") {
             const outPath = join32(outputDir, `${title}.mp4`);
             const outTemplate = join32(outputDir, `${title}.%(ext)s`);
-            execFileSync3("yt-dlp", [
+            execFileSync4("yt-dlp", [
               "-f",
               "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
               "--merge-output-format",
@@ -25673,7 +26010,7 @@ Format: mp4`,
           } else {
             const outPath = join32(outputDir, `${title}.mp3`);
             const outTemplate = join32(outputDir, `${title}.%(ext)s`);
-            execFileSync3("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "0", "-o", outTemplate, url], { timeout: 6e5, stdio: "pipe", cwd: outputDir });
+            execFileSync4("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "0", "-o", outTemplate, url], { timeout: 6e5, stdio: "pipe", cwd: outputDir });
             const actualPath = existsSync29(outPath) ? outPath : outTemplate.replace("%(ext)s", "mp3");
             return {
               success: true,
@@ -259063,6 +259400,12 @@ function imageCandidateFor(model, requestedBackend) {
     preset: getImageGenerationPreset(resolved)
   };
 }
+function imageCandidateHost(candidate) {
+  return candidate.backend === "ollama" ? "ollama" : "diffusers-py";
+}
+function imageCandidateEstimatedVramMB(candidate) {
+  return candidate.preset?.minVramGB !== void 0 ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
   const ladder = imageGenerationQualityLadder();
   const candidates = [];
@@ -259246,7 +259589,9 @@ function formatDiffusersFailure(stderrOrStdout) {
 Note: ${note}`)].filter(Boolean).join("");
 }
 function imageGenerationPythonEnv(_repoRoot) {
-  return unifiedPythonEnv();
+  const env2 = unifiedPythonEnv();
+  applyMediaCudaDeviceFilterToEnv(env2, "image");
+  return env2;
 }
 function approxImageDownloadBytes(preset) {
   if (preset?.approxDownloadGB)
@@ -259364,9 +259709,15 @@ function annotateImageFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatImageAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 function parseRunnerJson(stdout) {
@@ -259389,6 +259740,7 @@ var init_image_generate = __esm({
     init_venv_paths();
     init_model_store();
     init_hf_media_models();
+    init_cuda_device_filter();
     DEFAULT_DIFFUSERS_IMAGE_MODEL = "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers";
     DEFAULT_OLLAMA_IMAGE_MODEL = "x/flux2-klein";
     LEGACY_SDXL_TURBO_MODEL = "stabilityai/sdxl-turbo";
@@ -260173,6 +260525,45 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: "image-gen",
+          host: imageCandidateHost(args.candidate),
+          owner: "image-generate-tool",
+          estimatedVramMB: imageCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: true
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `Image generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `Image generation needs a broker fallback (${decision2.fallback.name}), but image candidate fallback must be selected by the image ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for image generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async execute(args) {
         const start2 = performance.now();
         const action = String(args["action"] ?? "generate");
@@ -260215,33 +260606,6 @@ if __name__ == "__main__":
         const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
         const seed = optionalNumberArg(args["seed"]);
         const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: "image-gen",
-            host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
-            owner: "image-generate-tool"
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, "image-gen-needs-room");
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `Image generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         try {
           return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
         } catch (err) {
@@ -260260,7 +260624,30 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "image-prewarm-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateImageFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult(result) });
@@ -260299,7 +260686,30 @@ if __name__ == "__main__":
             message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
           const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
-          const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "image-gen-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success) {
             await this.writeImageSidecar(result, {
               originalPrompt: args.prompt,
@@ -260523,6 +260933,17 @@ ${errText.slice(0, 1200)}`,
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading image model ${args.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess2(python.command, [
           runner,
           "--model",
@@ -260535,7 +260956,7 @@ ${errText.slice(0, 1200)}`,
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -260746,7 +261167,14 @@ ${errText.slice(0, 800)}`,
         this.emitProgress({ stage: "load", message: `Starting image generation with ${args.model}` });
         const runnerEnv = { ...python.env };
         if (this._brokerGpuIndex !== null) {
-          runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
         }
         const result = await runProcess2(python.command, argv, {
           cwd: this.cwd,
@@ -260948,6 +261376,7 @@ __export(audio_generate_exports, {
   AudioGenerateTool: () => AudioGenerateTool,
   DEFAULT_MUSIC_MODEL: () => DEFAULT_MUSIC_MODEL,
   DEFAULT_SOUND_MODEL: () => DEFAULT_SOUND_MODEL,
+  audioBrokerGpuIndexIsCompatible: () => audioBrokerGpuIndexIsCompatible,
   audioGenerationDir: () => audioGenerationDir,
   audioGenerationFallbackCandidates: () => audioGenerationFallbackCandidates,
   audioGenerationModelPresets: () => audioGenerationModelPresets,
@@ -260958,9 +261387,11 @@ __export(audio_generate_exports, {
   findNonGatedAudioFallback: () => findNonGatedAudioFallback,
   getAudioGenerationPreset: () => getAudioGenerationPreset,
   inferAudioGenerationBackend: () => inferAudioGenerationBackend,
-  isAudioPresetGated: () => isAudioPresetGated
+  isAudioPresetGated: () => isAudioPresetGated,
+  parseAudioCudaDeviceInfo: () => parseAudioCudaDeviceInfo,
+  resolveAudioCudaVisibleDevicesForEnv: () => resolveAudioCudaVisibleDevicesForEnv
 });
-import { execFileSync as execFileSync4, spawn as spawn8 } from "node:child_process";
+import { spawn as spawn8 } from "node:child_process";
 import { existsSync as existsSync32, readdirSync as readdirSync14, statSync as statSync13 } from "node:fs";
 import { chmod as chmod4, mkdir as mkdir13, writeFile as writeFile18 } from "node:fs/promises";
 import { join as join42 } from "node:path";
@@ -261025,25 +261456,36 @@ function backendPackages(backend) {
     return TANGOFLUX_PACKAGES;
   return DIFFUSERS_AUDIO_PACKAGES;
 }
+function splitCudaVisibleDevices2(value2) {
+  return (value2 ?? "").split(",").map((part) => part.trim()).filter(Boolean);
+}
+function parseAudioCudaDeviceInfo(text) {
+  return parseCudaDeviceInfo(text);
+}
+function detectAudioCudaDevices() {
+  return detectCudaDevices();
+}
+function resolveAudioCudaVisibleDevicesForEnv(args = {}) {
+  return resolveMediaCudaVisibleDevicesForEnv({
+    modality: "audio",
+    env: args.env,
+    devices: args.devices,
+    minComputeCapability: args.minComputeCapability
+  });
+}
+function audioBrokerGpuIndexIsCompatible(gpuIndex, env2 = process.env, devices = detectAudioCudaDevices()) {
+  return mediaBrokerGpuIndexIsCompatible(gpuIndex, "audio", env2, devices);
+}
 function detectLegacyCudaComputeCapability() {
-  try {
-    const out = execFileSync4("nvidia-smi", ["--query-gpu=compute_cap,name", "--format=csv,noheader,nounits"], {
-      encoding: "utf8",
-      timeout: 5e3,
-      stdio: ["ignore", "pipe", "ignore"]
-    }).trim();
-    const first2 = out.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
-    const match = first2?.match(/^(\d+)\.(\d+)\s*,?\s*(.*)$/);
-    if (!match)
-      return null;
-    const major = Number(match[1]);
-    const minor = Number(match[2]);
-    if (!Number.isFinite(major) || !Number.isFinite(minor))
-      return null;
-    return { major, minor, name: match[3]?.trim() || void 0 };
-  } catch {
+  const devices = detectAudioCudaDevices();
+  const visible = new Set(splitCudaVisibleDevices2(resolveAudioCudaVisibleDevicesForEnv({ devices })));
+  const candidates = visible.size > 0 ? devices.filter((device) => visible.has(String(device.index)) || device.uuid && visible.has(device.uuid)) : devices;
+  const legacy = candidates.find((device) => device.computeCapability !== null && isLegacyCudaCapability(Math.floor(device.computeCapability), Math.round(device.computeCapability % 1 * 10)));
+  if (!legacy || legacy.computeCapability === null)
     return null;
-  }
+  const major = Math.floor(legacy.computeCapability);
+  const minor = Math.round(legacy.computeCapability % 1 * 10);
+  return { major, minor, name: legacy.name };
 }
 function isLegacyCudaCapability(major, minor) {
   return major < 7 || major === 7 && minor < 5;
@@ -261087,7 +261529,9 @@ function backendImportCheck(backend) {
   return "import torch, diffusers, scipy\nfrom diffusers import AudioLDMPipeline\n";
 }
 function audioGenerationPythonEnv(_repoRoot) {
-  return unifiedPythonEnv();
+  const env2 = unifiedPythonEnv();
+  applyMediaCudaDeviceFilterToEnv(env2, "audio");
+  return env2;
 }
 function approxAudioDownloadBytes(preset) {
   if (preset?.approxDownloadGB)
@@ -261341,8 +261785,8 @@ function formatAudioSetupFailure(backend, text) {
   if (lowered.includes("cuda") && lowered.includes("not available")) {
     notes2.push("CUDA was not available to the selected Python environment; install a Torch build matching this machine's CUDA runtime or use CPU-compatible settings.");
   }
-  if (lowered.includes("cudnn version") && lowered.includes("sm < 7.5")) {
-    notes2.push("The installed PyTorch wheel uses cuDNN 9 on a legacy CUDA GPU. Omnius now repairs audio-generation venvs by reinstalling PyTorch 2.3.1 from the cu118 index for SM < 7.5 hardware.");
+  if (lowered.includes("cudnn") && lowered.includes("incompatible") || lowered.includes("sm < 7.5") || lowered.includes("not compatible") && (lowered.includes("sm_") || lowered.includes("compute capability"))) {
+    notes2.push(`The installed PyTorch wheel is touching a legacy CUDA GPU. Audio generation auto-filters CUDA devices below SM 7.5; current resolved CUDA_VISIBLE_DEVICES=${resolveAudioCudaVisibleDevicesForEnv() ?? "unset"}. Override with OMNIUS_AUDIO_CUDA_VISIBLE_DEVICES or disable with OMNIUS_AUDIO_DISABLE_CUDA_FILTER=1.`);
   }
   return [body, ...notes2.map((note) => `
 ${note}`)].filter(Boolean).join("");
@@ -261564,6 +262008,18 @@ function audioCandidateFor(kind, model, requestedBackend) {
     preset: getAudioGenerationPreset(model, kind)
   };
 }
+function audioCandidateHost(candidate) {
+  if (candidate.backend === "project")
+    return null;
+  if (candidate.backend === "audiocraft")
+    return "audiocraft";
+  if (candidate.backend === "tangoflux")
+    return "tangoflux";
+  return "diffusers-py";
+}
+function audioCandidateEstimatedVramMB(candidate) {
+  return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
   const ladder = audioGenerationQualityLadder(kind);
   const candidates = [];
@@ -261720,9 +262176,15 @@ function annotateAudioFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatAudioAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
@@ -261733,6 +262195,7 @@ var init_audio_generate = __esm({
     init_venv_paths();
     init_model_store();
     init_hf_media_models();
+    init_cuda_device_filter();
     DEFAULT_SOUND_MODEL = "cvssp/audioldm-s-full-v2";
     DEFAULT_MUSIC_MODEL = "facebook/musicgen-small";
     DIFFUSERS_AUDIO_PACKAGES = [
@@ -262094,9 +262557,9 @@ var init_audio_generate = __esm({
 import argparse, json, os, sys, time
 from pathlib import Path
-# Broker-picked GPU pinning — must run before importing torch.
-_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
-if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
+# Broker/audio GPU pinning — must run before importing torch.
+_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
+if _omnius_gpu:
     os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
 def _format_bytes(value):
@@ -262239,9 +262702,14 @@ if __name__ == "__main__":
     main()
 `;
     AUDIOCRAFT_RUNNER = String.raw`#!/usr/bin/env python3
-import argparse, json, sys, time
+import argparse, json, os, sys, time
 from pathlib import Path
+# Broker/audio GPU pinning — must run before importing torch/audiocraft.
+_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
+if _omnius_gpu:
+    os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
 def _progress(stage, message, percent=None):
     payload = {"omnius_progress": True, "stage": stage, "message": message}
     if percent is not None:
@@ -262295,9 +262763,9 @@ if __name__ == "__main__":
 import argparse, json, os, sys, time
 from pathlib import Path
-# Broker-picked GPU pinning — must run before importing torch.
-_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
-if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
+# Broker/audio GPU pinning — must run before importing torch.
+_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
+if _omnius_gpu:
     os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
 def _format_bytes(value):
@@ -262411,9 +262879,14 @@ if __name__ == "__main__":
     main()
 `;
     TANGOFLUX_RUNNER = String.raw`#!/usr/bin/env python3
-import argparse, json, sys, time
+import argparse, json, os, sys, time
 from pathlib import Path
+# Broker/audio GPU pinning — must run before importing torch/tangoflux.
+_omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
+if _omnius_gpu:
+    os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
 def _format_bytes(value):
     try:
         n = float(value)
@@ -262548,6 +263021,48 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const host = audioCandidateHost(args.candidate);
+        if (!host)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: args.kind,
+          host,
+          owner: `audio-generate-tool/${args.kind}`,
+          estimatedVramMB: audioCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: false
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `${args.kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `${args.kind === "music" ? "Music" : "Sound"} generation needs a broker fallback (${decision2.fallback.name}), but audio candidate fallback must be selected by the audio ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for ${args.kind} generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async prewarmPythonBackend(args) {
         const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
         let python;
@@ -262594,6 +263109,17 @@ if __name__ == "__main__":
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading ${args.kind} model ${args.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess3(python.command, [
           runner,
           "--kind",
@@ -262610,7 +263136,7 @@ if __name__ == "__main__":
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -262690,33 +263216,6 @@ if __name__ == "__main__":
         const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
         const seed = optionalNumberArg2(args["seed"]);
         const playback = playbackRequested(args);
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: kind === "music" ? "music" : "sound",
-            host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
-            owner: `audio-generate-tool/${kind}`
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         try {
           return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
         } catch (err) {
@@ -262736,15 +263235,39 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+          const leaseDecision = await this.acquireTransientLoadLease({
             kind: args.kind,
-            backend: candidate.backend,
-            runnerBackend: candidate.backend,
-            model: candidate.model,
-            duration,
-            start: args.start,
-            python: args.args["python"]
-          });
+            candidate,
+            reason: `${args.kind}-prewarm-needs-room`,
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+              kind: args.kind,
+              backend: candidate.backend,
+              runnerBackend: candidate.backend,
+              model: candidate.model,
+              duration,
+              start: args.start,
+              python: args.args["python"]
+            });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateAudioFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262772,19 +263295,43 @@ if __name__ == "__main__":
             stage: "setup",
             message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
           });
-          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+          const leaseDecision = await this.acquireTransientLoadLease({
             kind: args.kind,
-            backend: candidate.backend,
-            runnerBackend: candidate.backend,
-            prompt: args.prompt,
-            model: candidate.model,
-            duration,
-            steps,
-            seed: args.seed,
-            playback: args.playback,
-            start: args.start,
-            python: args.args["python"]
-          });
+            candidate,
+            reason: `${args.kind}-gen-needs-room`,
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+              kind: args.kind,
+              backend: candidate.backend,
+              runnerBackend: candidate.backend,
+              prompt: args.prompt,
+              model: candidate.model,
+              duration,
+              steps,
+              seed: args.seed,
+              playback: args.playback,
+              start: args.start,
+              python: args.args["python"]
+            });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateAudioFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262885,7 +263432,14 @@ if __name__ == "__main__":
         this.emitProgress({ stage: "load", message: `Starting ${args.kind} generation with ${args.model}` });
         const runnerEnv = { ...python.env };
         if (this._brokerGpuIndex !== null) {
-          runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
         }
         const result = await runProcess3(python.command, argv, {
           cwd: this.cwd,
@@ -263117,6 +263671,12 @@ function videoCandidateFor(model, requestedBackend, requestedKind) {
   }
   return { model, backend, preset };
 }
+function videoCandidateHost(candidate) {
+  return candidate.backend === "comfyui" ? "comfyui" : "diffusers-py";
+}
+function videoCandidateEstimatedVramMB(candidate) {
+  return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
+}
 function videoGenerationFallbackCandidates(requestedModel, requestedBackend, requestedKind, allowFallback = true, options2 = {}) {
   const preferAudioVideo = Boolean(options2.preferNativeAudioVideo);
   const baseLadderIds = preferAudioVideo ? [...VIDEO_AUDIO_QUALITY_LADDER, ...VIDEO_GENERATION_QUALITY_LADDER] : VIDEO_GENERATION_QUALITY_LADDER;
@@ -263359,6 +263919,7 @@ function resolveHfToken() {
 }
 function videoGenerationPythonEnv(_repoRoot) {
   const env2 = unifiedPythonEnv();
+  applyMediaCudaDeviceFilterToEnv(env2, "video");
   const token = resolveHfToken();
   if (token) {
     env2["HF_TOKEN"] = token;
@@ -263681,9 +264242,15 @@ function annotateVideoFallbackSuccess(result, failed, winner) {
     ...failed.map((attempt, index) => `  ${formatVideoAttempt(attempt.candidate, attempt.reason, index)}`),
     ""
   ].join("\n");
+  const llmPrefix = [
+    `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    ...failed.map((attempt, index) => formatVideoAttempt(attempt.candidate, attempt.reason, index))
+  ].join("\n");
   return {
     ...result,
-    output: prefix + result.output
+    output: prefix + result.output,
+    llmContent: result.llmContent ? `${llmPrefix}
+${result.llmContent}` : result.llmContent
   };
 }
 function parseRunnerJson3(stdout) {
@@ -263717,6 +264284,7 @@ var init_video_generate = __esm({
     init_venv_paths();
     init_model_store();
     init_hf_media_models();
+    init_cuda_device_filter();
     DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
     SANA_VIDEO_480P_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
     SANA_VIDEO_720P_MODEL = "Efficient-Large-Model/SANA-Video_2B_720p";
@@ -265049,6 +265617,45 @@ if __name__ == "__main__":
         this.lastProgressAt = now;
         this.progressHandler(event);
       }
+      async acquireTransientLoadLease(args) {
+        if (!args.candidate)
+          return null;
+        const broker = getModelBroker();
+        const decision2 = await broker.acquireTransientModelLoad({
+          name: args.candidate.model,
+          domain: "video-gen",
+          host: videoCandidateHost(args.candidate),
+          owner: "video-generate-tool",
+          estimatedVramMB: videoCandidateEstimatedVramMB(args.candidate)
+        }, {
+          reason: args.reason,
+          restoreOllama: true,
+          unloadRequestedOllama: false
+        });
+        if (decision2.kind === "reject") {
+          return {
+            success: false,
+            output: "",
+            error: `Video generation blocked by resource broker: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.kind === "degrade") {
+          return {
+            success: false,
+            output: "",
+            error: `Video generation needs a broker fallback (${decision2.fallback.name}), but video candidate fallback must be selected by the video ladder: ${decision2.reason}`,
+            durationMs: performance.now() - args.start
+          };
+        }
+        if (decision2.lease.evictedOllamaModels.length > 0) {
+          this.emitProgress({
+            stage: "setup",
+            message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for video generation`
+          });
+        }
+        return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
+      }
       async execute(args) {
         const start2 = performance.now();
         const action = String(args["action"] ?? "generate");
@@ -265104,35 +265711,6 @@ if __name__ == "__main__":
         const withAudio = booleanArg3(args["with_audio"], false);
         const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
         const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
-        const broker = getModelBroker();
-        const firstCandidate = candidates[0];
-        let brokerGpuIndex = null;
-        if (firstCandidate) {
-          const preset = firstCandidate.preset;
-          const decision2 = await broker.ensureModelLoadable({
-            name: firstCandidate.model,
-            domain: "video-gen",
-            host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
-            owner: "video-generate-tool",
-            estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
-          });
-          if (decision2.kind === "evict") {
-            for (const target of decision2.evictTargets) {
-              await broker.evict(target.host, target.name, "video-gen-needs-room");
-            }
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "ok") {
-            brokerGpuIndex = decision2.gpuIndex ?? null;
-          } else if (decision2.kind === "reject") {
-            return {
-              success: false,
-              output: "",
-              error: `Video generation blocked by resource broker: ${decision2.reason}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        }
-        this._brokerGpuIndex = brokerGpuIndex;
         if (candidates.length === 0) {
           return {
             success: false,
@@ -265182,7 +265760,30 @@ if __name__ == "__main__":
             failed.push({ candidate, reason: "ComfyUI backend not yet implemented." });
             continue;
           }
-          const result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
+          const leaseDecision = await this.acquireTransientLoadLease({
+            candidate,
+            reason: "video-prewarm-needs-room",
+            start: args.start
+          });
+          if (leaseDecision && "success" in leaseDecision) {
+            failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
+            if (index < args.candidates.length - 1) {
+              this.emitProgress({
+                stage: "setup",
+                message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
+              });
+            }
+            continue;
+          }
+          const lease = leaseDecision?.lease;
+          this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+          let result;
+          try {
+            result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
+          } finally {
+            await lease?.release();
+            this._brokerGpuIndex = null;
+          }
           if (result.success)
             return annotateVideoFallbackSuccess(result, failed, candidate);
           failed.push({ candidate, reason: summarizeToolResult3(result) });
@@ -265268,26 +265869,48 @@ if __name__ == "__main__":
               start: args.start
             });
           } else {
-            result = await this.generateWithDiffusers({
-              prompt: promptForCandidate,
-              model: candidate.model,
-              preset,
-              kind: args.kind,
-              imageArg: args.imageArg,
-              audioInput: args.audioInput,
-              width,
-              height,
-              numFrames,
-              fps,
-              steps,
-              guidance,
-              negativePrompt,
-              seed: args.seed,
-              hfToken: hfTokenOverride,
-              autoAcceptLicense,
-              start: args.start,
-              python: args.args["python"]
+            const leaseDecision = await this.acquireTransientLoadLease({
+              candidate,
+              reason: "video-gen-needs-room",
+              start: args.start
             });
+            if (leaseDecision && "success" in leaseDecision) {
+              failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
+              if (index < args.candidates.length - 1) {
+                this.emitProgress({
+                  stage: "setup",
+                  message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
+                });
+              }
+              continue;
+            }
+            const lease = leaseDecision?.lease;
+            this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
+            try {
+              result = await this.generateWithDiffusers({
+                prompt: promptForCandidate,
+                model: candidate.model,
+                preset,
+                kind: args.kind,
+                imageArg: args.imageArg,
+                audioInput: args.audioInput,
+                width,
+                height,
+                numFrames,
+                fps,
+                steps,
+                guidance,
+                negativePrompt,
+                seed: args.seed,
+                hfToken: hfTokenOverride,
+                autoAcceptLicense,
+                start: args.start,
+                python: args.args["python"]
+              });
+            } finally {
+              await lease?.release();
+              this._brokerGpuIndex = null;
+            }
           }
           let nativeAudio = preset.nativeAudioVideo === true;
           let audioPath;
@@ -265479,6 +266102,17 @@ ${llmAnnotation}` : result.llmContent;
         }
         ensureUnifiedCacheDirs();
         this.emitProgress({ stage: "load", message: `Downloading/loading video model ${args.candidate.model}` });
+        const runnerEnv = { ...python.env };
+        if (this._brokerGpuIndex !== null) {
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
+        }
         const result = await runProcess4(python.command, [
           runner,
           "--model",
@@ -265494,7 +266128,7 @@ ${llmAnnotation}` : result.llmContent;
         ], {
           cwd: this.cwd,
           timeoutMs: 18e5,
-          env: python.env,
+          env: runnerEnv,
           progressLabel: `Downloading/loading ${args.candidate.model}`,
           onProgress: (event) => this.emitProgress(event)
         });
@@ -265555,7 +266189,14 @@ ${llmAnnotation}` : result.llmContent;
           runnerEnv["HUGGING_FACE_HUB_TOKEN"] = effectiveToken;
         }
         if (this._brokerGpuIndex !== null) {
-          runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
+            runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
+          } else {
+            this.emitProgress({
+              stage: "setup",
+              message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
+            });
+          }
         }
         const argv = [
           runner,
@@ -266682,6 +267323,11 @@ import { readFileSync as readFileSync24, existsSync as existsSync34, statSync as
 import { execSync as execSync16, spawn as spawn10, spawnSync as spawnSync4 } from "node:child_process";
 import { resolve as resolve23, extname as extname6, basename as basename8, dirname as dirname9, join as join44 } from "node:path";
 import { fileURLToPath as fileURLToPath4 } from "node:url";
+function visionPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "vision");
+  return env2;
+}
 async function probeStation(endpoint) {
   try {
     const healthUrl = endpoint.replace(/\/v1\/?$/, "/health");
@@ -266752,7 +267398,8 @@ async function autoLaunchStation(port = 2020) {
     return false;
   return new Promise((resolvePromise) => {
     const child = spawn10(pythonBin, [launcherScript, "--port", String(port)], {
-      stdio: ["ignore", "pipe", "pipe"]
+      stdio: ["ignore", "pipe", "pipe"],
+      env: visionPythonEnv()
     });
     stationProcess = child;
     const cleanupStation = () => {
@@ -267067,7 +267714,11 @@ function tryHuggingFacePointBackend(options2) {
     hfPointUnavailable = "Python not found";
     return null;
   }
-  const deps = spawnSync4(python, ["-c", "import torch, transformers, PIL"], { stdio: "pipe", timeout: 1e4 });
+  const deps = spawnSync4(python, ["-c", "import torch, transformers, PIL"], {
+    stdio: "pipe",
+    timeout: 1e4,
+    env: visionPythonEnv()
+  });
   if (deps.status !== 0) {
     hfPointUnavailable = bufferishToString3(deps.stderr) || "Python dependencies torch, transformers, and pillow are not importable";
     return null;
@@ -267114,7 +267765,7 @@ print(json.dumps(last_result))
       encoding: "utf8",
       stdio: ["pipe", "pipe", "pipe"],
       timeout: Math.max(options2.timeoutMs ?? 6e4, 3e5),
-      env: { ...process.env }
+      env: visionPythonEnv()
     });
     if (run2.status !== 0) {
       hfPointUnavailable = run2.stderr || run2.stdout || "Hugging Face Moondream point backend failed";
@@ -267167,6 +267818,7 @@ var init_vision = __esm({
   "packages/execution/dist/tools/vision.js"() {
     "use strict";
     init_model_broker();
+    init_cuda_device_filter();
     moondreamClient = null;
     moondreamError = null;
     stationProcess = null;
@@ -520577,6 +521229,11 @@ import { execFileSync as execFileSync5, execSync as execSync30, spawn as spawn15
 import { copyFileSync as copyFileSync3, existsSync as existsSync47, statSync as statSync23, writeFileSync as writeFileSync19, mkdirSync as mkdirSync20, readdirSync as readdirSync18, writeSync } from "node:fs";
 import { basename as basename15, extname as extname10, isAbsolute as isAbsolute2, join as join63 } from "node:path";
 import { homedir as homedir16, tmpdir as tmpdir11 } from "node:os";
+function ttsPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "tts");
+  return env2;
+}
 function hasCommand3(command) {
   try {
     if (process.platform === "win32") {
@@ -521144,7 +521801,7 @@ function ensureLuxttsDaemon() {
     const daemon = spawn15(venvPy, [inferScript], {
       stdio: ["pipe", "pipe", "pipe"],
       cwd: tmpdir11(),
-      env: { ...process.env, LUXTTS_REPO_PATH: repoDir }
+      env: ttsPythonEnv({ LUXTTS_REPO_PATH: repoDir })
     });
     _luxttsDaemon = daemon;
     _luxttsBuffer = "";
@@ -521224,6 +521881,7 @@ var init_audio_playback = __esm({
   "packages/execution/dist/tools/audio-playback.js"() {
     "use strict";
     init_hf_media_models();
+    init_cuda_device_filter();
     _luxttsDaemon = null;
     _luxttsReady = false;
     _luxttsRequestId = 0;
@@ -521605,7 +522263,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         execFileSync5(venvPy, ["-c", pyScript, JSON.stringify({ text, output: outputPath3, clone_ref: cloneRef, repo: repoDir, speed })], {
           stdio: "pipe",
           timeout: 12e4,
-          env: { ...process.env, LUXTTS_REPO_PATH: repoDir }
+          env: ttsPythonEnv({ LUXTTS_REPO_PATH: repoDir })
         });
         return `${basename15(cloneRef)} (LuxTTS standalone)`;
       }
@@ -521619,7 +522277,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
           input: JSON.stringify({ text, output_path: outputPath3, voice_name: voice, lang, speed, total_step: totalStep }),
           encoding: "utf8",
           stdio: ["pipe", "pipe", "pipe"],
-          timeout: 18e4
+          timeout: 18e4,
+          env: ttsPythonEnv()
         });
         const line = stdout.trim().split(/\r?\n/).pop() || "";
         const parsed = JSON.parse(line);
@@ -523041,10 +523700,16 @@ import { execSync as execSync36 } from "node:child_process";
 import { existsSync as existsSync50, mkdirSync as mkdirSync22, writeFileSync as writeFileSync20, readFileSync as readFileSync35 } from "node:fs";
 import { join as join65 } from "node:path";
 import { homedir as homedir17, tmpdir as tmpdir13 } from "node:os";
+function audioAnalysisPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "asr");
+  return env2;
+}
 var VENV_DIR, VENV_PIP, VENV_PYTHON, AudioAnalyzeTool;
 var init_audio_analyze = __esm({
   "packages/execution/dist/tools/audio-analyze.js"() {
     "use strict";
+    init_cuda_device_filter();
     VENV_DIR = join65(homedir17(), ".omnius", "audio-ml-venv");
     VENV_PIP = join65(VENV_DIR, "bin", "pip");
     VENV_PYTHON = join65(VENV_DIR, "bin", "python3");
@@ -523334,15 +523999,15 @@ Context saved to: ${contextFile}`,
       /** Ensure Python venv with required packages */
       async ensureVenv(packages) {
         if (!existsSync50(VENV_PYTHON)) {
-          execSync36(`python3 -m venv ${VENV_DIR}`, { timeout: 3e4, stdio: "pipe" });
+          execSync36(`python3 -m venv ${VENV_DIR}`, { timeout: 3e4, stdio: "pipe", env: audioAnalysisPythonEnv() });
         }
         for (const pkg of packages) {
           const importName = pkg.replace(/[<>=!].*/g, "").replace(/-/g, "_");
           try {
-            execSync36(`${VENV_PYTHON} -c "import ${importName}"`, { timeout: 1e4, stdio: "pipe" });
+            execSync36(`${VENV_PYTHON} -c "import ${importName}"`, { timeout: 1e4, stdio: "pipe", env: audioAnalysisPythonEnv() });
           } catch {
             try {
-              execSync36(`${VENV_PIP} install "${pkg}"`, { timeout: 3e5, stdio: "pipe" });
+              execSync36(`${VENV_PIP} install "${pkg}"`, { timeout: 3e5, stdio: "pipe", env: audioAnalysisPythonEnv() });
             } catch {
             }
           }
@@ -523356,7 +524021,7 @@ Context saved to: ${contextFile}`,
           const output = execSync36(`${VENV_PYTHON} ${scriptFile}`, {
             encoding: "utf8",
             timeout: 3e5,
-            env: { ...process.env, TF_CPP_MIN_LOG_LEVEL: "3", TF_ENABLE_ONEDNN_OPTS: "0", PYTHONUNBUFFERED: "1" }
+            env: audioAnalysisPythonEnv({ TF_CPP_MIN_LOG_LEVEL: "3", TF_ENABLE_ONEDNN_OPTS: "0", PYTHONUNBUFFERED: "1" })
           });
           try {
             const result = JSON.parse(output.trim().split("\n").pop());
@@ -524192,10 +524857,16 @@ import { execSync as execSync38 } from "node:child_process";
 import { existsSync as existsSync52, mkdirSync as mkdirSync24, writeFileSync as writeFileSync22, readFileSync as readFileSync37 } from "node:fs";
 import { join as join67 } from "node:path";
 import { homedir as homedir19, tmpdir as tmpdir15 } from "node:os";
+function visualMemoryPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "vision");
+  return env2;
+}
 var VMEM_DIR, VENV_DIR2, VENV_PY, VENV_PIP2, VisualMemoryTool;
 var init_visual_memory = __esm({
   "packages/execution/dist/tools/visual-memory.js"() {
     "use strict";
+    init_cuda_device_filter();
     VMEM_DIR = join67(homedir19(), ".omnius", "visual-memory");
     VENV_DIR2 = join67(homedir19(), ".omnius", "vision-ml-venv");
     VENV_PY = join67(VENV_DIR2, "bin", "python3");
@@ -524723,19 +525394,23 @@ ${objects.join("\n") || "  (none taught)"}`,
       async ensureVenv() {
         if (existsSync52(VENV_PY)) {
           try {
-            execSync38(`${VENV_PY} -c "import insightface, transformers, torch"`, { timeout: 15e3, stdio: "pipe" });
+            execSync38(`${VENV_PY} -c "import insightface, transformers, torch"`, {
+              timeout: 15e3,
+              stdio: "pipe",
+              env: visualMemoryPythonEnv()
+            });
             return true;
           } catch {
           }
         }
         try {
           if (!existsSync52(VENV_PY)) {
-            execSync38(`python3 -m venv ${VENV_DIR2}`, { timeout: 3e4, stdio: "pipe" });
+            execSync38(`python3 -m venv ${VENV_DIR2}`, { timeout: 3e4, stdio: "pipe", env: visualMemoryPythonEnv() });
           }
-          execSync38(`${VENV_PIP2} install "setuptools<81" wheel`, { timeout: 6e4, stdio: "pipe" });
-          execSync38(`${VENV_PIP2} install torch torchvision`, { timeout: 6e5, stdio: "pipe" });
-          execSync38(`${VENV_PIP2} install insightface onnxruntime opencv-python-headless`, { timeout: 3e5, stdio: "pipe" });
-          execSync38(`${VENV_PIP2} install transformers pillow`, { timeout: 3e5, stdio: "pipe" });
+          execSync38(`${VENV_PIP2} install "setuptools<81" wheel`, { timeout: 6e4, stdio: "pipe", env: visualMemoryPythonEnv() });
+          execSync38(`${VENV_PIP2} install torch torchvision`, { timeout: 6e5, stdio: "pipe", env: visualMemoryPythonEnv() });
+          execSync38(`${VENV_PIP2} install insightface onnxruntime opencv-python-headless`, { timeout: 3e5, stdio: "pipe", env: visualMemoryPythonEnv() });
+          execSync38(`${VENV_PIP2} install transformers pillow`, { timeout: 3e5, stdio: "pipe", env: visualMemoryPythonEnv() });
           return true;
         } catch {
           return false;
@@ -524748,7 +525423,7 @@ ${objects.join("\n") || "  (none taught)"}`,
           const output = execSync38(`${VENV_PY} ${scriptFile}`, {
             encoding: "utf8",
             timeout: timeoutMs,
-            env: { ...process.env, PYTHONUNBUFFERED: "1" }
+            env: visualMemoryPythonEnv({ PYTHONUNBUFFERED: "1" })
           });
           const lastLine = output.trim().split("\n").pop() || "{}";
           return JSON.parse(lastLine);
@@ -525364,6 +526039,11 @@ import { existsSync as existsSync54, mkdirSync as mkdirSync26, writeFileSync as
 import { dirname as dirname15, join as join69, resolve as resolve35 } from "node:path";
 import { tmpdir as tmpdir17, homedir as homedir21 } from "node:os";
 import { fileURLToPath as fileURLToPath8 } from "node:url";
+function asrPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "asr");
+  return env2;
+}
 function _findNemotronScript() {
   const candidates = [];
   try {
@@ -525399,6 +526079,7 @@ var init_asr_listen = __esm({
   "packages/execution/dist/tools/asr-listen.js"() {
     "use strict";
     init_hf_media_models();
+    init_cuda_device_filter();
     AsrListenTool = class {
       name = "asr_listen";
       description = "Record from microphone and transcribe speech to text. Backends: 'whisper' (default, battle-tested openai-whisper / faster-whisper), 'nemotron' (nvidia/nemotron-speech-streaming-en-0.6b — faster streaming), or 'parallel' (runs BOTH engines on the same audio and returns a side-by-side comparison with per-engine latency and character counts). Actions: 'listen' to record + transcribe in one step, 'transcribe' to run on an existing file. Use this when you need to HEAR what a human is saying — ask a question via audio_playback speak, then use asr_listen to capture and transcribe their response.";
@@ -525633,7 +526314,7 @@ print(json.dumps({"ok": False, "error": "No whisper backend available"}))
               const output = execSync40(`"${pyPath}" "${scriptFile}"`, {
                 encoding: "utf8",
                 timeout: 12e4,
-                env: { ...process.env, PYTHONUNBUFFERED: "1" }
+                env: asrPythonEnv({ PYTHONUNBUFFERED: "1" })
               }).trim();
               const lines = output.split("\n");
               for (let i2 = lines.length - 1; i2 >= 0; i2--) {
@@ -525682,7 +526363,8 @@ print(json.dumps({"ok": False, "error": "No whisper backend available"}))
           const result = spawnSync7("python3", [script, "--file", audioFile], {
             encoding: "utf8",
             timeout: 6e5,
-            stdio: ["ignore", "pipe", "pipe"]
+            stdio: ["ignore", "pipe", "pipe"],
+            env: asrPythonEnv()
           });
           if (result.error) {
             return {
@@ -530791,6 +531473,7 @@ __export(dist_exports, {
   CustomTool: () => CustomTool,
   DEFAULT_DIFFUSERS_IMAGE_MODEL: () => DEFAULT_DIFFUSERS_IMAGE_MODEL,
   DEFAULT_DIFFUSERS_VIDEO_MODEL: () => DEFAULT_DIFFUSERS_VIDEO_MODEL,
+  DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY: () => DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY,
   DEFAULT_MUSIC_MODEL: () => DEFAULT_MUSIC_MODEL,
   DEFAULT_OLLAMA_IMAGE_MODEL: () => DEFAULT_OLLAMA_IMAGE_MODEL,
   DEFAULT_SOUND_MODEL: () => DEFAULT_SOUND_MODEL,
@@ -530905,6 +531588,7 @@ __export(dist_exports, {
   addProjectConstraint: () => addProjectConstraint,
   addSessionConstraint: () => addSessionConstraint,
   aliasTool: () => aliasTool,
+  applyMediaCudaDeviceFilterToEnv: () => applyMediaCudaDeviceFilterToEnv,
   applyPatch: () => applyPatch,
   applyToolResultTriage: () => applyToolResultTriage,
   artifactManifestFromBytes: () => artifactManifestFromBytes,
@@ -530945,6 +531629,7 @@ __export(dist_exports, {
   defaultExtensionForMime: () => defaultExtensionForMime,
   deleteMediaModelAdapter: () => deleteMediaModelAdapter,
   deleteTodos: () => deleteTodos,
+  detectCudaDevices: () => detectCudaDevices,
   detectElevationMethod: () => detectElevationMethod,
   detectLegacyCaches: () => detectLegacyCaches,
   detectSearchProvider: () => detectSearchProvider,
@@ -531046,6 +531731,7 @@ __export(dist_exports, {
   markSessionValidated: () => markSessionValidated,
   measureRepoCacheBytes: () => measureRepoCacheBytes,
   mediaBackendCompatibleWithModality: () => mediaBackendCompatibleWithModality,
+  mediaBrokerGpuIndexIsCompatible: () => mediaBrokerGpuIndexIsCompatible,
   mediaMimeFromPath: () => mediaMimeFromPath,
   mediaModelCatalogDir: () => mediaModelCatalogDir,
   mediaModelSlug: () => mediaModelSlug,
@@ -531056,6 +531742,8 @@ __export(dist_exports, {
   normalizeSponsorMediaConfig: () => normalizeSponsorMediaConfig,
   omniusHomeDir: () => omniusHomeDir,
   packetPath: () => packetPath,
+  parseCudaComputeCapability: () => parseCudaComputeCapability,
+  parseCudaDeviceInfo: () => parseCudaDeviceInfo,
   parseMcpMarkdown: () => parseMcpMarkdown,
   parseMcpToolName: () => parseMcpToolName,
   parseSponsorMediaCapability: () => parseSponsorMediaCapability,
@@ -531078,6 +531766,7 @@ __export(dist_exports, {
   renderCustomToolDocs: () => renderCustomToolDocs,
   resetDepCache: () => resetDepCache,
   resetMoondreamClient: () => resetMoondreamClient,
+  resolveMediaCudaVisibleDevicesForEnv: () => resolveMediaCudaVisibleDevicesForEnv,
   resolveMediaModel: () => resolveMediaModel,
   resolveSecret: () => resolveSecret,
   revokeSecret: () => revokeSecret,
@@ -531199,6 +531888,7 @@ var init_dist5 = __esm({
     init_embedding_store();
     init_image_generate();
     init_audio_generate();
+    init_cuda_device_filter();
     init_model_store();
     init_video_generate();
     init_sponsor_media();
@@ -589502,6 +590192,7 @@ ${CONTENT_BG_SEQ}`);
 });
 // packages/cli/src/tui/tui-select.ts
+import { AsyncLocalStorage } from "node:async_hooks";
 function ansi3(code8, text) {
   return isTTY2 ? `\x1B[${code8}m${text}\x1B[0m` : text;
 }
@@ -589511,6 +590202,48 @@ function fg2563(code8, text) {
 function stripAnsi3(s2) {
   return s2.replace(/\x1B\[[0-9;]*m/g, "");
 }
+function stripTerminalControl(s2) {
+  return s2.replace(/\x1B(?:\[[\d;?]*[ -/]*[@-~]|\][^\x07\x1B]*(?:\x07|\x1B\\)?|[@-Z\\-_])/g, "");
+}
+function isNonInteractiveSelectSurface() {
+  return Boolean(nonInteractiveSelectSurface.getStore());
+}
+function runWithNonInteractiveSelectSurface(fn, opts = {}) {
+  return nonInteractiveSelectSurface.run(opts, fn);
+}
+function renderNonInteractiveSelect(opts, currentTitle, skipSet) {
+  const surface = nonInteractiveSelectSurface.getStore();
+  const maxItems = Math.max(1, surface?.maxItems ?? 30);
+  const lines = [];
+  if (currentTitle) lines.push(stripTerminalControl(stripAnsi3(currentTitle)));
+  if (lines.length) lines.push("");
+  let idx = 1;
+  let shown = 0;
+  let omitted = 0;
+  for (const item of opts.items) {
+    const isSkip = skipSet.has(item.key);
+    const labelPlain = stripTerminalControl(stripAnsi3(item.label)).trim();
+    const detailPlain = item.detail ? stripTerminalControl(stripAnsi3(item.detail)).trim() : "";
+    if (isSkip) {
+      if (labelPlain) lines.push(labelPlain);
+      continue;
+    }
+    if (shown >= maxItems) {
+      omitted++;
+      idx++;
+      continue;
+    }
+    const num = String(idx).padStart(2, " ");
+    const detail = detailPlain ? ` - ${detailPlain}` : "";
+    lines.push(`  ${num}. ${labelPlain}${detail}`);
+    shown++;
+    idx++;
+  }
+  if (omitted > 0) lines.push(`  ... ${omitted} more`);
+  if (opts.customKeyHint) lines.push("", stripTerminalControl(stripAnsi3(opts.customKeyHint)));
+  lines.push("", surface?.hint ?? "(non-interactive: menu shown as text; open the TUI for selection)");
+  process.stdout.write(lines.join("\n").trimEnd() + "\n");
+}
 function defaultRenderRow(item, focused, isActive) {
   const marker = isActive ? selectColors.green("●") : focused ? selectColors.blue("●") : selectColors.dim("○");
   const label = focused ? selectColors.blue(selectColors.bold(item.label)) : isActive ? selectColors.green(item.label) : item.label;
@@ -589535,27 +590268,8 @@ function tuiSelect(opts) {
   if (items.length === 0) {
     return Promise.resolve({ confirmed: false, key: null, index: -1 });
   }
-  if (!process.stdin.isTTY && process.env["OMNIUS_TUI_FORCE_INTERACTIVE"] !== "1") {
-    const lines = [];
-    if (currentTitle) lines.push(currentTitle);
-    if (lines.length) lines.push("");
-    let idx = 1;
-    for (const item of items) {
-      const isSkip = skipSet.has(item.key);
-      const labelPlain = stripAnsi3(item.label);
-      const detailPlain = item.detail ? stripAnsi3(item.detail) : "";
-      if (isSkip) {
-        lines.push(labelPlain);
-      } else {
-        const num = String(idx).padStart(2, " ");
-        const detail = detailPlain ? `  — ${detailPlain}` : "";
-        lines.push(`  ${num}. ${labelPlain}${detail}`);
-        idx++;
-      }
-    }
-    if (opts.customKeyHint) lines.push("", opts.customKeyHint);
-    lines.push("", "(non-interactive: list shown above; pick options by re-running this command from the TUI)");
-    process.stdout.write(lines.join("\n") + "\n");
+  if (isNonInteractiveSelectSurface() || !process.stdin.isTTY && process.env["OMNIUS_TUI_FORCE_INTERACTIVE"] !== "1") {
+    renderNonInteractiveSelect(opts, currentTitle, skipSet);
     return Promise.resolve({ confirmed: false, key: null, index: -1 });
   }
   const isSkippable = (idx) => skipSet.has(items[idx].key);
@@ -590136,7 +590850,7 @@ ${tuiBgSeq()}`);
     }
   });
 }
-var isTTY2, MENU_ACTIVE_GREEN_256, selectColors;
+var isTTY2, MENU_ACTIVE_GREEN_256, selectColors, nonInteractiveSelectSurface;
 var init_tui_select = __esm({
   "packages/cli/src/tui/tui-select.ts"() {
     "use strict";
@@ -590156,6 +590870,7 @@ var init_tui_select = __esm({
       /** Readable grey for non-matching items */
       matchDark: (t2) => fg2563(tuiTextDim(), t2)
     };
+    nonInteractiveSelectSurface = new AsyncLocalStorage();
   }
 });
@@ -590182,12 +590897,17 @@ import { join as join114, dirname as dirname32 } from "node:path";
 import { homedir as homedir36 } from "node:os";
 import { execSync as execSync50, spawn as spawn26 } from "node:child_process";
 import { fileURLToPath as fileURLToPath15 } from "node:url";
+function personaplexPythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "voice");
+  return env2;
+}
 function execAsync(cmd, opts = {}) {
   return new Promise((resolve59, reject) => {
     const child = spawn26("bash", ["-c", cmd], {
       stdio: ["ignore", "pipe", "pipe"],
       timeout: opts.timeout ?? 3e5,
-      env: opts.env ?? process.env
+      env: personaplexPythonEnv(opts.env ?? {})
     });
     let stdout = "";
     let stderr = "";
@@ -590261,7 +590981,8 @@ function detectPersonaPlexCapability() {
     try {
       execSync50('python3 -c "import torch; assert torch.cuda.is_available()"', {
         timeout: 1e4,
-        stdio: "pipe"
+        stdio: "pipe",
+        env: personaplexPythonEnv()
       });
     } catch {
       const tier2 = selectWeightTier(vramGB);
@@ -590438,7 +591159,8 @@ async function installPersonaPlex(onInfo, weightTier) {
     const sitePackages = execSync50(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
       encoding: "utf8",
       timeout: 5e3,
-      stdio: "pipe"
+      stdio: "pipe",
+      env: personaplexPythonEnv()
     }).trim();
     const serverFile = join114(sitePackages, "server.py");
     if (existsSync99(serverFile)) {
@@ -590455,7 +591177,8 @@ async function installPersonaPlex(onInfo, weightTier) {
     const sitePackages = execSync50(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
       encoding: "utf8",
       timeout: 5e3,
-      stdio: "pipe"
+      stdio: "pipe",
+      env: personaplexPythonEnv()
     }).trim();
     const loadersFile = join114(sitePackages, "models", "loaders.py");
     if (existsSync99(loadersFile)) {
@@ -590559,7 +591282,8 @@ $2if filename.endswith(".safetensors"):`
     const sitePackages2 = execSync50(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
       encoding: "utf8",
       timeout: 5e3,
-      stdio: "pipe"
+      stdio: "pipe",
+      env: personaplexPythonEnv()
     }).trim();
     const hybridDest = join114(sitePackages2, "hybrid_agent.py");
     const serverDest = join114(sitePackages2, "server.py");
@@ -590693,7 +591417,7 @@ async function startPersonaPlexDaemon(onInfo) {
       try {
         const weightPath = execSync50(
           `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`,
-          { encoding: "utf8", timeout: 6e4, stdio: "pipe" }
+          { encoding: "utf8", timeout: 6e4, stdio: "pipe", env: personaplexPythonEnv() }
         ).trim();
         if (existsSync99(weightPath)) {
           if (!existsSync99(cachedBf16)) {
@@ -590706,7 +591430,7 @@ state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in sta
 save_file(state, '${cachedBf16}')
 print('Converted')
 "`,
-              { timeout: 18e4, stdio: "pipe" }
+              { timeout: 18e4, stdio: "pipe", env: personaplexPythonEnv() }
             );
           }
           if (existsSync99(cachedBf16)) {
@@ -590732,13 +591456,13 @@ print('Converted')
       try {
         const weightPath = execSync50(
           `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`,
-          { encoding: "utf8", timeout: 3e4, stdio: "pipe" }
+          { encoding: "utf8", timeout: 3e4, stdio: "pipe", env: personaplexPythonEnv() }
         ).trim();
         if (existsSync99(dequantScript) && existsSync99(weightPath)) {
           try {
             execSync50(
               `"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`,
-              { timeout: 3e5, stdio: "pipe" }
+              { timeout: 3e5, stdio: "pipe", env: personaplexPythonEnv() }
             );
             if (existsSync99(cachedBf16)) {
               extraArgs.push("--moshi-weight", cachedBf16);
@@ -590751,7 +591475,7 @@ print('Converted')
         try {
           const mimiPath = execSync50(
             `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`,
-            { encoding: "utf8", timeout: 3e4, stdio: "pipe" }
+            { encoding: "utf8", timeout: 3e4, stdio: "pipe", env: personaplexPythonEnv() }
           ).trim();
           if (existsSync99(mimiPath)) extraArgs.push("--mimi-weight", mimiPath);
         } catch {
@@ -590759,7 +591483,7 @@ print('Converted')
         try {
           const tokPath = execSync50(
             `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`,
-            { encoding: "utf8", timeout: 3e4, stdio: "pipe" }
+            { encoding: "utf8", timeout: 3e4, stdio: "pipe", env: personaplexPythonEnv() }
           ).trim();
           if (existsSync99(tokPath)) extraArgs.push("--tokenizer", tokPath);
         } catch {
@@ -590814,7 +591538,7 @@ print('Converted')
   ];
   if (hybridEnabled) serverArgs.push("--hybrid");
   if (needsOffload) serverArgs.push("--cpu-offload");
-  const serverEnv = { ...process.env };
+  const serverEnv = personaplexPythonEnv();
   if (hybridEnabled) {
     serverEnv["HYBRID_ENABLED"] = "1";
     serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
@@ -590959,7 +591683,7 @@ async function clonePersonaPlexVoice(inputWav, voiceName, onInfo) {
       "cuda"
     ], {
       stdio: ["ignore", "pipe", "pipe"],
-      env: { ...process.env },
+      env: personaplexPythonEnv(),
       cwd: PERSONAPLEX_DIR
     });
     let output = "";
@@ -591135,6 +591859,7 @@ var init_personaplex = __esm({
     init_render();
     init_daemon_registry();
     init_typed_node_events();
+    init_dist5();
     WEIGHT_REPOS = {
       original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
       nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
@@ -599715,6 +600440,11 @@ import {
   spawn as nodeSpawn
 } from "node:child_process";
 import { createRequire as createRequire6 } from "node:module";
+function voicePythonEnv(extra = {}) {
+  const env2 = { ...process.env, ...extra };
+  applyMediaCudaDeviceFilterToEnv(env2, "tts");
+  return env2;
+}
 function sanitizeForTTS(text) {
   return text.replace(/^#{1,6}\s+/gm, "").replace(/\*{1,3}([^*]+)\*{1,3}/g, "$1").replace(/_{1,3}([^_]+)_{1,3}/g, "$1").replace(/~~([^~]+)~~/g, "$1").replace(/`([^`]+)`/g, "$1").replace(/```[\s\S]*?```/g, "").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1").replace(/^[\s]*[-*+]\s+/gm, "").replace(/^[\s]*\d+\.\s+/gm, "").replace(/^>\s+/gm, "").replace(/^[-*_]{3,}$/gm, "").replace(/\[[ xX]\]\s*/g, "").replace(/[\u{1F600}-\u{1F64F}]/gu, "").replace(/[\u{1F300}-\u{1F5FF}]/gu, "").replace(/[\u{1F680}-\u{1F6FF}]/gu, "").replace(/[\u{1F1E0}-\u{1F1FF}]/gu, "").replace(/[\u{2600}-\u{26FF}]/gu, "").replace(/[\u{2700}-\u{27BF}]/gu, "").replace(/[\u{FE00}-\u{FE0F}]/gu, "").replace(/[\u{1F900}-\u{1F9FF}]/gu, "").replace(/[\u{1FA00}-\u{1FA6F}]/gu, "").replace(/[\u{1FA70}-\u{1FAFF}]/gu, "").replace(/[\u{200D}]/gu, "").replace(/[\u{20E3}]/gu, "").replace(/[✓✔✗✘✕✖⚠️⏸⏹⏵●○◆◇■□▪▫►▼▲◀⬆⬇⬅➡↑↓←→⇐⇒⇑⇓]/g, "").replace(/[─━│┃┌┐└┘├┤┬┴┼╔╗╚╝╠╣╦╩╬⎿⎾▕▏⏐░▒▓█⠀-⣿]/g, "").replace(/\s{2,}/g, " ").trim();
 }
@@ -600620,6 +601350,7 @@ var init_voice = __esm({
     init_typed_node_events();
     init_render();
     init_daemon_registry();
+    init_dist5();
     VOICE_MODELS = {
       glados: {
         id: "glados",
@@ -602134,7 +602865,8 @@ except Exception as exc:
         return new Promise((resolve59, reject) => {
           const proc = nodeSpawn("sh", ["-c", command], {
             stdio: ["ignore", "pipe", "pipe"],
-            cwd: tmpdir20()
+            cwd: tmpdir20(),
+            env: voicePythonEnv()
           });
           let stdout = "";
           let stderr = "";
@@ -602909,7 +603641,7 @@ if __name__ == '__main__':
         const venvPy = luxttsVenvPy2();
         if (!existsSync109(venvPy)) return false;
         return new Promise((resolve59) => {
-          const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir2() };
+          const env2 = voicePythonEnv({ LUXTTS_REPO_PATH: luxttsRepoDir2() });
           const daemon = nodeSpawn(venvPy, [luxttsInferScript2()], {
             stdio: ["pipe", "pipe", "pipe"],
             cwd: tmpdir20(),
@@ -625604,6 +626336,146 @@ var init_telegram_stats_menu = __esm({
   }
 });
+// packages/cli/src/tui/telegram-command-menu.ts
+function isBareTelegramGenerativeCommand(input) {
+  const trimmed = input.trim();
+  if (!trimmed.startsWith("/")) return false;
+  const parts = trimmed.split(/\s+/);
+  const name10 = (parts[0] ?? "").slice(1).split("@")[0]?.toLowerCase() ?? "";
+  return parts.length === 1 && GENERATIVE_COMMANDS.has(name10);
+}
+function buildTelegramCommandMenuItems(scope) {
+  const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => cmd.implementationStatus === "implemented").filter((cmd) => scope === "admin" || ["help", "start"].includes(cmd.name));
+  const seen = /* @__PURE__ */ new Set();
+  const items = [];
+  for (const cmd of commands) {
+    const signature = cmd.signatures[0]?.signature;
+    if (!signature || seen.has(cmd.name)) continue;
+    seen.add(cmd.name);
+    items.push({
+      label: `/${cmd.name}`,
+      command: `/${cmd.name}`,
+      description: cmd.signatures[0]?.description ?? signature,
+      adminOnly: scope === "admin"
+    });
+  }
+  return items.sort((a2, b) => a2.label.localeCompare(b.label));
+}
+function buildTelegramGenerativeMenuItems(commandName) {
+  const name10 = commandName.replace(/^\//, "").toLowerCase();
+  if (!GENERATIVE_COMMANDS.has(name10)) return [];
+  const title = name10[0].toUpperCase() + name10.slice(1);
+  return [
+    { label: `${title} models`, command: `/${name10} list`, description: `List available ${name10} models and hardware fit.` },
+    { label: `${title} setup`, command: `/${name10} setup`, description: `Show setup commands for the ${name10} backend.` }
+  ];
+}
+function encodeTelegramCommandMenuCallback(action, value2) {
+  const data = `${CALLBACK_PREFIX2}:${action[0]}:${value2}`;
+  return Buffer.byteLength(data, "utf8") <= MAX_CALLBACK_DATA_BYTES ? data : data.slice(0, MAX_CALLBACK_DATA_BYTES);
+}
+function decodeTelegramCommandMenuCallback(data) {
+  const parts = data.split(":");
+  if (parts.length !== 3 || parts[0] !== CALLBACK_PREFIX2) return null;
+  const action = parts[1] === "p" ? "page" : parts[1] === "r" ? "run" : parts[1] === "c" ? "close" : null;
+  if (!action) return null;
+  return { action, value: parts[2] ?? "" };
+}
+function renderTelegramCommandMenu(state) {
+  const totalPages = Math.max(1, Math.ceil(state.items.length / PAGE_SIZE2));
+  const page2 = Math.max(0, Math.min(state.page, totalPages - 1));
+  const start2 = page2 * PAGE_SIZE2;
+  const visible = state.items.slice(start2, start2 + PAGE_SIZE2);
+  const title = state.kind === "generative" ? "Generative command" : "Commands";
+  const scope = state.scope === "admin" ? "admin" : "public";
+  const lines = [
+    `<b>${escapeHTML3(title)}</b>`,
+    `<i>${escapeHTML3(scope)} scope - page ${page2 + 1}/${totalPages}</i>`,
+    "",
+    ...visible.flatMap((item) => [
+      `<code>${escapeHTML3(item.command)}</code>`,
+      escapeHTML3(item.description)
+    ])
+  ];
+  const keyboard = visible.map((item, offset) => [{
+    text: item.label.slice(0, 32),
+    callback_data: encodeTelegramCommandMenuCallback("run", start2 + offset)
+  }]);
+  const nav = [];
+  nav.push({ text: "Close", callback_data: encodeTelegramCommandMenuCallback("close", 0) });
+  if (page2 > 0) nav.push({ text: "Prev", callback_data: encodeTelegramCommandMenuCallback("page", page2 - 1) });
+  nav.push({ text: `${page2 + 1}/${totalPages}`, callback_data: encodeTelegramCommandMenuCallback("page", page2) });
+  if (page2 < totalPages - 1) nav.push({ text: "Next", callback_data: encodeTelegramCommandMenuCallback("page", page2 + 1) });
+  keyboard.push(nav);
+  return { text: lines.join("\n"), reply_markup: { inline_keyboard: keyboard } };
+}
+function handleTelegramCommandMenuCallback(data, state, now = Date.now()) {
+  const decoded = decodeTelegramCommandMenuCallback(data);
+  if (!decoded) return null;
+  if (state.expiresAt <= now) return null;
+  if (decoded.action === "close") return { close: true };
+  if (decoded.action === "page") {
+    const totalPages = Math.max(1, Math.ceil(state.items.length / PAGE_SIZE2));
+    const page2 = Math.max(0, Math.min(Number.parseInt(decoded.value, 10) || 0, totalPages - 1));
+    const newState = { ...state, page: page2 };
+    return { newState, render: renderTelegramCommandMenu(newState) };
+  }
+  const index = Number.parseInt(decoded.value, 10);
+  const item = Number.isFinite(index) ? state.items[index] : void 0;
+  return item ? { command: item.command } : null;
+}
+function escapeHTML3(text) {
+  return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+}
+var CALLBACK_PREFIX2, PAGE_SIZE2, TTL_MS, MAX_CALLBACK_DATA_BYTES, GENERATIVE_COMMANDS, TelegramCommandMenuStateStore;
+var init_telegram_command_menu = __esm({
+  "packages/cli/src/tui/telegram-command-menu.ts"() {
+    "use strict";
+    init_command_registry();
+    CALLBACK_PREFIX2 = "ocm";
+    PAGE_SIZE2 = 8;
+    TTL_MS = 10 * 60 * 1e3;
+    MAX_CALLBACK_DATA_BYTES = 64;
+    GENERATIVE_COMMANDS = /* @__PURE__ */ new Set(["image", "video", "sound", "music"]);
+    TelegramCommandMenuStateStore = class {
+      states = /* @__PURE__ */ new Map();
+      key(chatId, messageId) {
+        return `${chatId}:${messageId}`;
+      }
+      create(input, now = Date.now()) {
+        return {
+          ...input,
+          createdAt: now,
+          expiresAt: now + TTL_MS
+        };
+      }
+      set(state) {
+        this.states.set(this.key(state.chatId, state.messageId), state);
+      }
+      get(chatId, messageId, now = Date.now()) {
+        const state = this.states.get(this.key(chatId, messageId));
+        if (!state) return void 0;
+        if (state.expiresAt <= now) {
+          this.delete(chatId, messageId);
+          return void 0;
+        }
+        return state;
+      }
+      delete(chatId, messageId) {
+        this.states.delete(this.key(chatId, messageId));
+      }
+      prune(now = Date.now()) {
+        for (const [key, state] of this.states) {
+          if (state.expiresAt <= now) this.states.delete(key);
+        }
+      }
+      clear() {
+        this.states.clear();
+      }
+    };
+  }
+});
 // packages/cli/src/tui/telegram-creative-tools.ts
 import { createCipheriv as createCipheriv4, createDecipheriv as createDecipheriv4, randomBytes as randomBytes23 } from "node:crypto";
 import {
@@ -630996,6 +631868,7 @@ var init_telegram_bridge = __esm({
     init_command_registry();
     init_telegram_help_menu();
     init_telegram_stats_menu();
+    init_telegram_command_menu();
     init_scoped_personality();
     init_voice_soul();
     init_telegram_creative_tools();
@@ -631606,6 +632479,8 @@ Telegram link integrity contract:
       statsMenuTimers = null;
       /** Prune expired stats menu states every 5 minutes */
       statsMenuPruneTimer = null;
+      /** Telegram-native command and generative command menus */
+      telegramCommandMenuStates = new TelegramCommandMenuStateStore();
       /** Command handler for admin DM slash commands (wired from interactive.ts) */
       commandHandler = null;
       /** Callback fired after a Telegram user completes the TUI-only admin auth challenge */
@@ -631938,6 +632813,10 @@ Telegram link integrity contract:
         const name10 = this.telegramSlashName(input);
         return name10 === "help" || name10 === "h" || name10 === "commands" || name10 === "cmds";
       }
+      isTelegramCommandsMenuCommand(input) {
+        const name10 = this.telegramSlashName(input);
+        return name10 === "commands" || name10 === "cmds";
+      }
       isTelegramStatsCommand(input) {
         const name10 = this.telegramSlashName(input);
         return name10 === "stats" || name10 === "metrics";
@@ -632531,6 +633410,49 @@ ${message2}`)
           this.helpMenuTimers.startTimer(state);
         }
       }
+      async replyWithTelegramCommandMenu(msg, isAdmin, kind, commandName) {
+        const scope = isAdmin ? "admin" : "public";
+        const items = kind === "generative" ? buildTelegramGenerativeMenuItems(commandName ?? "") : buildTelegramCommandMenuItems(scope);
+        if (items.length === 0) {
+          await this.replyToTelegramMessage(msg, "No Telegram command menu entries are available.");
+          return;
+        }
+        if (msg.guestQueryId || !isAdmin) {
+          const lines = items.slice(0, 24).map((item) => `${item.command} - ${item.description}`);
+          const text = ["Available commands:", "", ...lines].join("\n");
+          if (msg.guestQueryId) {
+            await this.answerGuestQuery(msg.guestQueryId, text);
+          } else {
+            await this.replyToTelegramMessage(msg, text);
+          }
+          return;
+        }
+        const previewState = this.telegramCommandMenuStates.create({
+          chatId: msg.chatId,
+          messageId: 0,
+          invokerMessageId: msg.messageId,
+          fromUserId: msg.fromUserId ?? 0,
+          scope,
+          kind,
+          page: 0,
+          items
+        });
+        const menu = renderTelegramCommandMenu(previewState);
+        const sent = await this.apiCall("sendMessage", {
+          chat_id: msg.chatId,
+          text: menu.text,
+          parse_mode: "HTML",
+          reply_markup: JSON.stringify(menu.reply_markup),
+          ...msg.chatType !== "private" ? { reply_to_message_id: msg.messageId } : {}
+        });
+        if (sent.ok && sent.result?.message_id) {
+          this.telegramCommandMenuStates.prune();
+          this.telegramCommandMenuStates.set({
+            ...previewState,
+            messageId: sent.result.message_id
+          });
+        }
+      }
       collectSessionMetricsSnapshot() {
         if (this._metricsProvider) {
           try {
@@ -637339,6 +638261,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
         this.telegramActiveWorkGenerations.clear();
         this.telegramActiveWorkStartedAtMs.clear();
         this.telegramAdminLivePanels.clear();
+        this.telegramCommandMenuStates.clear();
         this.flushTelegramViewWrites();
         this.flushTelegramTuiWrites();
         this.telegramActiveInferences.clear();
@@ -637717,6 +638640,10 @@ ${summary}` : ""
           return;
         }
         const isAdmin = this.isAdminUser(msg);
+        if (msg.text.trim().startsWith("/") && this.isTelegramCommandsMenuCommand(normalizedCommandText)) {
+          await this.replyWithTelegramCommandMenu(msg, isAdmin, "commands");
+          return;
+        }
         if (msg.text.trim().startsWith("/") && this.isTelegramHelpCommand(normalizedCommandText)) {
           await this.replyWithTelegramHelp(msg, isAdmin);
           return;
@@ -637751,6 +638678,10 @@ ${summary}` : ""
         const toolContext = this.resolveToolContext(msg, isAdmin);
         const isAdminDM = toolContext === "telegram-admin-dm";
         const sessionKey = this.sessionKeyForMessage(msg);
+        if (isAdminDM && isBareTelegramGenerativeCommand(normalizedCommandText)) {
+          await this.replyWithTelegramCommandMenu(msg, isAdmin, "generative", telegramSlash);
+          return;
+        }
         if (msg.text.trim().startsWith("/") && TELEGRAM_REMINDER_SLASH_COMMANDS.has(telegramSlash)) {
           await this.handleTelegramReminderSlash(msg, normalizedCommandText, toolContext);
           return;
@@ -640485,6 +641416,90 @@ Scoped workspace: ${scopedRoot}`,
         return Boolean(result.ok);
       }
       async handleTelegramCallbackQuery(callback) {
+        const commandMenuDecoded = decodeTelegramCommandMenuCallback(callback.data);
+        if (commandMenuDecoded) {
+          let answerText2 = "";
+          let alert2 = false;
+          let answered = false;
+          try {
+            const chatId = callback.chatId;
+            const messageId = callback.messageId;
+            if (!chatId || !messageId) {
+              answerText2 = "Cannot identify menu message.";
+              alert2 = true;
+              return;
+            }
+            const menuState = this.telegramCommandMenuStates.get(chatId, messageId);
+            if (!menuState) {
+              answerText2 = "This command menu expired. Send /commands for a fresh one.";
+              alert2 = true;
+              return;
+            }
+            const isAdmin = this.isAdminActor(callback.fromUserId, callback.username);
+            if (callback.fromUserId !== menuState.fromUserId && !isAdmin) {
+              answerText2 = "Only the user who opened this menu can use it.";
+              alert2 = true;
+              return;
+            }
+            if (!isAdmin) {
+              answerText2 = "That command requires Telegram admin authentication.";
+              alert2 = true;
+              return;
+            }
+            const result = handleTelegramCommandMenuCallback(callback.data, menuState);
+            if (!result) {
+              answerText2 = "Unknown or expired command menu action.";
+              alert2 = true;
+              return;
+            }
+            if (result.close) {
+              this.telegramCommandMenuStates.delete(chatId, messageId);
+              await this.apiCall("deleteMessage", { chat_id: chatId, message_id: messageId }).catch(() => {
+              });
+              if (menuState.invokerMessageId) {
+                await this.apiCall("deleteMessage", { chat_id: chatId, message_id: menuState.invokerMessageId }).catch(() => {
+                });
+              }
+              answered = await this.answerCallbackQuery(callback.id).catch(() => false);
+              return;
+            }
+            if (result.render && result.newState) {
+              this.telegramCommandMenuStates.set(result.newState);
+              await this.apiCall("editMessageText", {
+                chat_id: chatId,
+                message_id: messageId,
+                text: result.render.text,
+                parse_mode: "HTML",
+                reply_markup: JSON.stringify(result.render.reply_markup)
+              });
+              return;
+            }
+            if (result.command) {
+              if (!this.commandHandler) {
+                answerText2 = "No command handler is available.";
+                alert2 = true;
+                return;
+              }
+              answered = await this.answerCallbackQuery(callback.id, `Running ${result.command}...`).catch(() => false);
+              const output = await this.commandHandler(result.command);
+              if (output) {
+                await this.sendMessageHTML(chatId, convertMarkdownToTelegramHTML(output));
+              }
+              return;
+            }
+          } catch (err) {
+            answerText2 = err instanceof Error ? err.message : String(err);
+            alert2 = true;
+          } finally {
+            if (answered) {
+            } else if (answerText2) {
+              await this.answerCallbackQuery(callback.id, answerText2.slice(0, 180), alert2).catch(() => false);
+            } else {
+              await this.answerCallbackQuery(callback.id).catch(() => false);
+            }
+          }
+          return;
+        }
         const helpDecoded = decodeHelpCallback(callback.data);
         if (helpDecoded) {
           let answerText2 = "";
@@ -673904,14 +674919,24 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
       telegramBridge.setCommandHandler(async (input) => {
         const captured = [];
         const origWrite = process.stdout.write;
-        process.stdout.write = function(chunk, ..._args) {
+        process.stdout.write = function(chunk, ...args) {
           if (typeof chunk === "string") {
             captured.push(chunk);
+          } else if (Buffer.isBuffer(chunk)) {
+            captured.push(chunk.toString("utf8"));
           }
+          const cb = args.find((arg) => typeof arg === "function");
+          if (cb) cb();
           return true;
         };
         try {
-          const result = await handleSlashCommand(input, commandCtx);
+          const result = await runWithNonInteractiveSelectSurface(
+            () => handleSlashCommand(input, commandCtx),
+            {
+              maxItems: 24,
+              hint: "(Telegram: interactive menu shown as text; use concrete slash arguments or open the TUI to select)"
+            }
+          );
           process.stdout.write = origWrite;
           if (statusBar.isActive) statusBar.handleResize();
           if (result === "exit") {
@@ -673925,7 +674950,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
             return `Skill invoked: ${result.name}`;
           }
           const raw = captured.join("");
-          const clean5 = raw.replace(/\x1B\[[0-9;]*[A-Za-z]/g, "").replace(/\x1B\][^\x07]*\x07/g, "").replace(/\x1B[()][A-Z0-9]/g, "").replace(/\x1B\[?\??[0-9;]*[a-zA-Z]/g, "").replace(/\x1B/g, "").replace(/[─━│┃┌┐└┘├┤┬┴┼╔╗╚╝╠╣╦╩╬⎿⎾▕▏⏐]/g, "").replace(/\n{3,}/g, "\n\n").trim();
+          const clean5 = raw.replace(/\x1B(?:\[[\d;?]*[ -/]*[@-~]|\][^\x07\x1B]*(?:\x07|\x1B\\)?|[@-Z\\-_])/g, "").replace(/\x1B/g, "").replace(/[─━│┃┌┐└┘├┤┬┴┼╔╗╚╝╠╣╦╩╬⎿⎾▕▏⏐]/g, "").replace(/\n{3,}/g, "\n\n").trim();
           if (!clean5) return null;
           return clean5.length > 3900 ? clean5.slice(0, 3900) + "\n..." : clean5;
         } catch (err) {