omnius 1.0.186 → 1.0.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1412,6 +1412,18 @@ var init_tool_executor = __esm({
1412
1412
  import { EventEmitter } from "node:events";
1413
1413
  import { totalmem, freemem } from "node:os";
1414
1414
  import { exec } from "node:child_process";
1415
+ function dedupeLoadedModels(models) {
1416
+ const seen = /* @__PURE__ */ new Set();
1417
+ const out = [];
1418
+ for (const model of models) {
1419
+ const key = `${model.host}:${model.name}`;
1420
+ if (seen.has(key))
1421
+ continue;
1422
+ seen.add(key);
1423
+ out.push(model);
1424
+ }
1425
+ return out;
1426
+ }
1415
1427
  function ramSnapshotMB() {
1416
1428
  const total = Math.round(totalmem() / (1024 * 1024));
1417
1429
  const free = Math.round(freemem() / (1024 * 1024));
@@ -1690,6 +1702,104 @@ var init_model_broker = __esm({
1690
1702
  this.emit("rejected", spec, reason);
1691
1703
  return { kind: "reject", reason };
1692
1704
  }
1705
+ /**
1706
+ * Acquire a short-lived load lease for media/subprocess generation.
1707
+ *
1708
+ * Media generation often needs to temporarily free VRAM occupied by Ollama
1709
+ * chat models. This helper refreshes Ollama state, asks the broker what must
1710
+ * be evicted, unloads those Ollama models with keep_alive=0, and returns a
1711
+ * lease whose release() clears transient inflight state, unloads any
1712
+ * Ollama-hosted requested model, and warms the evicted Ollama models again.
1713
+ */
1714
+ async acquireTransientModelLoad(spec, options2 = {}) {
1715
+ const reason = options2.reason ?? `${spec.domain}-transient-load`;
1716
+ const evictedModels = [];
1717
+ let gpuIndex = null;
1718
+ let admitted = false;
1719
+ await this.pollOnce().catch(() => {
1720
+ });
1721
+ for (let attempt = 0; attempt < 4; attempt++) {
1722
+ const decision2 = await this.ensureModelLoadable(spec);
1723
+ if (decision2.kind === "wait-for-inflight") {
1724
+ const waited = await decision2.promise.catch((err) => ({
1725
+ kind: "reject",
1726
+ reason: err instanceof Error ? err.message : String(err)
1727
+ }));
1728
+ if (waited.kind === "ok") {
1729
+ gpuIndex = waited.gpuIndex ?? null;
1730
+ admitted = true;
1731
+ break;
1732
+ }
1733
+ if (waited.kind === "evict") {
1734
+ for (const target of waited.evictTargets) {
1735
+ if (await this.evict(target.host, target.name, reason))
1736
+ evictedModels.push(target);
1737
+ }
1738
+ await this.pollOnce().catch(() => {
1739
+ });
1740
+ continue;
1741
+ }
1742
+ if (waited.kind === "degrade")
1743
+ return waited;
1744
+ if (waited.kind === "reject")
1745
+ return waited;
1746
+ continue;
1747
+ }
1748
+ if (decision2.kind === "ok") {
1749
+ gpuIndex = decision2.gpuIndex ?? null;
1750
+ admitted = true;
1751
+ break;
1752
+ }
1753
+ if (decision2.kind === "evict") {
1754
+ for (const target of decision2.evictTargets) {
1755
+ const before = this._loaded.get(`${target.host}:${target.name}`) ?? target;
1756
+ if (await this.evict(target.host, target.name, reason)) {
1757
+ evictedModels.push(before);
1758
+ }
1759
+ }
1760
+ await this.pollOnce().catch(() => {
1761
+ });
1762
+ continue;
1763
+ }
1764
+ if (decision2.kind === "degrade")
1765
+ return decision2;
1766
+ return decision2;
1767
+ }
1768
+ if (!admitted) {
1769
+ return {
1770
+ kind: "reject",
1771
+ reason: `could not acquire transient load lease for ${spec.host}:${spec.name} after repeated evictions`
1772
+ };
1773
+ }
1774
+ const evictedOllamaModels = dedupeLoadedModels(evictedModels.filter((m2) => m2.host === "ollama"));
1775
+ const broker = this;
1776
+ let released = false;
1777
+ return {
1778
+ kind: "ok",
1779
+ lease: {
1780
+ spec,
1781
+ gpuIndex,
1782
+ evictedModels: dedupeLoadedModels(evictedModels),
1783
+ evictedOllamaModels,
1784
+ async release() {
1785
+ if (released)
1786
+ return;
1787
+ released = true;
1788
+ broker.clearInflight(spec.host, spec.name);
1789
+ if ((options2.unloadRequestedOllama ?? true) && spec.host === "ollama") {
1790
+ await broker.unloadOllamaModel(spec.name, `${reason}-complete`).catch(() => false);
1791
+ }
1792
+ if (options2.restoreOllama !== false && evictedOllamaModels.length > 0) {
1793
+ await broker.restoreOllamaModels(evictedOllamaModels, {
1794
+ keepAlive: options2.restoreKeepAlive ?? "30m"
1795
+ });
1796
+ }
1797
+ await broker.pollOnce().catch(() => {
1798
+ });
1799
+ }
1800
+ }
1801
+ };
1802
+ }
1693
1803
  /**
1694
1804
  * Register a model that has been successfully loaded.
1695
1805
  * Callers MUST call this after a successful load so the broker can track LRU.
@@ -1743,6 +1853,66 @@ var init_model_broker = __esm({
1743
1853
  this.emit("evicted", m2, reason);
1744
1854
  return actively;
1745
1855
  }
1856
+ /** Force-unload an Ollama model even when it is not currently tracked. */
1857
+ async unloadOllamaModel(modelName, reason = "ollama-unload") {
1858
+ const key = `ollama:${modelName}`;
1859
+ const existing = this._loaded.get(key);
1860
+ const ok3 = await this.ollamaUnload(modelName).catch(() => false);
1861
+ if (existing) {
1862
+ this._loaded.delete(key);
1863
+ this.emit("evicted", existing, reason);
1864
+ }
1865
+ return ok3;
1866
+ }
1867
+ /** Best-effort warm/reload of an Ollama model after temporary eviction. */
1868
+ async warmOllamaModel(modelName, keepAlive = "30m") {
1869
+ const bodies = [
1870
+ { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 0 } },
1871
+ { model: modelName, prompt: "", stream: false, keep_alive: keepAlive, options: { num_predict: 1 } }
1872
+ ];
1873
+ for (const body of bodies) {
1874
+ try {
1875
+ const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
1876
+ method: "POST",
1877
+ headers: { "Content-Type": "application/json" },
1878
+ body: JSON.stringify(body),
1879
+ signal: AbortSignal.timeout(12e4)
1880
+ });
1881
+ if (!res.ok)
1882
+ continue;
1883
+ await this.refreshOllamaPs().catch(() => {
1884
+ });
1885
+ return true;
1886
+ } catch {
1887
+ }
1888
+ }
1889
+ try {
1890
+ const res = await fetch(`${this._ollamaBaseUrl}/api/generate`, {
1891
+ method: "POST",
1892
+ headers: { "Content-Type": "application/json" },
1893
+ body: JSON.stringify({
1894
+ model: modelName,
1895
+ stream: false,
1896
+ keep_alive: keepAlive
1897
+ }),
1898
+ signal: AbortSignal.timeout(12e4)
1899
+ });
1900
+ if (!res.ok)
1901
+ return false;
1902
+ await this.refreshOllamaPs().catch(() => {
1903
+ });
1904
+ return true;
1905
+ } catch {
1906
+ return false;
1907
+ }
1908
+ }
1909
+ /** Restore a set of previously evicted Ollama models, oldest first. */
1910
+ async restoreOllamaModels(models, options2 = {}) {
1911
+ const unique2 = dedupeLoadedModels(models.filter((m2) => m2.host === "ollama")).sort((a2, b) => a2.lastUsedAt - b.lastUsedAt);
1912
+ for (const model of unique2) {
1913
+ await this.warmOllamaModel(model.name, options2.keepAlive ?? "30m").catch(() => false);
1914
+ }
1915
+ }
1746
1916
  // ------------------------------------------------------------------
1747
1917
  // Internal — Ollama
1748
1918
  // ------------------------------------------------------------------
@@ -1885,7 +2055,7 @@ var init_model_broker = __esm({
1885
2055
  );
1886
2056
  const idle = (m2) => now - m2.lastUsedAt > this.idleEvictMs;
1887
2057
  const onTargetGpu = (m2) => req2.targetGpu === void 0 || req2.targetGpu === null ? true : m2.gpuIndex === req2.targetGpu;
1888
- const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).sort((a2, b) => {
2058
+ const evictable = [...this._loaded.values()].filter((m2) => m2.priority <= req2.requestingPriority).filter(sameDomainOk).filter(onTargetGpu).filter((m2) => !this.hasActiveSlotForModel(m2)).sort((a2, b) => {
1889
2059
  const aIdle = idle(a2) ? 0 : 1;
1890
2060
  const bIdle = idle(b) ? 0 : 1;
1891
2061
  if (aIdle !== bIdle)
@@ -1931,6 +2101,13 @@ var init_model_broker = __esm({
1931
2101
  n2++;
1932
2102
  return n2;
1933
2103
  }
2104
+ hasActiveSlotForModel(model) {
2105
+ for (const slot of this._activeSlots.values()) {
2106
+ if (slot.model === model.name)
2107
+ return true;
2108
+ }
2109
+ return false;
2110
+ }
1934
2111
  // ------------------------------------------------------------------
1935
2112
  // Internal — fallback resolution
1936
2113
  // ------------------------------------------------------------------
@@ -22761,6 +22938,17 @@ function evictModelsToFreeSpace(args) {
22761
22938
  writeMeta(meta);
22762
22939
  return { evicted, bytesFreed, finalFreeBytes: disk.freeBytes };
22763
22940
  }
22941
+ function estimateReclaimableCacheBytes(keepRepos) {
22942
+ const keep = new Set(keepRepos ?? []);
22943
+ let total = 0;
22944
+ for (const entry of readMeta().entries) {
22945
+ if (keep.has(entry.repo))
22946
+ continue;
22947
+ const measured = measureRepoCacheBytes(entry.repo);
22948
+ total += Math.max(0, measured || entry.sizeBytes || 0);
22949
+ }
22950
+ return total;
22951
+ }
22764
22952
  function ensureDiskSpaceForDownload(args) {
22765
22953
  ensureUnifiedCacheDirs();
22766
22954
  const safetyMargin = args.safetyMarginBytes ?? 1 * 1024 ** 3;
@@ -22769,6 +22957,10 @@ function ensureDiskSpaceForDownload(args) {
22769
22957
  if (disk.freeBytes >= target) {
22770
22958
  return { ok: true, evicted: [], freeBytes: disk.freeBytes };
22771
22959
  }
22960
+ const reclaimableBytes = estimateReclaimableCacheBytes(args.keepRepos);
22961
+ if (disk.freeBytes + reclaimableBytes < target) {
22962
+ throw new InsufficientDiskSpaceError(args.approxDownloadBytes, disk.freeBytes, disk.totalBytes, []);
22963
+ }
22772
22964
  const evictionResult = evictModelsToFreeSpace({
22773
22965
  neededBytes: args.approxDownloadBytes,
22774
22966
  keepRepos: args.keepRepos,
@@ -25161,11 +25353,154 @@ ${content}`
25161
25353
  }
25162
25354
  });
25163
25355
 
25356
+ // packages/execution/dist/tools/cuda-device-filter.js
25357
+ import { execFileSync as execFileSync3 } from "node:child_process";
25358
+ function cleanEnvValue(value2) {
25359
+ const trimmed = value2?.trim();
25360
+ return trimmed ? trimmed : void 0;
25361
+ }
25362
+ function envFlagEnabled(value2) {
25363
+ return /^(1|true|yes|on)$/i.test(value2?.trim() ?? "");
25364
+ }
25365
+ function parseCudaComputeCapability(value2) {
25366
+ const match = value2?.match(/(\d+(?:\.\d+)?)/);
25367
+ if (!match)
25368
+ return null;
25369
+ const parsed = Number(match[1]);
25370
+ return Number.isFinite(parsed) ? parsed : null;
25371
+ }
25372
+ function modalityEnvPrefix(modality) {
25373
+ return `OMNIUS_${modality.toUpperCase()}`;
25374
+ }
25375
+ function parseMinCudaComputeCapability(modality, env2) {
25376
+ const prefix = modalityEnvPrefix(modality);
25377
+ const parsed = parseCudaComputeCapability(env2[`${prefix}_MIN_CUDA_CC`]) ?? parseCudaComputeCapability(env2["OMNIUS_MEDIA_MIN_CUDA_CC"]);
25378
+ return parsed && parsed > 0 ? parsed : DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY;
25379
+ }
25380
+ function splitCudaVisibleDevices(value2) {
25381
+ return (value2 ?? "").split(",").map((part) => part.trim()).filter(Boolean);
25382
+ }
25383
+ function parseCudaDeviceInfo(text) {
25384
+ const devices = [];
25385
+ for (const line of text.split(/\r?\n/)) {
25386
+ const trimmed = line.trim();
25387
+ if (!trimmed)
25388
+ continue;
25389
+ const parts = trimmed.split(",").map((part) => part.trim());
25390
+ if (parts.length < 4)
25391
+ continue;
25392
+ const index = Number.parseInt(parts.shift() ?? "", 10);
25393
+ const capability = parseCudaComputeCapability(parts.pop());
25394
+ const uuid = cleanEnvValue(parts.shift());
25395
+ const name10 = cleanEnvValue(parts.join(", "));
25396
+ if (!Number.isFinite(index) || index < 0)
25397
+ continue;
25398
+ devices.push({ index, uuid, name: name10, computeCapability: capability });
25399
+ }
25400
+ return devices;
25401
+ }
25402
+ function detectCudaDevices() {
25403
+ try {
25404
+ const out = execFileSync3("nvidia-smi", ["--query-gpu=index,uuid,name,compute_cap", "--format=csv,noheader,nounits"], {
25405
+ encoding: "utf8",
25406
+ timeout: 5e3,
25407
+ stdio: ["ignore", "pipe", "ignore"]
25408
+ });
25409
+ return parseCudaDeviceInfo(out);
25410
+ } catch {
25411
+ return [];
25412
+ }
25413
+ }
25414
+ function resolveMediaCudaVisibleDevicesForEnv(args) {
25415
+ const env2 = args.env ?? process.env;
25416
+ const prefix = modalityEnvPrefix(args.modality);
25417
+ const explicit = cleanEnvValue(env2[`${prefix}_CUDA_VISIBLE_DEVICES`]) ?? cleanEnvValue(env2["OMNIUS_MEDIA_CUDA_VISIBLE_DEVICES"]) ?? (args.modality === "audio" ? cleanEnvValue(env2["OMNIUS_AUDIO_GPU"]) : void 0);
25418
+ if (explicit)
25419
+ return explicit;
25420
+ const current = cleanEnvValue(env2["CUDA_VISIBLE_DEVICES"]);
25421
+ if (envFlagEnabled(env2[`${prefix}_DISABLE_CUDA_FILTER`]) || envFlagEnabled(env2["OMNIUS_MEDIA_DISABLE_CUDA_FILTER"])) {
25422
+ return current;
25423
+ }
25424
+ const devices = args.devices ?? detectCudaDevices();
25425
+ const minComputeCapability = args.minComputeCapability ?? parseMinCudaComputeCapability(args.modality, env2);
25426
+ const compatible = devices.filter((device) => device.computeCapability !== null && device.computeCapability >= minComputeCapability);
25427
+ if (compatible.length === 0)
25428
+ return current;
25429
+ const compatibleTokens = /* @__PURE__ */ new Set();
25430
+ for (const device of compatible) {
25431
+ compatibleTokens.add(String(device.index));
25432
+ if (device.uuid)
25433
+ compatibleTokens.add(device.uuid);
25434
+ }
25435
+ if (current) {
25436
+ const requested = splitCudaVisibleDevices(current);
25437
+ const canFilter = requested.length > 0 && requested.every((token) => /^\d+$/.test(token) || token.startsWith("GPU-"));
25438
+ if (!canFilter)
25439
+ return current;
25440
+ const kept = requested.filter((token) => compatibleTokens.has(token));
25441
+ return (kept.length > 0 ? kept : compatible.map((device) => String(device.index))).join(",");
25442
+ }
25443
+ return compatible.map((device) => String(device.index)).join(",");
25444
+ }
25445
+ function mediaBrokerGpuIndexIsCompatible(gpuIndex, modality, env2 = process.env, devices = detectCudaDevices()) {
25446
+ const prefix = modalityEnvPrefix(modality);
25447
+ if (envFlagEnabled(env2[`${prefix}_DISABLE_CUDA_FILTER`]) || envFlagEnabled(env2["OMNIUS_MEDIA_DISABLE_CUDA_FILTER"])) {
25448
+ return true;
25449
+ }
25450
+ if (devices.length === 0)
25451
+ return true;
25452
+ const minComputeCapability = parseMinCudaComputeCapability(modality, env2);
25453
+ const device = devices.find((candidate) => candidate.index === gpuIndex);
25454
+ if (!device || device.computeCapability === null)
25455
+ return true;
25456
+ return device.computeCapability >= minComputeCapability;
25457
+ }
25458
+ function applyMediaCudaDeviceFilterToEnv(env2, modality) {
25459
+ const cudaVisibleDevices = resolveMediaCudaVisibleDevicesForEnv({ modality, env: env2 });
25460
+ if (cudaVisibleDevices) {
25461
+ env2["CUDA_VISIBLE_DEVICES"] = cudaVisibleDevices;
25462
+ env2["PYTORCH_NVML_BASED_CUDA_CHECK"] = process.env["PYTORCH_NVML_BASED_CUDA_CHECK"] ?? "1";
25463
+ }
25464
+ return env2;
25465
+ }
25466
+ var DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY;
25467
+ var init_cuda_device_filter = __esm({
25468
+ "packages/execution/dist/tools/cuda-device-filter.js"() {
25469
+ "use strict";
25470
+ DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY = 7.5;
25471
+ }
25472
+ });
25473
+
25164
25474
  // packages/execution/dist/tools/transcribe-tool.js
25165
25475
  import { existsSync as existsSync29, mkdirSync as mkdirSync13, writeFileSync as writeFileSync14, readFileSync as readFileSync22, unlinkSync as unlinkSync3, readdirSync as readdirSync13 } from "node:fs";
25166
25476
  import { join as join32, basename as basename6, extname as extname3, resolve as resolve17 } from "node:path";
25167
25477
  import { homedir as homedir10 } from "node:os";
25168
- import { execFileSync as execFileSync3, execSync as execSync15 } from "node:child_process";
25478
+ import { execFileSync as execFileSync4, execSync as execSync15 } from "node:child_process";
25479
+ function transcriptionPythonEnv(extra = {}) {
25480
+ const env2 = { ...process.env, ...extra };
25481
+ applyMediaCudaDeviceFilterToEnv(env2, "asr");
25482
+ return env2;
25483
+ }
25484
+ async function withProcessEnv(env2, fn) {
25485
+ const previous = /* @__PURE__ */ new Map();
25486
+ for (const [key, value2] of Object.entries(env2)) {
25487
+ previous.set(key, process.env[key]);
25488
+ if (value2 === void 0)
25489
+ delete process.env[key];
25490
+ else
25491
+ process.env[key] = value2;
25492
+ }
25493
+ try {
25494
+ return await fn();
25495
+ } finally {
25496
+ for (const [key, value2] of previous) {
25497
+ if (value2 === void 0)
25498
+ delete process.env[key];
25499
+ else
25500
+ process.env[key] = value2;
25501
+ }
25502
+ }
25503
+ }
25169
25504
  function whisperRamEstimate(model) {
25170
25505
  const m2 = model.toLowerCase();
25171
25506
  if (m2.includes("large"))
@@ -25251,6 +25586,7 @@ var init_transcribe_tool = __esm({
25251
25586
  "use strict";
25252
25587
  init_model_broker();
25253
25588
  init_network_egress_policy();
25589
+ init_cuda_device_filter();
25254
25590
  AUDIO_EXTS = /* @__PURE__ */ new Set([
25255
25591
  ".mp3",
25256
25592
  ".wav",
@@ -25353,13 +25689,13 @@ var init_transcribe_tool = __esm({
25353
25689
  return this.execViaCli(filePath, model, diarize, start2);
25354
25690
  }
25355
25691
  try {
25356
- const result = await tc.transcribe(filePath, {
25692
+ const result = await withProcessEnv(transcriptionPythonEnv(), () => tc.transcribe(filePath, {
25357
25693
  model,
25358
25694
  format: "json",
25359
25695
  diarize,
25360
25696
  wordTimestamps: true
25361
25697
  // Always get timestamps for structured output
25362
- });
25698
+ }));
25363
25699
  const transcriptDir = join32(this.workingDir, ".omnius", "transcripts");
25364
25700
  mkdirSync13(transcriptDir, { recursive: true });
25365
25701
  const fileBase = basename6(filePath).replace(/\.[^.]+$/, "");
@@ -25453,7 +25789,8 @@ var init_transcribe_tool = __esm({
25453
25789
  timeout: 3e5,
25454
25790
  // 5 min max
25455
25791
  cwd: this.workingDir,
25456
- stdio: ["pipe", "pipe", "pipe"]
25792
+ stdio: ["pipe", "pipe", "pipe"],
25793
+ env: transcriptionPythonEnv()
25457
25794
  });
25458
25795
  return {
25459
25796
  success: true,
@@ -25522,7 +25859,7 @@ var init_transcribe_tool = __esm({
25522
25859
  }
25523
25860
  tmpFile = `${tmpBase}.mp3`;
25524
25861
  try {
25525
- execFileSync3("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", `${tmpBase}.%(ext)s`, url], { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
25862
+ execFileSync4("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", `${tmpBase}.%(ext)s`, url], { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
25526
25863
  if (!existsSync29(tmpFile)) {
25527
25864
  const files = readdirSync13(tmpDir).filter((f2) => f2.startsWith(`download-`) && f2 !== ".gitkeep");
25528
25865
  const match = files.find((f2) => f2.includes(basename6(tmpBase)));
@@ -25647,13 +25984,13 @@ ${result.output}`,
25647
25984
  try {
25648
25985
  let title = "download";
25649
25986
  try {
25650
- title = execFileSync3("yt-dlp", ["--get-title", url], { timeout: 15e3, stdio: "pipe" }).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
25987
+ title = execFileSync4("yt-dlp", ["--get-title", url], { timeout: 15e3, stdio: "pipe" }).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
25651
25988
  } catch {
25652
25989
  }
25653
25990
  if (format3 === "mp4") {
25654
25991
  const outPath = join32(outputDir, `${title}.mp4`);
25655
25992
  const outTemplate = join32(outputDir, `${title}.%(ext)s`);
25656
- execFileSync3("yt-dlp", [
25993
+ execFileSync4("yt-dlp", [
25657
25994
  "-f",
25658
25995
  "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
25659
25996
  "--merge-output-format",
@@ -25673,7 +26010,7 @@ Format: mp4`,
25673
26010
  } else {
25674
26011
  const outPath = join32(outputDir, `${title}.mp3`);
25675
26012
  const outTemplate = join32(outputDir, `${title}.%(ext)s`);
25676
- execFileSync3("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "0", "-o", outTemplate, url], { timeout: 6e5, stdio: "pipe", cwd: outputDir });
26013
+ execFileSync4("yt-dlp", ["-x", "--audio-format", "mp3", "--audio-quality", "0", "-o", outTemplate, url], { timeout: 6e5, stdio: "pipe", cwd: outputDir });
25677
26014
  const actualPath = existsSync29(outPath) ? outPath : outTemplate.replace("%(ext)s", "mp3");
25678
26015
  return {
25679
26016
  success: true,
@@ -259063,6 +259400,12 @@ function imageCandidateFor(model, requestedBackend) {
259063
259400
  preset: getImageGenerationPreset(resolved)
259064
259401
  };
259065
259402
  }
259403
+ function imageCandidateHost(candidate) {
259404
+ return candidate.backend === "ollama" ? "ollama" : "diffusers-py";
259405
+ }
259406
+ function imageCandidateEstimatedVramMB(candidate) {
259407
+ return candidate.preset?.minVramGB !== void 0 ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
259408
+ }
259066
259409
  function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
259067
259410
  const ladder = imageGenerationQualityLadder();
259068
259411
  const candidates = [];
@@ -259246,7 +259589,9 @@ function formatDiffusersFailure(stderrOrStdout) {
259246
259589
  Note: ${note}`)].filter(Boolean).join("");
259247
259590
  }
259248
259591
  function imageGenerationPythonEnv(_repoRoot) {
259249
- return unifiedPythonEnv();
259592
+ const env2 = unifiedPythonEnv();
259593
+ applyMediaCudaDeviceFilterToEnv(env2, "image");
259594
+ return env2;
259250
259595
  }
259251
259596
  function approxImageDownloadBytes(preset) {
259252
259597
  if (preset?.approxDownloadGB)
@@ -259364,9 +259709,15 @@ function annotateImageFallbackSuccess(result, failed, winner) {
259364
259709
  ...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
259365
259710
  ""
259366
259711
  ].join("\n");
259712
+ const llmPrefix = [
259713
+ `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
259714
+ ...failed.map((attempt, index) => formatImageAttempt(attempt.candidate, attempt.reason, index))
259715
+ ].join("\n");
259367
259716
  return {
259368
259717
  ...result,
259369
- output: prefix + result.output
259718
+ output: prefix + result.output,
259719
+ llmContent: result.llmContent ? `${llmPrefix}
259720
+ ${result.llmContent}` : result.llmContent
259370
259721
  };
259371
259722
  }
259372
259723
  function parseRunnerJson(stdout) {
@@ -259389,6 +259740,7 @@ var init_image_generate = __esm({
259389
259740
  init_venv_paths();
259390
259741
  init_model_store();
259391
259742
  init_hf_media_models();
259743
+ init_cuda_device_filter();
259392
259744
  DEFAULT_DIFFUSERS_IMAGE_MODEL = "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers";
259393
259745
  DEFAULT_OLLAMA_IMAGE_MODEL = "x/flux2-klein";
259394
259746
  LEGACY_SDXL_TURBO_MODEL = "stabilityai/sdxl-turbo";
@@ -260173,6 +260525,45 @@ if __name__ == "__main__":
260173
260525
  this.lastProgressAt = now;
260174
260526
  this.progressHandler(event);
260175
260527
  }
260528
+ async acquireTransientLoadLease(args) {
260529
+ if (!args.candidate)
260530
+ return null;
260531
+ const broker = getModelBroker();
260532
+ const decision2 = await broker.acquireTransientModelLoad({
260533
+ name: args.candidate.model,
260534
+ domain: "image-gen",
260535
+ host: imageCandidateHost(args.candidate),
260536
+ owner: "image-generate-tool",
260537
+ estimatedVramMB: imageCandidateEstimatedVramMB(args.candidate)
260538
+ }, {
260539
+ reason: args.reason,
260540
+ restoreOllama: true,
260541
+ unloadRequestedOllama: true
260542
+ });
260543
+ if (decision2.kind === "reject") {
260544
+ return {
260545
+ success: false,
260546
+ output: "",
260547
+ error: `Image generation blocked by resource broker: ${decision2.reason}`,
260548
+ durationMs: performance.now() - args.start
260549
+ };
260550
+ }
260551
+ if (decision2.kind === "degrade") {
260552
+ return {
260553
+ success: false,
260554
+ output: "",
260555
+ error: `Image generation needs a broker fallback (${decision2.fallback.name}), but image candidate fallback must be selected by the image ladder: ${decision2.reason}`,
260556
+ durationMs: performance.now() - args.start
260557
+ };
260558
+ }
260559
+ if (decision2.lease.evictedOllamaModels.length > 0) {
260560
+ this.emitProgress({
260561
+ stage: "setup",
260562
+ message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for image generation`
260563
+ });
260564
+ }
260565
+ return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
260566
+ }
260176
260567
  async execute(args) {
260177
260568
  const start2 = performance.now();
260178
260569
  const action = String(args["action"] ?? "generate");
@@ -260215,33 +260606,6 @@ if __name__ == "__main__":
260215
260606
  const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
260216
260607
  const seed = optionalNumberArg(args["seed"]);
260217
260608
  const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
260218
- const broker = getModelBroker();
260219
- const firstCandidate = candidates[0];
260220
- let brokerGpuIndex = null;
260221
- if (firstCandidate) {
260222
- const decision2 = await broker.ensureModelLoadable({
260223
- name: firstCandidate.model,
260224
- domain: "image-gen",
260225
- host: firstCandidate.backend === "ollama" ? "ollama" : "diffusers-py",
260226
- owner: "image-generate-tool"
260227
- });
260228
- if (decision2.kind === "evict") {
260229
- for (const target of decision2.evictTargets) {
260230
- await broker.evict(target.host, target.name, "image-gen-needs-room");
260231
- }
260232
- brokerGpuIndex = decision2.gpuIndex ?? null;
260233
- } else if (decision2.kind === "ok") {
260234
- brokerGpuIndex = decision2.gpuIndex ?? null;
260235
- } else if (decision2.kind === "reject") {
260236
- return {
260237
- success: false,
260238
- output: "",
260239
- error: `Image generation blocked by resource broker: ${decision2.reason}`,
260240
- durationMs: performance.now() - start2
260241
- };
260242
- }
260243
- }
260244
- this._brokerGpuIndex = brokerGpuIndex;
260245
260609
  try {
260246
260610
  return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
260247
260611
  } catch (err) {
@@ -260260,7 +260624,30 @@ if __name__ == "__main__":
260260
260624
  stage: "setup",
260261
260625
  message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
260262
260626
  });
260263
- const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
260627
+ const leaseDecision = await this.acquireTransientLoadLease({
260628
+ candidate,
260629
+ reason: "image-prewarm-needs-room",
260630
+ start: args.start
260631
+ });
260632
+ if (leaseDecision && "success" in leaseDecision) {
260633
+ failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
260634
+ if (index < args.candidates.length - 1) {
260635
+ this.emitProgress({
260636
+ stage: "setup",
260637
+ message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
260638
+ });
260639
+ }
260640
+ continue;
260641
+ }
260642
+ const lease = leaseDecision?.lease;
260643
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
260644
+ let result;
260645
+ try {
260646
+ result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
260647
+ } finally {
260648
+ await lease?.release();
260649
+ this._brokerGpuIndex = null;
260650
+ }
260264
260651
  if (result.success)
260265
260652
  return annotateImageFallbackSuccess(result, failed, candidate);
260266
260653
  failed.push({ candidate, reason: summarizeToolResult(result) });
@@ -260299,7 +260686,30 @@ if __name__ == "__main__":
260299
260686
  message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
260300
260687
  });
260301
260688
  const promptForCandidate = expansionEnabled ? await this.expandPromptForCandidate(args.prompt, candidate, index, args.candidates.length) : args.prompt;
260302
- const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
260689
+ const leaseDecision = await this.acquireTransientLoadLease({
260690
+ candidate,
260691
+ reason: "image-gen-needs-room",
260692
+ start: args.start
260693
+ });
260694
+ if (leaseDecision && "success" in leaseDecision) {
260695
+ failed.push({ candidate, reason: summarizeToolResult(leaseDecision) });
260696
+ if (index < args.candidates.length - 1) {
260697
+ this.emitProgress({
260698
+ stage: "setup",
260699
+ message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
260700
+ });
260701
+ }
260702
+ continue;
260703
+ }
260704
+ const lease = leaseDecision?.lease;
260705
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
260706
+ let result;
260707
+ try {
260708
+ result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: promptForCandidate, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: promptForCandidate, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: promptForCandidate, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
260709
+ } finally {
260710
+ await lease?.release();
260711
+ this._brokerGpuIndex = null;
260712
+ }
260303
260713
  if (result.success) {
260304
260714
  await this.writeImageSidecar(result, {
260305
260715
  originalPrompt: args.prompt,
@@ -260523,6 +260933,17 @@ ${errText.slice(0, 1200)}`,
260523
260933
  }
260524
260934
  ensureUnifiedCacheDirs();
260525
260935
  this.emitProgress({ stage: "load", message: `Downloading/loading image model ${args.model}` });
260936
+ const runnerEnv = { ...python.env };
260937
+ if (this._brokerGpuIndex !== null) {
260938
+ if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
260939
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
260940
+ } else {
260941
+ this.emitProgress({
260942
+ stage: "setup",
260943
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
260944
+ });
260945
+ }
260946
+ }
260526
260947
  const result = await runProcess2(python.command, [
260527
260948
  runner,
260528
260949
  "--model",
@@ -260535,7 +260956,7 @@ ${errText.slice(0, 1200)}`,
260535
260956
  ], {
260536
260957
  cwd: this.cwd,
260537
260958
  timeoutMs: 18e5,
260538
- env: python.env,
260959
+ env: runnerEnv,
260539
260960
  progressLabel: `Downloading/loading ${args.model}`,
260540
260961
  onProgress: (event) => this.emitProgress(event)
260541
260962
  });
@@ -260746,7 +261167,14 @@ ${errText.slice(0, 800)}`,
260746
261167
  this.emitProgress({ stage: "load", message: `Starting image generation with ${args.model}` });
260747
261168
  const runnerEnv = { ...python.env };
260748
261169
  if (this._brokerGpuIndex !== null) {
260749
- runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
261170
+ if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "image", runnerEnv)) {
261171
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
261172
+ } else {
261173
+ this.emitProgress({
261174
+ stage: "setup",
261175
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but image CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
261176
+ });
261177
+ }
260750
261178
  }
260751
261179
  const result = await runProcess2(python.command, argv, {
260752
261180
  cwd: this.cwd,
@@ -260948,6 +261376,7 @@ __export(audio_generate_exports, {
260948
261376
  AudioGenerateTool: () => AudioGenerateTool,
260949
261377
  DEFAULT_MUSIC_MODEL: () => DEFAULT_MUSIC_MODEL,
260950
261378
  DEFAULT_SOUND_MODEL: () => DEFAULT_SOUND_MODEL,
261379
+ audioBrokerGpuIndexIsCompatible: () => audioBrokerGpuIndexIsCompatible,
260951
261380
  audioGenerationDir: () => audioGenerationDir,
260952
261381
  audioGenerationFallbackCandidates: () => audioGenerationFallbackCandidates,
260953
261382
  audioGenerationModelPresets: () => audioGenerationModelPresets,
@@ -260958,9 +261387,11 @@ __export(audio_generate_exports, {
260958
261387
  findNonGatedAudioFallback: () => findNonGatedAudioFallback,
260959
261388
  getAudioGenerationPreset: () => getAudioGenerationPreset,
260960
261389
  inferAudioGenerationBackend: () => inferAudioGenerationBackend,
260961
- isAudioPresetGated: () => isAudioPresetGated
261390
+ isAudioPresetGated: () => isAudioPresetGated,
261391
+ parseAudioCudaDeviceInfo: () => parseAudioCudaDeviceInfo,
261392
+ resolveAudioCudaVisibleDevicesForEnv: () => resolveAudioCudaVisibleDevicesForEnv
260962
261393
  });
260963
- import { execFileSync as execFileSync4, spawn as spawn8 } from "node:child_process";
261394
+ import { spawn as spawn8 } from "node:child_process";
260964
261395
  import { existsSync as existsSync32, readdirSync as readdirSync14, statSync as statSync13 } from "node:fs";
260965
261396
  import { chmod as chmod4, mkdir as mkdir13, writeFile as writeFile18 } from "node:fs/promises";
260966
261397
  import { join as join42 } from "node:path";
@@ -261025,25 +261456,36 @@ function backendPackages(backend) {
261025
261456
  return TANGOFLUX_PACKAGES;
261026
261457
  return DIFFUSERS_AUDIO_PACKAGES;
261027
261458
  }
261459
+ function splitCudaVisibleDevices2(value2) {
261460
+ return (value2 ?? "").split(",").map((part) => part.trim()).filter(Boolean);
261461
+ }
261462
+ function parseAudioCudaDeviceInfo(text) {
261463
+ return parseCudaDeviceInfo(text);
261464
+ }
261465
+ function detectAudioCudaDevices() {
261466
+ return detectCudaDevices();
261467
+ }
261468
+ function resolveAudioCudaVisibleDevicesForEnv(args = {}) {
261469
+ return resolveMediaCudaVisibleDevicesForEnv({
261470
+ modality: "audio",
261471
+ env: args.env,
261472
+ devices: args.devices,
261473
+ minComputeCapability: args.minComputeCapability
261474
+ });
261475
+ }
261476
+ function audioBrokerGpuIndexIsCompatible(gpuIndex, env2 = process.env, devices = detectAudioCudaDevices()) {
261477
+ return mediaBrokerGpuIndexIsCompatible(gpuIndex, "audio", env2, devices);
261478
+ }
261028
261479
  function detectLegacyCudaComputeCapability() {
261029
- try {
261030
- const out = execFileSync4("nvidia-smi", ["--query-gpu=compute_cap,name", "--format=csv,noheader,nounits"], {
261031
- encoding: "utf8",
261032
- timeout: 5e3,
261033
- stdio: ["ignore", "pipe", "ignore"]
261034
- }).trim();
261035
- const first2 = out.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
261036
- const match = first2?.match(/^(\d+)\.(\d+)\s*,?\s*(.*)$/);
261037
- if (!match)
261038
- return null;
261039
- const major = Number(match[1]);
261040
- const minor = Number(match[2]);
261041
- if (!Number.isFinite(major) || !Number.isFinite(minor))
261042
- return null;
261043
- return { major, minor, name: match[3]?.trim() || void 0 };
261044
- } catch {
261480
+ const devices = detectAudioCudaDevices();
261481
+ const visible = new Set(splitCudaVisibleDevices2(resolveAudioCudaVisibleDevicesForEnv({ devices })));
261482
+ const candidates = visible.size > 0 ? devices.filter((device) => visible.has(String(device.index)) || device.uuid && visible.has(device.uuid)) : devices;
261483
+ const legacy = candidates.find((device) => device.computeCapability !== null && isLegacyCudaCapability(Math.floor(device.computeCapability), Math.round(device.computeCapability % 1 * 10)));
261484
+ if (!legacy || legacy.computeCapability === null)
261045
261485
  return null;
261046
- }
261486
+ const major = Math.floor(legacy.computeCapability);
261487
+ const minor = Math.round(legacy.computeCapability % 1 * 10);
261488
+ return { major, minor, name: legacy.name };
261047
261489
  }
261048
261490
  function isLegacyCudaCapability(major, minor) {
261049
261491
  return major < 7 || major === 7 && minor < 5;
@@ -261087,7 +261529,9 @@ function backendImportCheck(backend) {
261087
261529
  return "import torch, diffusers, scipy\nfrom diffusers import AudioLDMPipeline\n";
261088
261530
  }
261089
261531
  function audioGenerationPythonEnv(_repoRoot) {
261090
- return unifiedPythonEnv();
261532
+ const env2 = unifiedPythonEnv();
261533
+ applyMediaCudaDeviceFilterToEnv(env2, "audio");
261534
+ return env2;
261091
261535
  }
261092
261536
  function approxAudioDownloadBytes(preset) {
261093
261537
  if (preset?.approxDownloadGB)
@@ -261341,8 +261785,8 @@ function formatAudioSetupFailure(backend, text) {
261341
261785
  if (lowered.includes("cuda") && lowered.includes("not available")) {
261342
261786
  notes2.push("CUDA was not available to the selected Python environment; install a Torch build matching this machine's CUDA runtime or use CPU-compatible settings.");
261343
261787
  }
261344
- if (lowered.includes("cudnn version") && lowered.includes("sm < 7.5")) {
261345
- notes2.push("The installed PyTorch wheel uses cuDNN 9 on a legacy CUDA GPU. Omnius now repairs audio-generation venvs by reinstalling PyTorch 2.3.1 from the cu118 index for SM < 7.5 hardware.");
261788
+ if (lowered.includes("cudnn") && lowered.includes("incompatible") || lowered.includes("sm < 7.5") || lowered.includes("not compatible") && (lowered.includes("sm_") || lowered.includes("compute capability"))) {
261789
+ notes2.push(`The installed PyTorch wheel is touching a legacy CUDA GPU. Audio generation auto-filters CUDA devices below SM 7.5; current resolved CUDA_VISIBLE_DEVICES=${resolveAudioCudaVisibleDevicesForEnv() ?? "unset"}. Override with OMNIUS_AUDIO_CUDA_VISIBLE_DEVICES or disable with OMNIUS_AUDIO_DISABLE_CUDA_FILTER=1.`);
261346
261790
  }
261347
261791
  return [body, ...notes2.map((note) => `
261348
261792
  ${note}`)].filter(Boolean).join("");
@@ -261564,6 +262008,18 @@ function audioCandidateFor(kind, model, requestedBackend) {
261564
262008
  preset: getAudioGenerationPreset(model, kind)
261565
262009
  };
261566
262010
  }
262011
+ function audioCandidateHost(candidate) {
262012
+ if (candidate.backend === "project")
262013
+ return null;
262014
+ if (candidate.backend === "audiocraft")
262015
+ return "audiocraft";
262016
+ if (candidate.backend === "tangoflux")
262017
+ return "tangoflux";
262018
+ return "diffusers-py";
262019
+ }
262020
+ function audioCandidateEstimatedVramMB(candidate) {
262021
+ return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
262022
+ }
261567
262023
  function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
261568
262024
  const ladder = audioGenerationQualityLadder(kind);
261569
262025
  const candidates = [];
@@ -261720,9 +262176,15 @@ function annotateAudioFallbackSuccess(result, failed, winner) {
261720
262176
  ...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
261721
262177
  ""
261722
262178
  ].join("\n");
262179
+ const llmPrefix = [
262180
+ `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
262181
+ ...failed.map((attempt, index) => formatAudioAttempt(attempt.candidate, attempt.reason, index))
262182
+ ].join("\n");
261723
262183
  return {
261724
262184
  ...result,
261725
- output: prefix + result.output
262185
+ output: prefix + result.output,
262186
+ llmContent: result.llmContent ? `${llmPrefix}
262187
+ ${result.llmContent}` : result.llmContent
261726
262188
  };
261727
262189
  }
261728
262190
  var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
@@ -261733,6 +262195,7 @@ var init_audio_generate = __esm({
261733
262195
  init_venv_paths();
261734
262196
  init_model_store();
261735
262197
  init_hf_media_models();
262198
+ init_cuda_device_filter();
261736
262199
  DEFAULT_SOUND_MODEL = "cvssp/audioldm-s-full-v2";
261737
262200
  DEFAULT_MUSIC_MODEL = "facebook/musicgen-small";
261738
262201
  DIFFUSERS_AUDIO_PACKAGES = [
@@ -262094,9 +262557,9 @@ var init_audio_generate = __esm({
262094
262557
  import argparse, json, os, sys, time
262095
262558
  from pathlib import Path
262096
262559
 
262097
- # Broker-picked GPU pinning — must run before importing torch.
262098
- _omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
262099
- if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
262560
+ # Broker/audio GPU pinning — must run before importing torch.
262561
+ _omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
262562
+ if _omnius_gpu:
262100
262563
  os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
262101
262564
 
262102
262565
  def _format_bytes(value):
@@ -262239,9 +262702,14 @@ if __name__ == "__main__":
262239
262702
  main()
262240
262703
  `;
262241
262704
  AUDIOCRAFT_RUNNER = String.raw`#!/usr/bin/env python3
262242
- import argparse, json, sys, time
262705
+ import argparse, json, os, sys, time
262243
262706
  from pathlib import Path
262244
262707
 
262708
+ # Broker/audio GPU pinning — must run before importing torch/audiocraft.
262709
+ _omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
262710
+ if _omnius_gpu:
262711
+ os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
262712
+
262245
262713
  def _progress(stage, message, percent=None):
262246
262714
  payload = {"omnius_progress": True, "stage": stage, "message": message}
262247
262715
  if percent is not None:
@@ -262295,9 +262763,9 @@ if __name__ == "__main__":
262295
262763
  import argparse, json, os, sys, time
262296
262764
  from pathlib import Path
262297
262765
 
262298
- # Broker-picked GPU pinning — must run before importing torch.
262299
- _omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip()
262300
- if _omnius_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ:
262766
+ # Broker/audio GPU pinning — must run before importing torch.
262767
+ _omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
262768
+ if _omnius_gpu:
262301
262769
  os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
262302
262770
 
262303
262771
  def _format_bytes(value):
@@ -262411,9 +262879,14 @@ if __name__ == "__main__":
262411
262879
  main()
262412
262880
  `;
262413
262881
  TANGOFLUX_RUNNER = String.raw`#!/usr/bin/env python3
262414
- import argparse, json, sys, time
262882
+ import argparse, json, os, sys, time
262415
262883
  from pathlib import Path
262416
262884
 
262885
+ # Broker/audio GPU pinning — must run before importing torch/tangoflux.
262886
+ _omnius_gpu = os.environ.get("OMNIUS_GPU_INDEX", "").strip() or os.environ.get("OMNIUS_AUDIO_GPU", "").strip()
262887
+ if _omnius_gpu:
262888
+ os.environ["CUDA_VISIBLE_DEVICES"] = _omnius_gpu
262889
+
262417
262890
  def _format_bytes(value):
262418
262891
  try:
262419
262892
  n = float(value)
@@ -262548,6 +263021,48 @@ if __name__ == "__main__":
262548
263021
  this.lastProgressAt = now;
262549
263022
  this.progressHandler(event);
262550
263023
  }
263024
+ async acquireTransientLoadLease(args) {
263025
+ if (!args.candidate)
263026
+ return null;
263027
+ const host = audioCandidateHost(args.candidate);
263028
+ if (!host)
263029
+ return null;
263030
+ const broker = getModelBroker();
263031
+ const decision2 = await broker.acquireTransientModelLoad({
263032
+ name: args.candidate.model,
263033
+ domain: args.kind,
263034
+ host,
263035
+ owner: `audio-generate-tool/${args.kind}`,
263036
+ estimatedVramMB: audioCandidateEstimatedVramMB(args.candidate)
263037
+ }, {
263038
+ reason: args.reason,
263039
+ restoreOllama: true,
263040
+ unloadRequestedOllama: false
263041
+ });
263042
+ if (decision2.kind === "reject") {
263043
+ return {
263044
+ success: false,
263045
+ output: "",
263046
+ error: `${args.kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
263047
+ durationMs: performance.now() - args.start
263048
+ };
263049
+ }
263050
+ if (decision2.kind === "degrade") {
263051
+ return {
263052
+ success: false,
263053
+ output: "",
263054
+ error: `${args.kind === "music" ? "Music" : "Sound"} generation needs a broker fallback (${decision2.fallback.name}), but audio candidate fallback must be selected by the audio ladder: ${decision2.reason}`,
263055
+ durationMs: performance.now() - args.start
263056
+ };
263057
+ }
263058
+ if (decision2.lease.evictedOllamaModels.length > 0) {
263059
+ this.emitProgress({
263060
+ stage: "setup",
263061
+ message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for ${args.kind} generation`
263062
+ });
263063
+ }
263064
+ return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
263065
+ }
262551
263066
  async prewarmPythonBackend(args) {
262552
263067
  const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
262553
263068
  let python;
@@ -262594,6 +263109,17 @@ if __name__ == "__main__":
262594
263109
  }
262595
263110
  ensureUnifiedCacheDirs();
262596
263111
  this.emitProgress({ stage: "load", message: `Downloading/loading ${args.kind} model ${args.model}` });
263112
+ const runnerEnv = { ...python.env };
263113
+ if (this._brokerGpuIndex !== null) {
263114
+ if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
263115
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
263116
+ } else {
263117
+ this.emitProgress({
263118
+ stage: "setup",
263119
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
263120
+ });
263121
+ }
263122
+ }
262597
263123
  const result = await runProcess3(python.command, [
262598
263124
  runner,
262599
263125
  "--kind",
@@ -262610,7 +263136,7 @@ if __name__ == "__main__":
262610
263136
  ], {
262611
263137
  cwd: this.cwd,
262612
263138
  timeoutMs: 18e5,
262613
- env: python.env,
263139
+ env: runnerEnv,
262614
263140
  progressLabel: `Downloading/loading ${args.model}`,
262615
263141
  onProgress: (event) => this.emitProgress(event)
262616
263142
  });
@@ -262690,33 +263216,6 @@ if __name__ == "__main__":
262690
263216
  const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
262691
263217
  const seed = optionalNumberArg2(args["seed"]);
262692
263218
  const playback = playbackRequested(args);
262693
- const broker = getModelBroker();
262694
- const firstCandidate = candidates[0];
262695
- let brokerGpuIndex = null;
262696
- if (firstCandidate) {
262697
- const decision2 = await broker.ensureModelLoadable({
262698
- name: firstCandidate.model,
262699
- domain: kind === "music" ? "music" : "sound",
262700
- host: firstCandidate.backend === "audiocraft" ? "audiocraft" : firstCandidate.backend === "tangoflux" ? "tangoflux" : firstCandidate.backend === "transformers" ? "diffusers-py" : "diffusers-py",
262701
- owner: `audio-generate-tool/${kind}`
262702
- });
262703
- if (decision2.kind === "evict") {
262704
- for (const target of decision2.evictTargets) {
262705
- await broker.evict(target.host, target.name, `${kind}-gen-needs-room`);
262706
- }
262707
- brokerGpuIndex = decision2.gpuIndex ?? null;
262708
- } else if (decision2.kind === "ok") {
262709
- brokerGpuIndex = decision2.gpuIndex ?? null;
262710
- } else if (decision2.kind === "reject") {
262711
- return {
262712
- success: false,
262713
- output: "",
262714
- error: `${kind === "music" ? "Music" : "Sound"} generation blocked by resource broker: ${decision2.reason}`,
262715
- durationMs: performance.now() - start2
262716
- };
262717
- }
262718
- }
262719
- this._brokerGpuIndex = brokerGpuIndex;
262720
263219
  try {
262721
263220
  return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
262722
263221
  } catch (err) {
@@ -262736,15 +263235,39 @@ if __name__ == "__main__":
262736
263235
  stage: "setup",
262737
263236
  message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
262738
263237
  });
262739
- const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
263238
+ const leaseDecision = await this.acquireTransientLoadLease({
262740
263239
  kind: args.kind,
262741
- backend: candidate.backend,
262742
- runnerBackend: candidate.backend,
262743
- model: candidate.model,
262744
- duration,
262745
- start: args.start,
262746
- python: args.args["python"]
262747
- });
263240
+ candidate,
263241
+ reason: `${args.kind}-prewarm-needs-room`,
263242
+ start: args.start
263243
+ });
263244
+ if (leaseDecision && "success" in leaseDecision) {
263245
+ failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
263246
+ if (index < args.candidates.length - 1) {
263247
+ this.emitProgress({
263248
+ stage: "setup",
263249
+ message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
263250
+ });
263251
+ }
263252
+ continue;
263253
+ }
263254
+ const lease = leaseDecision?.lease;
263255
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
263256
+ let result;
263257
+ try {
263258
+ result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
263259
+ kind: args.kind,
263260
+ backend: candidate.backend,
263261
+ runnerBackend: candidate.backend,
263262
+ model: candidate.model,
263263
+ duration,
263264
+ start: args.start,
263265
+ python: args.args["python"]
263266
+ });
263267
+ } finally {
263268
+ await lease?.release();
263269
+ this._brokerGpuIndex = null;
263270
+ }
262748
263271
  if (result.success)
262749
263272
  return annotateAudioFallbackSuccess(result, failed, candidate);
262750
263273
  failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262772,19 +263295,43 @@ if __name__ == "__main__":
262772
263295
  stage: "setup",
262773
263296
  message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
262774
263297
  });
262775
- const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
263298
+ const leaseDecision = await this.acquireTransientLoadLease({
262776
263299
  kind: args.kind,
262777
- backend: candidate.backend,
262778
- runnerBackend: candidate.backend,
262779
- prompt: args.prompt,
262780
- model: candidate.model,
262781
- duration,
262782
- steps,
262783
- seed: args.seed,
262784
- playback: args.playback,
262785
- start: args.start,
262786
- python: args.args["python"]
262787
- });
263300
+ candidate,
263301
+ reason: `${args.kind}-gen-needs-room`,
263302
+ start: args.start
263303
+ });
263304
+ if (leaseDecision && "success" in leaseDecision) {
263305
+ failed.push({ candidate, reason: summarizeToolResult2(leaseDecision) });
263306
+ if (index < args.candidates.length - 1) {
263307
+ this.emitProgress({
263308
+ stage: "setup",
263309
+ message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
263310
+ });
263311
+ }
263312
+ continue;
263313
+ }
263314
+ const lease = leaseDecision?.lease;
263315
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
263316
+ let result;
263317
+ try {
263318
+ result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
263319
+ kind: args.kind,
263320
+ backend: candidate.backend,
263321
+ runnerBackend: candidate.backend,
263322
+ prompt: args.prompt,
263323
+ model: candidate.model,
263324
+ duration,
263325
+ steps,
263326
+ seed: args.seed,
263327
+ playback: args.playback,
263328
+ start: args.start,
263329
+ python: args.args["python"]
263330
+ });
263331
+ } finally {
263332
+ await lease?.release();
263333
+ this._brokerGpuIndex = null;
263334
+ }
262788
263335
  if (result.success)
262789
263336
  return annotateAudioFallbackSuccess(result, failed, candidate);
262790
263337
  failed.push({ candidate, reason: summarizeToolResult2(result) });
@@ -262885,7 +263432,14 @@ if __name__ == "__main__":
262885
263432
  this.emitProgress({ stage: "load", message: `Starting ${args.kind} generation with ${args.model}` });
262886
263433
  const runnerEnv = { ...python.env };
262887
263434
  if (this._brokerGpuIndex !== null) {
262888
- runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
263435
+ if (audioBrokerGpuIndexIsCompatible(this._brokerGpuIndex, runnerEnv)) {
263436
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
263437
+ } else {
263438
+ this.emitProgress({
263439
+ stage: "setup",
263440
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but audio CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
263441
+ });
263442
+ }
262889
263443
  }
262890
263444
  const result = await runProcess3(python.command, argv, {
262891
263445
  cwd: this.cwd,
@@ -263117,6 +263671,12 @@ function videoCandidateFor(model, requestedBackend, requestedKind) {
263117
263671
  }
263118
263672
  return { model, backend, preset };
263119
263673
  }
263674
+ function videoCandidateHost(candidate) {
263675
+ return candidate.backend === "comfyui" ? "comfyui" : "diffusers-py";
263676
+ }
263677
+ function videoCandidateEstimatedVramMB(candidate) {
263678
+ return candidate.preset ? Math.ceil(candidate.preset.minVramGB * 1024) : void 0;
263679
+ }
263120
263680
  function videoGenerationFallbackCandidates(requestedModel, requestedBackend, requestedKind, allowFallback = true, options2 = {}) {
263121
263681
  const preferAudioVideo = Boolean(options2.preferNativeAudioVideo);
263122
263682
  const baseLadderIds = preferAudioVideo ? [...VIDEO_AUDIO_QUALITY_LADDER, ...VIDEO_GENERATION_QUALITY_LADDER] : VIDEO_GENERATION_QUALITY_LADDER;
@@ -263359,6 +263919,7 @@ function resolveHfToken() {
263359
263919
  }
263360
263920
  function videoGenerationPythonEnv(_repoRoot) {
263361
263921
  const env2 = unifiedPythonEnv();
263922
+ applyMediaCudaDeviceFilterToEnv(env2, "video");
263362
263923
  const token = resolveHfToken();
263363
263924
  if (token) {
263364
263925
  env2["HF_TOKEN"] = token;
@@ -263681,9 +264242,15 @@ function annotateVideoFallbackSuccess(result, failed, winner) {
263681
264242
  ...failed.map((attempt, index) => ` ${formatVideoAttempt(attempt.candidate, attempt.reason, index)}`),
263682
264243
  ""
263683
264244
  ].join("\n");
264245
+ const llmPrefix = [
264246
+ `Fallback ladder used ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
264247
+ ...failed.map((attempt, index) => formatVideoAttempt(attempt.candidate, attempt.reason, index))
264248
+ ].join("\n");
263684
264249
  return {
263685
264250
  ...result,
263686
- output: prefix + result.output
264251
+ output: prefix + result.output,
264252
+ llmContent: result.llmContent ? `${llmPrefix}
264253
+ ${result.llmContent}` : result.llmContent
263687
264254
  };
263688
264255
  }
263689
264256
  function parseRunnerJson3(stdout) {
@@ -263717,6 +264284,7 @@ var init_video_generate = __esm({
263717
264284
  init_venv_paths();
263718
264285
  init_model_store();
263719
264286
  init_hf_media_models();
264287
+ init_cuda_device_filter();
263720
264288
  DEFAULT_DIFFUSERS_VIDEO_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
263721
264289
  SANA_VIDEO_480P_MODEL = "Efficient-Large-Model/SANA-Video_2B_480p";
263722
264290
  SANA_VIDEO_720P_MODEL = "Efficient-Large-Model/SANA-Video_2B_720p";
@@ -265049,6 +265617,45 @@ if __name__ == "__main__":
265049
265617
  this.lastProgressAt = now;
265050
265618
  this.progressHandler(event);
265051
265619
  }
265620
+ async acquireTransientLoadLease(args) {
265621
+ if (!args.candidate)
265622
+ return null;
265623
+ const broker = getModelBroker();
265624
+ const decision2 = await broker.acquireTransientModelLoad({
265625
+ name: args.candidate.model,
265626
+ domain: "video-gen",
265627
+ host: videoCandidateHost(args.candidate),
265628
+ owner: "video-generate-tool",
265629
+ estimatedVramMB: videoCandidateEstimatedVramMB(args.candidate)
265630
+ }, {
265631
+ reason: args.reason,
265632
+ restoreOllama: true,
265633
+ unloadRequestedOllama: false
265634
+ });
265635
+ if (decision2.kind === "reject") {
265636
+ return {
265637
+ success: false,
265638
+ output: "",
265639
+ error: `Video generation blocked by resource broker: ${decision2.reason}`,
265640
+ durationMs: performance.now() - args.start
265641
+ };
265642
+ }
265643
+ if (decision2.kind === "degrade") {
265644
+ return {
265645
+ success: false,
265646
+ output: "",
265647
+ error: `Video generation needs a broker fallback (${decision2.fallback.name}), but video candidate fallback must be selected by the video ladder: ${decision2.reason}`,
265648
+ durationMs: performance.now() - args.start
265649
+ };
265650
+ }
265651
+ if (decision2.lease.evictedOllamaModels.length > 0) {
265652
+ this.emitProgress({
265653
+ stage: "setup",
265654
+ message: `Temporarily unloaded ${decision2.lease.evictedOllamaModels.length} Ollama inference model(s) to free VRAM for video generation`
265655
+ });
265656
+ }
265657
+ return { lease: decision2.lease, gpuIndex: decision2.lease.gpuIndex };
265658
+ }
265052
265659
  async execute(args) {
265053
265660
  const start2 = performance.now();
265054
265661
  const action = String(args["action"] ?? "generate");
@@ -265104,35 +265711,6 @@ if __name__ == "__main__":
265104
265711
  const withAudio = booleanArg3(args["with_audio"], false);
265105
265712
  const audioInput = typeof args["audio_input"] === "string" && args["audio_input"].trim() ? String(args["audio_input"]).trim() : void 0;
265106
265713
  const candidates = videoGenerationFallbackCandidates(requestedModel, requestedBackend, inferredKind, generationFallbackEnabled3(args), { preferNativeAudioVideo: withAudio || Boolean(audioInput) });
265107
- const broker = getModelBroker();
265108
- const firstCandidate = candidates[0];
265109
- let brokerGpuIndex = null;
265110
- if (firstCandidate) {
265111
- const preset = firstCandidate.preset;
265112
- const decision2 = await broker.ensureModelLoadable({
265113
- name: firstCandidate.model,
265114
- domain: "video-gen",
265115
- host: firstCandidate.backend === "comfyui" ? "comfyui" : "diffusers-py",
265116
- owner: "video-generate-tool",
265117
- estimatedVramMB: preset ? preset.minVramGB * 1024 : void 0
265118
- });
265119
- if (decision2.kind === "evict") {
265120
- for (const target of decision2.evictTargets) {
265121
- await broker.evict(target.host, target.name, "video-gen-needs-room");
265122
- }
265123
- brokerGpuIndex = decision2.gpuIndex ?? null;
265124
- } else if (decision2.kind === "ok") {
265125
- brokerGpuIndex = decision2.gpuIndex ?? null;
265126
- } else if (decision2.kind === "reject") {
265127
- return {
265128
- success: false,
265129
- output: "",
265130
- error: `Video generation blocked by resource broker: ${decision2.reason}`,
265131
- durationMs: performance.now() - start2
265132
- };
265133
- }
265134
- }
265135
- this._brokerGpuIndex = brokerGpuIndex;
265136
265714
  if (candidates.length === 0) {
265137
265715
  return {
265138
265716
  success: false,
@@ -265182,7 +265760,30 @@ if __name__ == "__main__":
265182
265760
  failed.push({ candidate, reason: "ComfyUI backend not yet implemented." });
265183
265761
  continue;
265184
265762
  }
265185
- const result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
265763
+ const leaseDecision = await this.acquireTransientLoadLease({
265764
+ candidate,
265765
+ reason: "video-prewarm-needs-room",
265766
+ start: args.start
265767
+ });
265768
+ if (leaseDecision && "success" in leaseDecision) {
265769
+ failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
265770
+ if (index < args.candidates.length - 1) {
265771
+ this.emitProgress({
265772
+ stage: "setup",
265773
+ message: `${candidate.model} did not fit current resources; trying ${args.candidates[index + 1].model}`
265774
+ });
265775
+ }
265776
+ continue;
265777
+ }
265778
+ const lease = leaseDecision?.lease;
265779
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
265780
+ let result;
265781
+ try {
265782
+ result = await this.prewarmDiffusers({ candidate, start: args.start, python: args.args["python"] });
265783
+ } finally {
265784
+ await lease?.release();
265785
+ this._brokerGpuIndex = null;
265786
+ }
265186
265787
  if (result.success)
265187
265788
  return annotateVideoFallbackSuccess(result, failed, candidate);
265188
265789
  failed.push({ candidate, reason: summarizeToolResult3(result) });
@@ -265268,26 +265869,48 @@ if __name__ == "__main__":
265268
265869
  start: args.start
265269
265870
  });
265270
265871
  } else {
265271
- result = await this.generateWithDiffusers({
265272
- prompt: promptForCandidate,
265273
- model: candidate.model,
265274
- preset,
265275
- kind: args.kind,
265276
- imageArg: args.imageArg,
265277
- audioInput: args.audioInput,
265278
- width,
265279
- height,
265280
- numFrames,
265281
- fps,
265282
- steps,
265283
- guidance,
265284
- negativePrompt,
265285
- seed: args.seed,
265286
- hfToken: hfTokenOverride,
265287
- autoAcceptLicense,
265288
- start: args.start,
265289
- python: args.args["python"]
265872
+ const leaseDecision = await this.acquireTransientLoadLease({
265873
+ candidate,
265874
+ reason: "video-gen-needs-room",
265875
+ start: args.start
265290
265876
  });
265877
+ if (leaseDecision && "success" in leaseDecision) {
265878
+ failed.push({ candidate, reason: summarizeToolResult3(leaseDecision) });
265879
+ if (index < args.candidates.length - 1) {
265880
+ this.emitProgress({
265881
+ stage: "setup",
265882
+ message: `${candidate.model} did not fit current resources; falling back to ${args.candidates[index + 1].model}`
265883
+ });
265884
+ }
265885
+ continue;
265886
+ }
265887
+ const lease = leaseDecision?.lease;
265888
+ this._brokerGpuIndex = leaseDecision?.gpuIndex ?? null;
265889
+ try {
265890
+ result = await this.generateWithDiffusers({
265891
+ prompt: promptForCandidate,
265892
+ model: candidate.model,
265893
+ preset,
265894
+ kind: args.kind,
265895
+ imageArg: args.imageArg,
265896
+ audioInput: args.audioInput,
265897
+ width,
265898
+ height,
265899
+ numFrames,
265900
+ fps,
265901
+ steps,
265902
+ guidance,
265903
+ negativePrompt,
265904
+ seed: args.seed,
265905
+ hfToken: hfTokenOverride,
265906
+ autoAcceptLicense,
265907
+ start: args.start,
265908
+ python: args.args["python"]
265909
+ });
265910
+ } finally {
265911
+ await lease?.release();
265912
+ this._brokerGpuIndex = null;
265913
+ }
265291
265914
  }
265292
265915
  let nativeAudio = preset.nativeAudioVideo === true;
265293
265916
  let audioPath;
@@ -265479,6 +266102,17 @@ ${llmAnnotation}` : result.llmContent;
265479
266102
  }
265480
266103
  ensureUnifiedCacheDirs();
265481
266104
  this.emitProgress({ stage: "load", message: `Downloading/loading video model ${args.candidate.model}` });
266105
+ const runnerEnv = { ...python.env };
266106
+ if (this._brokerGpuIndex !== null) {
266107
+ if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
266108
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
266109
+ } else {
266110
+ this.emitProgress({
266111
+ stage: "setup",
266112
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
266113
+ });
266114
+ }
266115
+ }
265482
266116
  const result = await runProcess4(python.command, [
265483
266117
  runner,
265484
266118
  "--model",
@@ -265494,7 +266128,7 @@ ${llmAnnotation}` : result.llmContent;
265494
266128
  ], {
265495
266129
  cwd: this.cwd,
265496
266130
  timeoutMs: 18e5,
265497
- env: python.env,
266131
+ env: runnerEnv,
265498
266132
  progressLabel: `Downloading/loading ${args.candidate.model}`,
265499
266133
  onProgress: (event) => this.emitProgress(event)
265500
266134
  });
@@ -265555,7 +266189,14 @@ ${llmAnnotation}` : result.llmContent;
265555
266189
  runnerEnv["HUGGING_FACE_HUB_TOKEN"] = effectiveToken;
265556
266190
  }
265557
266191
  if (this._brokerGpuIndex !== null) {
265558
- runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
266192
+ if (mediaBrokerGpuIndexIsCompatible(this._brokerGpuIndex, "video", runnerEnv)) {
266193
+ runnerEnv["OMNIUS_GPU_INDEX"] = String(this._brokerGpuIndex);
266194
+ } else {
266195
+ this.emitProgress({
266196
+ stage: "setup",
266197
+ message: `Broker selected CUDA GPU ${this._brokerGpuIndex}, but video CUDA filtering excluded it; using CUDA_VISIBLE_DEVICES=${runnerEnv["CUDA_VISIBLE_DEVICES"] ?? "default"}`
266198
+ });
266199
+ }
265559
266200
  }
265560
266201
  const argv = [
265561
266202
  runner,
@@ -266682,6 +267323,11 @@ import { readFileSync as readFileSync24, existsSync as existsSync34, statSync as
266682
267323
  import { execSync as execSync16, spawn as spawn10, spawnSync as spawnSync4 } from "node:child_process";
266683
267324
  import { resolve as resolve23, extname as extname6, basename as basename8, dirname as dirname9, join as join44 } from "node:path";
266684
267325
  import { fileURLToPath as fileURLToPath4 } from "node:url";
267326
+ function visionPythonEnv(extra = {}) {
267327
+ const env2 = { ...process.env, ...extra };
267328
+ applyMediaCudaDeviceFilterToEnv(env2, "vision");
267329
+ return env2;
267330
+ }
266685
267331
  async function probeStation(endpoint) {
266686
267332
  try {
266687
267333
  const healthUrl = endpoint.replace(/\/v1\/?$/, "/health");
@@ -266752,7 +267398,8 @@ async function autoLaunchStation(port = 2020) {
266752
267398
  return false;
266753
267399
  return new Promise((resolvePromise) => {
266754
267400
  const child = spawn10(pythonBin, [launcherScript, "--port", String(port)], {
266755
- stdio: ["ignore", "pipe", "pipe"]
267401
+ stdio: ["ignore", "pipe", "pipe"],
267402
+ env: visionPythonEnv()
266756
267403
  });
266757
267404
  stationProcess = child;
266758
267405
  const cleanupStation = () => {
@@ -267067,7 +267714,11 @@ function tryHuggingFacePointBackend(options2) {
267067
267714
  hfPointUnavailable = "Python not found";
267068
267715
  return null;
267069
267716
  }
267070
- const deps = spawnSync4(python, ["-c", "import torch, transformers, PIL"], { stdio: "pipe", timeout: 1e4 });
267717
+ const deps = spawnSync4(python, ["-c", "import torch, transformers, PIL"], {
267718
+ stdio: "pipe",
267719
+ timeout: 1e4,
267720
+ env: visionPythonEnv()
267721
+ });
267071
267722
  if (deps.status !== 0) {
267072
267723
  hfPointUnavailable = bufferishToString3(deps.stderr) || "Python dependencies torch, transformers, and pillow are not importable";
267073
267724
  return null;
@@ -267114,7 +267765,7 @@ print(json.dumps(last_result))
267114
267765
  encoding: "utf8",
267115
267766
  stdio: ["pipe", "pipe", "pipe"],
267116
267767
  timeout: Math.max(options2.timeoutMs ?? 6e4, 3e5),
267117
- env: { ...process.env }
267768
+ env: visionPythonEnv()
267118
267769
  });
267119
267770
  if (run2.status !== 0) {
267120
267771
  hfPointUnavailable = run2.stderr || run2.stdout || "Hugging Face Moondream point backend failed";
@@ -267167,6 +267818,7 @@ var init_vision = __esm({
267167
267818
  "packages/execution/dist/tools/vision.js"() {
267168
267819
  "use strict";
267169
267820
  init_model_broker();
267821
+ init_cuda_device_filter();
267170
267822
  moondreamClient = null;
267171
267823
  moondreamError = null;
267172
267824
  stationProcess = null;
@@ -520577,6 +521229,11 @@ import { execFileSync as execFileSync5, execSync as execSync30, spawn as spawn15
520577
521229
  import { copyFileSync as copyFileSync3, existsSync as existsSync47, statSync as statSync23, writeFileSync as writeFileSync19, mkdirSync as mkdirSync20, readdirSync as readdirSync18, writeSync } from "node:fs";
520578
521230
  import { basename as basename15, extname as extname10, isAbsolute as isAbsolute2, join as join63 } from "node:path";
520579
521231
  import { homedir as homedir16, tmpdir as tmpdir11 } from "node:os";
521232
+ function ttsPythonEnv(extra = {}) {
521233
+ const env2 = { ...process.env, ...extra };
521234
+ applyMediaCudaDeviceFilterToEnv(env2, "tts");
521235
+ return env2;
521236
+ }
520580
521237
  function hasCommand3(command) {
520581
521238
  try {
520582
521239
  if (process.platform === "win32") {
@@ -521144,7 +521801,7 @@ function ensureLuxttsDaemon() {
521144
521801
  const daemon = spawn15(venvPy, [inferScript], {
521145
521802
  stdio: ["pipe", "pipe", "pipe"],
521146
521803
  cwd: tmpdir11(),
521147
- env: { ...process.env, LUXTTS_REPO_PATH: repoDir }
521804
+ env: ttsPythonEnv({ LUXTTS_REPO_PATH: repoDir })
521148
521805
  });
521149
521806
  _luxttsDaemon = daemon;
521150
521807
  _luxttsBuffer = "";
@@ -521224,6 +521881,7 @@ var init_audio_playback = __esm({
521224
521881
  "packages/execution/dist/tools/audio-playback.js"() {
521225
521882
  "use strict";
521226
521883
  init_hf_media_models();
521884
+ init_cuda_device_filter();
521227
521885
  _luxttsDaemon = null;
521228
521886
  _luxttsReady = false;
521229
521887
  _luxttsRequestId = 0;
@@ -521605,7 +522263,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
521605
522263
  execFileSync5(venvPy, ["-c", pyScript, JSON.stringify({ text, output: outputPath3, clone_ref: cloneRef, repo: repoDir, speed })], {
521606
522264
  stdio: "pipe",
521607
522265
  timeout: 12e4,
521608
- env: { ...process.env, LUXTTS_REPO_PATH: repoDir }
522266
+ env: ttsPythonEnv({ LUXTTS_REPO_PATH: repoDir })
521609
522267
  });
521610
522268
  return `${basename15(cloneRef)} (LuxTTS standalone)`;
521611
522269
  }
@@ -521619,7 +522277,8 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
521619
522277
  input: JSON.stringify({ text, output_path: outputPath3, voice_name: voice, lang, speed, total_step: totalStep }),
521620
522278
  encoding: "utf8",
521621
522279
  stdio: ["pipe", "pipe", "pipe"],
521622
- timeout: 18e4
522280
+ timeout: 18e4,
522281
+ env: ttsPythonEnv()
521623
522282
  });
521624
522283
  const line = stdout.trim().split(/\r?\n/).pop() || "";
521625
522284
  const parsed = JSON.parse(line);
@@ -523041,10 +523700,16 @@ import { execSync as execSync36 } from "node:child_process";
523041
523700
  import { existsSync as existsSync50, mkdirSync as mkdirSync22, writeFileSync as writeFileSync20, readFileSync as readFileSync35 } from "node:fs";
523042
523701
  import { join as join65 } from "node:path";
523043
523702
  import { homedir as homedir17, tmpdir as tmpdir13 } from "node:os";
523703
+ function audioAnalysisPythonEnv(extra = {}) {
523704
+ const env2 = { ...process.env, ...extra };
523705
+ applyMediaCudaDeviceFilterToEnv(env2, "asr");
523706
+ return env2;
523707
+ }
523044
523708
  var VENV_DIR, VENV_PIP, VENV_PYTHON, AudioAnalyzeTool;
523045
523709
  var init_audio_analyze = __esm({
523046
523710
  "packages/execution/dist/tools/audio-analyze.js"() {
523047
523711
  "use strict";
523712
+ init_cuda_device_filter();
523048
523713
  VENV_DIR = join65(homedir17(), ".omnius", "audio-ml-venv");
523049
523714
  VENV_PIP = join65(VENV_DIR, "bin", "pip");
523050
523715
  VENV_PYTHON = join65(VENV_DIR, "bin", "python3");
@@ -523334,15 +523999,15 @@ Context saved to: ${contextFile}`,
523334
523999
  /** Ensure Python venv with required packages */
523335
524000
  async ensureVenv(packages) {
523336
524001
  if (!existsSync50(VENV_PYTHON)) {
523337
- execSync36(`python3 -m venv ${VENV_DIR}`, { timeout: 3e4, stdio: "pipe" });
524002
+ execSync36(`python3 -m venv ${VENV_DIR}`, { timeout: 3e4, stdio: "pipe", env: audioAnalysisPythonEnv() });
523338
524003
  }
523339
524004
  for (const pkg of packages) {
523340
524005
  const importName = pkg.replace(/[<>=!].*/g, "").replace(/-/g, "_");
523341
524006
  try {
523342
- execSync36(`${VENV_PYTHON} -c "import ${importName}"`, { timeout: 1e4, stdio: "pipe" });
524007
+ execSync36(`${VENV_PYTHON} -c "import ${importName}"`, { timeout: 1e4, stdio: "pipe", env: audioAnalysisPythonEnv() });
523343
524008
  } catch {
523344
524009
  try {
523345
- execSync36(`${VENV_PIP} install "${pkg}"`, { timeout: 3e5, stdio: "pipe" });
524010
+ execSync36(`${VENV_PIP} install "${pkg}"`, { timeout: 3e5, stdio: "pipe", env: audioAnalysisPythonEnv() });
523346
524011
  } catch {
523347
524012
  }
523348
524013
  }
@@ -523356,7 +524021,7 @@ Context saved to: ${contextFile}`,
523356
524021
  const output = execSync36(`${VENV_PYTHON} ${scriptFile}`, {
523357
524022
  encoding: "utf8",
523358
524023
  timeout: 3e5,
523359
- env: { ...process.env, TF_CPP_MIN_LOG_LEVEL: "3", TF_ENABLE_ONEDNN_OPTS: "0", PYTHONUNBUFFERED: "1" }
524024
+ env: audioAnalysisPythonEnv({ TF_CPP_MIN_LOG_LEVEL: "3", TF_ENABLE_ONEDNN_OPTS: "0", PYTHONUNBUFFERED: "1" })
523360
524025
  });
523361
524026
  try {
523362
524027
  const result = JSON.parse(output.trim().split("\n").pop());
@@ -524192,10 +524857,16 @@ import { execSync as execSync38 } from "node:child_process";
524192
524857
  import { existsSync as existsSync52, mkdirSync as mkdirSync24, writeFileSync as writeFileSync22, readFileSync as readFileSync37 } from "node:fs";
524193
524858
  import { join as join67 } from "node:path";
524194
524859
  import { homedir as homedir19, tmpdir as tmpdir15 } from "node:os";
524860
+ function visualMemoryPythonEnv(extra = {}) {
524861
+ const env2 = { ...process.env, ...extra };
524862
+ applyMediaCudaDeviceFilterToEnv(env2, "vision");
524863
+ return env2;
524864
+ }
524195
524865
  var VMEM_DIR, VENV_DIR2, VENV_PY, VENV_PIP2, VisualMemoryTool;
524196
524866
  var init_visual_memory = __esm({
524197
524867
  "packages/execution/dist/tools/visual-memory.js"() {
524198
524868
  "use strict";
524869
+ init_cuda_device_filter();
524199
524870
  VMEM_DIR = join67(homedir19(), ".omnius", "visual-memory");
524200
524871
  VENV_DIR2 = join67(homedir19(), ".omnius", "vision-ml-venv");
524201
524872
  VENV_PY = join67(VENV_DIR2, "bin", "python3");
@@ -524723,19 +525394,23 @@ ${objects.join("\n") || " (none taught)"}`,
524723
525394
  async ensureVenv() {
524724
525395
  if (existsSync52(VENV_PY)) {
524725
525396
  try {
524726
- execSync38(`${VENV_PY} -c "import insightface, transformers, torch"`, { timeout: 15e3, stdio: "pipe" });
525397
+ execSync38(`${VENV_PY} -c "import insightface, transformers, torch"`, {
525398
+ timeout: 15e3,
525399
+ stdio: "pipe",
525400
+ env: visualMemoryPythonEnv()
525401
+ });
524727
525402
  return true;
524728
525403
  } catch {
524729
525404
  }
524730
525405
  }
524731
525406
  try {
524732
525407
  if (!existsSync52(VENV_PY)) {
524733
- execSync38(`python3 -m venv ${VENV_DIR2}`, { timeout: 3e4, stdio: "pipe" });
525408
+ execSync38(`python3 -m venv ${VENV_DIR2}`, { timeout: 3e4, stdio: "pipe", env: visualMemoryPythonEnv() });
524734
525409
  }
524735
- execSync38(`${VENV_PIP2} install "setuptools<81" wheel`, { timeout: 6e4, stdio: "pipe" });
524736
- execSync38(`${VENV_PIP2} install torch torchvision`, { timeout: 6e5, stdio: "pipe" });
524737
- execSync38(`${VENV_PIP2} install insightface onnxruntime opencv-python-headless`, { timeout: 3e5, stdio: "pipe" });
524738
- execSync38(`${VENV_PIP2} install transformers pillow`, { timeout: 3e5, stdio: "pipe" });
525410
+ execSync38(`${VENV_PIP2} install "setuptools<81" wheel`, { timeout: 6e4, stdio: "pipe", env: visualMemoryPythonEnv() });
525411
+ execSync38(`${VENV_PIP2} install torch torchvision`, { timeout: 6e5, stdio: "pipe", env: visualMemoryPythonEnv() });
525412
+ execSync38(`${VENV_PIP2} install insightface onnxruntime opencv-python-headless`, { timeout: 3e5, stdio: "pipe", env: visualMemoryPythonEnv() });
525413
+ execSync38(`${VENV_PIP2} install transformers pillow`, { timeout: 3e5, stdio: "pipe", env: visualMemoryPythonEnv() });
524739
525414
  return true;
524740
525415
  } catch {
524741
525416
  return false;
@@ -524748,7 +525423,7 @@ ${objects.join("\n") || " (none taught)"}`,
524748
525423
  const output = execSync38(`${VENV_PY} ${scriptFile}`, {
524749
525424
  encoding: "utf8",
524750
525425
  timeout: timeoutMs,
524751
- env: { ...process.env, PYTHONUNBUFFERED: "1" }
525426
+ env: visualMemoryPythonEnv({ PYTHONUNBUFFERED: "1" })
524752
525427
  });
524753
525428
  const lastLine = output.trim().split("\n").pop() || "{}";
524754
525429
  return JSON.parse(lastLine);
@@ -525364,6 +526039,11 @@ import { existsSync as existsSync54, mkdirSync as mkdirSync26, writeFileSync as
525364
526039
  import { dirname as dirname15, join as join69, resolve as resolve35 } from "node:path";
525365
526040
  import { tmpdir as tmpdir17, homedir as homedir21 } from "node:os";
525366
526041
  import { fileURLToPath as fileURLToPath8 } from "node:url";
526042
+ function asrPythonEnv(extra = {}) {
526043
+ const env2 = { ...process.env, ...extra };
526044
+ applyMediaCudaDeviceFilterToEnv(env2, "asr");
526045
+ return env2;
526046
+ }
525367
526047
  function _findNemotronScript() {
525368
526048
  const candidates = [];
525369
526049
  try {
@@ -525399,6 +526079,7 @@ var init_asr_listen = __esm({
525399
526079
  "packages/execution/dist/tools/asr-listen.js"() {
525400
526080
  "use strict";
525401
526081
  init_hf_media_models();
526082
+ init_cuda_device_filter();
525402
526083
  AsrListenTool = class {
525403
526084
  name = "asr_listen";
525404
526085
  description = "Record from microphone and transcribe speech to text. Backends: 'whisper' (default, battle-tested openai-whisper / faster-whisper), 'nemotron' (nvidia/nemotron-speech-streaming-en-0.6b — faster streaming), or 'parallel' (runs BOTH engines on the same audio and returns a side-by-side comparison with per-engine latency and character counts). Actions: 'listen' to record + transcribe in one step, 'transcribe' to run on an existing file. Use this when you need to HEAR what a human is saying — ask a question via audio_playback speak, then use asr_listen to capture and transcribe their response.";
@@ -525633,7 +526314,7 @@ print(json.dumps({"ok": False, "error": "No whisper backend available"}))
525633
526314
  const output = execSync40(`"${pyPath}" "${scriptFile}"`, {
525634
526315
  encoding: "utf8",
525635
526316
  timeout: 12e4,
525636
- env: { ...process.env, PYTHONUNBUFFERED: "1" }
526317
+ env: asrPythonEnv({ PYTHONUNBUFFERED: "1" })
525637
526318
  }).trim();
525638
526319
  const lines = output.split("\n");
525639
526320
  for (let i2 = lines.length - 1; i2 >= 0; i2--) {
@@ -525682,7 +526363,8 @@ print(json.dumps({"ok": False, "error": "No whisper backend available"}))
525682
526363
  const result = spawnSync7("python3", [script, "--file", audioFile], {
525683
526364
  encoding: "utf8",
525684
526365
  timeout: 6e5,
525685
- stdio: ["ignore", "pipe", "pipe"]
526366
+ stdio: ["ignore", "pipe", "pipe"],
526367
+ env: asrPythonEnv()
525686
526368
  });
525687
526369
  if (result.error) {
525688
526370
  return {
@@ -530791,6 +531473,7 @@ __export(dist_exports, {
530791
531473
  CustomTool: () => CustomTool,
530792
531474
  DEFAULT_DIFFUSERS_IMAGE_MODEL: () => DEFAULT_DIFFUSERS_IMAGE_MODEL,
530793
531475
  DEFAULT_DIFFUSERS_VIDEO_MODEL: () => DEFAULT_DIFFUSERS_VIDEO_MODEL,
531476
+ DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY: () => DEFAULT_MEDIA_MIN_CUDA_COMPUTE_CAPABILITY,
530794
531477
  DEFAULT_MUSIC_MODEL: () => DEFAULT_MUSIC_MODEL,
530795
531478
  DEFAULT_OLLAMA_IMAGE_MODEL: () => DEFAULT_OLLAMA_IMAGE_MODEL,
530796
531479
  DEFAULT_SOUND_MODEL: () => DEFAULT_SOUND_MODEL,
@@ -530905,6 +531588,7 @@ __export(dist_exports, {
530905
531588
  addProjectConstraint: () => addProjectConstraint,
530906
531589
  addSessionConstraint: () => addSessionConstraint,
530907
531590
  aliasTool: () => aliasTool,
531591
+ applyMediaCudaDeviceFilterToEnv: () => applyMediaCudaDeviceFilterToEnv,
530908
531592
  applyPatch: () => applyPatch,
530909
531593
  applyToolResultTriage: () => applyToolResultTriage,
530910
531594
  artifactManifestFromBytes: () => artifactManifestFromBytes,
@@ -530945,6 +531629,7 @@ __export(dist_exports, {
530945
531629
  defaultExtensionForMime: () => defaultExtensionForMime,
530946
531630
  deleteMediaModelAdapter: () => deleteMediaModelAdapter,
530947
531631
  deleteTodos: () => deleteTodos,
531632
+ detectCudaDevices: () => detectCudaDevices,
530948
531633
  detectElevationMethod: () => detectElevationMethod,
530949
531634
  detectLegacyCaches: () => detectLegacyCaches,
530950
531635
  detectSearchProvider: () => detectSearchProvider,
@@ -531046,6 +531731,7 @@ __export(dist_exports, {
531046
531731
  markSessionValidated: () => markSessionValidated,
531047
531732
  measureRepoCacheBytes: () => measureRepoCacheBytes,
531048
531733
  mediaBackendCompatibleWithModality: () => mediaBackendCompatibleWithModality,
531734
+ mediaBrokerGpuIndexIsCompatible: () => mediaBrokerGpuIndexIsCompatible,
531049
531735
  mediaMimeFromPath: () => mediaMimeFromPath,
531050
531736
  mediaModelCatalogDir: () => mediaModelCatalogDir,
531051
531737
  mediaModelSlug: () => mediaModelSlug,
@@ -531056,6 +531742,8 @@ __export(dist_exports, {
531056
531742
  normalizeSponsorMediaConfig: () => normalizeSponsorMediaConfig,
531057
531743
  omniusHomeDir: () => omniusHomeDir,
531058
531744
  packetPath: () => packetPath,
531745
+ parseCudaComputeCapability: () => parseCudaComputeCapability,
531746
+ parseCudaDeviceInfo: () => parseCudaDeviceInfo,
531059
531747
  parseMcpMarkdown: () => parseMcpMarkdown,
531060
531748
  parseMcpToolName: () => parseMcpToolName,
531061
531749
  parseSponsorMediaCapability: () => parseSponsorMediaCapability,
@@ -531078,6 +531766,7 @@ __export(dist_exports, {
531078
531766
  renderCustomToolDocs: () => renderCustomToolDocs,
531079
531767
  resetDepCache: () => resetDepCache,
531080
531768
  resetMoondreamClient: () => resetMoondreamClient,
531769
+ resolveMediaCudaVisibleDevicesForEnv: () => resolveMediaCudaVisibleDevicesForEnv,
531081
531770
  resolveMediaModel: () => resolveMediaModel,
531082
531771
  resolveSecret: () => resolveSecret,
531083
531772
  revokeSecret: () => revokeSecret,
@@ -531199,6 +531888,7 @@ var init_dist5 = __esm({
531199
531888
  init_embedding_store();
531200
531889
  init_image_generate();
531201
531890
  init_audio_generate();
531891
+ init_cuda_device_filter();
531202
531892
  init_model_store();
531203
531893
  init_video_generate();
531204
531894
  init_sponsor_media();
@@ -589502,6 +590192,7 @@ ${CONTENT_BG_SEQ}`);
589502
590192
  });
589503
590193
 
589504
590194
  // packages/cli/src/tui/tui-select.ts
590195
+ import { AsyncLocalStorage } from "node:async_hooks";
589505
590196
  function ansi3(code8, text) {
589506
590197
  return isTTY2 ? `\x1B[${code8}m${text}\x1B[0m` : text;
589507
590198
  }
@@ -589511,6 +590202,48 @@ function fg2563(code8, text) {
589511
590202
  function stripAnsi3(s2) {
589512
590203
  return s2.replace(/\x1B\[[0-9;]*m/g, "");
589513
590204
  }
590205
+ function stripTerminalControl(s2) {
590206
+ return s2.replace(/\x1B(?:\[[\d;?]*[ -/]*[@-~]|\][^\x07\x1B]*(?:\x07|\x1B\\)?|[@-Z\\-_])/g, "");
590207
+ }
590208
+ function isNonInteractiveSelectSurface() {
590209
+ return Boolean(nonInteractiveSelectSurface.getStore());
590210
+ }
590211
+ function runWithNonInteractiveSelectSurface(fn, opts = {}) {
590212
+ return nonInteractiveSelectSurface.run(opts, fn);
590213
+ }
590214
+ function renderNonInteractiveSelect(opts, currentTitle, skipSet) {
590215
+ const surface = nonInteractiveSelectSurface.getStore();
590216
+ const maxItems = Math.max(1, surface?.maxItems ?? 30);
590217
+ const lines = [];
590218
+ if (currentTitle) lines.push(stripTerminalControl(stripAnsi3(currentTitle)));
590219
+ if (lines.length) lines.push("");
590220
+ let idx = 1;
590221
+ let shown = 0;
590222
+ let omitted = 0;
590223
+ for (const item of opts.items) {
590224
+ const isSkip = skipSet.has(item.key);
590225
+ const labelPlain = stripTerminalControl(stripAnsi3(item.label)).trim();
590226
+ const detailPlain = item.detail ? stripTerminalControl(stripAnsi3(item.detail)).trim() : "";
590227
+ if (isSkip) {
590228
+ if (labelPlain) lines.push(labelPlain);
590229
+ continue;
590230
+ }
590231
+ if (shown >= maxItems) {
590232
+ omitted++;
590233
+ idx++;
590234
+ continue;
590235
+ }
590236
+ const num = String(idx).padStart(2, " ");
590237
+ const detail = detailPlain ? ` - ${detailPlain}` : "";
590238
+ lines.push(` ${num}. ${labelPlain}${detail}`);
590239
+ shown++;
590240
+ idx++;
590241
+ }
590242
+ if (omitted > 0) lines.push(` ... ${omitted} more`);
590243
+ if (opts.customKeyHint) lines.push("", stripTerminalControl(stripAnsi3(opts.customKeyHint)));
590244
+ lines.push("", surface?.hint ?? "(non-interactive: menu shown as text; open the TUI for selection)");
590245
+ process.stdout.write(lines.join("\n").trimEnd() + "\n");
590246
+ }
589514
590247
  function defaultRenderRow(item, focused, isActive) {
589515
590248
  const marker = isActive ? selectColors.green("●") : focused ? selectColors.blue("●") : selectColors.dim("○");
589516
590249
  const label = focused ? selectColors.blue(selectColors.bold(item.label)) : isActive ? selectColors.green(item.label) : item.label;
@@ -589535,27 +590268,8 @@ function tuiSelect(opts) {
589535
590268
  if (items.length === 0) {
589536
590269
  return Promise.resolve({ confirmed: false, key: null, index: -1 });
589537
590270
  }
589538
- if (!process.stdin.isTTY && process.env["OMNIUS_TUI_FORCE_INTERACTIVE"] !== "1") {
589539
- const lines = [];
589540
- if (currentTitle) lines.push(currentTitle);
589541
- if (lines.length) lines.push("");
589542
- let idx = 1;
589543
- for (const item of items) {
589544
- const isSkip = skipSet.has(item.key);
589545
- const labelPlain = stripAnsi3(item.label);
589546
- const detailPlain = item.detail ? stripAnsi3(item.detail) : "";
589547
- if (isSkip) {
589548
- lines.push(labelPlain);
589549
- } else {
589550
- const num = String(idx).padStart(2, " ");
589551
- const detail = detailPlain ? ` — ${detailPlain}` : "";
589552
- lines.push(` ${num}. ${labelPlain}${detail}`);
589553
- idx++;
589554
- }
589555
- }
589556
- if (opts.customKeyHint) lines.push("", opts.customKeyHint);
589557
- lines.push("", "(non-interactive: list shown above; pick options by re-running this command from the TUI)");
589558
- process.stdout.write(lines.join("\n") + "\n");
590271
+ if (isNonInteractiveSelectSurface() || !process.stdin.isTTY && process.env["OMNIUS_TUI_FORCE_INTERACTIVE"] !== "1") {
590272
+ renderNonInteractiveSelect(opts, currentTitle, skipSet);
589559
590273
  return Promise.resolve({ confirmed: false, key: null, index: -1 });
589560
590274
  }
589561
590275
  const isSkippable = (idx) => skipSet.has(items[idx].key);
@@ -590136,7 +590850,7 @@ ${tuiBgSeq()}`);
590136
590850
  }
590137
590851
  });
590138
590852
  }
590139
- var isTTY2, MENU_ACTIVE_GREEN_256, selectColors;
590853
+ var isTTY2, MENU_ACTIVE_GREEN_256, selectColors, nonInteractiveSelectSurface;
590140
590854
  var init_tui_select = __esm({
590141
590855
  "packages/cli/src/tui/tui-select.ts"() {
590142
590856
  "use strict";
@@ -590156,6 +590870,7 @@ var init_tui_select = __esm({
590156
590870
  /** Readable grey for non-matching items */
590157
590871
  matchDark: (t2) => fg2563(tuiTextDim(), t2)
590158
590872
  };
590873
+ nonInteractiveSelectSurface = new AsyncLocalStorage();
590159
590874
  }
590160
590875
  });
590161
590876
 
@@ -590182,12 +590897,17 @@ import { join as join114, dirname as dirname32 } from "node:path";
590182
590897
  import { homedir as homedir36 } from "node:os";
590183
590898
  import { execSync as execSync50, spawn as spawn26 } from "node:child_process";
590184
590899
  import { fileURLToPath as fileURLToPath15 } from "node:url";
590900
+ function personaplexPythonEnv(extra = {}) {
590901
+ const env2 = { ...process.env, ...extra };
590902
+ applyMediaCudaDeviceFilterToEnv(env2, "voice");
590903
+ return env2;
590904
+ }
590185
590905
  function execAsync(cmd, opts = {}) {
590186
590906
  return new Promise((resolve59, reject) => {
590187
590907
  const child = spawn26("bash", ["-c", cmd], {
590188
590908
  stdio: ["ignore", "pipe", "pipe"],
590189
590909
  timeout: opts.timeout ?? 3e5,
590190
- env: opts.env ?? process.env
590910
+ env: personaplexPythonEnv(opts.env ?? {})
590191
590911
  });
590192
590912
  let stdout = "";
590193
590913
  let stderr = "";
@@ -590261,7 +590981,8 @@ function detectPersonaPlexCapability() {
590261
590981
  try {
590262
590982
  execSync50('python3 -c "import torch; assert torch.cuda.is_available()"', {
590263
590983
  timeout: 1e4,
590264
- stdio: "pipe"
590984
+ stdio: "pipe",
590985
+ env: personaplexPythonEnv()
590265
590986
  });
590266
590987
  } catch {
590267
590988
  const tier2 = selectWeightTier(vramGB);
@@ -590438,7 +591159,8 @@ async function installPersonaPlex(onInfo, weightTier) {
590438
591159
  const sitePackages = execSync50(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
590439
591160
  encoding: "utf8",
590440
591161
  timeout: 5e3,
590441
- stdio: "pipe"
591162
+ stdio: "pipe",
591163
+ env: personaplexPythonEnv()
590442
591164
  }).trim();
590443
591165
  const serverFile = join114(sitePackages, "server.py");
590444
591166
  if (existsSync99(serverFile)) {
@@ -590455,7 +591177,8 @@ async function installPersonaPlex(onInfo, weightTier) {
590455
591177
  const sitePackages = execSync50(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
590456
591178
  encoding: "utf8",
590457
591179
  timeout: 5e3,
590458
- stdio: "pipe"
591180
+ stdio: "pipe",
591181
+ env: personaplexPythonEnv()
590459
591182
  }).trim();
590460
591183
  const loadersFile = join114(sitePackages, "models", "loaders.py");
590461
591184
  if (existsSync99(loadersFile)) {
@@ -590559,7 +591282,8 @@ $2if filename.endswith(".safetensors"):`
590559
591282
  const sitePackages2 = execSync50(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
590560
591283
  encoding: "utf8",
590561
591284
  timeout: 5e3,
590562
- stdio: "pipe"
591285
+ stdio: "pipe",
591286
+ env: personaplexPythonEnv()
590563
591287
  }).trim();
590564
591288
  const hybridDest = join114(sitePackages2, "hybrid_agent.py");
590565
591289
  const serverDest = join114(sitePackages2, "server.py");
@@ -590693,7 +591417,7 @@ async function startPersonaPlexDaemon(onInfo) {
590693
591417
  try {
590694
591418
  const weightPath = execSync50(
590695
591419
  `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`,
590696
- { encoding: "utf8", timeout: 6e4, stdio: "pipe" }
591420
+ { encoding: "utf8", timeout: 6e4, stdio: "pipe", env: personaplexPythonEnv() }
590697
591421
  ).trim();
590698
591422
  if (existsSync99(weightPath)) {
590699
591423
  if (!existsSync99(cachedBf16)) {
@@ -590706,7 +591430,7 @@ state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in sta
590706
591430
  save_file(state, '${cachedBf16}')
590707
591431
  print('Converted')
590708
591432
  "`,
590709
- { timeout: 18e4, stdio: "pipe" }
591433
+ { timeout: 18e4, stdio: "pipe", env: personaplexPythonEnv() }
590710
591434
  );
590711
591435
  }
590712
591436
  if (existsSync99(cachedBf16)) {
@@ -590732,13 +591456,13 @@ print('Converted')
590732
591456
  try {
590733
591457
  const weightPath = execSync50(
590734
591458
  `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`,
590735
- { encoding: "utf8", timeout: 3e4, stdio: "pipe" }
591459
+ { encoding: "utf8", timeout: 3e4, stdio: "pipe", env: personaplexPythonEnv() }
590736
591460
  ).trim();
590737
591461
  if (existsSync99(dequantScript) && existsSync99(weightPath)) {
590738
591462
  try {
590739
591463
  execSync50(
590740
591464
  `"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`,
590741
- { timeout: 3e5, stdio: "pipe" }
591465
+ { timeout: 3e5, stdio: "pipe", env: personaplexPythonEnv() }
590742
591466
  );
590743
591467
  if (existsSync99(cachedBf16)) {
590744
591468
  extraArgs.push("--moshi-weight", cachedBf16);
@@ -590751,7 +591475,7 @@ print('Converted')
590751
591475
  try {
590752
591476
  const mimiPath = execSync50(
590753
591477
  `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`,
590754
- { encoding: "utf8", timeout: 3e4, stdio: "pipe" }
591478
+ { encoding: "utf8", timeout: 3e4, stdio: "pipe", env: personaplexPythonEnv() }
590755
591479
  ).trim();
590756
591480
  if (existsSync99(mimiPath)) extraArgs.push("--mimi-weight", mimiPath);
590757
591481
  } catch {
@@ -590759,7 +591483,7 @@ print('Converted')
590759
591483
  try {
590760
591484
  const tokPath = execSync50(
590761
591485
  `"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`,
590762
- { encoding: "utf8", timeout: 3e4, stdio: "pipe" }
591486
+ { encoding: "utf8", timeout: 3e4, stdio: "pipe", env: personaplexPythonEnv() }
590763
591487
  ).trim();
590764
591488
  if (existsSync99(tokPath)) extraArgs.push("--tokenizer", tokPath);
590765
591489
  } catch {
@@ -590814,7 +591538,7 @@ print('Converted')
590814
591538
  ];
590815
591539
  if (hybridEnabled) serverArgs.push("--hybrid");
590816
591540
  if (needsOffload) serverArgs.push("--cpu-offload");
590817
- const serverEnv = { ...process.env };
591541
+ const serverEnv = personaplexPythonEnv();
590818
591542
  if (hybridEnabled) {
590819
591543
  serverEnv["HYBRID_ENABLED"] = "1";
590820
591544
  serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
@@ -590959,7 +591683,7 @@ async function clonePersonaPlexVoice(inputWav, voiceName, onInfo) {
590959
591683
  "cuda"
590960
591684
  ], {
590961
591685
  stdio: ["ignore", "pipe", "pipe"],
590962
- env: { ...process.env },
591686
+ env: personaplexPythonEnv(),
590963
591687
  cwd: PERSONAPLEX_DIR
590964
591688
  });
590965
591689
  let output = "";
@@ -591135,6 +591859,7 @@ var init_personaplex = __esm({
591135
591859
  init_render();
591136
591860
  init_daemon_registry();
591137
591861
  init_typed_node_events();
591862
+ init_dist5();
591138
591863
  WEIGHT_REPOS = {
591139
591864
  original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
591140
591865
  nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
@@ -599715,6 +600440,11 @@ import {
599715
600440
  spawn as nodeSpawn
599716
600441
  } from "node:child_process";
599717
600442
  import { createRequire as createRequire6 } from "node:module";
600443
+ function voicePythonEnv(extra = {}) {
600444
+ const env2 = { ...process.env, ...extra };
600445
+ applyMediaCudaDeviceFilterToEnv(env2, "tts");
600446
+ return env2;
600447
+ }
599718
600448
  function sanitizeForTTS(text) {
599719
600449
  return text.replace(/^#{1,6}\s+/gm, "").replace(/\*{1,3}([^*]+)\*{1,3}/g, "$1").replace(/_{1,3}([^_]+)_{1,3}/g, "$1").replace(/~~([^~]+)~~/g, "$1").replace(/`([^`]+)`/g, "$1").replace(/```[\s\S]*?```/g, "").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1").replace(/^[\s]*[-*+]\s+/gm, "").replace(/^[\s]*\d+\.\s+/gm, "").replace(/^>\s+/gm, "").replace(/^[-*_]{3,}$/gm, "").replace(/\[[ xX]\]\s*/g, "").replace(/[\u{1F600}-\u{1F64F}]/gu, "").replace(/[\u{1F300}-\u{1F5FF}]/gu, "").replace(/[\u{1F680}-\u{1F6FF}]/gu, "").replace(/[\u{1F1E0}-\u{1F1FF}]/gu, "").replace(/[\u{2600}-\u{26FF}]/gu, "").replace(/[\u{2700}-\u{27BF}]/gu, "").replace(/[\u{FE00}-\u{FE0F}]/gu, "").replace(/[\u{1F900}-\u{1F9FF}]/gu, "").replace(/[\u{1FA00}-\u{1FA6F}]/gu, "").replace(/[\u{1FA70}-\u{1FAFF}]/gu, "").replace(/[\u{200D}]/gu, "").replace(/[\u{20E3}]/gu, "").replace(/[✓✔✗✘✕✖⚠️⏸⏹⏵●○◆◇■□▪▫►▼▲◀⬆⬇⬅➡↑↓←→⇐⇒⇑⇓]/g, "").replace(/[─━│┃┌┐└┘├┤┬┴┼╔╗╚╝╠╣╦╩╬⎿⎾▕▏⏐░▒▓█⠀-⣿]/g, "").replace(/\s{2,}/g, " ").trim();
599720
600450
  }
@@ -600620,6 +601350,7 @@ var init_voice = __esm({
600620
601350
  init_typed_node_events();
600621
601351
  init_render();
600622
601352
  init_daemon_registry();
601353
+ init_dist5();
600623
601354
  VOICE_MODELS = {
600624
601355
  glados: {
600625
601356
  id: "glados",
@@ -602134,7 +602865,8 @@ except Exception as exc:
602134
602865
  return new Promise((resolve59, reject) => {
602135
602866
  const proc = nodeSpawn("sh", ["-c", command], {
602136
602867
  stdio: ["ignore", "pipe", "pipe"],
602137
- cwd: tmpdir20()
602868
+ cwd: tmpdir20(),
602869
+ env: voicePythonEnv()
602138
602870
  });
602139
602871
  let stdout = "";
602140
602872
  let stderr = "";
@@ -602909,7 +603641,7 @@ if __name__ == '__main__':
602909
603641
  const venvPy = luxttsVenvPy2();
602910
603642
  if (!existsSync109(venvPy)) return false;
602911
603643
  return new Promise((resolve59) => {
602912
- const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir2() };
603644
+ const env2 = voicePythonEnv({ LUXTTS_REPO_PATH: luxttsRepoDir2() });
602913
603645
  const daemon = nodeSpawn(venvPy, [luxttsInferScript2()], {
602914
603646
  stdio: ["pipe", "pipe", "pipe"],
602915
603647
  cwd: tmpdir20(),
@@ -625604,6 +626336,146 @@ var init_telegram_stats_menu = __esm({
625604
626336
  }
625605
626337
  });
625606
626338
 
626339
+ // packages/cli/src/tui/telegram-command-menu.ts
626340
+ function isBareTelegramGenerativeCommand(input) {
626341
+ const trimmed = input.trim();
626342
+ if (!trimmed.startsWith("/")) return false;
626343
+ const parts = trimmed.split(/\s+/);
626344
+ const name10 = (parts[0] ?? "").slice(1).split("@")[0]?.toLowerCase() ?? "";
626345
+ return parts.length === 1 && GENERATIVE_COMMANDS.has(name10);
626346
+ }
626347
+ function buildTelegramCommandMenuItems(scope) {
626348
+ const commands = listCommandRegistry({ includePlanned: false }).filter((cmd) => cmd.implementationStatus === "implemented").filter((cmd) => scope === "admin" || ["help", "start"].includes(cmd.name));
626349
+ const seen = /* @__PURE__ */ new Set();
626350
+ const items = [];
626351
+ for (const cmd of commands) {
626352
+ const signature = cmd.signatures[0]?.signature;
626353
+ if (!signature || seen.has(cmd.name)) continue;
626354
+ seen.add(cmd.name);
626355
+ items.push({
626356
+ label: `/${cmd.name}`,
626357
+ command: `/${cmd.name}`,
626358
+ description: cmd.signatures[0]?.description ?? signature,
626359
+ adminOnly: scope === "admin"
626360
+ });
626361
+ }
626362
+ return items.sort((a2, b) => a2.label.localeCompare(b.label));
626363
+ }
626364
+ function buildTelegramGenerativeMenuItems(commandName) {
626365
+ const name10 = commandName.replace(/^\//, "").toLowerCase();
626366
+ if (!GENERATIVE_COMMANDS.has(name10)) return [];
626367
+ const title = name10[0].toUpperCase() + name10.slice(1);
626368
+ return [
626369
+ { label: `${title} models`, command: `/${name10} list`, description: `List available ${name10} models and hardware fit.` },
626370
+ { label: `${title} setup`, command: `/${name10} setup`, description: `Show setup commands for the ${name10} backend.` }
626371
+ ];
626372
+ }
626373
+ function encodeTelegramCommandMenuCallback(action, value2) {
626374
+ const data = `${CALLBACK_PREFIX2}:${action[0]}:${value2}`;
626375
+ return Buffer.byteLength(data, "utf8") <= MAX_CALLBACK_DATA_BYTES ? data : data.slice(0, MAX_CALLBACK_DATA_BYTES);
626376
+ }
626377
+ function decodeTelegramCommandMenuCallback(data) {
626378
+ const parts = data.split(":");
626379
+ if (parts.length !== 3 || parts[0] !== CALLBACK_PREFIX2) return null;
626380
+ const action = parts[1] === "p" ? "page" : parts[1] === "r" ? "run" : parts[1] === "c" ? "close" : null;
626381
+ if (!action) return null;
626382
+ return { action, value: parts[2] ?? "" };
626383
+ }
626384
+ function renderTelegramCommandMenu(state) {
626385
+ const totalPages = Math.max(1, Math.ceil(state.items.length / PAGE_SIZE2));
626386
+ const page2 = Math.max(0, Math.min(state.page, totalPages - 1));
626387
+ const start2 = page2 * PAGE_SIZE2;
626388
+ const visible = state.items.slice(start2, start2 + PAGE_SIZE2);
626389
+ const title = state.kind === "generative" ? "Generative command" : "Commands";
626390
+ const scope = state.scope === "admin" ? "admin" : "public";
626391
+ const lines = [
626392
+ `<b>${escapeHTML3(title)}</b>`,
626393
+ `<i>${escapeHTML3(scope)} scope - page ${page2 + 1}/${totalPages}</i>`,
626394
+ "",
626395
+ ...visible.flatMap((item) => [
626396
+ `<code>${escapeHTML3(item.command)}</code>`,
626397
+ escapeHTML3(item.description)
626398
+ ])
626399
+ ];
626400
+ const keyboard = visible.map((item, offset) => [{
626401
+ text: item.label.slice(0, 32),
626402
+ callback_data: encodeTelegramCommandMenuCallback("run", start2 + offset)
626403
+ }]);
626404
+ const nav = [];
626405
+ nav.push({ text: "Close", callback_data: encodeTelegramCommandMenuCallback("close", 0) });
626406
+ if (page2 > 0) nav.push({ text: "Prev", callback_data: encodeTelegramCommandMenuCallback("page", page2 - 1) });
626407
+ nav.push({ text: `${page2 + 1}/${totalPages}`, callback_data: encodeTelegramCommandMenuCallback("page", page2) });
626408
+ if (page2 < totalPages - 1) nav.push({ text: "Next", callback_data: encodeTelegramCommandMenuCallback("page", page2 + 1) });
626409
+ keyboard.push(nav);
626410
+ return { text: lines.join("\n"), reply_markup: { inline_keyboard: keyboard } };
626411
+ }
626412
+ function handleTelegramCommandMenuCallback(data, state, now = Date.now()) {
626413
+ const decoded = decodeTelegramCommandMenuCallback(data);
626414
+ if (!decoded) return null;
626415
+ if (state.expiresAt <= now) return null;
626416
+ if (decoded.action === "close") return { close: true };
626417
+ if (decoded.action === "page") {
626418
+ const totalPages = Math.max(1, Math.ceil(state.items.length / PAGE_SIZE2));
626419
+ const page2 = Math.max(0, Math.min(Number.parseInt(decoded.value, 10) || 0, totalPages - 1));
626420
+ const newState = { ...state, page: page2 };
626421
+ return { newState, render: renderTelegramCommandMenu(newState) };
626422
+ }
626423
+ const index = Number.parseInt(decoded.value, 10);
626424
+ const item = Number.isFinite(index) ? state.items[index] : void 0;
626425
+ return item ? { command: item.command } : null;
626426
+ }
626427
+ function escapeHTML3(text) {
626428
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
626429
+ }
626430
+ var CALLBACK_PREFIX2, PAGE_SIZE2, TTL_MS, MAX_CALLBACK_DATA_BYTES, GENERATIVE_COMMANDS, TelegramCommandMenuStateStore;
626431
+ var init_telegram_command_menu = __esm({
626432
+ "packages/cli/src/tui/telegram-command-menu.ts"() {
626433
+ "use strict";
626434
+ init_command_registry();
626435
+ CALLBACK_PREFIX2 = "ocm";
626436
+ PAGE_SIZE2 = 8;
626437
+ TTL_MS = 10 * 60 * 1e3;
626438
+ MAX_CALLBACK_DATA_BYTES = 64;
626439
+ GENERATIVE_COMMANDS = /* @__PURE__ */ new Set(["image", "video", "sound", "music"]);
626440
+ TelegramCommandMenuStateStore = class {
626441
+ states = /* @__PURE__ */ new Map();
626442
+ key(chatId, messageId) {
626443
+ return `${chatId}:${messageId}`;
626444
+ }
626445
+ create(input, now = Date.now()) {
626446
+ return {
626447
+ ...input,
626448
+ createdAt: now,
626449
+ expiresAt: now + TTL_MS
626450
+ };
626451
+ }
626452
+ set(state) {
626453
+ this.states.set(this.key(state.chatId, state.messageId), state);
626454
+ }
626455
+ get(chatId, messageId, now = Date.now()) {
626456
+ const state = this.states.get(this.key(chatId, messageId));
626457
+ if (!state) return void 0;
626458
+ if (state.expiresAt <= now) {
626459
+ this.delete(chatId, messageId);
626460
+ return void 0;
626461
+ }
626462
+ return state;
626463
+ }
626464
+ delete(chatId, messageId) {
626465
+ this.states.delete(this.key(chatId, messageId));
626466
+ }
626467
+ prune(now = Date.now()) {
626468
+ for (const [key, state] of this.states) {
626469
+ if (state.expiresAt <= now) this.states.delete(key);
626470
+ }
626471
+ }
626472
+ clear() {
626473
+ this.states.clear();
626474
+ }
626475
+ };
626476
+ }
626477
+ });
626478
+
625607
626479
  // packages/cli/src/tui/telegram-creative-tools.ts
625608
626480
  import { createCipheriv as createCipheriv4, createDecipheriv as createDecipheriv4, randomBytes as randomBytes23 } from "node:crypto";
625609
626481
  import {
@@ -630996,6 +631868,7 @@ var init_telegram_bridge = __esm({
630996
631868
  init_command_registry();
630997
631869
  init_telegram_help_menu();
630998
631870
  init_telegram_stats_menu();
631871
+ init_telegram_command_menu();
630999
631872
  init_scoped_personality();
631000
631873
  init_voice_soul();
631001
631874
  init_telegram_creative_tools();
@@ -631606,6 +632479,8 @@ Telegram link integrity contract:
631606
632479
  statsMenuTimers = null;
631607
632480
  /** Prune expired stats menu states every 5 minutes */
631608
632481
  statsMenuPruneTimer = null;
632482
+ /** Telegram-native command and generative command menus */
632483
+ telegramCommandMenuStates = new TelegramCommandMenuStateStore();
631609
632484
  /** Command handler for admin DM slash commands (wired from interactive.ts) */
631610
632485
  commandHandler = null;
631611
632486
  /** Callback fired after a Telegram user completes the TUI-only admin auth challenge */
@@ -631938,6 +632813,10 @@ Telegram link integrity contract:
631938
632813
  const name10 = this.telegramSlashName(input);
631939
632814
  return name10 === "help" || name10 === "h" || name10 === "commands" || name10 === "cmds";
631940
632815
  }
632816
+ isTelegramCommandsMenuCommand(input) {
632817
+ const name10 = this.telegramSlashName(input);
632818
+ return name10 === "commands" || name10 === "cmds";
632819
+ }
631941
632820
  isTelegramStatsCommand(input) {
631942
632821
  const name10 = this.telegramSlashName(input);
631943
632822
  return name10 === "stats" || name10 === "metrics";
@@ -632531,6 +633410,49 @@ ${message2}`)
632531
633410
  this.helpMenuTimers.startTimer(state);
632532
633411
  }
632533
633412
  }
633413
+ async replyWithTelegramCommandMenu(msg, isAdmin, kind, commandName) {
633414
+ const scope = isAdmin ? "admin" : "public";
633415
+ const items = kind === "generative" ? buildTelegramGenerativeMenuItems(commandName ?? "") : buildTelegramCommandMenuItems(scope);
633416
+ if (items.length === 0) {
633417
+ await this.replyToTelegramMessage(msg, "No Telegram command menu entries are available.");
633418
+ return;
633419
+ }
633420
+ if (msg.guestQueryId || !isAdmin) {
633421
+ const lines = items.slice(0, 24).map((item) => `${item.command} - ${item.description}`);
633422
+ const text = ["Available commands:", "", ...lines].join("\n");
633423
+ if (msg.guestQueryId) {
633424
+ await this.answerGuestQuery(msg.guestQueryId, text);
633425
+ } else {
633426
+ await this.replyToTelegramMessage(msg, text);
633427
+ }
633428
+ return;
633429
+ }
633430
+ const previewState = this.telegramCommandMenuStates.create({
633431
+ chatId: msg.chatId,
633432
+ messageId: 0,
633433
+ invokerMessageId: msg.messageId,
633434
+ fromUserId: msg.fromUserId ?? 0,
633435
+ scope,
633436
+ kind,
633437
+ page: 0,
633438
+ items
633439
+ });
633440
+ const menu = renderTelegramCommandMenu(previewState);
633441
+ const sent = await this.apiCall("sendMessage", {
633442
+ chat_id: msg.chatId,
633443
+ text: menu.text,
633444
+ parse_mode: "HTML",
633445
+ reply_markup: JSON.stringify(menu.reply_markup),
633446
+ ...msg.chatType !== "private" ? { reply_to_message_id: msg.messageId } : {}
633447
+ });
633448
+ if (sent.ok && sent.result?.message_id) {
633449
+ this.telegramCommandMenuStates.prune();
633450
+ this.telegramCommandMenuStates.set({
633451
+ ...previewState,
633452
+ messageId: sent.result.message_id
633453
+ });
633454
+ }
633455
+ }
632534
633456
  collectSessionMetricsSnapshot() {
632535
633457
  if (this._metricsProvider) {
632536
633458
  try {
@@ -637339,6 +638261,7 @@ ${TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT}`);
637339
638261
  this.telegramActiveWorkGenerations.clear();
637340
638262
  this.telegramActiveWorkStartedAtMs.clear();
637341
638263
  this.telegramAdminLivePanels.clear();
638264
+ this.telegramCommandMenuStates.clear();
637342
638265
  this.flushTelegramViewWrites();
637343
638266
  this.flushTelegramTuiWrites();
637344
638267
  this.telegramActiveInferences.clear();
@@ -637717,6 +638640,10 @@ ${summary}` : ""
637717
638640
  return;
637718
638641
  }
637719
638642
  const isAdmin = this.isAdminUser(msg);
638643
+ if (msg.text.trim().startsWith("/") && this.isTelegramCommandsMenuCommand(normalizedCommandText)) {
638644
+ await this.replyWithTelegramCommandMenu(msg, isAdmin, "commands");
638645
+ return;
638646
+ }
637720
638647
  if (msg.text.trim().startsWith("/") && this.isTelegramHelpCommand(normalizedCommandText)) {
637721
638648
  await this.replyWithTelegramHelp(msg, isAdmin);
637722
638649
  return;
@@ -637751,6 +638678,10 @@ ${summary}` : ""
637751
638678
  const toolContext = this.resolveToolContext(msg, isAdmin);
637752
638679
  const isAdminDM = toolContext === "telegram-admin-dm";
637753
638680
  const sessionKey = this.sessionKeyForMessage(msg);
638681
+ if (isAdminDM && isBareTelegramGenerativeCommand(normalizedCommandText)) {
638682
+ await this.replyWithTelegramCommandMenu(msg, isAdmin, "generative", telegramSlash);
638683
+ return;
638684
+ }
637754
638685
  if (msg.text.trim().startsWith("/") && TELEGRAM_REMINDER_SLASH_COMMANDS.has(telegramSlash)) {
637755
638686
  await this.handleTelegramReminderSlash(msg, normalizedCommandText, toolContext);
637756
638687
  return;
@@ -640485,6 +641416,90 @@ Scoped workspace: ${scopedRoot}`,
640485
641416
  return Boolean(result.ok);
640486
641417
  }
640487
641418
  async handleTelegramCallbackQuery(callback) {
641419
+ const commandMenuDecoded = decodeTelegramCommandMenuCallback(callback.data);
641420
+ if (commandMenuDecoded) {
641421
+ let answerText2 = "";
641422
+ let alert2 = false;
641423
+ let answered = false;
641424
+ try {
641425
+ const chatId = callback.chatId;
641426
+ const messageId = callback.messageId;
641427
+ if (!chatId || !messageId) {
641428
+ answerText2 = "Cannot identify menu message.";
641429
+ alert2 = true;
641430
+ return;
641431
+ }
641432
+ const menuState = this.telegramCommandMenuStates.get(chatId, messageId);
641433
+ if (!menuState) {
641434
+ answerText2 = "This command menu expired. Send /commands for a fresh one.";
641435
+ alert2 = true;
641436
+ return;
641437
+ }
641438
+ const isAdmin = this.isAdminActor(callback.fromUserId, callback.username);
641439
+ if (callback.fromUserId !== menuState.fromUserId && !isAdmin) {
641440
+ answerText2 = "Only the user who opened this menu can use it.";
641441
+ alert2 = true;
641442
+ return;
641443
+ }
641444
+ if (!isAdmin) {
641445
+ answerText2 = "That command requires Telegram admin authentication.";
641446
+ alert2 = true;
641447
+ return;
641448
+ }
641449
+ const result = handleTelegramCommandMenuCallback(callback.data, menuState);
641450
+ if (!result) {
641451
+ answerText2 = "Unknown or expired command menu action.";
641452
+ alert2 = true;
641453
+ return;
641454
+ }
641455
+ if (result.close) {
641456
+ this.telegramCommandMenuStates.delete(chatId, messageId);
641457
+ await this.apiCall("deleteMessage", { chat_id: chatId, message_id: messageId }).catch(() => {
641458
+ });
641459
+ if (menuState.invokerMessageId) {
641460
+ await this.apiCall("deleteMessage", { chat_id: chatId, message_id: menuState.invokerMessageId }).catch(() => {
641461
+ });
641462
+ }
641463
+ answered = await this.answerCallbackQuery(callback.id).catch(() => false);
641464
+ return;
641465
+ }
641466
+ if (result.render && result.newState) {
641467
+ this.telegramCommandMenuStates.set(result.newState);
641468
+ await this.apiCall("editMessageText", {
641469
+ chat_id: chatId,
641470
+ message_id: messageId,
641471
+ text: result.render.text,
641472
+ parse_mode: "HTML",
641473
+ reply_markup: JSON.stringify(result.render.reply_markup)
641474
+ });
641475
+ return;
641476
+ }
641477
+ if (result.command) {
641478
+ if (!this.commandHandler) {
641479
+ answerText2 = "No command handler is available.";
641480
+ alert2 = true;
641481
+ return;
641482
+ }
641483
+ answered = await this.answerCallbackQuery(callback.id, `Running ${result.command}...`).catch(() => false);
641484
+ const output = await this.commandHandler(result.command);
641485
+ if (output) {
641486
+ await this.sendMessageHTML(chatId, convertMarkdownToTelegramHTML(output));
641487
+ }
641488
+ return;
641489
+ }
641490
+ } catch (err) {
641491
+ answerText2 = err instanceof Error ? err.message : String(err);
641492
+ alert2 = true;
641493
+ } finally {
641494
+ if (answered) {
641495
+ } else if (answerText2) {
641496
+ await this.answerCallbackQuery(callback.id, answerText2.slice(0, 180), alert2).catch(() => false);
641497
+ } else {
641498
+ await this.answerCallbackQuery(callback.id).catch(() => false);
641499
+ }
641500
+ }
641501
+ return;
641502
+ }
640488
641503
  const helpDecoded = decodeHelpCallback(callback.data);
640489
641504
  if (helpDecoded) {
640490
641505
  let answerText2 = "";
@@ -673904,14 +674919,24 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
673904
674919
  telegramBridge.setCommandHandler(async (input) => {
673905
674920
  const captured = [];
673906
674921
  const origWrite = process.stdout.write;
673907
- process.stdout.write = function(chunk, ..._args) {
674922
+ process.stdout.write = function(chunk, ...args) {
673908
674923
  if (typeof chunk === "string") {
673909
674924
  captured.push(chunk);
674925
+ } else if (Buffer.isBuffer(chunk)) {
674926
+ captured.push(chunk.toString("utf8"));
673910
674927
  }
674928
+ const cb = args.find((arg) => typeof arg === "function");
674929
+ if (cb) cb();
673911
674930
  return true;
673912
674931
  };
673913
674932
  try {
673914
- const result = await handleSlashCommand(input, commandCtx);
674933
+ const result = await runWithNonInteractiveSelectSurface(
674934
+ () => handleSlashCommand(input, commandCtx),
674935
+ {
674936
+ maxItems: 24,
674937
+ hint: "(Telegram: interactive menu shown as text; use concrete slash arguments or open the TUI to select)"
674938
+ }
674939
+ );
673915
674940
  process.stdout.write = origWrite;
673916
674941
  if (statusBar.isActive) statusBar.handleResize();
673917
674942
  if (result === "exit") {
@@ -673925,7 +674950,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
673925
674950
  return `Skill invoked: ${result.name}`;
673926
674951
  }
673927
674952
  const raw = captured.join("");
673928
- const clean5 = raw.replace(/\x1B\[[0-9;]*[A-Za-z]/g, "").replace(/\x1B\][^\x07]*\x07/g, "").replace(/\x1B[()][A-Z0-9]/g, "").replace(/\x1B\[?\??[0-9;]*[a-zA-Z]/g, "").replace(/\x1B/g, "").replace(/[─━│┃┌┐└┘├┤┬┴┼╔╗╚╝╠╣╦╩╬⎿⎾▕▏⏐]/g, "").replace(/\n{3,}/g, "\n\n").trim();
674953
+ const clean5 = raw.replace(/\x1B(?:\[[\d;?]*[ -/]*[@-~]|\][^\x07\x1B]*(?:\x07|\x1B\\)?|[@-Z\\-_])/g, "").replace(/\x1B/g, "").replace(/[─━│┃┌┐└┘├┤┬┴┼╔╗╚╝╠╣╦╩╬⎿⎾▕▏⏐]/g, "").replace(/\n{3,}/g, "\n\n").trim();
673929
674954
  if (!clean5) return null;
673930
674955
  return clean5.length > 3900 ? clean5.slice(0, 3900) + "\n..." : clean5;
673931
674956
  } catch (err) {