npm - omnius - Versions diffs - 1.0.14 → 1.0.15 - Mend

omnius 1.0.14 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -1451,7 +1451,7 @@ var init_security_classifier = __esm({
       { match: /^(aiwg_setup|aiwg_health|aiwg_workflow)$/, info: CRITICAL_SENSITIVE },
       { match: /^(expose|sponsor|nexus_register|wallet_|x402|payment|spend)/, info: CRITICAL_SENSITIVE },
       // ── Hardware peripherals
-      { match: /^(camera_capture|audio_capture|audio_playback|audio_analyze|asr_listen)$/, info: HARDWARE_DEVICE },
+      { match: /^(camera_capture|audio_capture|audio_playback|play_sound|audio_analyze|asr_listen)$/, info: HARDWARE_DEVICE },
       { match: /^(wifi_control|bluetooth_scan|sdr_scan|flipper_zero|meshtastic|gps_location)$/, info: HARDWARE_DEVICE },
       { match: /^(desktop_click|desktop_describe|screenshot)$/, info: HARDWARE_DEVICE },
       { match: /^(jibberlink)$/, info: HARDWARE_DEVICE },
@@ -84452,7 +84452,7 @@ var require_mime_types = __commonJS({
   "../node_modules/mime-types/index.js"(exports) {
     "use strict";
     var db = require_mime_db();
-    var extname14 = __require("path").extname;
+    var extname15 = __require("path").extname;
     var EXTRACT_TYPE_REGEXP = /^\s*([^;\s]*)(?:;|\s|$)/;
     var TEXT_TYPE_REGEXP = /^text\//i;
     exports.charset = charset;
@@ -84506,7 +84506,7 @@ var require_mime_types = __commonJS({
       if (!path11 || typeof path11 !== "string") {
         return false;
       }
-      var extension4 = extname14("x." + path11).toLowerCase().substr(1);
+      var extension4 = extname15("x." + path11).toLowerCase().substr(1);
       if (!extension4) {
         return false;
       }
@@ -250538,6 +250538,19 @@ function trimProcessText(text, max = 1800) {
     return clean3;
   return clean3.slice(0, max - 20) + "\n... (truncated)";
 }
+function formatDiffusersFailure(stderrOrStdout) {
+  const text = trimProcessText(stderrOrStdout);
+  const lower = stderrOrStdout.toLowerCase();
+  const notes2 = [];
+  if (lower.includes("torchvision") && (lower.includes("not installed") || lower.includes("no module named"))) {
+    notes2.push("Missing torchvision was detected. The image-generation dependency set now includes torchvision; run /image prewarm again to repair the existing .omnius/image-gen/.venv.");
+  }
+  if (lower.includes("hf_token") || lower.includes("gated repo") || lower.includes("401") || lower.includes("unauthorized")) {
+    notes2.push("This model may require Hugging Face authentication or license acceptance. Set HF_TOKEN in the environment and accept the model license on Hugging Face, then prewarm again.");
+  }
+  return [text, ...notes2.map((note) => `
+Note: ${note}`)].filter(Boolean).join("");
+}
 function imageGenerationPythonEnv(repoRoot) {
   const root = imageGenerationDir(repoRoot);
   const hf = join36(root, "huggingface");
@@ -250595,7 +250608,7 @@ async function ensurePythonFor(repoRoot, kind, explicit, onProgress) {
 ${trimProcessText(created.stderr || created.stdout)}`);
     }
   }
-  const importCheck = kind === "diffusers" ? "import torch, diffusers, PIL\nfrom diffusers import AutoPipelineForText2Image\n" : "import stable_diffusion_cpp, PIL\n";
+  const importCheck = kind === "diffusers" ? "import torch, torchvision, diffusers, PIL\nfrom diffusers import AutoPipelineForText2Image\n" : "import stable_diffusion_cpp, PIL\n";
   if (await pythonCanImport(command, importCheck, repoRoot, pythonEnv)) {
     return { command, env: pythonEnv };
   }
@@ -250661,6 +250674,7 @@ var init_image_generate = __esm({
     DEFAULT_OLLAMA_IMAGE_MODEL = "x/z-image-turbo";
     DIFFUSERS_PYTHON_PACKAGES = [
       "torch",
+      "torchvision",
       "diffusers",
       "transformers",
       "accelerate",
@@ -250710,7 +250724,7 @@ var init_image_generate = __esm({
         id: "black-forest-labs/FLUX.1-dev",
         label: "FLUX.1 dev",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Primary hyper-realistic baseline",
         sizeClass: "12B rectified-flow transformer",
         quality: "Top-tier open-weight photorealism, prompt adherence, texture detail, composition, and typography.",
@@ -250727,7 +250741,7 @@ var init_image_generate = __esm({
         id: "stabilityai/stable-diffusion-3.5-large",
         label: "Stable Diffusion 3.5 Large",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large --steps 28 --guidance 4.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large --steps 28 --guidance 4.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Primary hyper-realistic baseline",
         sizeClass: "8B MMDiT",
         quality: "Serious open Stable Diffusion ecosystem baseline with strong realism, complex prompt understanding, typography, and controllability.",
@@ -250744,7 +250758,7 @@ var init_image_generate = __esm({
         id: "black-forest-labs/FLUX.1-schnell",
         label: "FLUX.1 schnell",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-schnell --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-schnell --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Fast large-model iteration",
         sizeClass: "12B rectified-flow transformer",
         quality: "FLUX-style output with fewer steps; better for rapid iteration than absolute peak quality.",
@@ -250761,7 +250775,7 @@ var init_image_generate = __esm({
         id: "stabilityai/stable-diffusion-3.5-large-turbo",
         label: "SD3.5 Large Turbo",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large-turbo --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large-turbo --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Fast large-model iteration",
         sizeClass: "8B distilled MMDiT",
         quality: "SD3.5-family quality optimized for fewer inference steps; throughput over peak fidelity.",
@@ -250778,7 +250792,7 @@ var init_image_generate = __esm({
         id: "Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers",
         label: "HunyuanDiT v1.2",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers --steps 30 --guidance 7.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers --steps 30 --guidance 7.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Large multilingual diffusion",
         sizeClass: "Large DiT text-to-image",
         quality: "Strong bilingual English/Chinese prompt understanding with detailed, realistic multi-resolution output.",
@@ -250795,7 +250809,7 @@ var init_image_generate = __esm({
         id: "Tongyi-MAI/Z-Image-Turbo",
         label: "Z-Image-Turbo",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model Tongyi-MAI/Z-Image-Turbo --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Tongyi-MAI/Z-Image-Turbo --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Modern deployable",
         sizeClass: "6B image generation model",
         quality: "Efficient newer large-model quality; useful below full FLUX/SD3.5 hardware budgets.",
@@ -250811,7 +250825,7 @@ var init_image_generate = __esm({
         id: "black-forest-labs/FLUX.2-klein-4B",
         label: "FLUX.2 Klein 4B",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4B --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4B --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Modern deployable",
         sizeClass: "4B compact FLUX-family",
         quality: "Bridge between practical deployment and modern FLUX-family visual quality.",
@@ -250843,7 +250857,7 @@ var init_image_generate = __esm({
         id: "segmind/tiny-sd",
         label: "Segmind Tiny-SD",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model segmind/tiny-sd --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model segmind/tiny-sd --prompt "..." --output .omnius/images/out.png',
         category: "Lightweight smoke test",
         sizeClass: "Small SD-compatible",
         quality: "Fast validation model; not a serious photorealism baseline.",
@@ -250860,7 +250874,7 @@ var init_image_generate = __esm({
         id: "nota-ai/bk-sdm-tiny-2m",
         label: "BK-SDM Tiny 2M",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-tiny-2m --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-tiny-2m --prompt "..." --output .omnius/images/out.png',
         category: "Lightweight smoke test",
         sizeClass: "Compressed SD-compatible",
         quality: "Very small and practical; quality is mainly for tests and rough drafts.",
@@ -250877,7 +250891,7 @@ var init_image_generate = __esm({
         id: "nota-ai/bk-sdm-small-2m",
         label: "BK-SDM Small 2M",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-small-2m --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-small-2m --prompt "..." --output .omnius/images/out.png',
         category: "Lightweight smoke test",
         sizeClass: "Compressed SD-compatible",
         quality: "Slightly better compressed-SD quality than tiny variants; still not a high-fidelity baseline.",
@@ -250894,7 +250908,7 @@ var init_image_generate = __esm({
         id: "SimianLuo/LCM_Dreamshaper_v7",
         label: "LCM DreamShaper v7",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model SimianLuo/LCM_Dreamshaper_v7 --steps 4 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model SimianLuo/LCM_Dreamshaper_v7 --steps 4 --prompt "..." --output .omnius/images/out.png',
         category: "Fast iteration",
         sizeClass: "Few-step SD-compatible",
         quality: "Good for low-latency concepting; below SDXL/SD3.5/FLUX for photoreal detail.",
@@ -250911,7 +250925,7 @@ var init_image_generate = __esm({
         id: "stabilityai/sd-turbo",
         label: "SD-Turbo",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/sd-turbo --steps 1 --guidance 0 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/sd-turbo --steps 1 --guidance 0 --prompt "..." --output .omnius/images/out.png',
         category: "Fast iteration",
         sizeClass: "One-to-four-step SD",
         quality: "Fast SD-family output; useful for iteration but lower ceiling than SDXL Turbo and large baselines.",
@@ -250928,7 +250942,7 @@ var init_image_generate = __esm({
         id: DEFAULT_DIFFUSERS_IMAGE_MODEL,
         label: "SDXL-Turbo",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/sdxl-turbo --steps 1 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/sdxl-turbo --steps 1 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Default local generation",
         sizeClass: "Few-step SDXL",
         quality: "Strong fast default for local image generation; not as realistic as FLUX.1 dev or SD3.5 Large, but much more practical.",
@@ -250945,7 +250959,7 @@ var init_image_generate = __esm({
         id: "Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
         label: "Sana Sprint 0.6B",
         backend: "diffusers",
-        install: 'python .omnius/image-gen/diffusers_text2image.py --model Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
         category: "Modern efficient",
         sizeClass: "0.6B efficient diffusion",
         quality: "Modern efficient output under smaller compute budgets; below primary large baselines.",
@@ -250962,7 +250976,7 @@ var init_image_generate = __esm({
         id: "sdcpp:local",
         label: "stable-diffusion.cpp local checkpoint",
         backend: "sdcpp",
-        install: 'python .omnius/image-gen/sdcpp_text2image.py --model-path /path/to/model.gguf --prompt "..." --output .omnius/images/out.png',
+        install: 'python3 .omnius/image-gen/sdcpp_text2image.py --model-path /path/to/model.gguf --prompt "..." --output .omnius/images/out.png',
         category: "Local checkpoint/GGUF",
         sizeClass: "Depends on checkpoint",
         quality: "Quality depends entirely on the local checkpoint or GGUF variant.",
@@ -251417,15 +251431,17 @@ ${errText.slice(0, 1200)}`,
         });
         if (result.code !== 0) {
           const plan = imageGenerationSetupPlan("diffusers", this.cwd, args.model);
+          const output = [
+            `Diffusers model prewarm failed with exit code ${result.code ?? "unknown"}.`,
+            formatDiffusersFailure(result.stderr || result.stdout),
+            "",
+            "Setup path:",
+            ...plan.commands.map((cmd) => `  ${cmd}`)
+          ].filter(Boolean).join("\n");
           return {
             success: false,
-            output: [
-              `Diffusers model prewarm failed with exit code ${result.code ?? "unknown"}.`,
-              trimProcessText(result.stderr || result.stdout),
-              "",
-              "Setup path:",
-              ...plan.commands.map((cmd) => `  ${cmd}`)
-            ].filter(Boolean).join("\n"),
+            output,
+            error: output,
             durationMs: performance.now() - args.start
           };
         }
@@ -251600,15 +251616,17 @@ ${errText.slice(0, 800)}`,
         });
         if (result.code !== 0 || !existsSync23(filepath)) {
           const plan = imageGenerationSetupPlan("diffusers", this.cwd, args.model);
+          const output2 = [
+            `Diffusers image generation failed with exit code ${result.code ?? "unknown"}.`,
+            formatDiffusersFailure(result.stderr || result.stdout),
+            "",
+            "Setup path:",
+            ...plan.commands.map((cmd) => `  ${cmd}`)
+          ].filter(Boolean).join("\n");
           return {
             success: false,
-            output: [
-              `Diffusers image generation failed with exit code ${result.code ?? "unknown"}.`,
-              trimProcessText(result.stderr || result.stdout),
-              "",
-              "Setup path:",
-              ...plan.commands.map((cmd) => `  ${cmd}`)
-            ].filter(Boolean).join("\n"),
+            output: output2,
+            error: output2,
             durationMs: performance.now() - args.start
           };
         }
@@ -251796,6 +251814,8 @@ function audioOutputDir(repoRoot = ".") {
   return join37(repoRoot, ".omnius", "audio");
 }
 function backendPackages(backend) {
+  if (backend === "transformers")
+    return TRANSFORMERS_AUDIO_PACKAGES;
   if (backend === "audiocraft")
     return AUDIOCRAFT_PACKAGES;
   if (backend === "stable-audio")
@@ -251805,6 +251825,8 @@ function backendPackages(backend) {
   return DIFFUSERS_AUDIO_PACKAGES;
 }
 function backendImportCheck(backend) {
+  if (backend === "transformers")
+    return "import torch, torchaudio, transformers, scipy\nfrom transformers import AutoProcessor, MusicgenForConditionalGeneration\n";
   if (backend === "audiocraft")
     return "import torch, torchaudio, audiocraft\nfrom audiocraft.models import MusicGen, AudioGen\n";
   if (backend === "stable-audio")
@@ -251933,8 +251955,23 @@ function trimProcessText2(text, max = 1800) {
   return clean3.slice(0, max - 20) + "\n... (truncated)";
 }
 async function pythonCanImport2(command, code8, repoRoot, env2) {
-  const result = await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
-  return result.code === 0;
+  return (await pythonImportResult(command, code8, repoRoot, env2)).code === 0;
+}
+async function pythonImportResult(command, code8, repoRoot, env2) {
+  return await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
+}
+function formatAudioSetupFailure(backend, text) {
+  const body = trimProcessText2(text);
+  const lowered = text.toLowerCase();
+  const notes2 = [];
+  if (backend === "audiocraft" && (lowered.includes("libavformat") || lowered.includes("pkg-config") || lowered.includes("pyav") || lowered.includes(" av"))) {
+    notes2.push("AudioCraft pulls PyAV. If a matching PyAV wheel is unavailable, the system must provide FFmpeg development headers.", "Ubuntu/Debian fix: sudo apt install -y pkg-config ffmpeg libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev", "For MusicGen, prefer the default Transformers backend: /music setup transformers or /music --backend transformers <prompt>.");
+  }
+  if (lowered.includes("cuda") && lowered.includes("not available")) {
+    notes2.push("CUDA was not available to the selected Python environment; install a Torch build matching this machine's CUDA runtime or use CPU-compatible settings.");
+  }
+  return [body, ...notes2.map((note) => `
+${note}`)].filter(Boolean).join("");
 }
 async function ensurePythonForAudio(repoRoot, backend, explicit, onProgress) {
   const pythonEnv = audioGenerationPythonEnv(repoRoot);
@@ -251965,7 +252002,19 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
   }
   const packages = backendPackages(backend);
   onProgress?.({ stage: "setup", message: `Installing ${backend} audio-generation Python packages` });
-  const pip = await runProcess3(command, ["-m", "pip", "install", "--progress-bar", "on", "-U", "pip", ...packages], {
+  const pipArgs = [
+    "-m",
+    "pip",
+    "install",
+    "--progress-bar",
+    "on",
+    "--prefer-binary",
+    ...backend === "audiocraft" ? ["--only-binary", "av"] : [],
+    "-U",
+    "pip",
+    ...packages
+  ];
+  const pip = await runProcess3(command, pipArgs, {
     cwd: repoRoot,
     timeoutMs: 18e5,
     env: pythonEnv,
@@ -251974,18 +252023,20 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
   });
   if (pip.code !== 0) {
     throw new Error(`Failed to install ${backend} audio-generation packages into ${venvDir}.
-${trimProcessText2(pip.stderr || pip.stdout)}`);
+${formatAudioSetupFailure(backend, pip.stderr || pip.stdout)}`);
   }
-  if (!await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
-    throw new Error(`Audio-generation Python environment at ${venvDir} was created, but required ${backend} imports still fail.`);
+  const importCheck = await pythonImportResult(command, backendImportCheck(backend), repoRoot, pythonEnv);
+  if (importCheck.code !== 0) {
+    throw new Error(`Audio-generation Python environment at ${venvDir} was created, but required ${backend} imports still fail.
+${formatAudioSetupFailure(backend, importCheck.stderr || importCheck.stdout)}`);
   }
   return { command, env: pythonEnv };
 }
 async function ensureAudioRunner(repoRoot, backend) {
   const dir = audioGenerationDir(repoRoot);
   await mkdir12(dir, { recursive: true });
-  const script = backend === "audiocraft" ? join37(dir, "audiocraft_audio.py") : backend === "stable-audio" ? join37(dir, "stable_audio.py") : backend === "tangoflux" ? join37(dir, "tangoflux_audio.py") : join37(dir, "diffusers_audio.py");
-  const body = backend === "audiocraft" ? AUDIOCRAFT_RUNNER : DIFFUSERS_AUDIO_RUNNER;
+  const script = backend === "transformers" ? join37(dir, "transformers_audio.py") : backend === "audiocraft" ? join37(dir, "audiocraft_audio.py") : backend === "stable-audio" ? join37(dir, "stable_audio.py") : backend === "tangoflux" ? join37(dir, "tangoflux_audio.py") : join37(dir, "diffusers_audio.py");
+  const body = backend === "transformers" ? TRANSFORMERS_AUDIO_RUNNER : backend === "audiocraft" ? AUDIOCRAFT_RUNNER : DIFFUSERS_AUDIO_RUNNER;
   await writeFile17(script, body, "utf8");
   await chmod4(script, 493).catch(() => {
   });
@@ -252040,7 +252091,7 @@ function getAudioGenerationPreset(model, kind) {
 }
 function inferAudioGenerationBackend(model, requested) {
   if (requested && requested !== "auto") {
-    if (requested === "diffusers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
+    if (requested === "diffusers" || requested === "transformers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
       return requested;
   }
   const preset = getAudioGenerationPreset(model);
@@ -252049,7 +252100,9 @@ function inferAudioGenerationBackend(model, requested) {
   if (!model)
     return "auto";
   const lowered = model.toLowerCase();
-  if (lowered.includes("musicgen") || lowered.includes("audiogen"))
+  if (lowered.includes("musicgen"))
+    return "transformers";
+  if (lowered.includes("audiogen"))
     return "audiocraft";
   if (lowered.includes("stable-audio"))
     return "stable-audio";
@@ -252071,7 +252124,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
       title: `Diffusers ${kind} generation`,
       commands: [
         `python3 -m venv ${venvDir}`,
-        `${venvPython(venvDir)} -m pip install -U pip ${DIFFUSERS_AUDIO_PACKAGES.join(" ")}`,
+        `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${DIFFUSERS_AUDIO_PACKAGES.join(" ")}`,
         `omnius /${commandName} "cinematic rain on a neon street" --backend diffusers --model ${chosen}`
       ],
       notes: [
@@ -252081,6 +252134,24 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
       ]
     };
   }
+  if (backend === "transformers") {
+    const venvDir = audioGenerationVenvDir(repoRoot, "transformers");
+    return {
+      kind,
+      backend,
+      title: `Transformers ${kind} generation`,
+      commands: [
+        `python3 -m venv ${venvDir}`,
+        `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${TRANSFORMERS_AUDIO_PACKAGES.join(" ")}`,
+        `omnius /${commandName} "warm analog synth arpeggio, slow drums" --backend transformers --model ${chosen}`
+      ],
+      notes: [
+        "Use this path for MusicGen without requiring AudioCraft/PyAV/libavformat system headers.",
+        "The venv, Hugging Face cache, Torch cache, and pip cache stay under .omnius/audio-gen.",
+        "First generation downloads model weights and then immediately generates the requested audio."
+      ]
+    };
+  }
   if (backend === "audiocraft") {
     const venvDir = audioGenerationVenvDir(repoRoot, "audiocraft");
     return {
@@ -252089,12 +252160,14 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
       title: `AudioCraft ${kind} generation`,
       commands: [
         `python3 -m venv ${venvDir}`,
-        `${venvPython(venvDir)} -m pip install -U pip ${AUDIOCRAFT_PACKAGES.join(" ")}`,
+        `${venvPython(venvDir)} -m pip install --prefer-binary --only-binary av -U pip ${AUDIOCRAFT_PACKAGES.join(" ")}`,
         `omnius /${commandName} "warm analog synth arpeggio, slow drums" --backend audiocraft --model ${chosen}`
       ],
       notes: [
-        "Use this path for MusicGen and AudioGen models.",
+        "Use this path for AudioCraft AudioGen models or explicit upstream AudioCraft testing.",
         "AudioCraft is a larger install; prefer CUDA and enough VRAM for the selected checkpoint.",
+        "If PyAV has no wheel for this Python version, install FFmpeg development headers first: sudo apt install -y pkg-config ffmpeg libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev.",
+        "For MusicGen, the default Transformers backend avoids the PyAV/libavformat system-header path.",
         "First generation downloads model weights and saves WAV files under .omnius/audio."
       ]
     };
@@ -252107,7 +252180,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
       title: `Stable Audio ${kind} generation`,
       commands: [
         `python3 -m venv ${venvDir}`,
-        `${venvPython(venvDir)} -m pip install -U pip ${STABLE_AUDIO_PACKAGES.join(" ")}`,
+        `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${STABLE_AUDIO_PACKAGES.join(" ")}`,
         `omnius /${commandName} "high fidelity stereo ${kind} bed, detailed and clean" --backend stable-audio --model ${chosen}`
       ],
       notes: [
@@ -252124,7 +252197,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
       title: "TangoFlux sound generation",
       commands: [
         `python3 -m venv ${venvDir}`,
-        `${venvPython(venvDir)} -m pip install -U pip ${TANGOFLUX_PACKAGES.join(" ")}`,
+        `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${TANGOFLUX_PACKAGES.join(" ")}`,
         `omnius /sound "fast whoosh impact with metallic tail" --backend tangoflux --model ${chosen}`
       ],
       notes: [
@@ -252147,7 +252220,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
     ]
   };
 }
-var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, AudioGenerateTool;
+var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, AudioGenerateTool;
 var init_audio_generate = __esm({
   "packages/execution/dist/tools/audio-generate.js"() {
     "use strict";
@@ -252163,6 +252236,16 @@ var init_audio_generate = __esm({
       "soundfile",
       "librosa"
     ];
+    TRANSFORMERS_AUDIO_PACKAGES = [
+      "torch",
+      "torchaudio",
+      "transformers",
+      "accelerate",
+      "scipy",
+      "soundfile",
+      "sentencepiece",
+      "protobuf"
+    ];
     AUDIOCRAFT_PACKAGES = [
       "torch",
       "torchaudio",
@@ -252190,7 +252273,7 @@ var init_audio_generate = __esm({
         label: "AudioLDM S-Full v2",
         kind: "sound",
         backend: "diffusers",
-        install: 'python .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm-s-full-v2 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm-s-full-v2 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
         category: "Default practical sound effects",
         sizeClass: "Small text-to-audio latent diffusion",
         quality: "Good practical baseline for SFX, ambience, foley, and quick local validation.",
@@ -252208,7 +252291,7 @@ var init_audio_generate = __esm({
         label: "AudioLDM 2",
         kind: "sound",
         backend: "diffusers",
-        install: 'python .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2 --duration 10 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2 --duration 10 --prompt "..." --output .omnius/audio/out.wav',
         category: "General sound effects",
         sizeClass: "Text-to-audio latent diffusion",
         quality: "More capable general sound synthesis than AudioLDM S; slower at high step counts.",
@@ -252226,7 +252309,7 @@ var init_audio_generate = __esm({
         label: "AudioLDM 2 Large",
         kind: "sound",
         backend: "diffusers",
-        install: 'python .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2-large --duration 10 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2-large --duration 10 --prompt "..." --output .omnius/audio/out.wav',
         category: "High quality sound effects",
         sizeClass: "Large text-to-audio latent diffusion",
         quality: "Higher ceiling than AudioLDM 2 base, with slower sampling and more VRAM pressure.",
@@ -252244,7 +252327,7 @@ var init_audio_generate = __esm({
         label: "AudioGen Medium",
         kind: "sound",
         backend: "audiocraft",
-        install: 'python .omnius/audio-gen/audiocraft_audio.py --kind sound --model facebook/audiogen-medium --duration 8 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/audiocraft_audio.py --kind sound --model facebook/audiogen-medium --duration 8 --prompt "..." --output .omnius/audio/out.wav',
         category: "AudioCraft sound effects",
         sizeClass: "AudioCraft text-to-sound",
         quality: "Strong text-conditioned non-music sound effects; good for foley and environments.",
@@ -252261,7 +252344,7 @@ var init_audio_generate = __esm({
         label: "TANGO 2",
         kind: "sound",
         backend: "project",
-        install: "git clone https://github.com/declare-lab/tango .omnius/audio-gen/projects/tango && cd .omnius/audio-gen/projects/tango && python -m pip install -r requirements.txt",
+        install: "git clone https://github.com/declare-lab/tango .omnius/audio-gen/projects/tango && cd .omnius/audio-gen/projects/tango && python3 -m pip install -r requirements.txt",
         category: "Research sound effects",
         sizeClass: "DPO-aligned text-to-audio research stack",
         quality: "Good prompt alignment target, but less turnkey than Diffusers or AudioCraft.",
@@ -252278,7 +252361,7 @@ var init_audio_generate = __esm({
         label: "TangoFlux",
         kind: "sound",
         backend: "tangoflux",
-        install: 'python .omnius/audio-gen/tangoflux_audio.py --model declare-lab/TangoFlux --duration 8 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/tangoflux_audio.py --model declare-lab/TangoFlux --duration 8 --prompt "..." --output .omnius/audio/out.wav',
         category: "Fast sound effects",
         sizeClass: "Fast text-to-audio generation",
         quality: "Fast SFX and ambience iteration, with newer-stack stability risks.",
@@ -252295,7 +252378,7 @@ var init_audio_generate = __esm({
         label: "Stable Audio Open 1.0",
         kind: "sound",
         backend: "stable-audio",
-        install: 'python .omnius/audio-gen/stable_audio.py --kind sound --model stabilityai/stable-audio-open-1.0 --duration 20 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/stable_audio.py --kind sound --model stabilityai/stable-audio-open-1.0 --duration 20 --prompt "..." --output .omnius/audio/out.wav',
         category: "High-quality long audio",
         sizeClass: "Stereo 44.1 kHz diffusion audio",
         quality: "High-fidelity stereo generation, useful for longer designed sounds and music-like clips.",
@@ -252312,16 +252395,16 @@ var init_audio_generate = __esm({
         id: DEFAULT_MUSIC_MODEL,
         label: "MusicGen Small",
         kind: "music",
-        backend: "audiocraft",
-        install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-small --duration 20 --prompt "..." --output .omnius/audio/out.wav',
+        backend: "transformers",
+        install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-small --duration 20 --prompt "..." --output .omnius/audio/out.wav',
         category: "Default practical music",
-        sizeClass: "Small AudioCraft music model",
+        sizeClass: "Small Transformers MusicGen model",
         quality: "Fast, practical music sketches with lower fidelity and complexity than medium/large models.",
         output: "Short mono/stereo music sketches depending on checkpoint.",
         bestUse: "Default /music smoke test and quick ideas.",
         minVramGB: 6,
         recommendedVramGB: 8,
-        deployment: "AudioCraft path; fastest usable MusicGen option.",
+        deployment: "Transformers path; fastest usable MusicGen option.",
         defaultDurationSec: 20,
         note: "Practical default music generation model."
       },
@@ -252329,16 +252412,16 @@ var init_audio_generate = __esm({
         id: "facebook/musicgen-medium",
         label: "MusicGen Medium",
         kind: "music",
-        backend: "audiocraft",
-        install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-medium --duration 20 --prompt "..." --output .omnius/audio/out.wav',
+        backend: "transformers",
+        install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-medium --duration 20 --prompt "..." --output .omnius/audio/out.wav',
         category: "Higher quality MusicGen",
-        sizeClass: "Medium AudioCraft music model",
+        sizeClass: "Medium Transformers MusicGen model",
         quality: "Better arrangement and texture than small; more VRAM and latency.",
         output: "Short music clips.",
         bestUse: "Local music generation when 12-16GB VRAM is available.",
         minVramGB: 10,
         recommendedVramGB: 16,
-        deployment: "AudioCraft path; good quality/footprint balance.",
+        deployment: "Transformers path; good quality/footprint balance.",
         defaultDurationSec: 20,
         note: "Balanced MusicGen quality target."
       },
@@ -252346,16 +252429,16 @@ var init_audio_generate = __esm({
         id: "facebook/musicgen-large",
         label: "MusicGen Large",
         kind: "music",
-        backend: "audiocraft",
-        install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
+        backend: "transformers",
+        install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
         category: "Higher quality MusicGen",
-        sizeClass: "Large AudioCraft music model",
+        sizeClass: "Large Transformers MusicGen model",
         quality: "Stronger musical structure and detail, with high memory pressure.",
         output: "Higher-quality short music clips.",
         bestUse: "High-VRAM local GPU or remote GPU music generation.",
         minVramGB: 16,
         recommendedVramGB: 24,
-        deployment: "AudioCraft path; expect large downloads and slower inference.",
+        deployment: "Transformers path; expect large downloads and slower inference.",
         defaultDurationSec: 20,
         note: "Large MusicGen baseline."
       },
@@ -252363,16 +252446,16 @@ var init_audio_generate = __esm({
         id: "facebook/musicgen-melody",
         label: "MusicGen Melody",
         kind: "music",
-        backend: "audiocraft",
-        install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-melody --duration 20 --prompt "..." --output .omnius/audio/out.wav',
+        backend: "transformers",
+        install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-melody --duration 20 --prompt "..." --output .omnius/audio/out.wav',
         category: "Conditioned music",
-        sizeClass: "Melody-conditioned AudioCraft music model",
+        sizeClass: "Melody-conditioned Transformers MusicGen model",
         quality: "Useful when a reference melody path is added to the generation flow.",
         output: "Music clips guided by text and optional melody conditioning.",
         bestUse: "Future melody-conditioned workflows; useful to pre-deploy now.",
         minVramGB: 10,
         recommendedVramGB: 16,
-        deployment: "AudioCraft path; melody conditioning needs an additional reference audio argument.",
+        deployment: "Transformers path; melody conditioning needs an additional reference audio argument.",
         defaultDurationSec: 20,
         note: "MusicGen variant for melody-conditioned generation."
       },
@@ -252380,16 +252463,16 @@ var init_audio_generate = __esm({
         id: "facebook/musicgen-stereo-large",
         label: "MusicGen Stereo Large",
         kind: "music",
-        backend: "audiocraft",
-        install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-stereo-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
+        backend: "transformers",
+        install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-stereo-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
         category: "Higher quality MusicGen",
-        sizeClass: "Large stereo AudioCraft music model",
+        sizeClass: "Large stereo Transformers MusicGen model",
         quality: "Stereo large MusicGen path; best MusicGen quality listed here but heavy.",
         output: "Stereo music clips.",
         bestUse: "High-VRAM machines where stereo output matters.",
         minVramGB: 20,
         recommendedVramGB: 32,
-        deployment: "AudioCraft path; prefer high-VRAM local GPUs or remote workers.",
+        deployment: "Transformers path; prefer high-VRAM local GPUs or remote workers.",
         defaultDurationSec: 20,
         note: "Heavy stereo MusicGen option."
       },
@@ -252398,7 +252481,7 @@ var init_audio_generate = __esm({
         label: "Stable Audio Open 1.0",
         kind: "music",
         backend: "stable-audio",
-        install: 'python .omnius/audio-gen/stable_audio.py --kind music --model stabilityai/stable-audio-open-1.0 --duration 30 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/stable_audio.py --kind music --model stabilityai/stable-audio-open-1.0 --duration 30 --prompt "..." --output .omnius/audio/out.wav',
         category: "Primary serious music baseline",
         sizeClass: "Stereo 44.1 kHz diffusion audio",
         quality: "Primary serious open music/audio baseline for higher-fidelity stereo clips.",
@@ -252416,7 +252499,7 @@ var init_audio_generate = __esm({
         label: "MAGNeT AudioCraft Profile",
         kind: "music",
         backend: "project",
-        install: "python -m pip install -U audiocraft && inspect the AudioCraft MAGNeT examples before enabling generation",
+        install: "python3 -m pip install -U audiocraft && inspect the AudioCraft MAGNeT examples before enabling generation",
         category: "Research music",
         sizeClass: "Non-autoregressive AudioCraft research model",
         quality: "Interesting fast music research path; less standard than MusicGen for production.",
@@ -252433,7 +252516,7 @@ var init_audio_generate = __esm({
         label: "JASCO AudioCraft Profile",
         kind: "music",
         backend: "project",
-        install: "python -m pip install -U audiocraft && inspect the AudioCraft JASCO examples before enabling generation",
+        install: "python3 -m pip install -U audiocraft && inspect the AudioCraft JASCO examples before enabling generation",
         category: "Conditioned music",
         sizeClass: "Chord/melody/drum conditioned research model",
         quality: "Promising controlled music path when conditioning inputs are available.",
@@ -252467,7 +252550,7 @@ var init_audio_generate = __esm({
         label: "Riffusion v1",
         kind: "music",
         backend: "diffusers",
-        install: 'python .omnius/audio-gen/diffusers_audio.py --kind music --model riffusion/riffusion-model-v1 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
+        install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind music --model riffusion/riffusion-model-v1 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
         category: "Legacy/specialized music",
         sizeClass: "Spectrogram diffusion",
         quality: "Historically important and fun, but below MusicGen and Stable Audio Open for general quality.",
@@ -252641,19 +252724,100 @@ def main():
     torchaudio.save(str(out), wav, model.sample_rate)
     print(json.dumps({"ok": True, "path": str(out), "model": args.model, "backend": "audiocraft", "sample_rate": model.sample_rate, "seconds": round(time.perf_counter() - t0, 3)}))
+if __name__ == "__main__":
+    main()
+`;
+    TRANSFORMERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
+import argparse, json, sys, time
+from pathlib import Path
+def _progress(stage, message, percent=None):
+    payload = {"omnius_progress": True, "stage": stage, "message": message}
+    if percent is not None:
+        payload["percent"] = percent
+    print(json.dumps(payload), file=sys.stderr, flush=True)
+def _device():
+    import torch
+    if torch.cuda.is_available():
+        return "cuda"
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+def _write_wav(path, sample_rate, audio):
+    import numpy as np
+    from scipy.io.wavfile import write
+    arr = np.asarray(audio)
+    if arr.ndim > 2:
+        arr = arr.squeeze()
+    if arr.ndim == 2 and arr.shape[0] < arr.shape[1]:
+        arr = arr.T
+    arr = np.nan_to_num(arr)
+    peak = float(np.max(np.abs(arr))) if arr.size else 0.0
+    if peak > 1.0:
+        arr = arr / peak
+    arr_i16 = (np.clip(arr, -1.0, 1.0) * 32767).astype(np.int16)
+    write(str(path), sample_rate, arr_i16)
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--kind", choices=["sound", "music"], required=True)
+    parser.add_argument("--model", required=True)
+    parser.add_argument("--prompt", required=True)
+    parser.add_argument("--output", required=True)
+    parser.add_argument("--duration", type=float, default=20)
+    parser.add_argument("--seed", type=int, default=None)
+    parser.add_argument("--prewarm", action="store_true")
+    args = parser.parse_args()
+    t0 = time.perf_counter()
+    import torch
+    from transformers import AutoProcessor, MusicgenForConditionalGeneration
+    device = _device()
+    dtype = torch.float16 if device == "cuda" else torch.float32
+    _progress("load", f"loading MusicGen model {args.model}")
+    processor = AutoProcessor.from_pretrained(args.model)
+    model = MusicgenForConditionalGeneration.from_pretrained(args.model, torch_dtype=dtype)
+    model = model.to(device)
+    sample_rate = int(getattr(model.config.audio_encoder, "sampling_rate", 32000))
+    _progress("load", f"model loaded on {device}")
+    if args.prewarm:
+        _progress("load", f"prewarmed {args.model} on {device}", 100)
+        print(json.dumps({"ok": True, "path": "", "model": args.model, "backend": "transformers", "sample_rate": sample_rate, "prewarm": True, "seconds": round(time.perf_counter() - t0, 3)}))
+        return
+    if args.seed is not None:
+        torch.manual_seed(args.seed)
+    inputs = processor(text=[args.prompt], padding=True, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # MusicGen uses roughly 50 generated tokens per second of audio.
+    max_new_tokens = max(32, min(1536, int(args.duration * 50)))
+    _progress("generate", f"generating about {args.duration}s audio with {max_new_tokens} tokens")
+    with torch.no_grad():
+        audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
+    audio = audio_values[0, 0].detach().cpu().float().numpy()
+    out = Path(args.output)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    _progress("save", f"saving audio to {out}")
+    _write_wav(out, sample_rate, audio)
+    print(json.dumps({"ok": True, "path": str(out), "model": args.model, "backend": "transformers", "sample_rate": sample_rate, "seconds": round(time.perf_counter() - t0, 3)}))
 if __name__ == "__main__":
     main()
 `;
     AudioGenerateTool = class {
       name = "generate_audio";
-      description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, AudioCraft MusicGen/AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
+      description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
       parameters = {
         type: "object",
         properties: {
           kind: { type: "string", enum: ["sound", "music"], description: "Generate a sound effect/ambience clip or a music clip" },
           prompt: { type: "string", description: "Text description of the audio to generate" },
           model: { type: "string", description: "Audio model id, e.g. cvssp/audioldm2 or facebook/musicgen-small" },
-          backend: { type: "string", enum: ["auto", "diffusers", "audiocraft", "stable-audio", "tangoflux", "project"] },
+          backend: { type: "string", enum: ["auto", "diffusers", "transformers", "audiocraft", "stable-audio", "tangoflux", "project"] },
           duration: { type: "number", description: "Clip length in seconds" },
           steps: { type: "number", description: "Diffusion sampling steps when supported" },
           seed: { type: "number", description: "Optional random seed" }
@@ -252764,9 +252928,9 @@ if __name__ == "__main__":
           };
         }
         if (action === "setup") {
-          const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? "diffusers");
+          const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? (kind === "music" ? "transformers" : "diffusers"));
           const backend2 = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
-          const resolvedBackend = backend2 === "auto" ? "diffusers" : backend2;
+          const resolvedBackend = backend2 === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend2;
           const plan = audioGenerationSetupPlan(kind, resolvedBackend, this.cwd, typeof args["model"] === "string" ? args["model"] : void 0);
           return {
             success: true,
@@ -252787,7 +252951,7 @@ if __name__ == "__main__":
           const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
           let backend2 = inferAudioGenerationBackend(requestedModel2, args["backend"] ? String(args["backend"]) : defaultBackend2);
           if (backend2 === "auto")
-            backend2 = kind === "music" ? "audiocraft" : "diffusers";
+            backend2 = kind === "music" ? "transformers" : "diffusers";
           const model2 = requestedModel2 ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
           const preset2 = getAudioGenerationPreset(model2, kind);
           const duration2 = numberArg2(args["duration"], preset2?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
@@ -252810,7 +252974,7 @@ if __name__ == "__main__":
           return await this.prewarmPythonBackend({
             kind,
             backend: backend2,
-            runnerBackend: backend2 === "audiocraft" ? "audiocraft" : backend2 === "stable-audio" ? "stable-audio" : backend2 === "tangoflux" ? "tangoflux" : "diffusers",
+            runnerBackend: backend2,
             model: model2,
             duration: duration2,
             start: start2,
@@ -252827,7 +252991,7 @@ if __name__ == "__main__":
         const requestedModel = rawModel === "auto" ? void 0 : rawModel;
         let backend = inferAudioGenerationBackend(requestedModel, args["backend"] ? String(args["backend"]) : defaultBackend);
         if (backend === "auto")
-          backend = kind === "music" ? "audiocraft" : "diffusers";
+          backend = kind === "music" ? "transformers" : "diffusers";
         const model = requestedModel ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
         const preset = getAudioGenerationPreset(model, kind);
         const duration = numberArg2(args["duration"], preset?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
@@ -252853,6 +253017,9 @@ if __name__ == "__main__":
           if (backend === "tangoflux") {
             return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "tangoflux", prompt, model, duration, steps, seed, start: start2, python: args["python"] });
           }
+          if (backend === "transformers") {
+            return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "transformers", prompt, model, duration, steps, seed, start: start2, python: args["python"] });
+          }
           if (backend === "audiocraft") {
             return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "audiocraft", prompt, model, duration, steps, seed, start: start2, python: args["python"] });
           }
@@ -252902,8 +253069,9 @@ if __name__ == "__main__":
           "--duration",
           String(args.duration)
         ];
-        if (args.steps !== void 0 && args.runnerBackend !== "audiocraft")
+        if (args.steps !== void 0 && (args.runnerBackend === "diffusers" || args.runnerBackend === "stable-audio" || args.runnerBackend === "tangoflux")) {
           argv.push("--steps", String(args.steps));
+        }
         if (args.seed !== void 0)
           argv.push("--seed", String(args.seed));
         this.emitProgress({ stage: "load", message: `Starting ${args.kind} generation with ${args.model}` });
@@ -476611,7 +476779,7 @@ var require_path_browserify = __commonJS({
           return path11.slice(start2, end);
         }
       },
-      extname: function extname14(path11) {
+      extname: function extname15(path11) {
         assertPath(path11);
         var startDot = -1;
         var startPart = 0;
@@ -506772,10 +506940,82 @@ Saved to: ${tempFile}`,
 });
 // packages/execution/dist/tools/audio-playback.js
-import { execSync as execSync29, spawn as spawn16 } from "node:child_process";
+import { execFileSync as execFileSync2, execSync as execSync29, spawn as spawn16 } from "node:child_process";
 import { existsSync as existsSync40, statSync as statSync18, writeFileSync as writeFileSync16, readFileSync as readFileSync31, unlinkSync as unlinkSync8, mkdirSync as mkdirSync16 } from "node:fs";
-import { join as join58 } from "node:path";
+import { extname as extname10, join as join58 } from "node:path";
 import { homedir as homedir14, tmpdir as tmpdir11 } from "node:os";
+function hasCommand3(command) {
+  try {
+    if (process.platform === "win32") {
+      execFileSync2("where", [command], { stdio: "ignore", timeout: 2e3 });
+    } else {
+      execFileSync2("command", ["-v", command], { stdio: "ignore", timeout: 2e3 });
+    }
+    return true;
+  } catch {
+    if (process.platform !== "win32") {
+      try {
+        execFileSync2("which", [command], { stdio: "ignore", timeout: 2e3 });
+        return true;
+      } catch {
+        return false;
+      }
+    }
+    return false;
+  }
+}
+function playbackCommandFor(file, device = "default") {
+  const ext = extname10(file).toLowerCase();
+  if (process.platform === "darwin" && hasCommand3("afplay")) {
+    return { command: "afplay", args: [file], label: "afplay" };
+  }
+  if (hasCommand3("ffplay")) {
+    return { command: "ffplay", args: ["-nodisp", "-autoexit", "-loglevel", "error", file], label: "ffplay" };
+  }
+  if (hasCommand3("mpv")) {
+    return { command: "mpv", args: ["--no-video", "--really-quiet", file], label: "mpv" };
+  }
+  if (process.platform === "win32") {
+    return {
+      command: "powershell.exe",
+      args: [
+        "-NoProfile",
+        "-Command",
+        `Add-Type -AssemblyName presentationCore; $p=New-Object System.Windows.Media.MediaPlayer; $p.Open([Uri]::new(${JSON.stringify(file)})); Start-Sleep -Milliseconds 200; while($p.NaturalDuration.HasTimeSpan -eq $false){Start-Sleep -Milliseconds 100}; $p.Play(); Start-Sleep -Milliseconds ([int]$p.NaturalDuration.TimeSpan.TotalMilliseconds + 250)`
+      ],
+      label: "powershell MediaPlayer"
+    };
+  }
+  if ((ext === ".wav" || ext === ".flac" || ext === ".oga" || ext === ".ogg") && hasCommand3("pw-play")) {
+    return { command: "pw-play", args: [file], label: "pw-play" };
+  }
+  if ((ext === ".wav" || ext === ".flac" || ext === ".oga" || ext === ".ogg") && hasCommand3("paplay")) {
+    return { command: "paplay", args: [file], label: "paplay" };
+  }
+  if (ext === ".wav" && hasCommand3("aplay")) {
+    return { command: "aplay", args: ["-D", device, "-q", file], label: "aplay" };
+  }
+  return null;
+}
+function playSoundFile(file, opts = {}) {
+  if (!file)
+    return { ok: false, error: "Missing file path." };
+  if (!existsSync40(file))
+    return { ok: false, error: `Audio file not found: ${file}` };
+  const command = playbackCommandFor(file, opts.device || "default");
+  if (!command) {
+    return {
+      ok: false,
+      error: "No terminal audio player found. Install ffmpeg/ffplay or mpv; WAV fallback can use pipewire (pw-play), PulseAudio (paplay), or ALSA (aplay)."
+    };
+  }
+  try {
+    execFileSync2(command.command, command.args, { timeout: opts.timeoutMs ?? 3e5, stdio: "pipe" });
+    return { ok: true, player: command.label };
+  } catch (err) {
+    return { ok: false, error: `Playback via ${command.label} failed: ${err instanceof Error ? err.message.slice(0, 300) : String(err).slice(0, 300)}` };
+  }
+}
 function ensureLuxttsDaemon() {
   if (_luxttsDaemon && !_luxttsDaemon.killed && _luxttsReady)
     return Promise.resolve(true);
@@ -506886,7 +507126,7 @@ function luxttsSynthesize(text, cloneRef) {
     _luxttsDaemon.stdin.write(req2 + "\n");
   });
 }
-var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, AudioPlaybackTool;
+var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, AudioPlaybackTool, SoundPlaybackTool;
 var init_audio_playback = __esm({
   "packages/execution/dist/tools/audio-playback.js"() {
     "use strict";
@@ -506965,21 +507205,13 @@ var init_audio_playback = __esm({
         const device = args["device"] || "default";
         const size = statSync18(file).size;
         const ext = file.split(".").pop()?.toLowerCase() || "";
-        let cmd;
-        if (ext === "wav") {
-          cmd = `aplay -D ${device} -q "${file}"`;
-        } else {
-          cmd = `ffplay -nodisp -autoexit -loglevel error "${file}"`;
-        }
-        try {
-          execSync29(cmd, { timeout: 3e5, stdio: "pipe" });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : String(err);
-          return { success: false, output: "", error: `Playback failed: ${msg.slice(0, 300)}`, durationMs: performance.now() - start2 };
+        const played = playSoundFile(file, { device, timeoutMs: 3e5 });
+        if (!played.ok) {
+          return { success: false, output: "", error: played.error, durationMs: performance.now() - start2 };
         }
         return {
           success: true,
-          output: `Played ${file} (${Math.round(size / 1024)}KB ${ext.toUpperCase()}) on ${device}`,
+          output: `Played ${file} (${Math.round(size / 1024)}KB ${ext.toUpperCase()}) via ${played.player}`,
           durationMs: performance.now() - start2
         };
       }
@@ -507146,6 +507378,51 @@ ${devices.join("\n")}`,
         };
       }
     };
+    SoundPlaybackTool = class {
+      name = "play_sound";
+      description = "Play any local sound/audio file through the system speakers from the terminal. Supports generated WAV files and common audio formats such as MP3, OGG, OPUS, M4A, FLAC, and AAC when ffplay/mpv or system audio players are available. Use this immediately after generate_audio, /sound, or /music when the user wants to hear the generated file.";
+      parameters = {
+        type: "object",
+        properties: {
+          file: {
+            type: "string",
+            description: "Path to the local audio file to play"
+          },
+          device: {
+            type: "string",
+            description: "Optional output device. ALSA aplay accepts values like default or hw:0,0; other players ignore this."
+          },
+          timeout_ms: {
+            type: "number",
+            description: "Optional playback timeout in milliseconds; default 300000."
+          }
+        },
+        required: ["file"]
+      };
+      async execute(args) {
+        const start2 = performance.now();
+        const file = typeof args["file"] === "string" ? args["file"] : "";
+        const device = typeof args["device"] === "string" ? args["device"] : "default";
+        const timeoutMs = typeof args["timeout_ms"] === "number" && Number.isFinite(args["timeout_ms"]) ? Math.max(1e3, Math.min(36e5, Math.round(args["timeout_ms"]))) : 3e5;
+        if (!file) {
+          return { success: false, output: "", error: "Missing 'file' parameter. Provide a local sound/audio file path.", durationMs: performance.now() - start2 };
+        }
+        if (!existsSync40(file)) {
+          return { success: false, output: "", error: `Audio file not found: ${file}`, durationMs: performance.now() - start2 };
+        }
+        const size = statSync18(file).size;
+        const ext = extname10(file).replace(/^\./, "").toUpperCase() || "audio";
+        const played = playSoundFile(file, { device, timeoutMs });
+        if (!played.ok) {
+          return { success: false, output: "", error: played.error, durationMs: performance.now() - start2 };
+        }
+        return {
+          success: true,
+          output: `Played sound: ${file} (${Math.round(size / 1024)}KB ${ext}) via ${played.player}`,
+          durationMs: performance.now() - start2
+        };
+      }
+    };
   }
 });
@@ -514787,6 +515064,7 @@ __export(dist_exports, {
   SkillBuildTool: () => SkillBuildTool,
   SkillExecuteTool: () => SkillExecuteTool,
   SkillListTool: () => SkillListTool,
+  SoundPlaybackTool: () => SoundPlaybackTool,
   StdioTransport: () => StdioTransport,
   StructuredFileTool: () => StructuredFileTool,
   StructuredReadTool: () => StructuredReadTool,
@@ -514906,6 +515184,7 @@ __export(dist_exports, {
   packetPath: () => packetPath,
   parseMcpMarkdown: () => parseMcpMarkdown,
   parseMcpToolName: () => parseMcpToolName,
+  playSoundFile: () => playSoundFile,
   promoteWorkingNotes: () => promoteWorkingNotes,
   readPacket: () => readPacket,
   readProvenanceFile: () => readProvenanceFile,
@@ -530271,6 +530550,8 @@ var init_agenticRunner = __esm({
       "transcribe_file",
       "transcribe_url",
       "audio_playback",
+      "play_sound",
+      "generate_audio",
       "youtube_download"
     ]);
     SOCIAL_TOOLS = /* @__PURE__ */ new Set([
@@ -552084,7 +552365,7 @@ var init_command_registry = __esm({
       ["/music", "Open music-generation model/setup menu"],
       ["/music <prompt>", "Generate a music clip from a prompt"],
       ["/music --model <model> <prompt>", "Generate music with an explicit music model"],
-      ["/music setup <audiocraft|stable-audio|diffusers>", "Show setup commands for a music-generation backend"],
+      ["/music setup <transformers|audiocraft|stable-audio|diffusers>", "Show setup commands for a music-generation backend"],
       ["/music list", "List music models by category, quality, size, and hardware fit"],
       ["/call", "Start voice call session (cloudflared tunnel + ASR/TTS)"],
       ["/hangup", "End active call session"],
@@ -569380,7 +569661,7 @@ var init_platforms = __esm({
 // packages/cli/src/tui/workspace-explorer.ts
 import { existsSync as existsSync88, readdirSync as readdirSync26, readFileSync as readFileSync72, statSync as statSync31 } from "node:fs";
-import { basename as basename15, extname as extname11, join as join104, relative as relative10, resolve as resolve35 } from "node:path";
+import { basename as basename15, extname as extname12, join as join104, relative as relative10, resolve as resolve35 } from "node:path";
 function exploreWorkspace(root, options2 = {}) {
   const query = (options2.query ?? "").trim().toLowerCase();
   const maxResults = options2.maxResults ?? 80;
@@ -569493,7 +569774,7 @@ function previewWorkspaceFile(root, relPath, options2 = {}) {
 }
 function classifyWorkspaceFile(path11) {
   const lower = path11.toLowerCase();
-  const ext = extname11(lower);
+  const ext = extname12(lower);
   if (lower.includes(".test.") || lower.includes(".spec.") || lower.includes("/tests/")) return "test";
   if (SOURCE_EXT.has(ext)) return "source";
   if (DOC_EXT2.has(ext)) return "doc";
@@ -569569,7 +569850,7 @@ var init_workspace_explorer = __esm({
 // packages/cli/src/tui/drop-panel.ts
 import { existsSync as existsSync89 } from "node:fs";
-import { extname as extname12, resolve as resolve36 } from "node:path";
+import { extname as extname13, resolve as resolve36 } from "node:path";
 function ansi4(code8, text) {
   return isTTY4 ? `\x1B[${code8}m${text}\x1B[0m` : text;
 }
@@ -569695,7 +569976,7 @@ function showDropPanel(opts) {
         return;
       }
       if (allowedExtensions.length > 0) {
-        const ext = extname12(filePath).toLowerCase();
+        const ext = extname13(filePath).toLowerCase();
         if (!allowedExtensions.includes(ext)) {
           errorMsg = `Invalid file type: ${ext}. Expected: ${allowedExtensions.join(", ")}`;
           render2();
@@ -574002,7 +574283,7 @@ __export(image_ascii_preview_exports, {
   extractSavedImagePath: () => extractSavedImagePath,
   formatImageAsciiContext: () => formatImageAsciiContext
 });
-import { execFileSync as execFileSync2 } from "node:child_process";
+import { execFileSync as execFileSync3 } from "node:child_process";
 import { createRequire as createRequire4 } from "node:module";
 import { existsSync as existsSync94, readFileSync as readFileSync75, statSync as statSync32 } from "node:fs";
 import { resolve as resolve37 } from "node:path";
@@ -574139,7 +574420,7 @@ function convertWithFfmpeg(imagePath, width, height, timeoutMs) {
       `scale=${width}:${height}`,
       "format=gray"
     ].join(",");
-    const raw = execFileSync2(
+    const raw = execFileSync3(
       "ffmpeg",
       [
         "-hide_banner",
@@ -584829,10 +585110,10 @@ function defaultAudioModel(kind) {
   return kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL;
 }
 function defaultAudioBackend(kind) {
-  return kind === "music" ? "audiocraft" : "diffusers";
+  return kind === "music" ? "transformers" : "diffusers";
 }
 function normalizeAudioBackend(value2, kind) {
-  if (value2 === "diffusers" || value2 === "audiocraft" || value2 === "stable-audio" || value2 === "tangoflux" || value2 === "project") return value2;
+  if (value2 === "diffusers" || value2 === "transformers" || value2 === "audiocraft" || value2 === "stable-audio" || value2 === "tangoflux" || value2 === "project") return value2;
   return defaultAudioBackend(kind);
 }
 function audioBackendForModel(model, requested, kind) {
@@ -584920,7 +585201,8 @@ async function showAudioGenerationMenu(ctx3, hasLocal, kind) {
   const activeModel = activeAudioModel(settings, kind);
   const title = kind === "music" ? "Music Generation" : "Sound Generation";
   const setupItems = kind === "music" ? [
-    { key: "setup:audiocraft", label: "Setup AudioCraft", detail: "MusicGen small/medium/large" },
+    { key: "setup:transformers", label: "Setup Transformers", detail: "Default MusicGen path; avoids AudioCraft/PyAV headers" },
+    { key: "setup:audiocraft", label: "Setup AudioCraft", detail: "Optional MusicGen/AudioGen upstream runtime" },
     { key: "setup:stable-audio", label: "Setup Stable Audio", detail: "Stable Audio Open 1.0 serious stereo baseline" },
     { key: "setup:diffusers", label: "Setup Diffusers", detail: "AudioLDM/Riffusion-style paths" }
   ] : [
@@ -585050,7 +585332,17 @@ async function handleAudioGenerationCommand(ctx3, arg, hasLocal, kind) {
   }
   renderInfo(result.output);
   const fileMatch = result.output.match(/(?:Sound|Music) generated:\s+(.+)/);
-  if (fileMatch?.[1]) renderInfo(`File: ${fileMatch[1].trim()}`);
+  if (fileMatch?.[1]) {
+    const audioPath = fileMatch[1].trim();
+    renderInfo(`File: ${audioPath}`);
+    renderInfo(`Playing generated ${kind}...`);
+    const playback = playSoundFile(audioPath);
+    if (playback.ok) {
+      renderInfo(`Audio playback complete via ${playback.player}.`);
+    } else {
+      renderWarning(`Audio playback failed: ${playback.error}`);
+    }
+  }
   return "handled";
 }
 function formatAudioGenerationProgress(event) {
@@ -595599,12 +595891,12 @@ __export(vision_ingress_exports, {
   queryVisionModel: () => queryVisionModel,
   runVisionIngress: () => runVisionIngress
 });
-import { execFileSync as execFileSync3 } from "node:child_process";
+import { execFileSync as execFileSync4 } from "node:child_process";
 import { existsSync as existsSync105, readFileSync as readFileSync86, unlinkSync as unlinkSync20 } from "node:fs";
 import { join as join120 } from "node:path";
 function isTesseractAvailable() {
   try {
-    execFileSync3("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
+    execFileSync4("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
     return true;
   } catch {
     return false;
@@ -595645,7 +595937,7 @@ function advancedOcr(imagePath) {
   for (const psm of psmModes) {
     const outFile = `${tmpBase}_psm${psm}`;
     try {
-      execFileSync3("tesseract", [
+      execFileSync4("tesseract", [
         imagePath,
         outFile,
         "--psm",
@@ -623381,7 +623673,7 @@ var clipboard_media_exports = {};
 __export(clipboard_media_exports, {
   pasteClipboardImageToFile: () => pasteClipboardImageToFile
 });
-import { execFileSync as execFileSync4, execSync as execSync58 } from "node:child_process";
+import { execFileSync as execFileSync5, execSync as execSync58 } from "node:child_process";
 import { mkdirSync as mkdirSync72, readFileSync as readFileSync99, rmSync as rmSync5, writeFileSync as writeFileSync67 } from "node:fs";
 import { join as join136 } from "node:path";
 function pasteClipboardImageToFile(repoRoot) {
@@ -623398,7 +623690,7 @@ function readClipboardImage() {
     try {
       execSync58("command -v pngpaste", { stdio: "ignore", timeout: 1e3 });
       const tmp = `/tmp/omnius-clipboard-${Date.now()}.png`;
-      execFileSync4("pngpaste", [tmp], { timeout: 3e3 });
+      execFileSync5("pngpaste", [tmp], { timeout: 3e3 });
       const buffer2 = readFileSync99(tmp);
       try {
         rmSync5(tmp);
@@ -623418,7 +623710,7 @@ function readClipboardImage() {
     ];
     for (const attempt of attempts) {
       try {
-        const buffer2 = execFileSync4(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
+        const buffer2 = execFileSync5(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
         if (buffer2.length > 0) return { buffer: buffer2, mime: attempt.mime, ext: attempt.ext };
       } catch {
         continue;
@@ -623435,7 +623727,7 @@ function readClipboardImage() {
         "$img.Save($ms,[Drawing.Imaging.ImageFormat]::Png);",
         "[Console]::OpenStandardOutput().Write($ms.ToArray(),0,$ms.Length)"
       ].join("");
-      const buffer2 = execFileSync4("powershell.exe", ["-NoProfile", "-Command", ps], {
+      const buffer2 = execFileSync5("powershell.exe", ["-NoProfile", "-Command", ps], {
         timeout: 5e3,
         maxBuffer: 25 * 1024 * 1024
       });
@@ -623454,7 +623746,7 @@ var init_clipboard_media = __esm({
 // packages/cli/src/tui/interactive.ts
 import { cwd } from "node:process";
-import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname13, relative as relative14 } from "node:path";
+import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname14, relative as relative14 } from "node:path";
 import { createRequire as createRequire7 } from "node:module";
 import { fileURLToPath as fileURLToPath18 } from "node:url";
 import {
@@ -623839,6 +624131,7 @@ function buildSubAgentTools(repoRoot, config) {
     new StructuredFileTool(repoRoot),
     // Audio
     new AudioPlaybackTool(),
+    new SoundPlaybackTool(),
     new AudioCaptureTool(),
     new AudioAnalyzeTool(),
     new AsrListenTool(),
@@ -623993,6 +624286,7 @@ function buildTools(repoRoot, config, contextWindowSize, modelTier) {
     new CameraCaptureTool(),
     new AudioCaptureTool(),
     new AudioPlaybackTool(),
+    new SoundPlaybackTool(),
     new WifiControlTool(),
     new BluetoothScanTool(),
     new SdrScanTool(),
@@ -624542,6 +624836,24 @@ async function renderAsciiPreviewForToolResult(toolName, output, repoRoot, write
     }
   }
 }
+function extractGeneratedAudioPath(output, repoRoot) {
+  const match = output.match(/(?:Sound|Music) generated:\s+([^\n\r]+)/i);
+  const raw = match?.[1]?.trim().replace(/^["']|["']$/g, "");
+  if (!raw) return null;
+  return raw.startsWith("/") || raw.startsWith("~") ? raw.replace(/^~(?=\/)/, homedir46()) : join137(repoRoot, raw);
+}
+async function playGeneratedAudioForToolResult(toolName, output, repoRoot, writer) {
+  if (toolName !== "generate_audio" || !output) return;
+  const audioPath = extractGeneratedAudioPath(output, repoRoot);
+  if (!audioPath) return;
+  writer(() => renderInfo(`Playing generated audio: ${relative14(repoRoot, audioPath).startsWith("..") ? audioPath : relative14(repoRoot, audioPath)}`));
+  const result = playSoundFile(audioPath);
+  if (result.ok) {
+    writer(() => renderInfo(`Audio playback complete via ${result.player}.`));
+  } else {
+    writer(() => renderWarning(`Audio playback failed: ${result.error}`));
+  }
+}
 async function runSelfImprovementCycle(repoRoot) {
   try {
     const {
@@ -625697,8 +626009,9 @@ ${entry.fullContent}`
             }
           });
         }
-        if (event.success || event.toolName === "generate_image") {
+        if (event.success) {
           void renderAsciiPreviewForToolResult(event.toolName, event.content ?? "", repoRoot, contentWrite);
+          void playGeneratedAudioForToolResult(event.toolName, event.content ?? "", repoRoot, contentWrite);
         }
         if (voice?.enabled && voice.voiceMode === "voicechat" && _voiceChatSession2?.isActive && event.toolName === "task_complete") {
           const emoStateFinal = emotionEngine?.getState();
@@ -630719,7 +631032,7 @@ Execute this skill now. Follow the behavioral guidance above.`;
           const imgPath = resolve44(repoRoot, cleanPath);
           const imgBuffer = readFileSync100(imgPath);
           const base642 = imgBuffer.toString("base64");
-          const ext = extname13(cleanPath).toLowerCase();
+          const ext = extname14(cleanPath).toLowerCase();
           const mime = ext === ".png" ? "image/png" : ext === ".gif" ? "image/gif" : ext === ".webp" ? "image/webp" : "image/jpeg";
           const asciiContext = await renderAsciiPreviewForImage(
             imgPath,

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.14",
+  "version": "1.0.15",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.14",
+      "version": "1.0.15",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.14",
+  "version": "1.0.15",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",