omnius 1.0.14 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1451,7 +1451,7 @@ var init_security_classifier = __esm({
1451
1451
  { match: /^(aiwg_setup|aiwg_health|aiwg_workflow)$/, info: CRITICAL_SENSITIVE },
1452
1452
  { match: /^(expose|sponsor|nexus_register|wallet_|x402|payment|spend)/, info: CRITICAL_SENSITIVE },
1453
1453
  // ── Hardware peripherals
1454
- { match: /^(camera_capture|audio_capture|audio_playback|audio_analyze|asr_listen)$/, info: HARDWARE_DEVICE },
1454
+ { match: /^(camera_capture|audio_capture|audio_playback|play_sound|audio_analyze|asr_listen)$/, info: HARDWARE_DEVICE },
1455
1455
  { match: /^(wifi_control|bluetooth_scan|sdr_scan|flipper_zero|meshtastic|gps_location)$/, info: HARDWARE_DEVICE },
1456
1456
  { match: /^(desktop_click|desktop_describe|screenshot)$/, info: HARDWARE_DEVICE },
1457
1457
  { match: /^(jibberlink)$/, info: HARDWARE_DEVICE },
@@ -84452,7 +84452,7 @@ var require_mime_types = __commonJS({
84452
84452
  "../node_modules/mime-types/index.js"(exports) {
84453
84453
  "use strict";
84454
84454
  var db = require_mime_db();
84455
- var extname14 = __require("path").extname;
84455
+ var extname15 = __require("path").extname;
84456
84456
  var EXTRACT_TYPE_REGEXP = /^\s*([^;\s]*)(?:;|\s|$)/;
84457
84457
  var TEXT_TYPE_REGEXP = /^text\//i;
84458
84458
  exports.charset = charset;
@@ -84506,7 +84506,7 @@ var require_mime_types = __commonJS({
84506
84506
  if (!path11 || typeof path11 !== "string") {
84507
84507
  return false;
84508
84508
  }
84509
- var extension4 = extname14("x." + path11).toLowerCase().substr(1);
84509
+ var extension4 = extname15("x." + path11).toLowerCase().substr(1);
84510
84510
  if (!extension4) {
84511
84511
  return false;
84512
84512
  }
@@ -250538,6 +250538,19 @@ function trimProcessText(text, max = 1800) {
250538
250538
  return clean3;
250539
250539
  return clean3.slice(0, max - 20) + "\n... (truncated)";
250540
250540
  }
250541
+ function formatDiffusersFailure(stderrOrStdout) {
250542
+ const text = trimProcessText(stderrOrStdout);
250543
+ const lower = stderrOrStdout.toLowerCase();
250544
+ const notes2 = [];
250545
+ if (lower.includes("torchvision") && (lower.includes("not installed") || lower.includes("no module named"))) {
250546
+ notes2.push("Missing torchvision was detected. The image-generation dependency set now includes torchvision; run /image prewarm again to repair the existing .omnius/image-gen/.venv.");
250547
+ }
250548
+ if (lower.includes("hf_token") || lower.includes("gated repo") || lower.includes("401") || lower.includes("unauthorized")) {
250549
+ notes2.push("This model may require Hugging Face authentication or license acceptance. Set HF_TOKEN in the environment and accept the model license on Hugging Face, then prewarm again.");
250550
+ }
250551
+ return [text, ...notes2.map((note) => `
250552
+ Note: ${note}`)].filter(Boolean).join("");
250553
+ }
250541
250554
  function imageGenerationPythonEnv(repoRoot) {
250542
250555
  const root = imageGenerationDir(repoRoot);
250543
250556
  const hf = join36(root, "huggingface");
@@ -250595,7 +250608,7 @@ async function ensurePythonFor(repoRoot, kind, explicit, onProgress) {
250595
250608
  ${trimProcessText(created.stderr || created.stdout)}`);
250596
250609
  }
250597
250610
  }
250598
- const importCheck = kind === "diffusers" ? "import torch, diffusers, PIL\nfrom diffusers import AutoPipelineForText2Image\n" : "import stable_diffusion_cpp, PIL\n";
250611
+ const importCheck = kind === "diffusers" ? "import torch, torchvision, diffusers, PIL\nfrom diffusers import AutoPipelineForText2Image\n" : "import stable_diffusion_cpp, PIL\n";
250599
250612
  if (await pythonCanImport(command, importCheck, repoRoot, pythonEnv)) {
250600
250613
  return { command, env: pythonEnv };
250601
250614
  }
@@ -250661,6 +250674,7 @@ var init_image_generate = __esm({
250661
250674
  DEFAULT_OLLAMA_IMAGE_MODEL = "x/z-image-turbo";
250662
250675
  DIFFUSERS_PYTHON_PACKAGES = [
250663
250676
  "torch",
250677
+ "torchvision",
250664
250678
  "diffusers",
250665
250679
  "transformers",
250666
250680
  "accelerate",
@@ -250710,7 +250724,7 @@ var init_image_generate = __esm({
250710
250724
  id: "black-forest-labs/FLUX.1-dev",
250711
250725
  label: "FLUX.1 dev",
250712
250726
  backend: "diffusers",
250713
- install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250727
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250714
250728
  category: "Primary hyper-realistic baseline",
250715
250729
  sizeClass: "12B rectified-flow transformer",
250716
250730
  quality: "Top-tier open-weight photorealism, prompt adherence, texture detail, composition, and typography.",
@@ -250727,7 +250741,7 @@ var init_image_generate = __esm({
250727
250741
  id: "stabilityai/stable-diffusion-3.5-large",
250728
250742
  label: "Stable Diffusion 3.5 Large",
250729
250743
  backend: "diffusers",
250730
- install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large --steps 28 --guidance 4.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250744
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large --steps 28 --guidance 4.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250731
250745
  category: "Primary hyper-realistic baseline",
250732
250746
  sizeClass: "8B MMDiT",
250733
250747
  quality: "Serious open Stable Diffusion ecosystem baseline with strong realism, complex prompt understanding, typography, and controllability.",
@@ -250744,7 +250758,7 @@ var init_image_generate = __esm({
250744
250758
  id: "black-forest-labs/FLUX.1-schnell",
250745
250759
  label: "FLUX.1 schnell",
250746
250760
  backend: "diffusers",
250747
- install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-schnell --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250761
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-schnell --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250748
250762
  category: "Fast large-model iteration",
250749
250763
  sizeClass: "12B rectified-flow transformer",
250750
250764
  quality: "FLUX-style output with fewer steps; better for rapid iteration than absolute peak quality.",
@@ -250761,7 +250775,7 @@ var init_image_generate = __esm({
250761
250775
  id: "stabilityai/stable-diffusion-3.5-large-turbo",
250762
250776
  label: "SD3.5 Large Turbo",
250763
250777
  backend: "diffusers",
250764
- install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large-turbo --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250778
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/stable-diffusion-3.5-large-turbo --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250765
250779
  category: "Fast large-model iteration",
250766
250780
  sizeClass: "8B distilled MMDiT",
250767
250781
  quality: "SD3.5-family quality optimized for fewer inference steps; throughput over peak fidelity.",
@@ -250778,7 +250792,7 @@ var init_image_generate = __esm({
250778
250792
  id: "Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers",
250779
250793
  label: "HunyuanDiT v1.2",
250780
250794
  backend: "diffusers",
250781
- install: 'python .omnius/image-gen/diffusers_text2image.py --model Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers --steps 30 --guidance 7.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250795
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers --steps 30 --guidance 7.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250782
250796
  category: "Large multilingual diffusion",
250783
250797
  sizeClass: "Large DiT text-to-image",
250784
250798
  quality: "Strong bilingual English/Chinese prompt understanding with detailed, realistic multi-resolution output.",
@@ -250795,7 +250809,7 @@ var init_image_generate = __esm({
250795
250809
  id: "Tongyi-MAI/Z-Image-Turbo",
250796
250810
  label: "Z-Image-Turbo",
250797
250811
  backend: "diffusers",
250798
- install: 'python .omnius/image-gen/diffusers_text2image.py --model Tongyi-MAI/Z-Image-Turbo --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250812
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Tongyi-MAI/Z-Image-Turbo --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250799
250813
  category: "Modern deployable",
250800
250814
  sizeClass: "6B image generation model",
250801
250815
  quality: "Efficient newer large-model quality; useful below full FLUX/SD3.5 hardware budgets.",
@@ -250811,7 +250825,7 @@ var init_image_generate = __esm({
250811
250825
  id: "black-forest-labs/FLUX.2-klein-4B",
250812
250826
  label: "FLUX.2 Klein 4B",
250813
250827
  backend: "diffusers",
250814
- install: 'python .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4B --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250828
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4B --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250815
250829
  category: "Modern deployable",
250816
250830
  sizeClass: "4B compact FLUX-family",
250817
250831
  quality: "Bridge between practical deployment and modern FLUX-family visual quality.",
@@ -250843,7 +250857,7 @@ var init_image_generate = __esm({
250843
250857
  id: "segmind/tiny-sd",
250844
250858
  label: "Segmind Tiny-SD",
250845
250859
  backend: "diffusers",
250846
- install: 'python .omnius/image-gen/diffusers_text2image.py --model segmind/tiny-sd --prompt "..." --output .omnius/images/out.png',
250860
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model segmind/tiny-sd --prompt "..." --output .omnius/images/out.png',
250847
250861
  category: "Lightweight smoke test",
250848
250862
  sizeClass: "Small SD-compatible",
250849
250863
  quality: "Fast validation model; not a serious photorealism baseline.",
@@ -250860,7 +250874,7 @@ var init_image_generate = __esm({
250860
250874
  id: "nota-ai/bk-sdm-tiny-2m",
250861
250875
  label: "BK-SDM Tiny 2M",
250862
250876
  backend: "diffusers",
250863
- install: 'python .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-tiny-2m --prompt "..." --output .omnius/images/out.png',
250877
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-tiny-2m --prompt "..." --output .omnius/images/out.png',
250864
250878
  category: "Lightweight smoke test",
250865
250879
  sizeClass: "Compressed SD-compatible",
250866
250880
  quality: "Very small and practical; quality is mainly for tests and rough drafts.",
@@ -250877,7 +250891,7 @@ var init_image_generate = __esm({
250877
250891
  id: "nota-ai/bk-sdm-small-2m",
250878
250892
  label: "BK-SDM Small 2M",
250879
250893
  backend: "diffusers",
250880
- install: 'python .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-small-2m --prompt "..." --output .omnius/images/out.png',
250894
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model nota-ai/bk-sdm-small-2m --prompt "..." --output .omnius/images/out.png',
250881
250895
  category: "Lightweight smoke test",
250882
250896
  sizeClass: "Compressed SD-compatible",
250883
250897
  quality: "Slightly better compressed-SD quality than tiny variants; still not a high-fidelity baseline.",
@@ -250894,7 +250908,7 @@ var init_image_generate = __esm({
250894
250908
  id: "SimianLuo/LCM_Dreamshaper_v7",
250895
250909
  label: "LCM DreamShaper v7",
250896
250910
  backend: "diffusers",
250897
- install: 'python .omnius/image-gen/diffusers_text2image.py --model SimianLuo/LCM_Dreamshaper_v7 --steps 4 --prompt "..." --output .omnius/images/out.png',
250911
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model SimianLuo/LCM_Dreamshaper_v7 --steps 4 --prompt "..." --output .omnius/images/out.png',
250898
250912
  category: "Fast iteration",
250899
250913
  sizeClass: "Few-step SD-compatible",
250900
250914
  quality: "Good for low-latency concepting; below SDXL/SD3.5/FLUX for photoreal detail.",
@@ -250911,7 +250925,7 @@ var init_image_generate = __esm({
250911
250925
  id: "stabilityai/sd-turbo",
250912
250926
  label: "SD-Turbo",
250913
250927
  backend: "diffusers",
250914
- install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/sd-turbo --steps 1 --guidance 0 --prompt "..." --output .omnius/images/out.png',
250928
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/sd-turbo --steps 1 --guidance 0 --prompt "..." --output .omnius/images/out.png',
250915
250929
  category: "Fast iteration",
250916
250930
  sizeClass: "One-to-four-step SD",
250917
250931
  quality: "Fast SD-family output; useful for iteration but lower ceiling than SDXL Turbo and large baselines.",
@@ -250928,7 +250942,7 @@ var init_image_generate = __esm({
250928
250942
  id: DEFAULT_DIFFUSERS_IMAGE_MODEL,
250929
250943
  label: "SDXL-Turbo",
250930
250944
  backend: "diffusers",
250931
- install: 'python .omnius/image-gen/diffusers_text2image.py --model stabilityai/sdxl-turbo --steps 1 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250945
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model stabilityai/sdxl-turbo --steps 1 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250932
250946
  category: "Default local generation",
250933
250947
  sizeClass: "Few-step SDXL",
250934
250948
  quality: "Strong fast default for local image generation; not as realistic as FLUX.1 dev or SD3.5 Large, but much more practical.",
@@ -250945,7 +250959,7 @@ var init_image_generate = __esm({
250945
250959
  id: "Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
250946
250960
  label: "Sana Sprint 0.6B",
250947
250961
  backend: "diffusers",
250948
- install: 'python .omnius/image-gen/diffusers_text2image.py --model Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250962
+ install: 'python3 .omnius/image-gen/diffusers_text2image.py --model Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers --steps 4 --guidance 0 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
250949
250963
  category: "Modern efficient",
250950
250964
  sizeClass: "0.6B efficient diffusion",
250951
250965
  quality: "Modern efficient output under smaller compute budgets; below primary large baselines.",
@@ -250962,7 +250976,7 @@ var init_image_generate = __esm({
250962
250976
  id: "sdcpp:local",
250963
250977
  label: "stable-diffusion.cpp local checkpoint",
250964
250978
  backend: "sdcpp",
250965
- install: 'python .omnius/image-gen/sdcpp_text2image.py --model-path /path/to/model.gguf --prompt "..." --output .omnius/images/out.png',
250979
+ install: 'python3 .omnius/image-gen/sdcpp_text2image.py --model-path /path/to/model.gguf --prompt "..." --output .omnius/images/out.png',
250966
250980
  category: "Local checkpoint/GGUF",
250967
250981
  sizeClass: "Depends on checkpoint",
250968
250982
  quality: "Quality depends entirely on the local checkpoint or GGUF variant.",
@@ -251417,15 +251431,17 @@ ${errText.slice(0, 1200)}`,
251417
251431
  });
251418
251432
  if (result.code !== 0) {
251419
251433
  const plan = imageGenerationSetupPlan("diffusers", this.cwd, args.model);
251434
+ const output = [
251435
+ `Diffusers model prewarm failed with exit code ${result.code ?? "unknown"}.`,
251436
+ formatDiffusersFailure(result.stderr || result.stdout),
251437
+ "",
251438
+ "Setup path:",
251439
+ ...plan.commands.map((cmd) => ` ${cmd}`)
251440
+ ].filter(Boolean).join("\n");
251420
251441
  return {
251421
251442
  success: false,
251422
- output: [
251423
- `Diffusers model prewarm failed with exit code ${result.code ?? "unknown"}.`,
251424
- trimProcessText(result.stderr || result.stdout),
251425
- "",
251426
- "Setup path:",
251427
- ...plan.commands.map((cmd) => ` ${cmd}`)
251428
- ].filter(Boolean).join("\n"),
251443
+ output,
251444
+ error: output,
251429
251445
  durationMs: performance.now() - args.start
251430
251446
  };
251431
251447
  }
@@ -251600,15 +251616,17 @@ ${errText.slice(0, 800)}`,
251600
251616
  });
251601
251617
  if (result.code !== 0 || !existsSync23(filepath)) {
251602
251618
  const plan = imageGenerationSetupPlan("diffusers", this.cwd, args.model);
251619
+ const output2 = [
251620
+ `Diffusers image generation failed with exit code ${result.code ?? "unknown"}.`,
251621
+ formatDiffusersFailure(result.stderr || result.stdout),
251622
+ "",
251623
+ "Setup path:",
251624
+ ...plan.commands.map((cmd) => ` ${cmd}`)
251625
+ ].filter(Boolean).join("\n");
251603
251626
  return {
251604
251627
  success: false,
251605
- output: [
251606
- `Diffusers image generation failed with exit code ${result.code ?? "unknown"}.`,
251607
- trimProcessText(result.stderr || result.stdout),
251608
- "",
251609
- "Setup path:",
251610
- ...plan.commands.map((cmd) => ` ${cmd}`)
251611
- ].filter(Boolean).join("\n"),
251628
+ output: output2,
251629
+ error: output2,
251612
251630
  durationMs: performance.now() - args.start
251613
251631
  };
251614
251632
  }
@@ -251796,6 +251814,8 @@ function audioOutputDir(repoRoot = ".") {
251796
251814
  return join37(repoRoot, ".omnius", "audio");
251797
251815
  }
251798
251816
  function backendPackages(backend) {
251817
+ if (backend === "transformers")
251818
+ return TRANSFORMERS_AUDIO_PACKAGES;
251799
251819
  if (backend === "audiocraft")
251800
251820
  return AUDIOCRAFT_PACKAGES;
251801
251821
  if (backend === "stable-audio")
@@ -251805,6 +251825,8 @@ function backendPackages(backend) {
251805
251825
  return DIFFUSERS_AUDIO_PACKAGES;
251806
251826
  }
251807
251827
  function backendImportCheck(backend) {
251828
+ if (backend === "transformers")
251829
+ return "import torch, torchaudio, transformers, scipy\nfrom transformers import AutoProcessor, MusicgenForConditionalGeneration\n";
251808
251830
  if (backend === "audiocraft")
251809
251831
  return "import torch, torchaudio, audiocraft\nfrom audiocraft.models import MusicGen, AudioGen\n";
251810
251832
  if (backend === "stable-audio")
@@ -251933,8 +251955,23 @@ function trimProcessText2(text, max = 1800) {
251933
251955
  return clean3.slice(0, max - 20) + "\n... (truncated)";
251934
251956
  }
251935
251957
  async function pythonCanImport2(command, code8, repoRoot, env2) {
251936
- const result = await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
251937
- return result.code === 0;
251958
+ return (await pythonImportResult(command, code8, repoRoot, env2)).code === 0;
251959
+ }
251960
+ async function pythonImportResult(command, code8, repoRoot, env2) {
251961
+ return await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
251962
+ }
251963
+ function formatAudioSetupFailure(backend, text) {
251964
+ const body = trimProcessText2(text);
251965
+ const lowered = text.toLowerCase();
251966
+ const notes2 = [];
251967
+ if (backend === "audiocraft" && (lowered.includes("libavformat") || lowered.includes("pkg-config") || lowered.includes("pyav") || lowered.includes(" av"))) {
251968
+ notes2.push("AudioCraft pulls PyAV. If a matching PyAV wheel is unavailable, the system must provide FFmpeg development headers.", "Ubuntu/Debian fix: sudo apt install -y pkg-config ffmpeg libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev", "For MusicGen, prefer the default Transformers backend: /music setup transformers or /music --backend transformers <prompt>.");
251969
+ }
251970
+ if (lowered.includes("cuda") && lowered.includes("not available")) {
251971
+ notes2.push("CUDA was not available to the selected Python environment; install a Torch build matching this machine's CUDA runtime or use CPU-compatible settings.");
251972
+ }
251973
+ return [body, ...notes2.map((note) => `
251974
+ ${note}`)].filter(Boolean).join("");
251938
251975
  }
251939
251976
  async function ensurePythonForAudio(repoRoot, backend, explicit, onProgress) {
251940
251977
  const pythonEnv = audioGenerationPythonEnv(repoRoot);
@@ -251965,7 +252002,19 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
251965
252002
  }
251966
252003
  const packages = backendPackages(backend);
251967
252004
  onProgress?.({ stage: "setup", message: `Installing ${backend} audio-generation Python packages` });
251968
- const pip = await runProcess3(command, ["-m", "pip", "install", "--progress-bar", "on", "-U", "pip", ...packages], {
252005
+ const pipArgs = [
252006
+ "-m",
252007
+ "pip",
252008
+ "install",
252009
+ "--progress-bar",
252010
+ "on",
252011
+ "--prefer-binary",
252012
+ ...backend === "audiocraft" ? ["--only-binary", "av"] : [],
252013
+ "-U",
252014
+ "pip",
252015
+ ...packages
252016
+ ];
252017
+ const pip = await runProcess3(command, pipArgs, {
251969
252018
  cwd: repoRoot,
251970
252019
  timeoutMs: 18e5,
251971
252020
  env: pythonEnv,
@@ -251974,18 +252023,20 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
251974
252023
  });
251975
252024
  if (pip.code !== 0) {
251976
252025
  throw new Error(`Failed to install ${backend} audio-generation packages into ${venvDir}.
251977
- ${trimProcessText2(pip.stderr || pip.stdout)}`);
252026
+ ${formatAudioSetupFailure(backend, pip.stderr || pip.stdout)}`);
251978
252027
  }
251979
- if (!await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
251980
- throw new Error(`Audio-generation Python environment at ${venvDir} was created, but required ${backend} imports still fail.`);
252028
+ const importCheck = await pythonImportResult(command, backendImportCheck(backend), repoRoot, pythonEnv);
252029
+ if (importCheck.code !== 0) {
252030
+ throw new Error(`Audio-generation Python environment at ${venvDir} was created, but required ${backend} imports still fail.
252031
+ ${formatAudioSetupFailure(backend, importCheck.stderr || importCheck.stdout)}`);
251981
252032
  }
251982
252033
  return { command, env: pythonEnv };
251983
252034
  }
251984
252035
  async function ensureAudioRunner(repoRoot, backend) {
251985
252036
  const dir = audioGenerationDir(repoRoot);
251986
252037
  await mkdir12(dir, { recursive: true });
251987
- const script = backend === "audiocraft" ? join37(dir, "audiocraft_audio.py") : backend === "stable-audio" ? join37(dir, "stable_audio.py") : backend === "tangoflux" ? join37(dir, "tangoflux_audio.py") : join37(dir, "diffusers_audio.py");
251988
- const body = backend === "audiocraft" ? AUDIOCRAFT_RUNNER : DIFFUSERS_AUDIO_RUNNER;
252038
+ const script = backend === "transformers" ? join37(dir, "transformers_audio.py") : backend === "audiocraft" ? join37(dir, "audiocraft_audio.py") : backend === "stable-audio" ? join37(dir, "stable_audio.py") : backend === "tangoflux" ? join37(dir, "tangoflux_audio.py") : join37(dir, "diffusers_audio.py");
252039
+ const body = backend === "transformers" ? TRANSFORMERS_AUDIO_RUNNER : backend === "audiocraft" ? AUDIOCRAFT_RUNNER : DIFFUSERS_AUDIO_RUNNER;
251989
252040
  await writeFile17(script, body, "utf8");
251990
252041
  await chmod4(script, 493).catch(() => {
251991
252042
  });
@@ -252040,7 +252091,7 @@ function getAudioGenerationPreset(model, kind) {
252040
252091
  }
252041
252092
  function inferAudioGenerationBackend(model, requested) {
252042
252093
  if (requested && requested !== "auto") {
252043
- if (requested === "diffusers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
252094
+ if (requested === "diffusers" || requested === "transformers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
252044
252095
  return requested;
252045
252096
  }
252046
252097
  const preset = getAudioGenerationPreset(model);
@@ -252049,7 +252100,9 @@ function inferAudioGenerationBackend(model, requested) {
252049
252100
  if (!model)
252050
252101
  return "auto";
252051
252102
  const lowered = model.toLowerCase();
252052
- if (lowered.includes("musicgen") || lowered.includes("audiogen"))
252103
+ if (lowered.includes("musicgen"))
252104
+ return "transformers";
252105
+ if (lowered.includes("audiogen"))
252053
252106
  return "audiocraft";
252054
252107
  if (lowered.includes("stable-audio"))
252055
252108
  return "stable-audio";
@@ -252071,7 +252124,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252071
252124
  title: `Diffusers ${kind} generation`,
252072
252125
  commands: [
252073
252126
  `python3 -m venv ${venvDir}`,
252074
- `${venvPython(venvDir)} -m pip install -U pip ${DIFFUSERS_AUDIO_PACKAGES.join(" ")}`,
252127
+ `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${DIFFUSERS_AUDIO_PACKAGES.join(" ")}`,
252075
252128
  `omnius /${commandName} "cinematic rain on a neon street" --backend diffusers --model ${chosen}`
252076
252129
  ],
252077
252130
  notes: [
@@ -252081,6 +252134,24 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252081
252134
  ]
252082
252135
  };
252083
252136
  }
252137
+ if (backend === "transformers") {
252138
+ const venvDir = audioGenerationVenvDir(repoRoot, "transformers");
252139
+ return {
252140
+ kind,
252141
+ backend,
252142
+ title: `Transformers ${kind} generation`,
252143
+ commands: [
252144
+ `python3 -m venv ${venvDir}`,
252145
+ `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${TRANSFORMERS_AUDIO_PACKAGES.join(" ")}`,
252146
+ `omnius /${commandName} "warm analog synth arpeggio, slow drums" --backend transformers --model ${chosen}`
252147
+ ],
252148
+ notes: [
252149
+ "Use this path for MusicGen without requiring AudioCraft/PyAV/libavformat system headers.",
252150
+ "The venv, Hugging Face cache, Torch cache, and pip cache stay under .omnius/audio-gen.",
252151
+ "First generation downloads model weights and then immediately generates the requested audio."
252152
+ ]
252153
+ };
252154
+ }
252084
252155
  if (backend === "audiocraft") {
252085
252156
  const venvDir = audioGenerationVenvDir(repoRoot, "audiocraft");
252086
252157
  return {
@@ -252089,12 +252160,14 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252089
252160
  title: `AudioCraft ${kind} generation`,
252090
252161
  commands: [
252091
252162
  `python3 -m venv ${venvDir}`,
252092
- `${venvPython(venvDir)} -m pip install -U pip ${AUDIOCRAFT_PACKAGES.join(" ")}`,
252163
+ `${venvPython(venvDir)} -m pip install --prefer-binary --only-binary av -U pip ${AUDIOCRAFT_PACKAGES.join(" ")}`,
252093
252164
  `omnius /${commandName} "warm analog synth arpeggio, slow drums" --backend audiocraft --model ${chosen}`
252094
252165
  ],
252095
252166
  notes: [
252096
- "Use this path for MusicGen and AudioGen models.",
252167
+ "Use this path for AudioCraft AudioGen models or explicit upstream AudioCraft testing.",
252097
252168
  "AudioCraft is a larger install; prefer CUDA and enough VRAM for the selected checkpoint.",
252169
+ "If PyAV has no wheel for this Python version, install FFmpeg development headers first: sudo apt install -y pkg-config ffmpeg libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev.",
252170
+ "For MusicGen, the default Transformers backend avoids the PyAV/libavformat system-header path.",
252098
252171
  "First generation downloads model weights and saves WAV files under .omnius/audio."
252099
252172
  ]
252100
252173
  };
@@ -252107,7 +252180,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252107
252180
  title: `Stable Audio ${kind} generation`,
252108
252181
  commands: [
252109
252182
  `python3 -m venv ${venvDir}`,
252110
- `${venvPython(venvDir)} -m pip install -U pip ${STABLE_AUDIO_PACKAGES.join(" ")}`,
252183
+ `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${STABLE_AUDIO_PACKAGES.join(" ")}`,
252111
252184
  `omnius /${commandName} "high fidelity stereo ${kind} bed, detailed and clean" --backend stable-audio --model ${chosen}`
252112
252185
  ],
252113
252186
  notes: [
@@ -252124,7 +252197,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252124
252197
  title: "TangoFlux sound generation",
252125
252198
  commands: [
252126
252199
  `python3 -m venv ${venvDir}`,
252127
- `${venvPython(venvDir)} -m pip install -U pip ${TANGOFLUX_PACKAGES.join(" ")}`,
252200
+ `${venvPython(venvDir)} -m pip install --prefer-binary -U pip ${TANGOFLUX_PACKAGES.join(" ")}`,
252128
252201
  `omnius /sound "fast whoosh impact with metallic tail" --backend tangoflux --model ${chosen}`
252129
252202
  ],
252130
252203
  notes: [
@@ -252147,7 +252220,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252147
252220
  ]
252148
252221
  };
252149
252222
  }
252150
- var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, AudioGenerateTool;
252223
+ var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, AudioGenerateTool;
252151
252224
  var init_audio_generate = __esm({
252152
252225
  "packages/execution/dist/tools/audio-generate.js"() {
252153
252226
  "use strict";
@@ -252163,6 +252236,16 @@ var init_audio_generate = __esm({
252163
252236
  "soundfile",
252164
252237
  "librosa"
252165
252238
  ];
252239
+ TRANSFORMERS_AUDIO_PACKAGES = [
252240
+ "torch",
252241
+ "torchaudio",
252242
+ "transformers",
252243
+ "accelerate",
252244
+ "scipy",
252245
+ "soundfile",
252246
+ "sentencepiece",
252247
+ "protobuf"
252248
+ ];
252166
252249
  AUDIOCRAFT_PACKAGES = [
252167
252250
  "torch",
252168
252251
  "torchaudio",
@@ -252190,7 +252273,7 @@ var init_audio_generate = __esm({
252190
252273
  label: "AudioLDM S-Full v2",
252191
252274
  kind: "sound",
252192
252275
  backend: "diffusers",
252193
- install: 'python .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm-s-full-v2 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252276
+ install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm-s-full-v2 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252194
252277
  category: "Default practical sound effects",
252195
252278
  sizeClass: "Small text-to-audio latent diffusion",
252196
252279
  quality: "Good practical baseline for SFX, ambience, foley, and quick local validation.",
@@ -252208,7 +252291,7 @@ var init_audio_generate = __esm({
252208
252291
  label: "AudioLDM 2",
252209
252292
  kind: "sound",
252210
252293
  backend: "diffusers",
252211
- install: 'python .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2 --duration 10 --prompt "..." --output .omnius/audio/out.wav',
252294
+ install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2 --duration 10 --prompt "..." --output .omnius/audio/out.wav',
252212
252295
  category: "General sound effects",
252213
252296
  sizeClass: "Text-to-audio latent diffusion",
252214
252297
  quality: "More capable general sound synthesis than AudioLDM S; slower at high step counts.",
@@ -252226,7 +252309,7 @@ var init_audio_generate = __esm({
252226
252309
  label: "AudioLDM 2 Large",
252227
252310
  kind: "sound",
252228
252311
  backend: "diffusers",
252229
- install: 'python .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2-large --duration 10 --prompt "..." --output .omnius/audio/out.wav',
252312
+ install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind sound --model cvssp/audioldm2-large --duration 10 --prompt "..." --output .omnius/audio/out.wav',
252230
252313
  category: "High quality sound effects",
252231
252314
  sizeClass: "Large text-to-audio latent diffusion",
252232
252315
  quality: "Higher ceiling than AudioLDM 2 base, with slower sampling and more VRAM pressure.",
@@ -252244,7 +252327,7 @@ var init_audio_generate = __esm({
252244
252327
  label: "AudioGen Medium",
252245
252328
  kind: "sound",
252246
252329
  backend: "audiocraft",
252247
- install: 'python .omnius/audio-gen/audiocraft_audio.py --kind sound --model facebook/audiogen-medium --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252330
+ install: 'python3 .omnius/audio-gen/audiocraft_audio.py --kind sound --model facebook/audiogen-medium --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252248
252331
  category: "AudioCraft sound effects",
252249
252332
  sizeClass: "AudioCraft text-to-sound",
252250
252333
  quality: "Strong text-conditioned non-music sound effects; good for foley and environments.",
@@ -252261,7 +252344,7 @@ var init_audio_generate = __esm({
252261
252344
  label: "TANGO 2",
252262
252345
  kind: "sound",
252263
252346
  backend: "project",
252264
- install: "git clone https://github.com/declare-lab/tango .omnius/audio-gen/projects/tango && cd .omnius/audio-gen/projects/tango && python -m pip install -r requirements.txt",
252347
+ install: "git clone https://github.com/declare-lab/tango .omnius/audio-gen/projects/tango && cd .omnius/audio-gen/projects/tango && python3 -m pip install -r requirements.txt",
252265
252348
  category: "Research sound effects",
252266
252349
  sizeClass: "DPO-aligned text-to-audio research stack",
252267
252350
  quality: "Good prompt alignment target, but less turnkey than Diffusers or AudioCraft.",
@@ -252278,7 +252361,7 @@ var init_audio_generate = __esm({
252278
252361
  label: "TangoFlux",
252279
252362
  kind: "sound",
252280
252363
  backend: "tangoflux",
252281
- install: 'python .omnius/audio-gen/tangoflux_audio.py --model declare-lab/TangoFlux --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252364
+ install: 'python3 .omnius/audio-gen/tangoflux_audio.py --model declare-lab/TangoFlux --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252282
252365
  category: "Fast sound effects",
252283
252366
  sizeClass: "Fast text-to-audio generation",
252284
252367
  quality: "Fast SFX and ambience iteration, with newer-stack stability risks.",
@@ -252295,7 +252378,7 @@ var init_audio_generate = __esm({
252295
252378
  label: "Stable Audio Open 1.0",
252296
252379
  kind: "sound",
252297
252380
  backend: "stable-audio",
252298
- install: 'python .omnius/audio-gen/stable_audio.py --kind sound --model stabilityai/stable-audio-open-1.0 --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252381
+ install: 'python3 .omnius/audio-gen/stable_audio.py --kind sound --model stabilityai/stable-audio-open-1.0 --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252299
252382
  category: "High-quality long audio",
252300
252383
  sizeClass: "Stereo 44.1 kHz diffusion audio",
252301
252384
  quality: "High-fidelity stereo generation, useful for longer designed sounds and music-like clips.",
@@ -252312,16 +252395,16 @@ var init_audio_generate = __esm({
252312
252395
  id: DEFAULT_MUSIC_MODEL,
252313
252396
  label: "MusicGen Small",
252314
252397
  kind: "music",
252315
- backend: "audiocraft",
252316
- install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-small --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252398
+ backend: "transformers",
252399
+ install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-small --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252317
252400
  category: "Default practical music",
252318
- sizeClass: "Small AudioCraft music model",
252401
+ sizeClass: "Small Transformers MusicGen model",
252319
252402
  quality: "Fast, practical music sketches with lower fidelity and complexity than medium/large models.",
252320
252403
  output: "Short mono/stereo music sketches depending on checkpoint.",
252321
252404
  bestUse: "Default /music smoke test and quick ideas.",
252322
252405
  minVramGB: 6,
252323
252406
  recommendedVramGB: 8,
252324
- deployment: "AudioCraft path; fastest usable MusicGen option.",
252407
+ deployment: "Transformers path; fastest usable MusicGen option.",
252325
252408
  defaultDurationSec: 20,
252326
252409
  note: "Practical default music generation model."
252327
252410
  },
@@ -252329,16 +252412,16 @@ var init_audio_generate = __esm({
252329
252412
  id: "facebook/musicgen-medium",
252330
252413
  label: "MusicGen Medium",
252331
252414
  kind: "music",
252332
- backend: "audiocraft",
252333
- install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-medium --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252415
+ backend: "transformers",
252416
+ install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-medium --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252334
252417
  category: "Higher quality MusicGen",
252335
- sizeClass: "Medium AudioCraft music model",
252418
+ sizeClass: "Medium Transformers MusicGen model",
252336
252419
  quality: "Better arrangement and texture than small; more VRAM and latency.",
252337
252420
  output: "Short music clips.",
252338
252421
  bestUse: "Local music generation when 12-16GB VRAM is available.",
252339
252422
  minVramGB: 10,
252340
252423
  recommendedVramGB: 16,
252341
- deployment: "AudioCraft path; good quality/footprint balance.",
252424
+ deployment: "Transformers path; good quality/footprint balance.",
252342
252425
  defaultDurationSec: 20,
252343
252426
  note: "Balanced MusicGen quality target."
252344
252427
  },
@@ -252346,16 +252429,16 @@ var init_audio_generate = __esm({
252346
252429
  id: "facebook/musicgen-large",
252347
252430
  label: "MusicGen Large",
252348
252431
  kind: "music",
252349
- backend: "audiocraft",
252350
- install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252432
+ backend: "transformers",
252433
+ install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252351
252434
  category: "Higher quality MusicGen",
252352
- sizeClass: "Large AudioCraft music model",
252435
+ sizeClass: "Large Transformers MusicGen model",
252353
252436
  quality: "Stronger musical structure and detail, with high memory pressure.",
252354
252437
  output: "Higher-quality short music clips.",
252355
252438
  bestUse: "High-VRAM local GPU or remote GPU music generation.",
252356
252439
  minVramGB: 16,
252357
252440
  recommendedVramGB: 24,
252358
- deployment: "AudioCraft path; expect large downloads and slower inference.",
252441
+ deployment: "Transformers path; expect large downloads and slower inference.",
252359
252442
  defaultDurationSec: 20,
252360
252443
  note: "Large MusicGen baseline."
252361
252444
  },
@@ -252363,16 +252446,16 @@ var init_audio_generate = __esm({
252363
252446
  id: "facebook/musicgen-melody",
252364
252447
  label: "MusicGen Melody",
252365
252448
  kind: "music",
252366
- backend: "audiocraft",
252367
- install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-melody --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252449
+ backend: "transformers",
252450
+ install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-melody --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252368
252451
  category: "Conditioned music",
252369
- sizeClass: "Melody-conditioned AudioCraft music model",
252452
+ sizeClass: "Melody-conditioned Transformers MusicGen model",
252370
252453
  quality: "Useful when a reference melody path is added to the generation flow.",
252371
252454
  output: "Music clips guided by text and optional melody conditioning.",
252372
252455
  bestUse: "Future melody-conditioned workflows; useful to pre-deploy now.",
252373
252456
  minVramGB: 10,
252374
252457
  recommendedVramGB: 16,
252375
- deployment: "AudioCraft path; melody conditioning needs an additional reference audio argument.",
252458
+ deployment: "Transformers path; melody conditioning needs an additional reference audio argument.",
252376
252459
  defaultDurationSec: 20,
252377
252460
  note: "MusicGen variant for melody-conditioned generation."
252378
252461
  },
@@ -252380,16 +252463,16 @@ var init_audio_generate = __esm({
252380
252463
  id: "facebook/musicgen-stereo-large",
252381
252464
  label: "MusicGen Stereo Large",
252382
252465
  kind: "music",
252383
- backend: "audiocraft",
252384
- install: 'python .omnius/audio-gen/audiocraft_audio.py --kind music --model facebook/musicgen-stereo-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252466
+ backend: "transformers",
252467
+ install: 'python3 .omnius/audio-gen/transformers_audio.py --kind music --model facebook/musicgen-stereo-large --duration 20 --prompt "..." --output .omnius/audio/out.wav',
252385
252468
  category: "Higher quality MusicGen",
252386
- sizeClass: "Large stereo AudioCraft music model",
252469
+ sizeClass: "Large stereo Transformers MusicGen model",
252387
252470
  quality: "Stereo large MusicGen path; best MusicGen quality listed here but heavy.",
252388
252471
  output: "Stereo music clips.",
252389
252472
  bestUse: "High-VRAM machines where stereo output matters.",
252390
252473
  minVramGB: 20,
252391
252474
  recommendedVramGB: 32,
252392
- deployment: "AudioCraft path; prefer high-VRAM local GPUs or remote workers.",
252475
+ deployment: "Transformers path; prefer high-VRAM local GPUs or remote workers.",
252393
252476
  defaultDurationSec: 20,
252394
252477
  note: "Heavy stereo MusicGen option."
252395
252478
  },
@@ -252398,7 +252481,7 @@ var init_audio_generate = __esm({
252398
252481
  label: "Stable Audio Open 1.0",
252399
252482
  kind: "music",
252400
252483
  backend: "stable-audio",
252401
- install: 'python .omnius/audio-gen/stable_audio.py --kind music --model stabilityai/stable-audio-open-1.0 --duration 30 --prompt "..." --output .omnius/audio/out.wav',
252484
+ install: 'python3 .omnius/audio-gen/stable_audio.py --kind music --model stabilityai/stable-audio-open-1.0 --duration 30 --prompt "..." --output .omnius/audio/out.wav',
252402
252485
  category: "Primary serious music baseline",
252403
252486
  sizeClass: "Stereo 44.1 kHz diffusion audio",
252404
252487
  quality: "Primary serious open music/audio baseline for higher-fidelity stereo clips.",
@@ -252416,7 +252499,7 @@ var init_audio_generate = __esm({
252416
252499
  label: "MAGNeT AudioCraft Profile",
252417
252500
  kind: "music",
252418
252501
  backend: "project",
252419
- install: "python -m pip install -U audiocraft && inspect the AudioCraft MAGNeT examples before enabling generation",
252502
+ install: "python3 -m pip install -U audiocraft && inspect the AudioCraft MAGNeT examples before enabling generation",
252420
252503
  category: "Research music",
252421
252504
  sizeClass: "Non-autoregressive AudioCraft research model",
252422
252505
  quality: "Interesting fast music research path; less standard than MusicGen for production.",
@@ -252433,7 +252516,7 @@ var init_audio_generate = __esm({
252433
252516
  label: "JASCO AudioCraft Profile",
252434
252517
  kind: "music",
252435
252518
  backend: "project",
252436
- install: "python -m pip install -U audiocraft && inspect the AudioCraft JASCO examples before enabling generation",
252519
+ install: "python3 -m pip install -U audiocraft && inspect the AudioCraft JASCO examples before enabling generation",
252437
252520
  category: "Conditioned music",
252438
252521
  sizeClass: "Chord/melody/drum conditioned research model",
252439
252522
  quality: "Promising controlled music path when conditioning inputs are available.",
@@ -252467,7 +252550,7 @@ var init_audio_generate = __esm({
252467
252550
  label: "Riffusion v1",
252468
252551
  kind: "music",
252469
252552
  backend: "diffusers",
252470
- install: 'python .omnius/audio-gen/diffusers_audio.py --kind music --model riffusion/riffusion-model-v1 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252553
+ install: 'python3 .omnius/audio-gen/diffusers_audio.py --kind music --model riffusion/riffusion-model-v1 --duration 8 --prompt "..." --output .omnius/audio/out.wav',
252471
252554
  category: "Legacy/specialized music",
252472
252555
  sizeClass: "Spectrogram diffusion",
252473
252556
  quality: "Historically important and fun, but below MusicGen and Stable Audio Open for general quality.",
@@ -252641,19 +252724,100 @@ def main():
252641
252724
  torchaudio.save(str(out), wav, model.sample_rate)
252642
252725
  print(json.dumps({"ok": True, "path": str(out), "model": args.model, "backend": "audiocraft", "sample_rate": model.sample_rate, "seconds": round(time.perf_counter() - t0, 3)}))
252643
252726
 
252727
+ if __name__ == "__main__":
252728
+ main()
252729
+ `;
252730
+ TRANSFORMERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
252731
+ import argparse, json, sys, time
252732
+ from pathlib import Path
252733
+
252734
+ def _progress(stage, message, percent=None):
252735
+ payload = {"omnius_progress": True, "stage": stage, "message": message}
252736
+ if percent is not None:
252737
+ payload["percent"] = percent
252738
+ print(json.dumps(payload), file=sys.stderr, flush=True)
252739
+
252740
+ def _device():
252741
+ import torch
252742
+ if torch.cuda.is_available():
252743
+ return "cuda"
252744
+ if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
252745
+ return "mps"
252746
+ return "cpu"
252747
+
252748
+ def _write_wav(path, sample_rate, audio):
252749
+ import numpy as np
252750
+ from scipy.io.wavfile import write
252751
+ arr = np.asarray(audio)
252752
+ if arr.ndim > 2:
252753
+ arr = arr.squeeze()
252754
+ if arr.ndim == 2 and arr.shape[0] < arr.shape[1]:
252755
+ arr = arr.T
252756
+ arr = np.nan_to_num(arr)
252757
+ peak = float(np.max(np.abs(arr))) if arr.size else 0.0
252758
+ if peak > 1.0:
252759
+ arr = arr / peak
252760
+ arr_i16 = (np.clip(arr, -1.0, 1.0) * 32767).astype(np.int16)
252761
+ write(str(path), sample_rate, arr_i16)
252762
+
252763
+ def main():
252764
+ parser = argparse.ArgumentParser()
252765
+ parser.add_argument("--kind", choices=["sound", "music"], required=True)
252766
+ parser.add_argument("--model", required=True)
252767
+ parser.add_argument("--prompt", required=True)
252768
+ parser.add_argument("--output", required=True)
252769
+ parser.add_argument("--duration", type=float, default=20)
252770
+ parser.add_argument("--seed", type=int, default=None)
252771
+ parser.add_argument("--prewarm", action="store_true")
252772
+ args = parser.parse_args()
252773
+
252774
+ t0 = time.perf_counter()
252775
+ import torch
252776
+ from transformers import AutoProcessor, MusicgenForConditionalGeneration
252777
+
252778
+ device = _device()
252779
+ dtype = torch.float16 if device == "cuda" else torch.float32
252780
+ _progress("load", f"loading MusicGen model {args.model}")
252781
+ processor = AutoProcessor.from_pretrained(args.model)
252782
+ model = MusicgenForConditionalGeneration.from_pretrained(args.model, torch_dtype=dtype)
252783
+ model = model.to(device)
252784
+ sample_rate = int(getattr(model.config.audio_encoder, "sampling_rate", 32000))
252785
+ _progress("load", f"model loaded on {device}")
252786
+
252787
+ if args.prewarm:
252788
+ _progress("load", f"prewarmed {args.model} on {device}", 100)
252789
+ print(json.dumps({"ok": True, "path": "", "model": args.model, "backend": "transformers", "sample_rate": sample_rate, "prewarm": True, "seconds": round(time.perf_counter() - t0, 3)}))
252790
+ return
252791
+
252792
+ if args.seed is not None:
252793
+ torch.manual_seed(args.seed)
252794
+ inputs = processor(text=[args.prompt], padding=True, return_tensors="pt")
252795
+ inputs = {k: v.to(device) for k, v in inputs.items()}
252796
+ # MusicGen uses roughly 50 generated tokens per second of audio.
252797
+ max_new_tokens = max(32, min(1536, int(args.duration * 50)))
252798
+ _progress("generate", f"generating about {args.duration}s audio with {max_new_tokens} tokens")
252799
+ with torch.no_grad():
252800
+ audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
252801
+ audio = audio_values[0, 0].detach().cpu().float().numpy()
252802
+ out = Path(args.output)
252803
+ out.parent.mkdir(parents=True, exist_ok=True)
252804
+ _progress("save", f"saving audio to {out}")
252805
+ _write_wav(out, sample_rate, audio)
252806
+ print(json.dumps({"ok": True, "path": str(out), "model": args.model, "backend": "transformers", "sample_rate": sample_rate, "seconds": round(time.perf_counter() - t0, 3)}))
252807
+
252644
252808
  if __name__ == "__main__":
252645
252809
  main()
252646
252810
  `;
252647
252811
  AudioGenerateTool = class {
252648
252812
  name = "generate_audio";
252649
- description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, AudioCraft MusicGen/AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
252813
+ description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
252650
252814
  parameters = {
252651
252815
  type: "object",
252652
252816
  properties: {
252653
252817
  kind: { type: "string", enum: ["sound", "music"], description: "Generate a sound effect/ambience clip or a music clip" },
252654
252818
  prompt: { type: "string", description: "Text description of the audio to generate" },
252655
252819
  model: { type: "string", description: "Audio model id, e.g. cvssp/audioldm2 or facebook/musicgen-small" },
252656
- backend: { type: "string", enum: ["auto", "diffusers", "audiocraft", "stable-audio", "tangoflux", "project"] },
252820
+ backend: { type: "string", enum: ["auto", "diffusers", "transformers", "audiocraft", "stable-audio", "tangoflux", "project"] },
252657
252821
  duration: { type: "number", description: "Clip length in seconds" },
252658
252822
  steps: { type: "number", description: "Diffusion sampling steps when supported" },
252659
252823
  seed: { type: "number", description: "Optional random seed" }
@@ -252764,9 +252928,9 @@ if __name__ == "__main__":
252764
252928
  };
252765
252929
  }
252766
252930
  if (action === "setup") {
252767
- const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? "diffusers");
252931
+ const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? (kind === "music" ? "transformers" : "diffusers"));
252768
252932
  const backend2 = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
252769
- const resolvedBackend = backend2 === "auto" ? "diffusers" : backend2;
252933
+ const resolvedBackend = backend2 === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend2;
252770
252934
  const plan = audioGenerationSetupPlan(kind, resolvedBackend, this.cwd, typeof args["model"] === "string" ? args["model"] : void 0);
252771
252935
  return {
252772
252936
  success: true,
@@ -252787,7 +252951,7 @@ if __name__ == "__main__":
252787
252951
  const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
252788
252952
  let backend2 = inferAudioGenerationBackend(requestedModel2, args["backend"] ? String(args["backend"]) : defaultBackend2);
252789
252953
  if (backend2 === "auto")
252790
- backend2 = kind === "music" ? "audiocraft" : "diffusers";
252954
+ backend2 = kind === "music" ? "transformers" : "diffusers";
252791
252955
  const model2 = requestedModel2 ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
252792
252956
  const preset2 = getAudioGenerationPreset(model2, kind);
252793
252957
  const duration2 = numberArg2(args["duration"], preset2?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
@@ -252810,7 +252974,7 @@ if __name__ == "__main__":
252810
252974
  return await this.prewarmPythonBackend({
252811
252975
  kind,
252812
252976
  backend: backend2,
252813
- runnerBackend: backend2 === "audiocraft" ? "audiocraft" : backend2 === "stable-audio" ? "stable-audio" : backend2 === "tangoflux" ? "tangoflux" : "diffusers",
252977
+ runnerBackend: backend2,
252814
252978
  model: model2,
252815
252979
  duration: duration2,
252816
252980
  start: start2,
@@ -252827,7 +252991,7 @@ if __name__ == "__main__":
252827
252991
  const requestedModel = rawModel === "auto" ? void 0 : rawModel;
252828
252992
  let backend = inferAudioGenerationBackend(requestedModel, args["backend"] ? String(args["backend"]) : defaultBackend);
252829
252993
  if (backend === "auto")
252830
- backend = kind === "music" ? "audiocraft" : "diffusers";
252994
+ backend = kind === "music" ? "transformers" : "diffusers";
252831
252995
  const model = requestedModel ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
252832
252996
  const preset = getAudioGenerationPreset(model, kind);
252833
252997
  const duration = numberArg2(args["duration"], preset?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
@@ -252853,6 +253017,9 @@ if __name__ == "__main__":
252853
253017
  if (backend === "tangoflux") {
252854
253018
  return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "tangoflux", prompt, model, duration, steps, seed, start: start2, python: args["python"] });
252855
253019
  }
253020
+ if (backend === "transformers") {
253021
+ return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "transformers", prompt, model, duration, steps, seed, start: start2, python: args["python"] });
253022
+ }
252856
253023
  if (backend === "audiocraft") {
252857
253024
  return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "audiocraft", prompt, model, duration, steps, seed, start: start2, python: args["python"] });
252858
253025
  }
@@ -252902,8 +253069,9 @@ if __name__ == "__main__":
252902
253069
  "--duration",
252903
253070
  String(args.duration)
252904
253071
  ];
252905
- if (args.steps !== void 0 && args.runnerBackend !== "audiocraft")
253072
+ if (args.steps !== void 0 && (args.runnerBackend === "diffusers" || args.runnerBackend === "stable-audio" || args.runnerBackend === "tangoflux")) {
252906
253073
  argv.push("--steps", String(args.steps));
253074
+ }
252907
253075
  if (args.seed !== void 0)
252908
253076
  argv.push("--seed", String(args.seed));
252909
253077
  this.emitProgress({ stage: "load", message: `Starting ${args.kind} generation with ${args.model}` });
@@ -476611,7 +476779,7 @@ var require_path_browserify = __commonJS({
476611
476779
  return path11.slice(start2, end);
476612
476780
  }
476613
476781
  },
476614
- extname: function extname14(path11) {
476782
+ extname: function extname15(path11) {
476615
476783
  assertPath(path11);
476616
476784
  var startDot = -1;
476617
476785
  var startPart = 0;
@@ -506772,10 +506940,82 @@ Saved to: ${tempFile}`,
506772
506940
  });
506773
506941
 
506774
506942
  // packages/execution/dist/tools/audio-playback.js
506775
- import { execSync as execSync29, spawn as spawn16 } from "node:child_process";
506943
+ import { execFileSync as execFileSync2, execSync as execSync29, spawn as spawn16 } from "node:child_process";
506776
506944
  import { existsSync as existsSync40, statSync as statSync18, writeFileSync as writeFileSync16, readFileSync as readFileSync31, unlinkSync as unlinkSync8, mkdirSync as mkdirSync16 } from "node:fs";
506777
- import { join as join58 } from "node:path";
506945
+ import { extname as extname10, join as join58 } from "node:path";
506778
506946
  import { homedir as homedir14, tmpdir as tmpdir11 } from "node:os";
506947
+ function hasCommand3(command) {
506948
+ try {
506949
+ if (process.platform === "win32") {
506950
+ execFileSync2("where", [command], { stdio: "ignore", timeout: 2e3 });
506951
+ } else {
506952
+ execFileSync2("command", ["-v", command], { stdio: "ignore", timeout: 2e3 });
506953
+ }
506954
+ return true;
506955
+ } catch {
506956
+ if (process.platform !== "win32") {
506957
+ try {
506958
+ execFileSync2("which", [command], { stdio: "ignore", timeout: 2e3 });
506959
+ return true;
506960
+ } catch {
506961
+ return false;
506962
+ }
506963
+ }
506964
+ return false;
506965
+ }
506966
+ }
506967
+ function playbackCommandFor(file, device = "default") {
506968
+ const ext = extname10(file).toLowerCase();
506969
+ if (process.platform === "darwin" && hasCommand3("afplay")) {
506970
+ return { command: "afplay", args: [file], label: "afplay" };
506971
+ }
506972
+ if (hasCommand3("ffplay")) {
506973
+ return { command: "ffplay", args: ["-nodisp", "-autoexit", "-loglevel", "error", file], label: "ffplay" };
506974
+ }
506975
+ if (hasCommand3("mpv")) {
506976
+ return { command: "mpv", args: ["--no-video", "--really-quiet", file], label: "mpv" };
506977
+ }
506978
+ if (process.platform === "win32") {
506979
+ return {
506980
+ command: "powershell.exe",
506981
+ args: [
506982
+ "-NoProfile",
506983
+ "-Command",
506984
+ `Add-Type -AssemblyName presentationCore; $p=New-Object System.Windows.Media.MediaPlayer; $p.Open([Uri]::new(${JSON.stringify(file)})); Start-Sleep -Milliseconds 200; while($p.NaturalDuration.HasTimeSpan -eq $false){Start-Sleep -Milliseconds 100}; $p.Play(); Start-Sleep -Milliseconds ([int]$p.NaturalDuration.TimeSpan.TotalMilliseconds + 250)`
506985
+ ],
506986
+ label: "powershell MediaPlayer"
506987
+ };
506988
+ }
506989
+ if ((ext === ".wav" || ext === ".flac" || ext === ".oga" || ext === ".ogg") && hasCommand3("pw-play")) {
506990
+ return { command: "pw-play", args: [file], label: "pw-play" };
506991
+ }
506992
+ if ((ext === ".wav" || ext === ".flac" || ext === ".oga" || ext === ".ogg") && hasCommand3("paplay")) {
506993
+ return { command: "paplay", args: [file], label: "paplay" };
506994
+ }
506995
+ if (ext === ".wav" && hasCommand3("aplay")) {
506996
+ return { command: "aplay", args: ["-D", device, "-q", file], label: "aplay" };
506997
+ }
506998
+ return null;
506999
+ }
507000
+ function playSoundFile(file, opts = {}) {
507001
+ if (!file)
507002
+ return { ok: false, error: "Missing file path." };
507003
+ if (!existsSync40(file))
507004
+ return { ok: false, error: `Audio file not found: ${file}` };
507005
+ const command = playbackCommandFor(file, opts.device || "default");
507006
+ if (!command) {
507007
+ return {
507008
+ ok: false,
507009
+ error: "No terminal audio player found. Install ffmpeg/ffplay or mpv; WAV fallback can use pipewire (pw-play), PulseAudio (paplay), or ALSA (aplay)."
507010
+ };
507011
+ }
507012
+ try {
507013
+ execFileSync2(command.command, command.args, { timeout: opts.timeoutMs ?? 3e5, stdio: "pipe" });
507014
+ return { ok: true, player: command.label };
507015
+ } catch (err) {
507016
+ return { ok: false, error: `Playback via ${command.label} failed: ${err instanceof Error ? err.message.slice(0, 300) : String(err).slice(0, 300)}` };
507017
+ }
507018
+ }
506779
507019
  function ensureLuxttsDaemon() {
506780
507020
  if (_luxttsDaemon && !_luxttsDaemon.killed && _luxttsReady)
506781
507021
  return Promise.resolve(true);
@@ -506886,7 +507126,7 @@ function luxttsSynthesize(text, cloneRef) {
506886
507126
  _luxttsDaemon.stdin.write(req2 + "\n");
506887
507127
  });
506888
507128
  }
506889
- var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, AudioPlaybackTool;
507129
+ var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, AudioPlaybackTool, SoundPlaybackTool;
506890
507130
  var init_audio_playback = __esm({
506891
507131
  "packages/execution/dist/tools/audio-playback.js"() {
506892
507132
  "use strict";
@@ -506965,21 +507205,13 @@ var init_audio_playback = __esm({
506965
507205
  const device = args["device"] || "default";
506966
507206
  const size = statSync18(file).size;
506967
507207
  const ext = file.split(".").pop()?.toLowerCase() || "";
506968
- let cmd;
506969
- if (ext === "wav") {
506970
- cmd = `aplay -D ${device} -q "${file}"`;
506971
- } else {
506972
- cmd = `ffplay -nodisp -autoexit -loglevel error "${file}"`;
506973
- }
506974
- try {
506975
- execSync29(cmd, { timeout: 3e5, stdio: "pipe" });
506976
- } catch (err) {
506977
- const msg = err instanceof Error ? err.message : String(err);
506978
- return { success: false, output: "", error: `Playback failed: ${msg.slice(0, 300)}`, durationMs: performance.now() - start2 };
507208
+ const played = playSoundFile(file, { device, timeoutMs: 3e5 });
507209
+ if (!played.ok) {
507210
+ return { success: false, output: "", error: played.error, durationMs: performance.now() - start2 };
506979
507211
  }
506980
507212
  return {
506981
507213
  success: true,
506982
- output: `Played ${file} (${Math.round(size / 1024)}KB ${ext.toUpperCase()}) on ${device}`,
507214
+ output: `Played ${file} (${Math.round(size / 1024)}KB ${ext.toUpperCase()}) via ${played.player}`,
506983
507215
  durationMs: performance.now() - start2
506984
507216
  };
506985
507217
  }
@@ -507146,6 +507378,51 @@ ${devices.join("\n")}`,
507146
507378
  };
507147
507379
  }
507148
507380
  };
507381
+ SoundPlaybackTool = class {
507382
+ name = "play_sound";
507383
+ description = "Play any local sound/audio file through the system speakers from the terminal. Supports generated WAV files and common audio formats such as MP3, OGG, OPUS, M4A, FLAC, and AAC when ffplay/mpv or system audio players are available. Use this immediately after generate_audio, /sound, or /music when the user wants to hear the generated file.";
507384
+ parameters = {
507385
+ type: "object",
507386
+ properties: {
507387
+ file: {
507388
+ type: "string",
507389
+ description: "Path to the local audio file to play"
507390
+ },
507391
+ device: {
507392
+ type: "string",
507393
+ description: "Optional output device. ALSA aplay accepts values like default or hw:0,0; other players ignore this."
507394
+ },
507395
+ timeout_ms: {
507396
+ type: "number",
507397
+ description: "Optional playback timeout in milliseconds; default 300000."
507398
+ }
507399
+ },
507400
+ required: ["file"]
507401
+ };
507402
+ async execute(args) {
507403
+ const start2 = performance.now();
507404
+ const file = typeof args["file"] === "string" ? args["file"] : "";
507405
+ const device = typeof args["device"] === "string" ? args["device"] : "default";
507406
+ const timeoutMs = typeof args["timeout_ms"] === "number" && Number.isFinite(args["timeout_ms"]) ? Math.max(1e3, Math.min(36e5, Math.round(args["timeout_ms"]))) : 3e5;
507407
+ if (!file) {
507408
+ return { success: false, output: "", error: "Missing 'file' parameter. Provide a local sound/audio file path.", durationMs: performance.now() - start2 };
507409
+ }
507410
+ if (!existsSync40(file)) {
507411
+ return { success: false, output: "", error: `Audio file not found: ${file}`, durationMs: performance.now() - start2 };
507412
+ }
507413
+ const size = statSync18(file).size;
507414
+ const ext = extname10(file).replace(/^\./, "").toUpperCase() || "audio";
507415
+ const played = playSoundFile(file, { device, timeoutMs });
507416
+ if (!played.ok) {
507417
+ return { success: false, output: "", error: played.error, durationMs: performance.now() - start2 };
507418
+ }
507419
+ return {
507420
+ success: true,
507421
+ output: `Played sound: ${file} (${Math.round(size / 1024)}KB ${ext}) via ${played.player}`,
507422
+ durationMs: performance.now() - start2
507423
+ };
507424
+ }
507425
+ };
507149
507426
  }
507150
507427
  });
507151
507428
 
@@ -514787,6 +515064,7 @@ __export(dist_exports, {
514787
515064
  SkillBuildTool: () => SkillBuildTool,
514788
515065
  SkillExecuteTool: () => SkillExecuteTool,
514789
515066
  SkillListTool: () => SkillListTool,
515067
+ SoundPlaybackTool: () => SoundPlaybackTool,
514790
515068
  StdioTransport: () => StdioTransport,
514791
515069
  StructuredFileTool: () => StructuredFileTool,
514792
515070
  StructuredReadTool: () => StructuredReadTool,
@@ -514906,6 +515184,7 @@ __export(dist_exports, {
514906
515184
  packetPath: () => packetPath,
514907
515185
  parseMcpMarkdown: () => parseMcpMarkdown,
514908
515186
  parseMcpToolName: () => parseMcpToolName,
515187
+ playSoundFile: () => playSoundFile,
514909
515188
  promoteWorkingNotes: () => promoteWorkingNotes,
514910
515189
  readPacket: () => readPacket,
514911
515190
  readProvenanceFile: () => readProvenanceFile,
@@ -530271,6 +530550,8 @@ var init_agenticRunner = __esm({
530271
530550
  "transcribe_file",
530272
530551
  "transcribe_url",
530273
530552
  "audio_playback",
530553
+ "play_sound",
530554
+ "generate_audio",
530274
530555
  "youtube_download"
530275
530556
  ]);
530276
530557
  SOCIAL_TOOLS = /* @__PURE__ */ new Set([
@@ -552084,7 +552365,7 @@ var init_command_registry = __esm({
552084
552365
  ["/music", "Open music-generation model/setup menu"],
552085
552366
  ["/music <prompt>", "Generate a music clip from a prompt"],
552086
552367
  ["/music --model <model> <prompt>", "Generate music with an explicit music model"],
552087
- ["/music setup <audiocraft|stable-audio|diffusers>", "Show setup commands for a music-generation backend"],
552368
+ ["/music setup <transformers|audiocraft|stable-audio|diffusers>", "Show setup commands for a music-generation backend"],
552088
552369
  ["/music list", "List music models by category, quality, size, and hardware fit"],
552089
552370
  ["/call", "Start voice call session (cloudflared tunnel + ASR/TTS)"],
552090
552371
  ["/hangup", "End active call session"],
@@ -569380,7 +569661,7 @@ var init_platforms = __esm({
569380
569661
 
569381
569662
  // packages/cli/src/tui/workspace-explorer.ts
569382
569663
  import { existsSync as existsSync88, readdirSync as readdirSync26, readFileSync as readFileSync72, statSync as statSync31 } from "node:fs";
569383
- import { basename as basename15, extname as extname11, join as join104, relative as relative10, resolve as resolve35 } from "node:path";
569664
+ import { basename as basename15, extname as extname12, join as join104, relative as relative10, resolve as resolve35 } from "node:path";
569384
569665
  function exploreWorkspace(root, options2 = {}) {
569385
569666
  const query = (options2.query ?? "").trim().toLowerCase();
569386
569667
  const maxResults = options2.maxResults ?? 80;
@@ -569493,7 +569774,7 @@ function previewWorkspaceFile(root, relPath, options2 = {}) {
569493
569774
  }
569494
569775
  function classifyWorkspaceFile(path11) {
569495
569776
  const lower = path11.toLowerCase();
569496
- const ext = extname11(lower);
569777
+ const ext = extname12(lower);
569497
569778
  if (lower.includes(".test.") || lower.includes(".spec.") || lower.includes("/tests/")) return "test";
569498
569779
  if (SOURCE_EXT.has(ext)) return "source";
569499
569780
  if (DOC_EXT2.has(ext)) return "doc";
@@ -569569,7 +569850,7 @@ var init_workspace_explorer = __esm({
569569
569850
 
569570
569851
  // packages/cli/src/tui/drop-panel.ts
569571
569852
  import { existsSync as existsSync89 } from "node:fs";
569572
- import { extname as extname12, resolve as resolve36 } from "node:path";
569853
+ import { extname as extname13, resolve as resolve36 } from "node:path";
569573
569854
  function ansi4(code8, text) {
569574
569855
  return isTTY4 ? `\x1B[${code8}m${text}\x1B[0m` : text;
569575
569856
  }
@@ -569695,7 +569976,7 @@ function showDropPanel(opts) {
569695
569976
  return;
569696
569977
  }
569697
569978
  if (allowedExtensions.length > 0) {
569698
- const ext = extname12(filePath).toLowerCase();
569979
+ const ext = extname13(filePath).toLowerCase();
569699
569980
  if (!allowedExtensions.includes(ext)) {
569700
569981
  errorMsg = `Invalid file type: ${ext}. Expected: ${allowedExtensions.join(", ")}`;
569701
569982
  render2();
@@ -574002,7 +574283,7 @@ __export(image_ascii_preview_exports, {
574002
574283
  extractSavedImagePath: () => extractSavedImagePath,
574003
574284
  formatImageAsciiContext: () => formatImageAsciiContext
574004
574285
  });
574005
- import { execFileSync as execFileSync2 } from "node:child_process";
574286
+ import { execFileSync as execFileSync3 } from "node:child_process";
574006
574287
  import { createRequire as createRequire4 } from "node:module";
574007
574288
  import { existsSync as existsSync94, readFileSync as readFileSync75, statSync as statSync32 } from "node:fs";
574008
574289
  import { resolve as resolve37 } from "node:path";
@@ -574139,7 +574420,7 @@ function convertWithFfmpeg(imagePath, width, height, timeoutMs) {
574139
574420
  `scale=${width}:${height}`,
574140
574421
  "format=gray"
574141
574422
  ].join(",");
574142
- const raw = execFileSync2(
574423
+ const raw = execFileSync3(
574143
574424
  "ffmpeg",
574144
574425
  [
574145
574426
  "-hide_banner",
@@ -584829,10 +585110,10 @@ function defaultAudioModel(kind) {
584829
585110
  return kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL;
584830
585111
  }
584831
585112
  function defaultAudioBackend(kind) {
584832
- return kind === "music" ? "audiocraft" : "diffusers";
585113
+ return kind === "music" ? "transformers" : "diffusers";
584833
585114
  }
584834
585115
  function normalizeAudioBackend(value2, kind) {
584835
- if (value2 === "diffusers" || value2 === "audiocraft" || value2 === "stable-audio" || value2 === "tangoflux" || value2 === "project") return value2;
585116
+ if (value2 === "diffusers" || value2 === "transformers" || value2 === "audiocraft" || value2 === "stable-audio" || value2 === "tangoflux" || value2 === "project") return value2;
584836
585117
  return defaultAudioBackend(kind);
584837
585118
  }
584838
585119
  function audioBackendForModel(model, requested, kind) {
@@ -584920,7 +585201,8 @@ async function showAudioGenerationMenu(ctx3, hasLocal, kind) {
584920
585201
  const activeModel = activeAudioModel(settings, kind);
584921
585202
  const title = kind === "music" ? "Music Generation" : "Sound Generation";
584922
585203
  const setupItems = kind === "music" ? [
584923
- { key: "setup:audiocraft", label: "Setup AudioCraft", detail: "MusicGen small/medium/large" },
585204
+ { key: "setup:transformers", label: "Setup Transformers", detail: "Default MusicGen path; avoids AudioCraft/PyAV headers" },
585205
+ { key: "setup:audiocraft", label: "Setup AudioCraft", detail: "Optional MusicGen/AudioGen upstream runtime" },
584924
585206
  { key: "setup:stable-audio", label: "Setup Stable Audio", detail: "Stable Audio Open 1.0 serious stereo baseline" },
584925
585207
  { key: "setup:diffusers", label: "Setup Diffusers", detail: "AudioLDM/Riffusion-style paths" }
584926
585208
  ] : [
@@ -585050,7 +585332,17 @@ async function handleAudioGenerationCommand(ctx3, arg, hasLocal, kind) {
585050
585332
  }
585051
585333
  renderInfo(result.output);
585052
585334
  const fileMatch = result.output.match(/(?:Sound|Music) generated:\s+(.+)/);
585053
- if (fileMatch?.[1]) renderInfo(`File: ${fileMatch[1].trim()}`);
585335
+ if (fileMatch?.[1]) {
585336
+ const audioPath = fileMatch[1].trim();
585337
+ renderInfo(`File: ${audioPath}`);
585338
+ renderInfo(`Playing generated ${kind}...`);
585339
+ const playback = playSoundFile(audioPath);
585340
+ if (playback.ok) {
585341
+ renderInfo(`Audio playback complete via ${playback.player}.`);
585342
+ } else {
585343
+ renderWarning(`Audio playback failed: ${playback.error}`);
585344
+ }
585345
+ }
585054
585346
  return "handled";
585055
585347
  }
585056
585348
  function formatAudioGenerationProgress(event) {
@@ -595599,12 +595891,12 @@ __export(vision_ingress_exports, {
595599
595891
  queryVisionModel: () => queryVisionModel,
595600
595892
  runVisionIngress: () => runVisionIngress
595601
595893
  });
595602
- import { execFileSync as execFileSync3 } from "node:child_process";
595894
+ import { execFileSync as execFileSync4 } from "node:child_process";
595603
595895
  import { existsSync as existsSync105, readFileSync as readFileSync86, unlinkSync as unlinkSync20 } from "node:fs";
595604
595896
  import { join as join120 } from "node:path";
595605
595897
  function isTesseractAvailable() {
595606
595898
  try {
595607
- execFileSync3("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
595899
+ execFileSync4("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
595608
595900
  return true;
595609
595901
  } catch {
595610
595902
  return false;
@@ -595645,7 +595937,7 @@ function advancedOcr(imagePath) {
595645
595937
  for (const psm of psmModes) {
595646
595938
  const outFile = `${tmpBase}_psm${psm}`;
595647
595939
  try {
595648
- execFileSync3("tesseract", [
595940
+ execFileSync4("tesseract", [
595649
595941
  imagePath,
595650
595942
  outFile,
595651
595943
  "--psm",
@@ -623381,7 +623673,7 @@ var clipboard_media_exports = {};
623381
623673
  __export(clipboard_media_exports, {
623382
623674
  pasteClipboardImageToFile: () => pasteClipboardImageToFile
623383
623675
  });
623384
- import { execFileSync as execFileSync4, execSync as execSync58 } from "node:child_process";
623676
+ import { execFileSync as execFileSync5, execSync as execSync58 } from "node:child_process";
623385
623677
  import { mkdirSync as mkdirSync72, readFileSync as readFileSync99, rmSync as rmSync5, writeFileSync as writeFileSync67 } from "node:fs";
623386
623678
  import { join as join136 } from "node:path";
623387
623679
  function pasteClipboardImageToFile(repoRoot) {
@@ -623398,7 +623690,7 @@ function readClipboardImage() {
623398
623690
  try {
623399
623691
  execSync58("command -v pngpaste", { stdio: "ignore", timeout: 1e3 });
623400
623692
  const tmp = `/tmp/omnius-clipboard-${Date.now()}.png`;
623401
- execFileSync4("pngpaste", [tmp], { timeout: 3e3 });
623693
+ execFileSync5("pngpaste", [tmp], { timeout: 3e3 });
623402
623694
  const buffer2 = readFileSync99(tmp);
623403
623695
  try {
623404
623696
  rmSync5(tmp);
@@ -623418,7 +623710,7 @@ function readClipboardImage() {
623418
623710
  ];
623419
623711
  for (const attempt of attempts) {
623420
623712
  try {
623421
- const buffer2 = execFileSync4(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
623713
+ const buffer2 = execFileSync5(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
623422
623714
  if (buffer2.length > 0) return { buffer: buffer2, mime: attempt.mime, ext: attempt.ext };
623423
623715
  } catch {
623424
623716
  continue;
@@ -623435,7 +623727,7 @@ function readClipboardImage() {
623435
623727
  "$img.Save($ms,[Drawing.Imaging.ImageFormat]::Png);",
623436
623728
  "[Console]::OpenStandardOutput().Write($ms.ToArray(),0,$ms.Length)"
623437
623729
  ].join("");
623438
- const buffer2 = execFileSync4("powershell.exe", ["-NoProfile", "-Command", ps], {
623730
+ const buffer2 = execFileSync5("powershell.exe", ["-NoProfile", "-Command", ps], {
623439
623731
  timeout: 5e3,
623440
623732
  maxBuffer: 25 * 1024 * 1024
623441
623733
  });
@@ -623454,7 +623746,7 @@ var init_clipboard_media = __esm({
623454
623746
 
623455
623747
  // packages/cli/src/tui/interactive.ts
623456
623748
  import { cwd } from "node:process";
623457
- import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname13, relative as relative14 } from "node:path";
623749
+ import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname14, relative as relative14 } from "node:path";
623458
623750
  import { createRequire as createRequire7 } from "node:module";
623459
623751
  import { fileURLToPath as fileURLToPath18 } from "node:url";
623460
623752
  import {
@@ -623839,6 +624131,7 @@ function buildSubAgentTools(repoRoot, config) {
623839
624131
  new StructuredFileTool(repoRoot),
623840
624132
  // Audio
623841
624133
  new AudioPlaybackTool(),
624134
+ new SoundPlaybackTool(),
623842
624135
  new AudioCaptureTool(),
623843
624136
  new AudioAnalyzeTool(),
623844
624137
  new AsrListenTool(),
@@ -623993,6 +624286,7 @@ function buildTools(repoRoot, config, contextWindowSize, modelTier) {
623993
624286
  new CameraCaptureTool(),
623994
624287
  new AudioCaptureTool(),
623995
624288
  new AudioPlaybackTool(),
624289
+ new SoundPlaybackTool(),
623996
624290
  new WifiControlTool(),
623997
624291
  new BluetoothScanTool(),
623998
624292
  new SdrScanTool(),
@@ -624542,6 +624836,24 @@ async function renderAsciiPreviewForToolResult(toolName, output, repoRoot, write
624542
624836
  }
624543
624837
  }
624544
624838
  }
624839
+ function extractGeneratedAudioPath(output, repoRoot) {
624840
+ const match = output.match(/(?:Sound|Music) generated:\s+([^\n\r]+)/i);
624841
+ const raw = match?.[1]?.trim().replace(/^["']|["']$/g, "");
624842
+ if (!raw) return null;
624843
+ return raw.startsWith("/") || raw.startsWith("~") ? raw.replace(/^~(?=\/)/, homedir46()) : join137(repoRoot, raw);
624844
+ }
624845
+ async function playGeneratedAudioForToolResult(toolName, output, repoRoot, writer) {
624846
+ if (toolName !== "generate_audio" || !output) return;
624847
+ const audioPath = extractGeneratedAudioPath(output, repoRoot);
624848
+ if (!audioPath) return;
624849
+ writer(() => renderInfo(`Playing generated audio: ${relative14(repoRoot, audioPath).startsWith("..") ? audioPath : relative14(repoRoot, audioPath)}`));
624850
+ const result = playSoundFile(audioPath);
624851
+ if (result.ok) {
624852
+ writer(() => renderInfo(`Audio playback complete via ${result.player}.`));
624853
+ } else {
624854
+ writer(() => renderWarning(`Audio playback failed: ${result.error}`));
624855
+ }
624856
+ }
624545
624857
  async function runSelfImprovementCycle(repoRoot) {
624546
624858
  try {
624547
624859
  const {
@@ -625697,8 +626009,9 @@ ${entry.fullContent}`
625697
626009
  }
625698
626010
  });
625699
626011
  }
625700
- if (event.success || event.toolName === "generate_image") {
626012
+ if (event.success) {
625701
626013
  void renderAsciiPreviewForToolResult(event.toolName, event.content ?? "", repoRoot, contentWrite);
626014
+ void playGeneratedAudioForToolResult(event.toolName, event.content ?? "", repoRoot, contentWrite);
625702
626015
  }
625703
626016
  if (voice?.enabled && voice.voiceMode === "voicechat" && _voiceChatSession2?.isActive && event.toolName === "task_complete") {
625704
626017
  const emoStateFinal = emotionEngine?.getState();
@@ -630719,7 +631032,7 @@ Execute this skill now. Follow the behavioral guidance above.`;
630719
631032
  const imgPath = resolve44(repoRoot, cleanPath);
630720
631033
  const imgBuffer = readFileSync100(imgPath);
630721
631034
  const base642 = imgBuffer.toString("base64");
630722
- const ext = extname13(cleanPath).toLowerCase();
631035
+ const ext = extname14(cleanPath).toLowerCase();
630723
631036
  const mime = ext === ".png" ? "image/png" : ext === ".gif" ? "image/gif" : ext === ".webp" ? "image/webp" : "image/jpeg";
630724
631037
  const asciiContext = await renderAsciiPreviewForImage(
630725
631038
  imgPath,
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.14",
3
+ "version": "1.0.15",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.14",
9
+ "version": "1.0.15",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.14",
3
+ "version": "1.0.15",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",