omnius 1.0.20 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -250375,6 +250375,22 @@ function optionalNumberArg(value2) {
250375
250375
  const n2 = Number(value2);
250376
250376
  return Number.isFinite(n2) ? n2 : void 0;
250377
250377
  }
250378
+ function booleanArg(value2, fallback) {
250379
+ if (typeof value2 === "boolean")
250380
+ return value2;
250381
+ if (typeof value2 === "string") {
250382
+ if (/^(1|true|yes|on)$/i.test(value2.trim()))
250383
+ return true;
250384
+ if (/^(0|false|no|off)$/i.test(value2.trim()))
250385
+ return false;
250386
+ }
250387
+ return fallback;
250388
+ }
250389
+ function generationFallbackEnabled(args) {
250390
+ if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
250391
+ return false;
250392
+ return booleanArg(args["fallback"] ?? args["allow_fallback"] ?? args["allowFallback"], true);
250393
+ }
250378
250394
  function isBackend(value2) {
250379
250395
  return value2 === "auto" || value2 === "ollama" || value2 === "diffusers" || value2 === "sdcpp";
250380
250396
  }
@@ -250383,6 +250399,9 @@ function getImageGenerationPreset(model) {
250383
250399
  return void 0;
250384
250400
  return IMAGE_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model);
250385
250401
  }
250402
+ function imageGenerationQualityLadder() {
250403
+ return IMAGE_GENERATION_QUALITY_LADDER.map((id) => getImageGenerationPreset(id)).filter((preset) => Boolean(preset));
250404
+ }
250386
250405
  function inferImageGenerationBackend(model, requested) {
250387
250406
  if (requested && isBackend(requested))
250388
250407
  return requested;
@@ -250399,6 +250418,40 @@ function inferImageGenerationBackend(model, requested) {
250399
250418
  return "sdcpp";
250400
250419
  return "diffusers";
250401
250420
  }
250421
+ function imageCandidateFor(model, requestedBackend) {
250422
+ let backend = inferImageGenerationBackend(model, requestedBackend);
250423
+ if (backend === "auto")
250424
+ backend = "diffusers";
250425
+ return {
250426
+ model,
250427
+ backend,
250428
+ preset: getImageGenerationPreset(model)
250429
+ };
250430
+ }
250431
+ function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
250432
+ const ladder = imageGenerationQualityLadder();
250433
+ const candidates = [];
250434
+ const add2 = (candidate) => {
250435
+ const key = `${candidate.backend}:${candidate.model}`;
250436
+ if (!candidates.some((existing) => `${existing.backend}:${existing.model}` === key))
250437
+ candidates.push(candidate);
250438
+ };
250439
+ if (requestedModel) {
250440
+ add2(imageCandidateFor(requestedModel, requestedBackend));
250441
+ } else if (requestedBackend && requestedBackend !== "auto") {
250442
+ const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
250443
+ add2(imageCandidateFor(firstForBackend?.id ?? (requestedBackend === "ollama" ? DEFAULT_OLLAMA_IMAGE_MODEL : DEFAULT_DIFFUSERS_IMAGE_MODEL), requestedBackend));
250444
+ } else if (!allowFallback) {
250445
+ add2(imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend));
250446
+ }
250447
+ if (!allowFallback)
250448
+ return candidates.length ? candidates : [imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend)];
250449
+ const primaryIndex = requestedModel ? ladder.findIndex((preset) => preset.id === requestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
250450
+ const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
250451
+ for (const preset of fallbackTail)
250452
+ add2(imageCandidateFor(preset.id));
250453
+ return candidates;
250454
+ }
250402
250455
  function imageGenerationDir(repoRoot = ".") {
250403
250456
  return join36(repoRoot, ".omnius", "image-gen");
250404
250457
  }
@@ -250653,6 +250706,33 @@ function formatSuccessOutput(args) {
250653
250706
  ` Prompt: "${prompt}"`
250654
250707
  ].filter(Boolean).join("\n");
250655
250708
  }
250709
+ function summarizeToolResult(result) {
250710
+ return trimProcessText(String(result.error || result.output || "unknown error"), 700).replace(/\s+/g, " ").trim();
250711
+ }
250712
+ function formatImageAttempt(candidate, reason, index) {
250713
+ return `${index + 1}. ${candidate.model} [${candidate.backend}] - ${reason}`;
250714
+ }
250715
+ function formatImageFallbackFailure(failed) {
250716
+ return [
250717
+ "No image generation model in the fallback ladder completed successfully.",
250718
+ "Attempted, highest quality to lowest:",
250719
+ ...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`)
250720
+ ].join("\n");
250721
+ }
250722
+ function annotateImageFallbackSuccess(result, failed, winner) {
250723
+ if (failed.length === 0)
250724
+ return result;
250725
+ const prefix = [
250726
+ `Fallback ladder succeeded with ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
250727
+ "Failed attempts:",
250728
+ ...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
250729
+ ""
250730
+ ].join("\n");
250731
+ return {
250732
+ ...result,
250733
+ output: prefix + result.output
250734
+ };
250735
+ }
250656
250736
  function parseRunnerJson(stdout) {
250657
250737
  const lines = stdout.trim().split(/\r?\n/).reverse();
250658
250738
  for (const line of lines) {
@@ -250665,7 +250745,7 @@ function parseRunnerJson(stdout) {
250665
250745
  }
250666
250746
  return null;
250667
250747
  }
250668
- var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
250748
+ var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, IMAGE_GENERATION_QUALITY_LADDER, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
250669
250749
  var init_image_generate = __esm({
250670
250750
  "packages/execution/dist/tools/image-generate.js"() {
250671
250751
  "use strict";
@@ -250989,6 +251069,21 @@ var init_image_generate = __esm({
250989
251069
  note: "CPU/GGUF/checkpoint route; requires a local model path."
250990
251070
  }
250991
251071
  ];
251072
+ IMAGE_GENERATION_QUALITY_LADDER = [
251073
+ "black-forest-labs/FLUX.1-dev",
251074
+ "stabilityai/stable-diffusion-3.5-large",
251075
+ DEFAULT_OLLAMA_IMAGE_MODEL,
251076
+ "black-forest-labs/FLUX.1-schnell",
251077
+ "stabilityai/stable-diffusion-3.5-large-turbo",
251078
+ "Tongyi-MAI/Z-Image-Turbo",
251079
+ "black-forest-labs/FLUX.2-klein-4B",
251080
+ DEFAULT_DIFFUSERS_IMAGE_MODEL,
251081
+ "Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
251082
+ "SimianLuo/LCM_Dreamshaper_v7",
251083
+ "stabilityai/sd-turbo",
251084
+ "segmind/tiny-sd",
251085
+ "nota-ai/bk-sdm-tiny-2m"
251086
+ ];
250992
251087
  OLLAMA_IMAGE_MODELS = IMAGE_GENERATION_MODEL_PRESETS.filter((preset) => preset.backend === "ollama").map((preset) => preset.id);
250993
251088
  DIFFUSERS_RUNNER = String.raw`#!/usr/bin/env python3
250994
251089
  import argparse
@@ -251170,7 +251265,7 @@ if __name__ == "__main__":
251170
251265
  `;
251171
251266
  ImageGenerateTool = class {
251172
251267
  name = "generate_image";
251173
- description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. Saves a PNG under .omnius/images and returns the file path.";
251268
+ description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first and falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
251174
251269
  parameters = {
251175
251270
  type: "object",
251176
251271
  properties: {
@@ -251215,6 +251310,14 @@ if __name__ == "__main__":
251215
251310
  type: "string",
251216
251311
  enum: ["generate", "list_models", "setup"],
251217
251312
  description: "Optional utility action. Default is generate."
251313
+ },
251314
+ fallback: {
251315
+ type: "boolean",
251316
+ description: "Whether to try the ranked quality ladder if the selected model/backend fails. Defaults true."
251317
+ },
251318
+ strict_model: {
251319
+ type: "boolean",
251320
+ description: "When true, use only the requested model/backend and do not fall back. Defaults false."
251218
251321
  }
251219
251322
  },
251220
251323
  required: ["prompt"]
@@ -251257,7 +251360,7 @@ if __name__ == "__main__":
251257
251360
  if (action === "list_models") {
251258
251361
  return {
251259
251362
  success: true,
251260
- output: IMAGE_GENERATION_MODEL_PRESETS.map((preset2) => `${preset2.id} [${preset2.backend}] - ${preset2.note}`).join("\n"),
251363
+ output: IMAGE_GENERATION_MODEL_PRESETS.map((preset) => `${preset.id} [${preset.backend}] - ${preset.note}`).join("\n"),
251261
251364
  durationMs: performance.now() - start2
251262
251365
  };
251263
251366
  }
@@ -251281,19 +251384,8 @@ if __name__ == "__main__":
251281
251384
  const rawModel2 = args["model_path"] ? String(args["model_path"]) : args["model"] ? String(args["model"]) : this.defaultModel;
251282
251385
  const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
251283
251386
  const requestedBackend2 = args["backend"] ? String(args["backend"]) : this.defaultBackend;
251284
- let backend = inferImageGenerationBackend(requestedModel2, requestedBackend2);
251285
- if (backend === "auto") {
251286
- backend = inferImageGenerationBackend(requestedModel2, void 0);
251287
- if (backend === "auto")
251288
- backend = "diffusers";
251289
- }
251290
- const model = requestedModel2 ?? (backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL);
251291
- this.emitProgress({ stage: "setup", message: `Preparing image model ${model} (${backend})` });
251292
- if (backend === "ollama")
251293
- return await this.prewarmOllama({ model, start: start2 });
251294
- if (backend === "sdcpp")
251295
- return await this.prewarmSdCpp({ model, start: start2, python: args["python"] });
251296
- return await this.prewarmDiffusers({ model, start: start2, python: args["python"] });
251387
+ const candidates2 = imageGenerationFallbackCandidates(requestedModel2, requestedBackend2, generationFallbackEnabled(args));
251388
+ return await this.prewarmCandidateLadder({ candidates: candidates2, args, start: start2 });
251297
251389
  }
251298
251390
  const prompt = String(args["prompt"] ?? "").trim();
251299
251391
  if (!prompt) {
@@ -251302,31 +251394,10 @@ if __name__ == "__main__":
251302
251394
  const rawModel = args["model_path"] ? String(args["model_path"]) : args["model"] ? String(args["model"]) : this.defaultModel;
251303
251395
  const requestedModel = rawModel === "auto" ? void 0 : rawModel;
251304
251396
  const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
251305
- const preset = getImageGenerationPreset(requestedModel);
251306
- const width = numberArg(args["width"], preset?.width ?? 1024);
251307
- const height = numberArg(args["height"], preset?.height ?? 1024);
251308
- const steps = optionalNumberArg(args["steps"]) ?? preset?.steps;
251309
- const guidance = optionalNumberArg(args["guidance"]) ?? preset?.guidance;
251310
251397
  const seed = optionalNumberArg(args["seed"]);
251398
+ const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
251311
251399
  try {
251312
- let backend = inferImageGenerationBackend(requestedModel, requestedBackend);
251313
- let model = requestedModel;
251314
- if (backend === "auto") {
251315
- backend = inferImageGenerationBackend(model, void 0);
251316
- if (backend === "auto")
251317
- backend = "diffusers";
251318
- }
251319
- if (!model) {
251320
- model = backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL;
251321
- }
251322
- this.emitProgress({ stage: "setup", message: `Using image model ${model} (${backend})` });
251323
- if (backend === "ollama") {
251324
- return await this.generateWithOllama({ prompt, model, width, height, steps, start: start2 });
251325
- }
251326
- if (backend === "sdcpp") {
251327
- return await this.generateWithSdCpp({ prompt, model, width, height, steps, seed, start: start2, python: args["python"] });
251328
- }
251329
- return await this.generateWithDiffusers({ prompt, model, width, height, steps, guidance, seed, start: start2, python: args["python"] });
251400
+ return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
251330
251401
  } catch (err) {
251331
251402
  return {
251332
251403
  success: false,
@@ -251335,6 +251406,64 @@ if __name__ == "__main__":
251335
251406
  };
251336
251407
  }
251337
251408
  }
251409
+ async prewarmCandidateLadder(args) {
251410
+ const failed = [];
251411
+ for (let index = 0; index < args.candidates.length; index++) {
251412
+ const candidate = args.candidates[index];
251413
+ this.emitProgress({
251414
+ stage: "setup",
251415
+ message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
251416
+ });
251417
+ const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
251418
+ if (result.success)
251419
+ return annotateImageFallbackSuccess(result, failed, candidate);
251420
+ failed.push({ candidate, reason: summarizeToolResult(result) });
251421
+ if (index < args.candidates.length - 1) {
251422
+ this.emitProgress({
251423
+ stage: "setup",
251424
+ message: `${candidate.model} failed; trying ${args.candidates[index + 1].model}`
251425
+ });
251426
+ }
251427
+ }
251428
+ const output = formatImageFallbackFailure(failed);
251429
+ return {
251430
+ success: false,
251431
+ output,
251432
+ error: output,
251433
+ durationMs: performance.now() - args.start
251434
+ };
251435
+ }
251436
+ async generateCandidateLadder(args) {
251437
+ const failed = [];
251438
+ for (let index = 0; index < args.candidates.length; index++) {
251439
+ const candidate = args.candidates[index];
251440
+ const width = numberArg(args.args["width"], candidate.preset?.width ?? 1024);
251441
+ const height = numberArg(args.args["height"], candidate.preset?.height ?? 1024);
251442
+ const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
251443
+ const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
251444
+ this.emitProgress({
251445
+ stage: "setup",
251446
+ message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
251447
+ });
251448
+ const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: args.prompt, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: args.prompt, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: args.prompt, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
251449
+ if (result.success)
251450
+ return annotateImageFallbackSuccess(result, failed, candidate);
251451
+ failed.push({ candidate, reason: summarizeToolResult(result) });
251452
+ if (index < args.candidates.length - 1) {
251453
+ this.emitProgress({
251454
+ stage: "setup",
251455
+ message: `${candidate.model} failed; falling back to ${args.candidates[index + 1].model}`
251456
+ });
251457
+ }
251458
+ }
251459
+ const output = formatImageFallbackFailure(failed);
251460
+ return {
251461
+ success: false,
251462
+ output,
251463
+ error: output,
251464
+ durationMs: performance.now() - args.start
251465
+ };
251466
+ }
251338
251467
  async prewarmOllama(args) {
251339
251468
  const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
251340
251469
  if (await this.ollamaHasModel(model)) {
@@ -251830,7 +251959,7 @@ function backendImportCheck(backend) {
251830
251959
  if (backend === "audiocraft")
251831
251960
  return "import torch, torchaudio, audiocraft\nfrom audiocraft.models import MusicGen, AudioGen\n";
251832
251961
  if (backend === "stable-audio")
251833
- return "import torch, torchaudio, stable_audio_tools\n";
251962
+ return "import torch, torchaudio, diffusers, scipy\nfrom diffusers import StableAudioPipeline\n";
251834
251963
  if (backend === "tangoflux")
251835
251964
  return "import torch, torchaudio\nfrom tangoflux import TangoFluxInference\n";
251836
251965
  return "import torch, diffusers, scipy\nfrom diffusers import AudioLDMPipeline\n";
@@ -252160,11 +252289,31 @@ function playbackRequested(args) {
252160
252289
  return false;
252161
252290
  return true;
252162
252291
  }
252292
+ function booleanArg2(value2, fallback) {
252293
+ if (typeof value2 === "boolean")
252294
+ return value2;
252295
+ if (typeof value2 === "string") {
252296
+ if (/^(1|true|yes|on)$/i.test(value2.trim()))
252297
+ return true;
252298
+ if (/^(0|false|no|off)$/i.test(value2.trim()))
252299
+ return false;
252300
+ }
252301
+ return fallback;
252302
+ }
252303
+ function generationFallbackEnabled2(args) {
252304
+ if (booleanArg2(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
252305
+ return false;
252306
+ return booleanArg2(args["fallback"] ?? args["allow_fallback"] ?? args["allowFallback"], true);
252307
+ }
252163
252308
  function getAudioGenerationPreset(model, kind) {
252164
252309
  if (!model)
252165
252310
  return void 0;
252166
252311
  return AUDIO_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model && (!kind || preset.kind === kind)) ?? AUDIO_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model);
252167
252312
  }
252313
+ function audioGenerationQualityLadder(kind) {
252314
+ const ids = kind === "music" ? MUSIC_GENERATION_QUALITY_LADDER : SOUND_GENERATION_QUALITY_LADDER;
252315
+ return ids.map((id) => getAudioGenerationPreset(id, kind)).filter((preset) => Boolean(preset));
252316
+ }
252168
252317
  function inferAudioGenerationBackend(model, requested) {
252169
252318
  if (requested && requested !== "auto") {
252170
252319
  if (requested === "diffusers" || requested === "transformers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
@@ -252188,6 +252337,41 @@ function inferAudioGenerationBackend(model, requested) {
252188
252337
  return "project";
252189
252338
  return "diffusers";
252190
252339
  }
252340
+ function audioCandidateFor(kind, model, requestedBackend) {
252341
+ const backend = inferAudioGenerationBackend(model, requestedBackend);
252342
+ const resolvedBackend = backend === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend;
252343
+ return {
252344
+ kind,
252345
+ model,
252346
+ backend: resolvedBackend,
252347
+ preset: getAudioGenerationPreset(model, kind)
252348
+ };
252349
+ }
252350
+ function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
252351
+ const ladder = audioGenerationQualityLadder(kind);
252352
+ const candidates = [];
252353
+ const add2 = (candidate) => {
252354
+ const key = `${candidate.kind}:${candidate.backend}:${candidate.model}`;
252355
+ if (!candidates.some((existing) => `${existing.kind}:${existing.backend}:${existing.model}` === key)) {
252356
+ candidates.push(candidate);
252357
+ }
252358
+ };
252359
+ if (requestedModel) {
252360
+ add2(audioCandidateFor(kind, requestedModel, requestedBackend));
252361
+ } else if (requestedBackend && requestedBackend !== "auto") {
252362
+ const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
252363
+ add2(audioCandidateFor(kind, firstForBackend?.id ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL), requestedBackend));
252364
+ } else if (!allowFallback) {
252365
+ add2(audioCandidateFor(kind, kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL, requestedBackend));
252366
+ }
252367
+ if (!allowFallback)
252368
+ return candidates.length ? candidates : [audioCandidateFor(kind, kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL, requestedBackend)];
252369
+ const primaryIndex = requestedModel ? ladder.findIndex((preset) => preset.id === requestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
252370
+ const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
252371
+ for (const preset of fallbackTail)
252372
+ add2(audioCandidateFor(kind, preset.id));
252373
+ return candidates;
252374
+ }
252191
252375
  function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252192
252376
  const commandName = kind === "music" ? "music" : "sound";
252193
252377
  const fallback = kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL;
@@ -252261,6 +252445,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252261
252445
  ],
252262
252446
  notes: [
252263
252447
  "Use this path for Stable Audio Open 1.0, the serious stereo audio/music baseline.",
252448
+ "Omnius uses Diffusers StableAudioPipeline here; stable-audio-tools is intentionally not installed because it often pulls build-from-source dependencies.",
252264
252449
  "Expect larger model downloads and higher VRAM pressure than AudioLDM or MusicGen small."
252265
252450
  ]
252266
252451
  };
@@ -252296,7 +252481,34 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
252296
252481
  ]
252297
252482
  };
252298
252483
  }
252299
- var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
252484
+ function summarizeToolResult2(result) {
252485
+ return trimProcessText2(String(result.error || result.output || "unknown error"), 700).replace(/\s+/g, " ").trim();
252486
+ }
252487
+ function formatAudioAttempt(candidate, reason, index) {
252488
+ return `${index + 1}. ${candidate.model} [${candidate.backend}] - ${reason}`;
252489
+ }
252490
+ function formatAudioFallbackFailure(kind, failed) {
252491
+ return [
252492
+ `No ${kind} generation model in the fallback ladder completed successfully.`,
252493
+ "Attempted, highest quality to lowest:",
252494
+ ...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`)
252495
+ ].join("\n");
252496
+ }
252497
+ function annotateAudioFallbackSuccess(result, failed, winner) {
252498
+ if (failed.length === 0)
252499
+ return result;
252500
+ const prefix = [
252501
+ `Fallback ladder succeeded with ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
252502
+ "Failed attempts:",
252503
+ ...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
252504
+ ""
252505
+ ].join("\n");
252506
+ return {
252507
+ ...result,
252508
+ output: prefix + result.output
252509
+ };
252510
+ }
252511
+ var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
252300
252512
  var init_audio_generate = __esm({
252301
252513
  "packages/execution/dist/tools/audio-generate.js"() {
252302
252514
  "use strict";
@@ -252338,7 +252550,6 @@ var init_audio_generate = __esm({
252338
252550
  "accelerate",
252339
252551
  "scipy",
252340
252552
  "soundfile",
252341
- "stable-audio-tools",
252342
252553
  "einops"
252343
252554
  ];
252344
252555
  TANGOFLUX_PACKAGES = [
@@ -252644,6 +252855,21 @@ var init_audio_generate = __esm({
252644
252855
  note: "Legacy specialized music-generation path."
252645
252856
  }
252646
252857
  ];
252858
+ SOUND_GENERATION_QUALITY_LADDER = [
252859
+ "stabilityai/stable-audio-open-1.0",
252860
+ "cvssp/audioldm2-large",
252861
+ "cvssp/audioldm2",
252862
+ "facebook/audiogen-medium",
252863
+ "declare-lab/TangoFlux",
252864
+ DEFAULT_SOUND_MODEL
252865
+ ];
252866
+ MUSIC_GENERATION_QUALITY_LADDER = [
252867
+ "stabilityai/stable-audio-open-1.0",
252868
+ "facebook/musicgen-stereo-large",
252869
+ "facebook/musicgen-large",
252870
+ "facebook/musicgen-medium",
252871
+ DEFAULT_MUSIC_MODEL
252872
+ ];
252647
252873
  DIFFUSERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
252648
252874
  import argparse, json, sys, time
252649
252875
  from pathlib import Path
@@ -253030,7 +253256,7 @@ if __name__ == "__main__":
253030
253256
  `;
253031
253257
  AudioGenerateTool = class {
253032
253258
  name = "generate_audio";
253033
- description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
253259
+ description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. When fallback is enabled, auto generation tries ranked high-quality candidates first and gracefully falls back to smaller models if setup, download, or generation fails. Saves WAV files under .omnius/audio and returns the file path.";
253034
253260
  parameters = {
253035
253261
  type: "object",
253036
253262
  properties: {
@@ -253044,6 +253270,14 @@ if __name__ == "__main__":
253044
253270
  playback: {
253045
253271
  type: "boolean",
253046
253272
  description: "Whether the TUI should play generated audio after saving it. Defaults true; set false for silent generation."
253273
+ },
253274
+ fallback: {
253275
+ type: "boolean",
253276
+ description: "Whether to try the ranked quality ladder if the selected model/backend fails. Defaults true."
253277
+ },
253278
+ strict_model: {
253279
+ type: "boolean",
253280
+ description: "When true, use only the requested model/backend and do not fall back. Defaults false."
253047
253281
  }
253048
253282
  },
253049
253283
  required: ["prompt"]
@@ -253147,14 +253381,14 @@ if __name__ == "__main__":
253147
253381
  if (action === "list_models") {
253148
253382
  return {
253149
253383
  success: true,
253150
- output: AUDIO_GENERATION_MODEL_PRESETS.filter((preset2) => preset2.kind === kind).map((preset2) => `${preset2.id} [${preset2.backend}] - ${preset2.note}`).join("\n"),
253384
+ output: AUDIO_GENERATION_MODEL_PRESETS.filter((preset) => preset.kind === kind).map((preset) => `${preset.id} [${preset.backend}] - ${preset.note}`).join("\n"),
253151
253385
  durationMs: performance.now() - start2
253152
253386
  };
253153
253387
  }
253154
253388
  if (action === "setup") {
253155
253389
  const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? (kind === "music" ? "transformers" : "diffusers"));
253156
- const backend2 = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
253157
- const resolvedBackend = backend2 === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend2;
253390
+ const backend = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
253391
+ const resolvedBackend = backend === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend;
253158
253392
  const plan = audioGenerationSetupPlan(kind, resolvedBackend, this.cwd, typeof args["model"] === "string" ? args["model"] : void 0);
253159
253393
  return {
253160
253394
  success: true,
@@ -253173,37 +253407,9 @@ if __name__ == "__main__":
253173
253407
  const defaultBackend2 = kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend;
253174
253408
  const rawModel2 = args["model"] ? String(args["model"]) : defaultModel2;
253175
253409
  const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
253176
- let backend2 = inferAudioGenerationBackend(requestedModel2, args["backend"] ? String(args["backend"]) : defaultBackend2);
253177
- if (backend2 === "auto")
253178
- backend2 = kind === "music" ? "transformers" : "diffusers";
253179
- const model2 = requestedModel2 ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
253180
- const preset2 = getAudioGenerationPreset(model2, kind);
253181
- const duration2 = numberArg2(args["duration"], preset2?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
253182
- if (backend2 === "project") {
253183
- const plan = audioGenerationSetupPlan(kind, "project", this.cwd, model2);
253184
- return {
253185
- success: false,
253186
- output: [
253187
- `${preset2?.label ?? model2} is a project deployment profile, not an automatic generic runner.`,
253188
- "",
253189
- "Setup path:",
253190
- ...plan.commands.map((cmd) => ` ${cmd}`),
253191
- "",
253192
- ...plan.notes.map((note) => `- ${note}`)
253193
- ].join("\n"),
253194
- durationMs: performance.now() - start2
253195
- };
253196
- }
253197
- this.emitProgress({ stage: "setup", message: `Preparing ${kind} model ${model2} (${backend2})` });
253198
- return await this.prewarmPythonBackend({
253199
- kind,
253200
- backend: backend2,
253201
- runnerBackend: backend2,
253202
- model: model2,
253203
- duration: duration2,
253204
- start: start2,
253205
- python: args["python"]
253206
- });
253410
+ const requestedBackend2 = args["backend"] ? String(args["backend"]) : defaultBackend2;
253411
+ const candidates2 = audioGenerationFallbackCandidates(kind, requestedModel2, requestedBackend2, generationFallbackEnabled2(args));
253412
+ return await this.prewarmCandidateLadder({ kind, candidates: candidates2, args, start: start2 });
253207
253413
  }
253208
253414
  const prompt = String(args["prompt"] ?? "").trim();
253209
253415
  if (!prompt) {
@@ -253213,45 +253419,12 @@ if __name__ == "__main__":
253213
253419
  const defaultBackend = kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend;
253214
253420
  const rawModel = args["model"] ? String(args["model"]) : defaultModel;
253215
253421
  const requestedModel = rawModel === "auto" ? void 0 : rawModel;
253216
- let backend = inferAudioGenerationBackend(requestedModel, args["backend"] ? String(args["backend"]) : defaultBackend);
253217
- if (backend === "auto")
253218
- backend = kind === "music" ? "transformers" : "diffusers";
253219
- const model = requestedModel ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
253220
- const preset = getAudioGenerationPreset(model, kind);
253221
- const duration = numberArg2(args["duration"], preset?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
253222
- const steps = optionalNumberArg2(args["steps"]) ?? preset?.defaultSteps;
253422
+ const requestedBackend = args["backend"] ? String(args["backend"]) : defaultBackend;
253423
+ const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
253223
253424
  const seed = optionalNumberArg2(args["seed"]);
253224
253425
  const playback = playbackRequested(args);
253225
253426
  try {
253226
- this.emitProgress({ stage: "setup", message: `Using ${kind} model ${model} (${backend})` });
253227
- if (backend === "project") {
253228
- const plan = audioGenerationSetupPlan(kind, "project", this.cwd, model);
253229
- return {
253230
- success: false,
253231
- output: [
253232
- `${preset?.label ?? model} is a project deployment profile, not an automatic generic runner.`,
253233
- "",
253234
- "Setup path:",
253235
- ...plan.commands.map((cmd) => ` ${cmd}`),
253236
- "",
253237
- ...plan.notes.map((note) => `- ${note}`)
253238
- ].join("\n"),
253239
- durationMs: performance.now() - start2
253240
- };
253241
- }
253242
- if (backend === "tangoflux") {
253243
- return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "tangoflux", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
253244
- }
253245
- if (backend === "transformers") {
253246
- return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "transformers", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
253247
- }
253248
- if (backend === "audiocraft") {
253249
- return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "audiocraft", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
253250
- }
253251
- if (backend === "stable-audio") {
253252
- return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "stable-audio", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
253253
- }
253254
- return await this.generateWithPythonBackend({ kind, backend: "diffusers", runnerBackend: "diffusers", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
253427
+ return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
253255
253428
  } catch (err) {
253256
253429
  return {
253257
253430
  success: false,
@@ -253260,6 +253433,96 @@ if __name__ == "__main__":
253260
253433
  };
253261
253434
  }
253262
253435
  }
253436
+ async prewarmCandidateLadder(args) {
253437
+ const failed = [];
253438
+ for (let index = 0; index < args.candidates.length; index++) {
253439
+ const candidate = args.candidates[index];
253440
+ const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
253441
+ this.emitProgress({
253442
+ stage: "setup",
253443
+ message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
253444
+ });
253445
+ const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
253446
+ kind: args.kind,
253447
+ backend: candidate.backend,
253448
+ runnerBackend: candidate.backend,
253449
+ model: candidate.model,
253450
+ duration,
253451
+ start: args.start,
253452
+ python: args.args["python"]
253453
+ });
253454
+ if (result.success)
253455
+ return annotateAudioFallbackSuccess(result, failed, candidate);
253456
+ failed.push({ candidate, reason: summarizeToolResult2(result) });
253457
+ if (index < args.candidates.length - 1) {
253458
+ this.emitProgress({
253459
+ stage: "setup",
253460
+ message: `${candidate.model} failed; trying ${args.candidates[index + 1].model}`
253461
+ });
253462
+ }
253463
+ }
253464
+ return {
253465
+ success: false,
253466
+ output: formatAudioFallbackFailure(args.kind, failed),
253467
+ error: formatAudioFallbackFailure(args.kind, failed),
253468
+ durationMs: performance.now() - args.start
253469
+ };
253470
+ }
253471
+ async generateCandidateLadder(args) {
253472
+ const failed = [];
253473
+ for (let index = 0; index < args.candidates.length; index++) {
253474
+ const candidate = args.candidates[index];
253475
+ const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
253476
+ const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
253477
+ this.emitProgress({
253478
+ stage: "setup",
253479
+ message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
253480
+ });
253481
+ const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
253482
+ kind: args.kind,
253483
+ backend: candidate.backend,
253484
+ runnerBackend: candidate.backend,
253485
+ prompt: args.prompt,
253486
+ model: candidate.model,
253487
+ duration,
253488
+ steps,
253489
+ seed: args.seed,
253490
+ playback: args.playback,
253491
+ start: args.start,
253492
+ python: args.args["python"]
253493
+ });
253494
+ if (result.success)
253495
+ return annotateAudioFallbackSuccess(result, failed, candidate);
253496
+ failed.push({ candidate, reason: summarizeToolResult2(result) });
253497
+ if (index < args.candidates.length - 1) {
253498
+ this.emitProgress({
253499
+ stage: "setup",
253500
+ message: `${candidate.model} failed; falling back to ${args.candidates[index + 1].model}`
253501
+ });
253502
+ }
253503
+ }
253504
+ return {
253505
+ success: false,
253506
+ output: formatAudioFallbackFailure(args.kind, failed),
253507
+ error: formatAudioFallbackFailure(args.kind, failed),
253508
+ durationMs: performance.now() - args.start
253509
+ };
253510
+ }
253511
+ projectProfileResult(kind, candidate, start2) {
253512
+ const plan = audioGenerationSetupPlan(kind, "project", this.cwd, candidate.model);
253513
+ return {
253514
+ success: false,
253515
+ output: [
253516
+ `${candidate.preset?.label ?? candidate.model} is a project deployment profile, not an automatic generic runner.`,
253517
+ "",
253518
+ "Setup path:",
253519
+ ...plan.commands.map((cmd) => ` ${cmd}`),
253520
+ "",
253521
+ ...plan.notes.map((note) => `- ${note}`)
253522
+ ].join("\n"),
253523
+ durationMs: performance.now() - start2
253524
+ };
253525
+ }
253263
253526
  async generateWithPythonBackend(args) {
253264
253527
  const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
253265
253528
  await mkdir12(audioOutputDir(this.cwd), { recursive: true });
@@ -507359,6 +507622,18 @@ function supertonicInferScript() {
507359
507622
  function mlxVenvPy() {
507360
507623
  return process.platform === "win32" ? join58(voiceDir(), "mlx-venv", "Scripts", "python.exe") : join58(voiceDir(), "mlx-venv", "bin", "python3");
507361
507624
  }
507625
+ function luxttsVenvDir() {
507626
+ return join58(voiceDir(), "luxtts-venv");
507627
+ }
507628
+ function luxttsVenvPy() {
507629
+ return process.platform === "win32" ? join58(luxttsVenvDir(), "Scripts", "python.exe") : join58(luxttsVenvDir(), "bin", "python3");
507630
+ }
507631
+ function luxttsRepoDir() {
507632
+ return join58(voiceDir(), "LuxTTS");
507633
+ }
507634
+ function luxttsInferScript() {
507635
+ return join58(voiceDir(), "luxtts-infer.py");
507636
+ }
507362
507637
  function piperVenvDir() {
507363
507638
  return join58(voiceDir(), "piper-venv");
507364
507639
  }
@@ -507385,7 +507660,7 @@ function ensureSupertonicInstalled() {
507385
507660
  }
507386
507661
  function ensureMlxInstalled() {
507387
507662
  if (process.platform !== "darwin" || process.arch !== "arm64") {
507388
- throw new Error("MLX TTS requires macOS on Apple Silicon. Use luxtts, supertonic, onnx/piper, or espeak on this machine.");
507663
+ throw new Error("MLX TTS requires macOS on Apple Silicon. Use luxtts, supertonic, onnx/piper, or backend=auto on this machine.");
507389
507664
  }
507390
507665
  const venvPy = mlxVenvPy();
507391
507666
  if (!existsSync40(venvPy)) {
@@ -507402,6 +507677,81 @@ function ensureMlxInstalled() {
507402
507677
  }
507403
507678
  return venvPy;
507404
507679
  }
507680
+ function pythonCanImportLuxTts(venvPy) {
507681
+ try {
507682
+ execFileSync2(venvPy, [
507683
+ "-c",
507684
+ "import sys, os; sys.path.insert(0, os.environ['LUXTTS_REPO_PATH']); from zipvoice.luxvoice import LuxTTS; print('ok')"
507685
+ ], {
507686
+ stdio: "pipe",
507687
+ timeout: 3e4,
507688
+ env: { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir() }
507689
+ });
507690
+ return true;
507691
+ } catch {
507692
+ return false;
507693
+ }
507694
+ }
507695
+ function pipInstall(venvPy, packages, timeout2 = 9e5) {
507696
+ execFileSync2(venvPy, ["-m", "pip", "install", "--prefer-binary", ...packages], {
507697
+ stdio: "pipe",
507698
+ timeout: timeout2,
507699
+ env: process.env
507700
+ });
507701
+ }
507702
+ function ensureLuxttsInstalled() {
507703
+ const venvPy = luxttsVenvPy();
507704
+ const repoDir = luxttsRepoDir();
507705
+ mkdirSync16(voiceDir(), { recursive: true });
507706
+ if (existsSync40(venvPy) && existsSync40(join58(repoDir, "zipvoice", "luxvoice.py")) && pythonCanImportLuxTts(venvPy)) {
507707
+ writeFileSync16(luxttsInferScript(), LUXTTS_DAEMON_PY, "utf-8");
507708
+ return venvPy;
507709
+ }
507710
+ const py = findPython32();
507711
+ if (!py)
507712
+ throw new Error("python3 is required to set up LuxTTS voice cloning.");
507713
+ if (!existsSync40(venvPy)) {
507714
+ execFileSync2(py, ["-m", "venv", luxttsVenvDir()], { stdio: "pipe", timeout: 18e4 });
507715
+ }
507716
+ execFileSync2(venvPy, ["-m", "pip", "install", "--upgrade", "pip", "wheel", "setuptools<81"], {
507717
+ stdio: "pipe",
507718
+ timeout: 3e5
507719
+ });
507720
+ pipInstall(venvPy, ["torch", "torchaudio"], 12e5);
507721
+ if (!existsSync40(join58(repoDir, "zipvoice", "luxvoice.py"))) {
507722
+ if (!hasCommand3("git"))
507723
+ throw new Error("git is required to set up LuxTTS voice cloning.");
507724
+ execFileSync2("git", ["clone", "--depth", "1", "https://github.com/ysharma3501/LuxTTS.git", repoDir], {
507725
+ stdio: "pipe",
507726
+ timeout: 3e5
507727
+ });
507728
+ }
507729
+ pipInstall(venvPy, [
507730
+ "lhotse",
507731
+ "huggingface_hub",
507732
+ "safetensors",
507733
+ "pydub",
507734
+ "onnxruntime",
507735
+ "librosa",
507736
+ "transformers<=4.57.6",
507737
+ "inflect",
507738
+ "numpy",
507739
+ "vocos",
507740
+ "jieba",
507741
+ "pypinyin",
507742
+ "cn2an"
507743
+ ], 12e5);
507744
+ try {
507745
+ pipInstall(venvPy, ["git+https://github.com/ysharma3501/LinaCodec.git"], 12e5);
507746
+ } catch {
507747
+ }
507748
+ pipInstall(venvPy, ["-e", repoDir], 6e5);
507749
+ writeFileSync16(luxttsInferScript(), LUXTTS_DAEMON_PY, "utf-8");
507750
+ if (!pythonCanImportLuxTts(venvPy)) {
507751
+ throw new Error(`LuxTTS setup completed but import still fails in ${luxttsVenvDir()}.`);
507752
+ }
507753
+ return venvPy;
507754
+ }
507405
507755
  function ensurePiperInstalled() {
507406
507756
  if (hasCommand3("piper"))
507407
507757
  return "piper";
@@ -507435,6 +507785,28 @@ function saveCloneRefFromSample(sample, cloneName) {
507435
507785
  copyFileSync2(source, dest);
507436
507786
  return dest;
507437
507787
  }
507788
+ function cloneSampleArg(args) {
507789
+ for (const key of ["sample", "source_audio", "voice_sample", "reference_audio", "ref_audio", "clone_sample"]) {
507790
+ const value2 = args[key];
507791
+ if (typeof value2 === "string" && value2.trim())
507792
+ return value2.trim();
507793
+ }
507794
+ return "";
507795
+ }
507796
+ function wantsVoiceClone(args) {
507797
+ if (cloneSampleArg(args))
507798
+ return true;
507799
+ if (typeof args["clone_ref"] === "string" && args["clone_ref"].trim())
507800
+ return true;
507801
+ const voice = typeof args["voice"] === "string" ? args["voice"].trim() : "";
507802
+ return /\.(wav|mp3|flac|ogg|m4a)$/i.test(voice) || voice.startsWith("/") || voice.startsWith("./") || voice.startsWith("../") || voice.startsWith("~/");
507803
+ }
507804
+ function cloneRefForSynthesis(args) {
507805
+ const sample = cloneSampleArg(args);
507806
+ if (sample)
507807
+ return saveCloneRefFromSample(sample, typeof args["clone_name"] === "string" ? args["clone_name"] : void 0);
507808
+ return resolveCloneRef(args["clone_ref"] ?? args["voice"]);
507809
+ }
507438
507810
  function ensureLuxttsDaemon() {
507439
507811
  if (_luxttsDaemon && !_luxttsDaemon.killed && _luxttsReady)
507440
507812
  return Promise.resolve(true);
@@ -507448,14 +507820,23 @@ function ensureLuxttsDaemon() {
507448
507820
  }
507449
507821
  if (_luxttsStarting)
507450
507822
  return Promise.resolve(false);
507451
- const venvPy = join58(homedir14(), ".omnius", "voice", "luxtts-venv", "bin", "python3");
507452
- const inferScript = join58(homedir14(), ".omnius", "voice", "luxtts-infer.py");
507453
- const repoDir = join58(homedir14(), ".omnius", "voice", "LuxTTS");
507823
+ const venvPy = luxttsVenvPy();
507824
+ const inferScript = luxttsInferScript();
507825
+ const repoDir = luxttsRepoDir();
507454
507826
  if (!existsSync40(venvPy) || !existsSync40(inferScript))
507455
507827
  return Promise.resolve(false);
507456
507828
  _luxttsStarting = true;
507457
507829
  return new Promise((resolve48) => {
507458
- const timeout2 = setTimeout(() => {
507830
+ let settled = false;
507831
+ let timeout2;
507832
+ const finish = (ready) => {
507833
+ if (settled)
507834
+ return;
507835
+ settled = true;
507836
+ clearTimeout(timeout2);
507837
+ resolve48(ready);
507838
+ };
507839
+ timeout2 = setTimeout(() => {
507459
507840
  _luxttsStarting = false;
507460
507841
  if (_luxttsDaemon && !_luxttsReady) {
507461
507842
  try {
@@ -507464,7 +507845,7 @@ function ensureLuxttsDaemon() {
507464
507845
  }
507465
507846
  _luxttsDaemon = null;
507466
507847
  }
507467
- resolve48(false);
507848
+ finish(false);
507468
507849
  }, 12e4);
507469
507850
  const daemon = spawn16(venvPy, [inferScript], {
507470
507851
  stdio: ["pipe", "pipe", "pipe"],
@@ -507486,8 +507867,7 @@ function ensureLuxttsDaemon() {
507486
507867
  if (msg.type === "ready") {
507487
507868
  _luxttsReady = true;
507488
507869
  _luxttsStarting = false;
507489
- clearTimeout(timeout2);
507490
- resolve48(true);
507870
+ finish(true);
507491
507871
  } else if (msg.type === "result" && msg.id) {
507492
507872
  const pending = _luxttsPending.get(msg.id);
507493
507873
  if (pending) {
@@ -507509,13 +507889,13 @@ function ensureLuxttsDaemon() {
507509
507889
  _luxttsDaemon = null;
507510
507890
  _luxttsReady = false;
507511
507891
  _luxttsStarting = false;
507892
+ finish(false);
507512
507893
  });
507513
507894
  daemon.on("error", () => {
507514
507895
  _luxttsDaemon = null;
507515
507896
  _luxttsReady = false;
507516
507897
  _luxttsStarting = false;
507517
- clearTimeout(timeout2);
507518
- resolve48(false);
507898
+ finish(false);
507519
507899
  });
507520
507900
  });
507521
507901
  }
@@ -507545,7 +507925,7 @@ function luxttsSynthesize(text, cloneRef, outputPath2, speed = 1) {
507545
507925
  _luxttsDaemon.stdin.write(req2 + "\n");
507546
507926
  });
507547
507927
  }
507548
- var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, SUPERTONIC_INFER_PY, AudioPlaybackTool, TtsGenerateTool, SoundPlaybackTool;
507928
+ var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, SUPERTONIC_INFER_PY, LUXTTS_DAEMON_PY, AudioPlaybackTool, TtsGenerateTool, SoundPlaybackTool;
507549
507929
  var init_audio_playback = __esm({
507550
507930
  "packages/execution/dist/tools/audio-playback.js"() {
507551
507931
  "use strict";
@@ -507585,10 +507965,45 @@ try:
507585
507965
  except Exception as exc:
507586
507966
  print(json.dumps({"ok": False, "error": str(exc), "trace": traceback.format_exc(limit=3)}))
507587
507967
  sys.exit(1)
507968
+ `;
507969
+ LUXTTS_DAEMON_PY = String.raw`
507970
+ import json, os, sys, traceback, wave
507971
+ import numpy as np
507972
+ import torch
507973
+ repo = os.environ.get("LUXTTS_REPO_PATH") or ""
507974
+ if repo:
507975
+ sys.path.insert(0, repo)
507976
+ from zipvoice.luxvoice import LuxTTS
507977
+ device = "cuda" if torch.cuda.is_available() else "cpu"
507978
+ tts = LuxTTS(model_path="YatharthS/LuxTTS", device=device, threads=4)
507979
+ print(json.dumps({"type": "ready", "device": device}), flush=True)
507980
+ for line in sys.stdin:
507981
+ if not line.strip():
507982
+ continue
507983
+ req = json.loads(line)
507984
+ if req.get("action") == "quit":
507985
+ break
507986
+ rid = req.get("id")
507987
+ try:
507988
+ text = str(req.get("text") or "").strip()
507989
+ clone_ref = str(req.get("clone_ref") or "")
507990
+ output = str(req.get("output_path") or "")
507991
+ speed = float(req.get("speed") or 1.0)
507992
+ enc = tts.encode_prompt(clone_ref, duration=5, rms=0.001)
507993
+ wav = tts.generate_speech(text, enc, num_steps=4, guidance_scale=3.0, t_shift=0.5, speed=speed)
507994
+ data = (np.clip(wav.cpu().numpy().squeeze(), -1, 1) * 32767).astype(np.int16)
507995
+ with wave.open(output, "wb") as f:
507996
+ f.setnchannels(1)
507997
+ f.setsampwidth(2)
507998
+ f.setframerate(48000)
507999
+ f.writeframes(data.tobytes())
508000
+ print(json.dumps({"type": "result", "id": rid, "path": output}), flush=True)
508001
+ except Exception as exc:
508002
+ print(json.dumps({"type": "error", "id": rid, "error": str(exc), "trace": traceback.format_exc(limit=3)}), flush=True)
507588
508003
  `;
507589
508004
  AudioPlaybackTool = class {
507590
508005
  name = "audio_playback";
507591
- description = "Play audio through speakers, synthesize text-to-speech, and manage TTS clone voices. Actions: 'play' to play an audio file (WAV/MP3/OGG — including recordings from memory episodes), 'speak' to synthesize and play text, 'synthesize' to save TTS to a WAV file, 'clone' to register a voice-clone sample, 'list_voices' to inspect available clone refs/backends, 'volume' to get or set system volume, 'list' to enumerate audio output devices. TTS backends are explicit: auto, luxtts, supertonic, mlx, onnx/piper, or espeak. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. Use generate_tts when the task is specifically to create a TTS file; do not use shell speech commands or generate_audio for spoken TTS.";
508006
+ description = "Play audio through speakers, synthesize text-to-speech, and manage TTS clone voices. Actions: 'play' to play an audio file (WAV/MP3/OGG — including recordings from memory episodes), 'speak' to synthesize and play text, 'synthesize' to save TTS to a WAV file, 'clone' to register a voice-clone source clip, 'list_voices' to inspect available clone refs/backends, 'volume' to get or set system volume, 'list' to enumerate audio output devices. TTS backends include auto, LuxTTS voice cloning, Supertonic, MLX, ONNX/Piper, and a local fallback. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. For cloned speech from a source clip, call generate_tts or audio_playback action=synthesize with sample/source_audio/voice_sample and backend=auto or luxtts. Use generate_tts when the task is specifically to create a TTS file; do not use shell speech commands or generate_audio for spoken TTS.";
507592
508007
  parameters = {
507593
508008
  type: "object",
507594
508009
  properties: {
@@ -507615,8 +508030,8 @@ except Exception as exc:
507615
508030
  },
507616
508031
  backend: {
507617
508032
  type: "string",
507618
- enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper", "espeak"],
507619
- description: "TTS backend. auto tries LuxTTS clone, Supertonic, MLX on Apple Silicon, Piper/ONNX, then espeak."
508033
+ enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"],
508034
+ description: "TTS backend. auto tries LuxTTS clone, Supertonic, MLX on Apple Silicon, Piper/ONNX, then a local fallback."
507620
508035
  },
507621
508036
  output: {
507622
508037
  type: "string",
@@ -507632,11 +508047,31 @@ except Exception as exc:
507632
508047
  },
507633
508048
  sample: {
507634
508049
  type: "string",
507635
- description: "Audio sample path to register as a clone voice for action=clone."
508050
+ description: "Audio source clip path to register or use as a LuxTTS clone voice."
508051
+ },
508052
+ source_audio: {
508053
+ type: "string",
508054
+ description: "Alias for sample. Use this for cloned speech from a source voice clip."
508055
+ },
508056
+ voice_sample: {
508057
+ type: "string",
508058
+ description: "Alias for sample/source_audio."
508059
+ },
508060
+ reference_audio: {
508061
+ type: "string",
508062
+ description: "Alias for sample/source_audio."
508063
+ },
508064
+ ref_audio: {
508065
+ type: "string",
508066
+ description: "Alias for sample/source_audio."
508067
+ },
508068
+ clone_sample: {
508069
+ type: "string",
508070
+ description: "Alias for sample/source_audio."
507636
508071
  },
507637
508072
  clone_name: {
507638
508073
  type: "string",
507639
- description: "Friendly filename stem for action=clone."
508074
+ description: "Friendly filename stem for action=clone or for registering a source clip during synthesis."
507640
508075
  },
507641
508076
  model: {
507642
508077
  type: "string",
@@ -507652,11 +508087,11 @@ except Exception as exc:
507652
508087
  },
507653
508088
  speed: {
507654
508089
  type: "number",
507655
- description: "Speech speed. espeak uses words per minute; neural backends use a multiplier."
508090
+ description: "Speech speed. Neural backends use a multiplier; local fallback uses its backend-specific rate."
507656
508091
  },
507657
508092
  voice: {
507658
508093
  type: "string",
507659
- description: "Voice id/name. Examples: Supertonic voice M4, MLX voice af_heart, espeak voice en-us, or Piper/ONNX model path."
508094
+ description: "Voice id/name. Examples: Supertonic voice M4, MLX voice af_heart, a source audio path for cloning, or Piper/ONNX model path."
507660
508095
  },
507661
508096
  lang: {
507662
508097
  type: "string",
@@ -507720,9 +508155,9 @@ except Exception as exc:
507720
508155
  return await this.synthesizeText(args, start2, true);
507721
508156
  }
507722
508157
  cloneVoice(args, start2) {
507723
- const sample = typeof args["sample"] === "string" ? args["sample"] : typeof args["file"] === "string" ? args["file"] : "";
508158
+ const sample = cloneSampleArg(args) || (typeof args["file"] === "string" ? args["file"] : "");
507724
508159
  if (!sample.trim()) {
507725
- return { success: false, output: "", error: "Missing 'sample' parameter. Provide a local audio sample to register as a clone voice.", durationMs: performance.now() - start2 };
508160
+ return { success: false, output: "", error: "Missing source audio. Provide sample=<file> or source_audio=<file> to register as a clone voice.", durationMs: performance.now() - start2 };
507726
508161
  }
507727
508162
  const saved = saveCloneRefFromSample(sample, typeof args["clone_name"] === "string" ? args["clone_name"] : void 0);
507728
508163
  return {
@@ -507739,10 +508174,11 @@ except Exception as exc:
507739
508174
  const lines = [
507740
508175
  "TTS backends:",
507741
508176
  ` luxtts: ${existsSync40(join58(voiceDir(), "luxtts-venv", "bin", "python3")) ? "installed" : "not installed"}; clone refs: ${refs.length}`,
508177
+ " clone from source clip: generate_tts text=<words> source_audio=<wav/mp3/flac/ogg/m4a> backend=auto",
507742
508178
  ` supertonic: ${existsSync40(supertonicVenvPy()) ? "installed" : "not installed"}; voices include M1, M2, M3, M4 when package assets are available`,
507743
508179
  ` mlx: ${existsSync40(mlxVenvPy()) ? "installed" : "not installed"}; Apple Silicon only; default model mlx-community/Kokoro-82M-bf16`,
507744
508180
  ` piper/onnx: ${hasCommand3("piper") || existsSync40(piperVenvBin()) ? "available" : "not installed"}; first use installs piper-tts into ${piperVenvDir()}; pass model=<path.onnx> for raw ONNX voices`,
507745
- ` espeak: ${hasCommand3("espeak-ng") ? "available" : "not found"}`,
508181
+ ` local fallback: ${hasCommand3("espeak-ng") ? "available" : "not found"}`,
507746
508182
  "",
507747
508183
  "Registered clone refs:",
507748
508184
  ...refs.length ? refs.map((ref) => ` ${ref}`) : [" none"]
@@ -507756,11 +508192,20 @@ except Exception as exc:
507756
508192
  }
507757
508193
  const requestedBackend = normalizeTtsBackend(args["backend"]);
507758
508194
  const strictBackend = boolArg(args["strict_backend"] ?? args["strictBackend"], false);
508195
+ const cloneRequested = wantsVoiceClone(args);
508196
+ if (cloneRequested && requestedBackend !== "auto" && requestedBackend !== "luxtts") {
508197
+ return {
508198
+ success: false,
508199
+ output: "",
508200
+ error: "Voice cloning from a source clip requires backend=auto or backend=luxtts.",
508201
+ durationMs: performance.now() - start2
508202
+ };
508203
+ }
507759
508204
  const playback = playbackArg(args, speakDefault);
507760
508205
  const outputPath2 = ttsOutputPath(args, requestedBackend);
507761
508206
  const device = typeof args["device"] === "string" ? args["device"] : "default";
507762
508207
  const tried = [];
507763
- const autoCandidates = ["luxtts", "supertonic", ...process.platform === "darwin" && process.arch === "arm64" ? ["mlx"] : [], "piper", "espeak"];
508208
+ const autoCandidates = cloneRequested ? ["luxtts"] : ["luxtts", "supertonic", ...process.platform === "darwin" && process.arch === "arm64" ? ["mlx"] : [], "piper", "espeak"];
507764
508209
  const candidates = requestedBackend === "auto" ? autoCandidates : strictBackend ? [requestedBackend] : [requestedBackend, ...autoCandidates.filter((backend) => backend !== requestedBackend)];
507765
508210
  let usedBackend = "";
507766
508211
  let voiceSummary = "";
@@ -507823,21 +508268,19 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
507823
508268
  };
507824
508269
  }
507825
508270
  async synthesizeLuxtts(text, outputPath2, args) {
507826
- const cloneRef = resolveCloneRef(args["clone_ref"] ?? args["voice"] ?? args["sample"]);
508271
+ const cloneRef = cloneRefForSynthesis(args);
507827
508272
  if (!cloneRef)
507828
- throw new Error(`No LuxTTS clone reference found. Register one with audio_playback action=clone sample=<file>.`);
508273
+ throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
507829
508274
  const speed = numberArg3(args["speed"], 1);
508275
+ ensureLuxttsInstalled();
507830
508276
  const daemonReady = await ensureLuxttsDaemon();
507831
508277
  if (daemonReady) {
507832
508278
  await luxttsSynthesize(text, cloneRef, outputPath2, speed);
507833
508279
  if (existsSync40(outputPath2))
507834
508280
  return `${basename12(cloneRef)} (LuxTTS daemon)`;
507835
508281
  }
507836
- const venvPy = join58(voiceDir(), "luxtts-venv", "bin", "python3");
507837
- const repoDir = join58(voiceDir(), "LuxTTS");
507838
- if (!existsSync40(venvPy) || !existsSync40(repoDir)) {
507839
- throw new Error("LuxTTS is not installed in the managed voice environment yet.");
507840
- }
508282
+ const venvPy = luxttsVenvPy();
508283
+ const repoDir = luxttsRepoDir();
507841
508284
  const pyScript = [
507842
508285
  "import json, sys, wave",
507843
508286
  "import numpy as np, torch",
@@ -507913,7 +508356,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
507913
508356
  }
507914
508357
  synthesizeEspeak(text, outputPath2, args) {
507915
508358
  if (!hasCommand3("espeak-ng"))
507916
- throw new Error("espeak-ng command not found.");
508359
+ throw new Error("Local fallback TTS command not found.");
507917
508360
  const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
507918
508361
  const speed = Math.round(numberArg3(args["speed"], 160));
507919
508362
  execFileSync2("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
@@ -507995,20 +508438,27 @@ ${devices.join("\n")}`,
507995
508438
  };
507996
508439
  TtsGenerateTool = class {
507997
508440
  name = "generate_tts";
507998
- description = "Generate text-to-speech audio as a WAV file, optionally playing it after synthesis. Supports explicit backends: auto, luxtts voice cloning, supertonic, mlx, onnx/piper, and espeak. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. Use clone_ref to select a registered LuxTTS voice, sample+clone_name to register a clone sample via audio_playback action=clone, and playback=false for silent file generation. Use this tool for speech/TTS requests; do not use shell commands or generate_audio as a TTS fallback.";
508441
+ description = "Generate text-to-speech audio as a WAV file, optionally playing it after synthesis. Supports explicit backends: auto, LuxTTS voice cloning, Supertonic, MLX, ONNX/Piper, and local fallback. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. For voice cloning, pass source_audio/sample/voice_sample with the reference clip and backend=auto or luxtts; clone_name can register it for reuse. Use clone_ref to select a registered LuxTTS voice and playback=false for silent file generation. Use this tool for speech/TTS requests; do not use shell commands or generate_audio as a TTS fallback.";
507999
508442
  parameters = {
508000
508443
  type: "object",
508001
508444
  properties: {
508002
508445
  text: { type: "string", description: "Text to synthesize" },
508003
508446
  input: { type: "string", description: "Alias for text." },
508004
508447
  prompt: { type: "string", description: "Alias for text." },
508005
- backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper", "espeak"] },
508448
+ backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"] },
508006
508449
  output: { type: "string", description: "Output WAV path. Defaults to ~/.omnius/voice/generated/tts-*.wav." },
508007
508450
  path: { type: "string", description: "Alias for output." },
508008
508451
  playback: { type: "boolean", description: "Whether to play after generating. Defaults false for generate_tts." },
508009
508452
  strict_backend: { type: "boolean", description: "When true, fail instead of falling back if the requested backend is unavailable. Defaults false." },
508010
508453
  voice: { type: "string", description: "Voice id/name, or raw Piper/ONNX path when backend=onnx/piper." },
508011
508454
  clone_ref: { type: "string", description: "LuxTTS clone reference path, filename, or registered clone name." },
508455
+ sample: { type: "string", description: "Voice source clip path for cloned speech. Alias: source_audio." },
508456
+ source_audio: { type: "string", description: "Voice source clip path for cloned speech." },
508457
+ voice_sample: { type: "string", description: "Alias for source_audio." },
508458
+ reference_audio: { type: "string", description: "Alias for source_audio." },
508459
+ ref_audio: { type: "string", description: "Alias for source_audio." },
508460
+ clone_sample: { type: "string", description: "Alias for source_audio." },
508461
+ clone_name: { type: "string", description: "Optional name to register the source clip for later reuse." },
508012
508462
  model: { type: "string", description: "Backend model id or raw ONNX/Piper model path." },
508013
508463
  lang: { type: "string", description: "Language code for Supertonic/MLX where supported." },
508014
508464
  speed: { type: "number", description: "Speech speed multiplier or backend-specific rate." },
@@ -575357,19 +575807,19 @@ function modelOnnxPath(id) {
575357
575807
  function modelConfigPath(id) {
575358
575808
  return join109(modelDir(id), "config.json");
575359
575809
  }
575360
- function luxttsVenvDir() {
575810
+ function luxttsVenvDir2() {
575361
575811
  return join109(voiceDir2(), "luxtts-venv");
575362
575812
  }
575363
- function luxttsVenvPy() {
575364
- return platform5() === "win32" ? join109(luxttsVenvDir(), "Scripts", "python.exe") : join109(luxttsVenvDir(), "bin", "python3");
575813
+ function luxttsVenvPy2() {
575814
+ return platform5() === "win32" ? join109(luxttsVenvDir2(), "Scripts", "python.exe") : join109(luxttsVenvDir2(), "bin", "python3");
575365
575815
  }
575366
- function luxttsRepoDir() {
575816
+ function luxttsRepoDir2() {
575367
575817
  return join109(voiceDir2(), "LuxTTS");
575368
575818
  }
575369
575819
  function luxttsCloneRefsDir() {
575370
575820
  return join109(voiceDir2(), "clone-refs");
575371
575821
  }
575372
- function luxttsInferScript() {
575822
+ function luxttsInferScript2() {
575373
575823
  return join109(voiceDir2(), "luxtts-infer.py");
575374
575824
  }
575375
575825
  function supertonicVenvDir() {
@@ -577936,12 +578386,12 @@ Error: ${err2 instanceof Error ? err2.message : String(err2)}`
577936
578386
  "python3 not found. LuxTTS requires Python 3.10+. Try: apt install python3 / brew install python3"
577937
578387
  );
577938
578388
  }
577939
- const venvDir = luxttsVenvDir();
577940
- const venvPy = luxttsVenvPy();
578389
+ const venvDir = luxttsVenvDir2();
578390
+ const venvPy = luxttsVenvPy2();
577941
578391
  if (existsSync95(venvPy)) {
577942
578392
  try {
577943
578393
  const quotedPy = `"${venvPy}"`;
577944
- const repoPath = luxttsRepoDir().replace(/\\/g, "/");
578394
+ const repoPath = luxttsRepoDir2().replace(/\\/g, "/");
577945
578395
  await this.asyncShell(
577946
578396
  `${quotedPy} -c "import sys; sys.path.insert(0, '${repoPath}'); from zipvoice.luxvoice import LuxTTS; print('ok')"`,
577947
578397
  3e4
@@ -578055,7 +578505,7 @@ Error: ${err2 instanceof Error ? err2.message : String(err2)}`
578055
578505
  }
578056
578506
  }
578057
578507
  }
578058
- const repoDir = luxttsRepoDir();
578508
+ const repoDir = luxttsRepoDir2();
578059
578509
  if (!existsSync95(join109(repoDir, "zipvoice", "luxvoice.py"))) {
578060
578510
  renderInfo(" Cloning LuxTTS repository...");
578061
578511
  try {
@@ -578479,18 +578929,18 @@ def main():
578479
578929
  if __name__ == '__main__':
578480
578930
  main()
578481
578931
  `;
578482
- const scriptPath2 = luxttsInferScript();
578932
+ const scriptPath2 = luxttsInferScript2();
578483
578933
  mkdirSync52(voiceDir2(), { recursive: true });
578484
578934
  writeFileSync49(scriptPath2, script);
578485
578935
  }
578486
578936
  /** Ensure the LuxTTS daemon is running, spawn if needed */
578487
578937
  async ensureLuxttsDaemon() {
578488
578938
  if (this._luxttsDaemon && !this._luxttsDaemon.killed) return true;
578489
- const venvPy = luxttsVenvPy();
578939
+ const venvPy = luxttsVenvPy2();
578490
578940
  if (!existsSync95(venvPy)) return false;
578491
578941
  return new Promise((resolve48) => {
578492
- const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir() };
578493
- const daemon = nodeSpawn(venvPy, [luxttsInferScript()], {
578942
+ const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir2() };
578943
+ const daemon = nodeSpawn(venvPy, [luxttsInferScript2()], {
578494
578944
  stdio: ["pipe", "pipe", "pipe"],
578495
578945
  cwd: tmpdir20(),
578496
578946
  env: env2
@@ -596500,6 +596950,7 @@ function scopedTool(base3, root, mode) {
596500
596950
  async execute(args) {
596501
596951
  const next = { ...args };
596502
596952
  if (base3.name === "generate_image" || base3.name === "generate_audio" || base3.name === "generate_tts") {
596953
+ const cleanup = [];
596503
596954
  const localModel = typeof next["model_path"] === "string" ? String(next["model_path"]) : typeof next["model"] === "string" && looksLikeLocalPath(String(next["model"])) ? String(next["model"]) : "";
596504
596955
  if (localModel) {
596505
596956
  const guarded = guardPath(rootAbs, localModel);
@@ -596508,6 +596959,22 @@ function scopedTool(base3, root, mode) {
596508
596959
  else next["model"] = guarded.path.abs;
596509
596960
  }
596510
596961
  if (base3.name === "generate_tts") {
596962
+ for (const key of TTS_CLONE_SOURCE_KEYS) {
596963
+ const value2 = next[key];
596964
+ if (typeof value2 !== "string" || !value2.trim()) continue;
596965
+ const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, value2.trim());
596966
+ if (!materialized.ok) return denied(materialized.error);
596967
+ next[key] = materialized.path;
596968
+ if (materialized.cleanup) cleanup.push(materialized.cleanup);
596969
+ }
596970
+ for (const key of ["clone_ref", "voice"]) {
596971
+ const value2 = next[key];
596972
+ if (typeof value2 !== "string" || !value2.trim() || !looksLikeAudioPath(value2.trim())) continue;
596973
+ const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, value2.trim());
596974
+ if (!materialized.ok) return denied(materialized.error);
596975
+ next[key] = materialized.path;
596976
+ if (materialized.cleanup) cleanup.push(materialized.cleanup);
596977
+ }
596511
596978
  const rawOutput = typeof next["output"] === "string" && String(next["output"]).trim() ? String(next["output"]) : typeof next["output_path"] === "string" && String(next["output_path"]).trim() ? String(next["output_path"]) : `tts-${Date.now()}.wav`;
596512
596979
  const guardedOutput = guardPath(rootAbs, rawOutput);
596513
596980
  if (!guardedOutput.ok) return denied(guardedOutput.error);
@@ -596517,16 +596984,20 @@ function scopedTool(base3, root, mode) {
596517
596984
  next["output"] = guardedOutput.path.abs;
596518
596985
  next["playback"] = false;
596519
596986
  }
596520
- const result2 = await base3.execute(next);
596521
- if (result2.success) {
596522
- if (base3.name === "generate_tts" && typeof next["output"] === "string") {
596523
- rememberCreated(rootAbs, String(next["output"]));
596524
- }
596525
- for (const path11 of collectGeneratedArtifactPathsFromText(result2.output, rootAbs)) {
596526
- rememberCreated(rootAbs, path11);
596987
+ try {
596988
+ const result2 = await base3.execute(next);
596989
+ if (result2.success) {
596990
+ if (base3.name === "generate_tts" && typeof next["output"] === "string") {
596991
+ rememberCreated(rootAbs, String(next["output"]));
596992
+ }
596993
+ for (const path11 of collectGeneratedArtifactPathsFromText(result2.output, rootAbs)) {
596994
+ rememberCreated(rootAbs, path11);
596995
+ }
596527
596996
  }
596997
+ return result2;
596998
+ } finally {
596999
+ for (const fn of cleanup) fn();
596528
597000
  }
596529
- return result2;
596530
597001
  }
596531
597002
  const pathKey = PATH_KEYS.find((key) => typeof next[key] === "string" && String(next[key]).trim());
596532
597003
  if (pathKey) {
@@ -596591,6 +597062,9 @@ function isInside(root, path11) {
596591
597062
  function looksLikeLocalPath(value2) {
596592
597063
  return value2.startsWith("/") || value2.startsWith("./") || value2.startsWith("../");
596593
597064
  }
597065
+ function looksLikeAudioPath(value2) {
597066
+ return looksLikeLocalPath(value2) || value2.startsWith("~/") || /\.(wav|mp3|flac|ogg|m4a)$/i.test(value2);
597067
+ }
596594
597068
  function manifestPath(root) {
596595
597069
  return join119(root, MANIFEST_FILE);
596596
597070
  }
@@ -596753,7 +597227,7 @@ function denied(error) {
596753
597227
  mutatedFiles: []
596754
597228
  };
596755
597229
  }
596756
- var MANIFEST_FILE, OBJECTS_DIR, SEND_DIR, PATH_KEYS, MEDIA_PATH_RE, PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS, CreativeAudioFileTool;
597230
+ var MANIFEST_FILE, OBJECTS_DIR, SEND_DIR, PATH_KEYS, TTS_CLONE_SOURCE_KEYS, MEDIA_PATH_RE, PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS, CreativeAudioFileTool;
596757
597231
  var init_telegram_creative_tools = __esm({
596758
597232
  "packages/cli/src/tui/telegram-creative-tools.ts"() {
596759
597233
  "use strict";
@@ -596762,6 +597236,7 @@ var init_telegram_creative_tools = __esm({
596762
597236
  OBJECTS_DIR = ".objects";
596763
597237
  SEND_DIR = ".send";
596764
597238
  PATH_KEYS = ["path", "file", "file_path", "filename", "filepath", "filePath"];
597239
+ TTS_CLONE_SOURCE_KEYS = ["sample", "source_audio", "voice_sample", "reference_audio", "ref_audio", "clone_sample"];
596765
597240
  MEDIA_PATH_RE = /(?:^|[\s([])(\/[^\s<>"')\]]+\.[A-Za-z0-9]{1,12})(?:$|[\s),.\]])/g;
596766
597241
  PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS = /* @__PURE__ */ new Set([
596767
597242
  ".sh",
@@ -596836,9 +597311,16 @@ var init_telegram_creative_tools = __esm({
596836
597311
  input: { type: "string", description: "Alias for text" },
596837
597312
  prompt: { type: "string", description: "Alias for text" },
596838
597313
  path: { type: "string", description: "Output .wav path inside the creative workspace" },
596839
- backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper", "espeak"], description: "TTS backend. Defaults to auto." },
596840
- voice: { type: "string", description: "Voice id/name for the selected TTS backend" },
597314
+ backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"], description: "TTS backend. Defaults to auto." },
597315
+ voice: { type: "string", description: "Voice id/name for the selected TTS backend, or a scoped source audio path for cloning" },
596841
597316
  clone_ref: { type: "string", description: "Optional LuxTTS clone reference" },
597317
+ sample: { type: "string", description: "Voice source clip path inside the creative workspace" },
597318
+ source_audio: { type: "string", description: "Alias for sample" },
597319
+ voice_sample: { type: "string", description: "Alias for sample" },
597320
+ reference_audio: { type: "string", description: "Alias for sample" },
597321
+ ref_audio: { type: "string", description: "Alias for sample" },
597322
+ clone_sample: { type: "string", description: "Alias for sample" },
597323
+ clone_name: { type: "string", description: "Optional name to register the source clip for later reuse" },
596842
597324
  model: { type: "string", description: "Optional backend model id or raw Piper/ONNX path" },
596843
597325
  speed: { type: "number", description: "Speech speed multiplier or backend-specific rate" }
596844
597326
  },
@@ -596857,26 +597339,57 @@ var init_telegram_creative_tools = __esm({
596857
597339
  if (!guarded.path.abs.toLowerCase().endsWith(".wav")) {
596858
597340
  return denied("create_audio_file currently writes WAV files; use a .wav output path.");
596859
597341
  }
596860
- await mkdir17(dirname33(guarded.path.abs), { recursive: true });
596861
- const tts = new TtsGenerateTool();
596862
- const result = await tts.execute({
596863
- text,
596864
- output: guarded.path.abs,
596865
- playback: false,
596866
- backend: args["backend"],
596867
- voice: args["voice"],
596868
- clone_ref: args["clone_ref"],
596869
- model: args["model"],
596870
- speed: args["speed"]
596871
- });
596872
- if (!result.success || !existsSync104(guarded.path.abs)) {
596873
- return {
596874
- success: false,
596875
- output: "",
596876
- error: `Audio synthesis failed through generate_tts.
597342
+ const cloneArgs = {};
597343
+ const cleanup = [];
597344
+ for (const key of TTS_CLONE_SOURCE_KEYS) {
597345
+ const value2 = args[key];
597346
+ if (typeof value2 !== "string" || !value2.trim()) continue;
597347
+ const materialized = materializeTelegramCreativeArtifactForSend(this.root, value2.trim());
597348
+ if (!materialized.ok) return denied(materialized.error);
597349
+ cloneArgs[key] = materialized.path;
597350
+ if (materialized.cleanup) cleanup.push(materialized.cleanup);
597351
+ }
597352
+ for (const key of ["clone_ref", "voice"]) {
597353
+ const value2 = args[key];
597354
+ if (typeof value2 !== "string" || !value2.trim() || !looksLikeAudioPath(value2.trim())) continue;
597355
+ const materialized = materializeTelegramCreativeArtifactForSend(this.root, value2.trim());
597356
+ if (!materialized.ok) return denied(materialized.error);
597357
+ cloneArgs[key] = materialized.path;
597358
+ if (materialized.cleanup) cleanup.push(materialized.cleanup);
597359
+ }
597360
+ let result;
597361
+ try {
597362
+ await mkdir17(dirname33(guarded.path.abs), { recursive: true });
597363
+ const tts = new TtsGenerateTool();
597364
+ result = await tts.execute({
597365
+ text,
597366
+ output: guarded.path.abs,
597367
+ playback: false,
597368
+ backend: args["backend"],
597369
+ voice: cloneArgs["voice"] ?? args["voice"],
597370
+ clone_ref: cloneArgs["clone_ref"] ?? args["clone_ref"],
597371
+ ...cloneArgs,
597372
+ sample: cloneArgs["sample"],
597373
+ source_audio: cloneArgs["source_audio"],
597374
+ voice_sample: cloneArgs["voice_sample"],
597375
+ reference_audio: cloneArgs["reference_audio"],
597376
+ ref_audio: cloneArgs["ref_audio"],
597377
+ clone_sample: cloneArgs["clone_sample"],
597378
+ clone_name: args["clone_name"],
597379
+ model: args["model"],
597380
+ speed: args["speed"]
597381
+ });
597382
+ if (!result.success || !existsSync104(guarded.path.abs)) {
597383
+ return {
597384
+ success: false,
597385
+ output: "",
597386
+ error: `Audio synthesis failed through generate_tts.
596877
597387
  ${(result.error || result.output || "").slice(0, 1200)}`,
596878
- durationMs: performance.now() - start2
596879
- };
597388
+ durationMs: performance.now() - start2
597389
+ };
597390
+ }
597391
+ } finally {
597392
+ for (const fn of cleanup) fn();
596880
597393
  }
596881
597394
  rememberCreated(this.root, guarded.path.abs);
596882
597395
  const sizeKB = Math.round(statSync35(guarded.path.abs).size / 1024);