omnius 1.0.20 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/index.js +701 -188
- package/npm-shrinkwrap.json +2 -2
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -250375,6 +250375,22 @@ function optionalNumberArg(value2) {
|
|
|
250375
250375
|
const n2 = Number(value2);
|
|
250376
250376
|
return Number.isFinite(n2) ? n2 : void 0;
|
|
250377
250377
|
}
|
|
250378
|
+
function booleanArg(value2, fallback) {
|
|
250379
|
+
if (typeof value2 === "boolean")
|
|
250380
|
+
return value2;
|
|
250381
|
+
if (typeof value2 === "string") {
|
|
250382
|
+
if (/^(1|true|yes|on)$/i.test(value2.trim()))
|
|
250383
|
+
return true;
|
|
250384
|
+
if (/^(0|false|no|off)$/i.test(value2.trim()))
|
|
250385
|
+
return false;
|
|
250386
|
+
}
|
|
250387
|
+
return fallback;
|
|
250388
|
+
}
|
|
250389
|
+
function generationFallbackEnabled(args) {
|
|
250390
|
+
if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
|
|
250391
|
+
return false;
|
|
250392
|
+
return booleanArg(args["fallback"] ?? args["allow_fallback"] ?? args["allowFallback"], true);
|
|
250393
|
+
}
|
|
250378
250394
|
function isBackend(value2) {
|
|
250379
250395
|
return value2 === "auto" || value2 === "ollama" || value2 === "diffusers" || value2 === "sdcpp";
|
|
250380
250396
|
}
|
|
@@ -250383,6 +250399,9 @@ function getImageGenerationPreset(model) {
|
|
|
250383
250399
|
return void 0;
|
|
250384
250400
|
return IMAGE_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model);
|
|
250385
250401
|
}
|
|
250402
|
+
function imageGenerationQualityLadder() {
|
|
250403
|
+
return IMAGE_GENERATION_QUALITY_LADDER.map((id) => getImageGenerationPreset(id)).filter((preset) => Boolean(preset));
|
|
250404
|
+
}
|
|
250386
250405
|
function inferImageGenerationBackend(model, requested) {
|
|
250387
250406
|
if (requested && isBackend(requested))
|
|
250388
250407
|
return requested;
|
|
@@ -250399,6 +250418,40 @@ function inferImageGenerationBackend(model, requested) {
|
|
|
250399
250418
|
return "sdcpp";
|
|
250400
250419
|
return "diffusers";
|
|
250401
250420
|
}
|
|
250421
|
+
function imageCandidateFor(model, requestedBackend) {
|
|
250422
|
+
let backend = inferImageGenerationBackend(model, requestedBackend);
|
|
250423
|
+
if (backend === "auto")
|
|
250424
|
+
backend = "diffusers";
|
|
250425
|
+
return {
|
|
250426
|
+
model,
|
|
250427
|
+
backend,
|
|
250428
|
+
preset: getImageGenerationPreset(model)
|
|
250429
|
+
};
|
|
250430
|
+
}
|
|
250431
|
+
function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
|
|
250432
|
+
const ladder = imageGenerationQualityLadder();
|
|
250433
|
+
const candidates = [];
|
|
250434
|
+
const add2 = (candidate) => {
|
|
250435
|
+
const key = `${candidate.backend}:${candidate.model}`;
|
|
250436
|
+
if (!candidates.some((existing) => `${existing.backend}:${existing.model}` === key))
|
|
250437
|
+
candidates.push(candidate);
|
|
250438
|
+
};
|
|
250439
|
+
if (requestedModel) {
|
|
250440
|
+
add2(imageCandidateFor(requestedModel, requestedBackend));
|
|
250441
|
+
} else if (requestedBackend && requestedBackend !== "auto") {
|
|
250442
|
+
const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
|
|
250443
|
+
add2(imageCandidateFor(firstForBackend?.id ?? (requestedBackend === "ollama" ? DEFAULT_OLLAMA_IMAGE_MODEL : DEFAULT_DIFFUSERS_IMAGE_MODEL), requestedBackend));
|
|
250444
|
+
} else if (!allowFallback) {
|
|
250445
|
+
add2(imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend));
|
|
250446
|
+
}
|
|
250447
|
+
if (!allowFallback)
|
|
250448
|
+
return candidates.length ? candidates : [imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend)];
|
|
250449
|
+
const primaryIndex = requestedModel ? ladder.findIndex((preset) => preset.id === requestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
|
|
250450
|
+
const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
|
|
250451
|
+
for (const preset of fallbackTail)
|
|
250452
|
+
add2(imageCandidateFor(preset.id));
|
|
250453
|
+
return candidates;
|
|
250454
|
+
}
|
|
250402
250455
|
function imageGenerationDir(repoRoot = ".") {
|
|
250403
250456
|
return join36(repoRoot, ".omnius", "image-gen");
|
|
250404
250457
|
}
|
|
@@ -250653,6 +250706,33 @@ function formatSuccessOutput(args) {
|
|
|
250653
250706
|
` Prompt: "${prompt}"`
|
|
250654
250707
|
].filter(Boolean).join("\n");
|
|
250655
250708
|
}
|
|
250709
|
+
function summarizeToolResult(result) {
|
|
250710
|
+
return trimProcessText(String(result.error || result.output || "unknown error"), 700).replace(/\s+/g, " ").trim();
|
|
250711
|
+
}
|
|
250712
|
+
function formatImageAttempt(candidate, reason, index) {
|
|
250713
|
+
return `${index + 1}. ${candidate.model} [${candidate.backend}] - ${reason}`;
|
|
250714
|
+
}
|
|
250715
|
+
function formatImageFallbackFailure(failed) {
|
|
250716
|
+
return [
|
|
250717
|
+
"No image generation model in the fallback ladder completed successfully.",
|
|
250718
|
+
"Attempted, highest quality to lowest:",
|
|
250719
|
+
...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`)
|
|
250720
|
+
].join("\n");
|
|
250721
|
+
}
|
|
250722
|
+
function annotateImageFallbackSuccess(result, failed, winner) {
|
|
250723
|
+
if (failed.length === 0)
|
|
250724
|
+
return result;
|
|
250725
|
+
const prefix = [
|
|
250726
|
+
`Fallback ladder succeeded with ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
|
|
250727
|
+
"Failed attempts:",
|
|
250728
|
+
...failed.map((attempt, index) => ` ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
|
|
250729
|
+
""
|
|
250730
|
+
].join("\n");
|
|
250731
|
+
return {
|
|
250732
|
+
...result,
|
|
250733
|
+
output: prefix + result.output
|
|
250734
|
+
};
|
|
250735
|
+
}
|
|
250656
250736
|
function parseRunnerJson(stdout) {
|
|
250657
250737
|
const lines = stdout.trim().split(/\r?\n/).reverse();
|
|
250658
250738
|
for (const line of lines) {
|
|
@@ -250665,7 +250745,7 @@ function parseRunnerJson(stdout) {
|
|
|
250665
250745
|
}
|
|
250666
250746
|
return null;
|
|
250667
250747
|
}
|
|
250668
|
-
var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
|
|
250748
|
+
var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, IMAGE_GENERATION_QUALITY_LADDER, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
|
|
250669
250749
|
var init_image_generate = __esm({
|
|
250670
250750
|
"packages/execution/dist/tools/image-generate.js"() {
|
|
250671
250751
|
"use strict";
|
|
@@ -250989,6 +251069,21 @@ var init_image_generate = __esm({
|
|
|
250989
251069
|
note: "CPU/GGUF/checkpoint route; requires a local model path."
|
|
250990
251070
|
}
|
|
250991
251071
|
];
|
|
251072
|
+
IMAGE_GENERATION_QUALITY_LADDER = [
|
|
251073
|
+
"black-forest-labs/FLUX.1-dev",
|
|
251074
|
+
"stabilityai/stable-diffusion-3.5-large",
|
|
251075
|
+
DEFAULT_OLLAMA_IMAGE_MODEL,
|
|
251076
|
+
"black-forest-labs/FLUX.1-schnell",
|
|
251077
|
+
"stabilityai/stable-diffusion-3.5-large-turbo",
|
|
251078
|
+
"Tongyi-MAI/Z-Image-Turbo",
|
|
251079
|
+
"black-forest-labs/FLUX.2-klein-4B",
|
|
251080
|
+
DEFAULT_DIFFUSERS_IMAGE_MODEL,
|
|
251081
|
+
"Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
|
|
251082
|
+
"SimianLuo/LCM_Dreamshaper_v7",
|
|
251083
|
+
"stabilityai/sd-turbo",
|
|
251084
|
+
"segmind/tiny-sd",
|
|
251085
|
+
"nota-ai/bk-sdm-tiny-2m"
|
|
251086
|
+
];
|
|
250992
251087
|
OLLAMA_IMAGE_MODELS = IMAGE_GENERATION_MODEL_PRESETS.filter((preset) => preset.backend === "ollama").map((preset) => preset.id);
|
|
250993
251088
|
DIFFUSERS_RUNNER = String.raw`#!/usr/bin/env python3
|
|
250994
251089
|
import argparse
|
|
@@ -251170,7 +251265,7 @@ if __name__ == "__main__":
|
|
|
251170
251265
|
`;
|
|
251171
251266
|
ImageGenerateTool = class {
|
|
251172
251267
|
name = "generate_image";
|
|
251173
|
-
description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. Saves a PNG under .omnius/images and returns the file path.";
|
|
251268
|
+
description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first and falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
|
|
251174
251269
|
parameters = {
|
|
251175
251270
|
type: "object",
|
|
251176
251271
|
properties: {
|
|
@@ -251215,6 +251310,14 @@ if __name__ == "__main__":
|
|
|
251215
251310
|
type: "string",
|
|
251216
251311
|
enum: ["generate", "list_models", "setup"],
|
|
251217
251312
|
description: "Optional utility action. Default is generate."
|
|
251313
|
+
},
|
|
251314
|
+
fallback: {
|
|
251315
|
+
type: "boolean",
|
|
251316
|
+
description: "Whether to try the ranked quality ladder if the selected model/backend fails. Defaults true."
|
|
251317
|
+
},
|
|
251318
|
+
strict_model: {
|
|
251319
|
+
type: "boolean",
|
|
251320
|
+
description: "When true, use only the requested model/backend and do not fall back. Defaults false."
|
|
251218
251321
|
}
|
|
251219
251322
|
},
|
|
251220
251323
|
required: ["prompt"]
|
|
@@ -251257,7 +251360,7 @@ if __name__ == "__main__":
|
|
|
251257
251360
|
if (action === "list_models") {
|
|
251258
251361
|
return {
|
|
251259
251362
|
success: true,
|
|
251260
|
-
output: IMAGE_GENERATION_MODEL_PRESETS.map((
|
|
251363
|
+
output: IMAGE_GENERATION_MODEL_PRESETS.map((preset) => `${preset.id} [${preset.backend}] - ${preset.note}`).join("\n"),
|
|
251261
251364
|
durationMs: performance.now() - start2
|
|
251262
251365
|
};
|
|
251263
251366
|
}
|
|
@@ -251281,19 +251384,8 @@ if __name__ == "__main__":
|
|
|
251281
251384
|
const rawModel2 = args["model_path"] ? String(args["model_path"]) : args["model"] ? String(args["model"]) : this.defaultModel;
|
|
251282
251385
|
const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
|
|
251283
251386
|
const requestedBackend2 = args["backend"] ? String(args["backend"]) : this.defaultBackend;
|
|
251284
|
-
|
|
251285
|
-
|
|
251286
|
-
backend = inferImageGenerationBackend(requestedModel2, void 0);
|
|
251287
|
-
if (backend === "auto")
|
|
251288
|
-
backend = "diffusers";
|
|
251289
|
-
}
|
|
251290
|
-
const model = requestedModel2 ?? (backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL);
|
|
251291
|
-
this.emitProgress({ stage: "setup", message: `Preparing image model ${model} (${backend})` });
|
|
251292
|
-
if (backend === "ollama")
|
|
251293
|
-
return await this.prewarmOllama({ model, start: start2 });
|
|
251294
|
-
if (backend === "sdcpp")
|
|
251295
|
-
return await this.prewarmSdCpp({ model, start: start2, python: args["python"] });
|
|
251296
|
-
return await this.prewarmDiffusers({ model, start: start2, python: args["python"] });
|
|
251387
|
+
const candidates2 = imageGenerationFallbackCandidates(requestedModel2, requestedBackend2, generationFallbackEnabled(args));
|
|
251388
|
+
return await this.prewarmCandidateLadder({ candidates: candidates2, args, start: start2 });
|
|
251297
251389
|
}
|
|
251298
251390
|
const prompt = String(args["prompt"] ?? "").trim();
|
|
251299
251391
|
if (!prompt) {
|
|
@@ -251302,31 +251394,10 @@ if __name__ == "__main__":
|
|
|
251302
251394
|
const rawModel = args["model_path"] ? String(args["model_path"]) : args["model"] ? String(args["model"]) : this.defaultModel;
|
|
251303
251395
|
const requestedModel = rawModel === "auto" ? void 0 : rawModel;
|
|
251304
251396
|
const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
|
|
251305
|
-
const preset = getImageGenerationPreset(requestedModel);
|
|
251306
|
-
const width = numberArg(args["width"], preset?.width ?? 1024);
|
|
251307
|
-
const height = numberArg(args["height"], preset?.height ?? 1024);
|
|
251308
|
-
const steps = optionalNumberArg(args["steps"]) ?? preset?.steps;
|
|
251309
|
-
const guidance = optionalNumberArg(args["guidance"]) ?? preset?.guidance;
|
|
251310
251397
|
const seed = optionalNumberArg(args["seed"]);
|
|
251398
|
+
const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
|
|
251311
251399
|
try {
|
|
251312
|
-
|
|
251313
|
-
let model = requestedModel;
|
|
251314
|
-
if (backend === "auto") {
|
|
251315
|
-
backend = inferImageGenerationBackend(model, void 0);
|
|
251316
|
-
if (backend === "auto")
|
|
251317
|
-
backend = "diffusers";
|
|
251318
|
-
}
|
|
251319
|
-
if (!model) {
|
|
251320
|
-
model = backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL;
|
|
251321
|
-
}
|
|
251322
|
-
this.emitProgress({ stage: "setup", message: `Using image model ${model} (${backend})` });
|
|
251323
|
-
if (backend === "ollama") {
|
|
251324
|
-
return await this.generateWithOllama({ prompt, model, width, height, steps, start: start2 });
|
|
251325
|
-
}
|
|
251326
|
-
if (backend === "sdcpp") {
|
|
251327
|
-
return await this.generateWithSdCpp({ prompt, model, width, height, steps, seed, start: start2, python: args["python"] });
|
|
251328
|
-
}
|
|
251329
|
-
return await this.generateWithDiffusers({ prompt, model, width, height, steps, guidance, seed, start: start2, python: args["python"] });
|
|
251400
|
+
return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
|
|
251330
251401
|
} catch (err) {
|
|
251331
251402
|
return {
|
|
251332
251403
|
success: false,
|
|
@@ -251335,6 +251406,64 @@ if __name__ == "__main__":
|
|
|
251335
251406
|
};
|
|
251336
251407
|
}
|
|
251337
251408
|
}
|
|
251409
|
+
async prewarmCandidateLadder(args) {
|
|
251410
|
+
const failed = [];
|
|
251411
|
+
for (let index = 0; index < args.candidates.length; index++) {
|
|
251412
|
+
const candidate = args.candidates[index];
|
|
251413
|
+
this.emitProgress({
|
|
251414
|
+
stage: "setup",
|
|
251415
|
+
message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
251416
|
+
});
|
|
251417
|
+
const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
|
|
251418
|
+
if (result.success)
|
|
251419
|
+
return annotateImageFallbackSuccess(result, failed, candidate);
|
|
251420
|
+
failed.push({ candidate, reason: summarizeToolResult(result) });
|
|
251421
|
+
if (index < args.candidates.length - 1) {
|
|
251422
|
+
this.emitProgress({
|
|
251423
|
+
stage: "setup",
|
|
251424
|
+
message: `${candidate.model} failed; trying ${args.candidates[index + 1].model}`
|
|
251425
|
+
});
|
|
251426
|
+
}
|
|
251427
|
+
}
|
|
251428
|
+
const output = formatImageFallbackFailure(failed);
|
|
251429
|
+
return {
|
|
251430
|
+
success: false,
|
|
251431
|
+
output,
|
|
251432
|
+
error: output,
|
|
251433
|
+
durationMs: performance.now() - args.start
|
|
251434
|
+
};
|
|
251435
|
+
}
|
|
251436
|
+
async generateCandidateLadder(args) {
|
|
251437
|
+
const failed = [];
|
|
251438
|
+
for (let index = 0; index < args.candidates.length; index++) {
|
|
251439
|
+
const candidate = args.candidates[index];
|
|
251440
|
+
const width = numberArg(args.args["width"], candidate.preset?.width ?? 1024);
|
|
251441
|
+
const height = numberArg(args.args["height"], candidate.preset?.height ?? 1024);
|
|
251442
|
+
const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
|
|
251443
|
+
const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
|
|
251444
|
+
this.emitProgress({
|
|
251445
|
+
stage: "setup",
|
|
251446
|
+
message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
251447
|
+
});
|
|
251448
|
+
const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: args.prompt, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: args.prompt, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: args.prompt, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
|
|
251449
|
+
if (result.success)
|
|
251450
|
+
return annotateImageFallbackSuccess(result, failed, candidate);
|
|
251451
|
+
failed.push({ candidate, reason: summarizeToolResult(result) });
|
|
251452
|
+
if (index < args.candidates.length - 1) {
|
|
251453
|
+
this.emitProgress({
|
|
251454
|
+
stage: "setup",
|
|
251455
|
+
message: `${candidate.model} failed; falling back to ${args.candidates[index + 1].model}`
|
|
251456
|
+
});
|
|
251457
|
+
}
|
|
251458
|
+
}
|
|
251459
|
+
const output = formatImageFallbackFailure(failed);
|
|
251460
|
+
return {
|
|
251461
|
+
success: false,
|
|
251462
|
+
output,
|
|
251463
|
+
error: output,
|
|
251464
|
+
durationMs: performance.now() - args.start
|
|
251465
|
+
};
|
|
251466
|
+
}
|
|
251338
251467
|
async prewarmOllama(args) {
|
|
251339
251468
|
const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
|
|
251340
251469
|
if (await this.ollamaHasModel(model)) {
|
|
@@ -251830,7 +251959,7 @@ function backendImportCheck(backend) {
|
|
|
251830
251959
|
if (backend === "audiocraft")
|
|
251831
251960
|
return "import torch, torchaudio, audiocraft\nfrom audiocraft.models import MusicGen, AudioGen\n";
|
|
251832
251961
|
if (backend === "stable-audio")
|
|
251833
|
-
return "import torch, torchaudio,
|
|
251962
|
+
return "import torch, torchaudio, diffusers, scipy\nfrom diffusers import StableAudioPipeline\n";
|
|
251834
251963
|
if (backend === "tangoflux")
|
|
251835
251964
|
return "import torch, torchaudio\nfrom tangoflux import TangoFluxInference\n";
|
|
251836
251965
|
return "import torch, diffusers, scipy\nfrom diffusers import AudioLDMPipeline\n";
|
|
@@ -252160,11 +252289,31 @@ function playbackRequested(args) {
|
|
|
252160
252289
|
return false;
|
|
252161
252290
|
return true;
|
|
252162
252291
|
}
|
|
252292
|
+
function booleanArg2(value2, fallback) {
|
|
252293
|
+
if (typeof value2 === "boolean")
|
|
252294
|
+
return value2;
|
|
252295
|
+
if (typeof value2 === "string") {
|
|
252296
|
+
if (/^(1|true|yes|on)$/i.test(value2.trim()))
|
|
252297
|
+
return true;
|
|
252298
|
+
if (/^(0|false|no|off)$/i.test(value2.trim()))
|
|
252299
|
+
return false;
|
|
252300
|
+
}
|
|
252301
|
+
return fallback;
|
|
252302
|
+
}
|
|
252303
|
+
function generationFallbackEnabled2(args) {
|
|
252304
|
+
if (booleanArg2(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
|
|
252305
|
+
return false;
|
|
252306
|
+
return booleanArg2(args["fallback"] ?? args["allow_fallback"] ?? args["allowFallback"], true);
|
|
252307
|
+
}
|
|
252163
252308
|
function getAudioGenerationPreset(model, kind) {
|
|
252164
252309
|
if (!model)
|
|
252165
252310
|
return void 0;
|
|
252166
252311
|
return AUDIO_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model && (!kind || preset.kind === kind)) ?? AUDIO_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model);
|
|
252167
252312
|
}
|
|
252313
|
+
function audioGenerationQualityLadder(kind) {
|
|
252314
|
+
const ids = kind === "music" ? MUSIC_GENERATION_QUALITY_LADDER : SOUND_GENERATION_QUALITY_LADDER;
|
|
252315
|
+
return ids.map((id) => getAudioGenerationPreset(id, kind)).filter((preset) => Boolean(preset));
|
|
252316
|
+
}
|
|
252168
252317
|
function inferAudioGenerationBackend(model, requested) {
|
|
252169
252318
|
if (requested && requested !== "auto") {
|
|
252170
252319
|
if (requested === "diffusers" || requested === "transformers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
|
|
@@ -252188,6 +252337,41 @@ function inferAudioGenerationBackend(model, requested) {
|
|
|
252188
252337
|
return "project";
|
|
252189
252338
|
return "diffusers";
|
|
252190
252339
|
}
|
|
252340
|
+
function audioCandidateFor(kind, model, requestedBackend) {
|
|
252341
|
+
const backend = inferAudioGenerationBackend(model, requestedBackend);
|
|
252342
|
+
const resolvedBackend = backend === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend;
|
|
252343
|
+
return {
|
|
252344
|
+
kind,
|
|
252345
|
+
model,
|
|
252346
|
+
backend: resolvedBackend,
|
|
252347
|
+
preset: getAudioGenerationPreset(model, kind)
|
|
252348
|
+
};
|
|
252349
|
+
}
|
|
252350
|
+
function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
|
|
252351
|
+
const ladder = audioGenerationQualityLadder(kind);
|
|
252352
|
+
const candidates = [];
|
|
252353
|
+
const add2 = (candidate) => {
|
|
252354
|
+
const key = `${candidate.kind}:${candidate.backend}:${candidate.model}`;
|
|
252355
|
+
if (!candidates.some((existing) => `${existing.kind}:${existing.backend}:${existing.model}` === key)) {
|
|
252356
|
+
candidates.push(candidate);
|
|
252357
|
+
}
|
|
252358
|
+
};
|
|
252359
|
+
if (requestedModel) {
|
|
252360
|
+
add2(audioCandidateFor(kind, requestedModel, requestedBackend));
|
|
252361
|
+
} else if (requestedBackend && requestedBackend !== "auto") {
|
|
252362
|
+
const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
|
|
252363
|
+
add2(audioCandidateFor(kind, firstForBackend?.id ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL), requestedBackend));
|
|
252364
|
+
} else if (!allowFallback) {
|
|
252365
|
+
add2(audioCandidateFor(kind, kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL, requestedBackend));
|
|
252366
|
+
}
|
|
252367
|
+
if (!allowFallback)
|
|
252368
|
+
return candidates.length ? candidates : [audioCandidateFor(kind, kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL, requestedBackend)];
|
|
252369
|
+
const primaryIndex = requestedModel ? ladder.findIndex((preset) => preset.id === requestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
|
|
252370
|
+
const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
|
|
252371
|
+
for (const preset of fallbackTail)
|
|
252372
|
+
add2(audioCandidateFor(kind, preset.id));
|
|
252373
|
+
return candidates;
|
|
252374
|
+
}
|
|
252191
252375
|
function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
|
|
252192
252376
|
const commandName = kind === "music" ? "music" : "sound";
|
|
252193
252377
|
const fallback = kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL;
|
|
@@ -252261,6 +252445,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
|
|
|
252261
252445
|
],
|
|
252262
252446
|
notes: [
|
|
252263
252447
|
"Use this path for Stable Audio Open 1.0, the serious stereo audio/music baseline.",
|
|
252448
|
+
"Omnius uses Diffusers StableAudioPipeline here; stable-audio-tools is intentionally not installed because it often pulls build-from-source dependencies.",
|
|
252264
252449
|
"Expect larger model downloads and higher VRAM pressure than AudioLDM or MusicGen small."
|
|
252265
252450
|
]
|
|
252266
252451
|
};
|
|
@@ -252296,7 +252481,34 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
|
|
|
252296
252481
|
]
|
|
252297
252482
|
};
|
|
252298
252483
|
}
|
|
252299
|
-
|
|
252484
|
+
function summarizeToolResult2(result) {
|
|
252485
|
+
return trimProcessText2(String(result.error || result.output || "unknown error"), 700).replace(/\s+/g, " ").trim();
|
|
252486
|
+
}
|
|
252487
|
+
function formatAudioAttempt(candidate, reason, index) {
|
|
252488
|
+
return `${index + 1}. ${candidate.model} [${candidate.backend}] - ${reason}`;
|
|
252489
|
+
}
|
|
252490
|
+
function formatAudioFallbackFailure(kind, failed) {
|
|
252491
|
+
return [
|
|
252492
|
+
`No ${kind} generation model in the fallback ladder completed successfully.`,
|
|
252493
|
+
"Attempted, highest quality to lowest:",
|
|
252494
|
+
...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`)
|
|
252495
|
+
].join("\n");
|
|
252496
|
+
}
|
|
252497
|
+
function annotateAudioFallbackSuccess(result, failed, winner) {
|
|
252498
|
+
if (failed.length === 0)
|
|
252499
|
+
return result;
|
|
252500
|
+
const prefix = [
|
|
252501
|
+
`Fallback ladder succeeded with ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
|
|
252502
|
+
"Failed attempts:",
|
|
252503
|
+
...failed.map((attempt, index) => ` ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
|
|
252504
|
+
""
|
|
252505
|
+
].join("\n");
|
|
252506
|
+
return {
|
|
252507
|
+
...result,
|
|
252508
|
+
output: prefix + result.output
|
|
252509
|
+
};
|
|
252510
|
+
}
|
|
252511
|
+
var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
|
|
252300
252512
|
var init_audio_generate = __esm({
|
|
252301
252513
|
"packages/execution/dist/tools/audio-generate.js"() {
|
|
252302
252514
|
"use strict";
|
|
@@ -252338,7 +252550,6 @@ var init_audio_generate = __esm({
|
|
|
252338
252550
|
"accelerate",
|
|
252339
252551
|
"scipy",
|
|
252340
252552
|
"soundfile",
|
|
252341
|
-
"stable-audio-tools",
|
|
252342
252553
|
"einops"
|
|
252343
252554
|
];
|
|
252344
252555
|
TANGOFLUX_PACKAGES = [
|
|
@@ -252644,6 +252855,21 @@ var init_audio_generate = __esm({
|
|
|
252644
252855
|
note: "Legacy specialized music-generation path."
|
|
252645
252856
|
}
|
|
252646
252857
|
];
|
|
252858
|
+
SOUND_GENERATION_QUALITY_LADDER = [
|
|
252859
|
+
"stabilityai/stable-audio-open-1.0",
|
|
252860
|
+
"cvssp/audioldm2-large",
|
|
252861
|
+
"cvssp/audioldm2",
|
|
252862
|
+
"facebook/audiogen-medium",
|
|
252863
|
+
"declare-lab/TangoFlux",
|
|
252864
|
+
DEFAULT_SOUND_MODEL
|
|
252865
|
+
];
|
|
252866
|
+
MUSIC_GENERATION_QUALITY_LADDER = [
|
|
252867
|
+
"stabilityai/stable-audio-open-1.0",
|
|
252868
|
+
"facebook/musicgen-stereo-large",
|
|
252869
|
+
"facebook/musicgen-large",
|
|
252870
|
+
"facebook/musicgen-medium",
|
|
252871
|
+
DEFAULT_MUSIC_MODEL
|
|
252872
|
+
];
|
|
252647
252873
|
DIFFUSERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
|
|
252648
252874
|
import argparse, json, sys, time
|
|
252649
252875
|
from pathlib import Path
|
|
@@ -253030,7 +253256,7 @@ if __name__ == "__main__":
|
|
|
253030
253256
|
`;
|
|
253031
253257
|
AudioGenerateTool = class {
|
|
253032
253258
|
name = "generate_audio";
|
|
253033
|
-
description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
|
|
253259
|
+
description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. When fallback is enabled, auto generation tries ranked high-quality candidates first and gracefully falls back to smaller models if setup, download, or generation fails. Saves WAV files under .omnius/audio and returns the file path.";
|
|
253034
253260
|
parameters = {
|
|
253035
253261
|
type: "object",
|
|
253036
253262
|
properties: {
|
|
@@ -253044,6 +253270,14 @@ if __name__ == "__main__":
|
|
|
253044
253270
|
playback: {
|
|
253045
253271
|
type: "boolean",
|
|
253046
253272
|
description: "Whether the TUI should play generated audio after saving it. Defaults true; set false for silent generation."
|
|
253273
|
+
},
|
|
253274
|
+
fallback: {
|
|
253275
|
+
type: "boolean",
|
|
253276
|
+
description: "Whether to try the ranked quality ladder if the selected model/backend fails. Defaults true."
|
|
253277
|
+
},
|
|
253278
|
+
strict_model: {
|
|
253279
|
+
type: "boolean",
|
|
253280
|
+
description: "When true, use only the requested model/backend and do not fall back. Defaults false."
|
|
253047
253281
|
}
|
|
253048
253282
|
},
|
|
253049
253283
|
required: ["prompt"]
|
|
@@ -253147,14 +253381,14 @@ if __name__ == "__main__":
|
|
|
253147
253381
|
if (action === "list_models") {
|
|
253148
253382
|
return {
|
|
253149
253383
|
success: true,
|
|
253150
|
-
output: AUDIO_GENERATION_MODEL_PRESETS.filter((
|
|
253384
|
+
output: AUDIO_GENERATION_MODEL_PRESETS.filter((preset) => preset.kind === kind).map((preset) => `${preset.id} [${preset.backend}] - ${preset.note}`).join("\n"),
|
|
253151
253385
|
durationMs: performance.now() - start2
|
|
253152
253386
|
};
|
|
253153
253387
|
}
|
|
253154
253388
|
if (action === "setup") {
|
|
253155
253389
|
const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? (kind === "music" ? "transformers" : "diffusers"));
|
|
253156
|
-
const
|
|
253157
|
-
const resolvedBackend =
|
|
253390
|
+
const backend = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
|
|
253391
|
+
const resolvedBackend = backend === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend;
|
|
253158
253392
|
const plan = audioGenerationSetupPlan(kind, resolvedBackend, this.cwd, typeof args["model"] === "string" ? args["model"] : void 0);
|
|
253159
253393
|
return {
|
|
253160
253394
|
success: true,
|
|
@@ -253173,37 +253407,9 @@ if __name__ == "__main__":
|
|
|
253173
253407
|
const defaultBackend2 = kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend;
|
|
253174
253408
|
const rawModel2 = args["model"] ? String(args["model"]) : defaultModel2;
|
|
253175
253409
|
const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
|
|
253176
|
-
|
|
253177
|
-
|
|
253178
|
-
|
|
253179
|
-
const model2 = requestedModel2 ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
|
|
253180
|
-
const preset2 = getAudioGenerationPreset(model2, kind);
|
|
253181
|
-
const duration2 = numberArg2(args["duration"], preset2?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
|
|
253182
|
-
if (backend2 === "project") {
|
|
253183
|
-
const plan = audioGenerationSetupPlan(kind, "project", this.cwd, model2);
|
|
253184
|
-
return {
|
|
253185
|
-
success: false,
|
|
253186
|
-
output: [
|
|
253187
|
-
`${preset2?.label ?? model2} is a project deployment profile, not an automatic generic runner.`,
|
|
253188
|
-
"",
|
|
253189
|
-
"Setup path:",
|
|
253190
|
-
...plan.commands.map((cmd) => ` ${cmd}`),
|
|
253191
|
-
"",
|
|
253192
|
-
...plan.notes.map((note) => `- ${note}`)
|
|
253193
|
-
].join("\n"),
|
|
253194
|
-
durationMs: performance.now() - start2
|
|
253195
|
-
};
|
|
253196
|
-
}
|
|
253197
|
-
this.emitProgress({ stage: "setup", message: `Preparing ${kind} model ${model2} (${backend2})` });
|
|
253198
|
-
return await this.prewarmPythonBackend({
|
|
253199
|
-
kind,
|
|
253200
|
-
backend: backend2,
|
|
253201
|
-
runnerBackend: backend2,
|
|
253202
|
-
model: model2,
|
|
253203
|
-
duration: duration2,
|
|
253204
|
-
start: start2,
|
|
253205
|
-
python: args["python"]
|
|
253206
|
-
});
|
|
253410
|
+
const requestedBackend2 = args["backend"] ? String(args["backend"]) : defaultBackend2;
|
|
253411
|
+
const candidates2 = audioGenerationFallbackCandidates(kind, requestedModel2, requestedBackend2, generationFallbackEnabled2(args));
|
|
253412
|
+
return await this.prewarmCandidateLadder({ kind, candidates: candidates2, args, start: start2 });
|
|
253207
253413
|
}
|
|
253208
253414
|
const prompt = String(args["prompt"] ?? "").trim();
|
|
253209
253415
|
if (!prompt) {
|
|
@@ -253213,45 +253419,12 @@ if __name__ == "__main__":
|
|
|
253213
253419
|
const defaultBackend = kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend;
|
|
253214
253420
|
const rawModel = args["model"] ? String(args["model"]) : defaultModel;
|
|
253215
253421
|
const requestedModel = rawModel === "auto" ? void 0 : rawModel;
|
|
253216
|
-
|
|
253217
|
-
|
|
253218
|
-
backend = kind === "music" ? "transformers" : "diffusers";
|
|
253219
|
-
const model = requestedModel ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
|
|
253220
|
-
const preset = getAudioGenerationPreset(model, kind);
|
|
253221
|
-
const duration = numberArg2(args["duration"], preset?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
|
|
253222
|
-
const steps = optionalNumberArg2(args["steps"]) ?? preset?.defaultSteps;
|
|
253422
|
+
const requestedBackend = args["backend"] ? String(args["backend"]) : defaultBackend;
|
|
253423
|
+
const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
|
|
253223
253424
|
const seed = optionalNumberArg2(args["seed"]);
|
|
253224
253425
|
const playback = playbackRequested(args);
|
|
253225
253426
|
try {
|
|
253226
|
-
this.
|
|
253227
|
-
if (backend === "project") {
|
|
253228
|
-
const plan = audioGenerationSetupPlan(kind, "project", this.cwd, model);
|
|
253229
|
-
return {
|
|
253230
|
-
success: false,
|
|
253231
|
-
output: [
|
|
253232
|
-
`${preset?.label ?? model} is a project deployment profile, not an automatic generic runner.`,
|
|
253233
|
-
"",
|
|
253234
|
-
"Setup path:",
|
|
253235
|
-
...plan.commands.map((cmd) => ` ${cmd}`),
|
|
253236
|
-
"",
|
|
253237
|
-
...plan.notes.map((note) => `- ${note}`)
|
|
253238
|
-
].join("\n"),
|
|
253239
|
-
durationMs: performance.now() - start2
|
|
253240
|
-
};
|
|
253241
|
-
}
|
|
253242
|
-
if (backend === "tangoflux") {
|
|
253243
|
-
return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "tangoflux", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
|
|
253244
|
-
}
|
|
253245
|
-
if (backend === "transformers") {
|
|
253246
|
-
return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "transformers", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
|
|
253247
|
-
}
|
|
253248
|
-
if (backend === "audiocraft") {
|
|
253249
|
-
return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "audiocraft", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
|
|
253250
|
-
}
|
|
253251
|
-
if (backend === "stable-audio") {
|
|
253252
|
-
return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "stable-audio", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
|
|
253253
|
-
}
|
|
253254
|
-
return await this.generateWithPythonBackend({ kind, backend: "diffusers", runnerBackend: "diffusers", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
|
|
253427
|
+
return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
|
|
253255
253428
|
} catch (err) {
|
|
253256
253429
|
return {
|
|
253257
253430
|
success: false,
|
|
@@ -253260,6 +253433,96 @@ if __name__ == "__main__":
|
|
|
253260
253433
|
};
|
|
253261
253434
|
}
|
|
253262
253435
|
}
|
|
253436
|
+
async prewarmCandidateLadder(args) {
|
|
253437
|
+
const failed = [];
|
|
253438
|
+
for (let index = 0; index < args.candidates.length; index++) {
|
|
253439
|
+
const candidate = args.candidates[index];
|
|
253440
|
+
const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
|
|
253441
|
+
this.emitProgress({
|
|
253442
|
+
stage: "setup",
|
|
253443
|
+
message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
253444
|
+
});
|
|
253445
|
+
const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
|
|
253446
|
+
kind: args.kind,
|
|
253447
|
+
backend: candidate.backend,
|
|
253448
|
+
runnerBackend: candidate.backend,
|
|
253449
|
+
model: candidate.model,
|
|
253450
|
+
duration,
|
|
253451
|
+
start: args.start,
|
|
253452
|
+
python: args.args["python"]
|
|
253453
|
+
});
|
|
253454
|
+
if (result.success)
|
|
253455
|
+
return annotateAudioFallbackSuccess(result, failed, candidate);
|
|
253456
|
+
failed.push({ candidate, reason: summarizeToolResult2(result) });
|
|
253457
|
+
if (index < args.candidates.length - 1) {
|
|
253458
|
+
this.emitProgress({
|
|
253459
|
+
stage: "setup",
|
|
253460
|
+
message: `${candidate.model} failed; trying ${args.candidates[index + 1].model}`
|
|
253461
|
+
});
|
|
253462
|
+
}
|
|
253463
|
+
}
|
|
253464
|
+
return {
|
|
253465
|
+
success: false,
|
|
253466
|
+
output: formatAudioFallbackFailure(args.kind, failed),
|
|
253467
|
+
error: formatAudioFallbackFailure(args.kind, failed),
|
|
253468
|
+
durationMs: performance.now() - args.start
|
|
253469
|
+
};
|
|
253470
|
+
}
|
|
253471
|
+
async generateCandidateLadder(args) {
|
|
253472
|
+
const failed = [];
|
|
253473
|
+
for (let index = 0; index < args.candidates.length; index++) {
|
|
253474
|
+
const candidate = args.candidates[index];
|
|
253475
|
+
const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
|
|
253476
|
+
const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
|
|
253477
|
+
this.emitProgress({
|
|
253478
|
+
stage: "setup",
|
|
253479
|
+
message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
|
|
253480
|
+
});
|
|
253481
|
+
const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
|
|
253482
|
+
kind: args.kind,
|
|
253483
|
+
backend: candidate.backend,
|
|
253484
|
+
runnerBackend: candidate.backend,
|
|
253485
|
+
prompt: args.prompt,
|
|
253486
|
+
model: candidate.model,
|
|
253487
|
+
duration,
|
|
253488
|
+
steps,
|
|
253489
|
+
seed: args.seed,
|
|
253490
|
+
playback: args.playback,
|
|
253491
|
+
start: args.start,
|
|
253492
|
+
python: args.args["python"]
|
|
253493
|
+
});
|
|
253494
|
+
if (result.success)
|
|
253495
|
+
return annotateAudioFallbackSuccess(result, failed, candidate);
|
|
253496
|
+
failed.push({ candidate, reason: summarizeToolResult2(result) });
|
|
253497
|
+
if (index < args.candidates.length - 1) {
|
|
253498
|
+
this.emitProgress({
|
|
253499
|
+
stage: "setup",
|
|
253500
|
+
message: `${candidate.model} failed; falling back to ${args.candidates[index + 1].model}`
|
|
253501
|
+
});
|
|
253502
|
+
}
|
|
253503
|
+
}
|
|
253504
|
+
return {
|
|
253505
|
+
success: false,
|
|
253506
|
+
output: formatAudioFallbackFailure(args.kind, failed),
|
|
253507
|
+
error: formatAudioFallbackFailure(args.kind, failed),
|
|
253508
|
+
durationMs: performance.now() - args.start
|
|
253509
|
+
};
|
|
253510
|
+
}
|
|
253511
|
+
projectProfileResult(kind, candidate, start2) {
|
|
253512
|
+
const plan = audioGenerationSetupPlan(kind, "project", this.cwd, candidate.model);
|
|
253513
|
+
return {
|
|
253514
|
+
success: false,
|
|
253515
|
+
output: [
|
|
253516
|
+
`${candidate.preset?.label ?? candidate.model} is a project deployment profile, not an automatic generic runner.`,
|
|
253517
|
+
"",
|
|
253518
|
+
"Setup path:",
|
|
253519
|
+
...plan.commands.map((cmd) => ` ${cmd}`),
|
|
253520
|
+
"",
|
|
253521
|
+
...plan.notes.map((note) => `- ${note}`)
|
|
253522
|
+
].join("\n"),
|
|
253523
|
+
durationMs: performance.now() - start2
|
|
253524
|
+
};
|
|
253525
|
+
}
|
|
253263
253526
|
async generateWithPythonBackend(args) {
|
|
253264
253527
|
const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
|
|
253265
253528
|
await mkdir12(audioOutputDir(this.cwd), { recursive: true });
|
|
@@ -507359,6 +507622,18 @@ function supertonicInferScript() {
|
|
|
507359
507622
|
function mlxVenvPy() {
|
|
507360
507623
|
return process.platform === "win32" ? join58(voiceDir(), "mlx-venv", "Scripts", "python.exe") : join58(voiceDir(), "mlx-venv", "bin", "python3");
|
|
507361
507624
|
}
|
|
507625
|
+
function luxttsVenvDir() {
|
|
507626
|
+
return join58(voiceDir(), "luxtts-venv");
|
|
507627
|
+
}
|
|
507628
|
+
function luxttsVenvPy() {
|
|
507629
|
+
return process.platform === "win32" ? join58(luxttsVenvDir(), "Scripts", "python.exe") : join58(luxttsVenvDir(), "bin", "python3");
|
|
507630
|
+
}
|
|
507631
|
+
function luxttsRepoDir() {
|
|
507632
|
+
return join58(voiceDir(), "LuxTTS");
|
|
507633
|
+
}
|
|
507634
|
+
function luxttsInferScript() {
|
|
507635
|
+
return join58(voiceDir(), "luxtts-infer.py");
|
|
507636
|
+
}
|
|
507362
507637
|
function piperVenvDir() {
|
|
507363
507638
|
return join58(voiceDir(), "piper-venv");
|
|
507364
507639
|
}
|
|
@@ -507385,7 +507660,7 @@ function ensureSupertonicInstalled() {
|
|
|
507385
507660
|
}
|
|
507386
507661
|
function ensureMlxInstalled() {
|
|
507387
507662
|
if (process.platform !== "darwin" || process.arch !== "arm64") {
|
|
507388
|
-
throw new Error("MLX TTS requires macOS on Apple Silicon. Use luxtts, supertonic, onnx/piper, or
|
|
507663
|
+
throw new Error("MLX TTS requires macOS on Apple Silicon. Use luxtts, supertonic, onnx/piper, or backend=auto on this machine.");
|
|
507389
507664
|
}
|
|
507390
507665
|
const venvPy = mlxVenvPy();
|
|
507391
507666
|
if (!existsSync40(venvPy)) {
|
|
@@ -507402,6 +507677,81 @@ function ensureMlxInstalled() {
|
|
|
507402
507677
|
}
|
|
507403
507678
|
return venvPy;
|
|
507404
507679
|
}
|
|
507680
|
+
function pythonCanImportLuxTts(venvPy) {
|
|
507681
|
+
try {
|
|
507682
|
+
execFileSync2(venvPy, [
|
|
507683
|
+
"-c",
|
|
507684
|
+
"import sys, os; sys.path.insert(0, os.environ['LUXTTS_REPO_PATH']); from zipvoice.luxvoice import LuxTTS; print('ok')"
|
|
507685
|
+
], {
|
|
507686
|
+
stdio: "pipe",
|
|
507687
|
+
timeout: 3e4,
|
|
507688
|
+
env: { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir() }
|
|
507689
|
+
});
|
|
507690
|
+
return true;
|
|
507691
|
+
} catch {
|
|
507692
|
+
return false;
|
|
507693
|
+
}
|
|
507694
|
+
}
|
|
507695
|
+
function pipInstall(venvPy, packages, timeout2 = 9e5) {
|
|
507696
|
+
execFileSync2(venvPy, ["-m", "pip", "install", "--prefer-binary", ...packages], {
|
|
507697
|
+
stdio: "pipe",
|
|
507698
|
+
timeout: timeout2,
|
|
507699
|
+
env: process.env
|
|
507700
|
+
});
|
|
507701
|
+
}
|
|
507702
|
+
function ensureLuxttsInstalled() {
|
|
507703
|
+
const venvPy = luxttsVenvPy();
|
|
507704
|
+
const repoDir = luxttsRepoDir();
|
|
507705
|
+
mkdirSync16(voiceDir(), { recursive: true });
|
|
507706
|
+
if (existsSync40(venvPy) && existsSync40(join58(repoDir, "zipvoice", "luxvoice.py")) && pythonCanImportLuxTts(venvPy)) {
|
|
507707
|
+
writeFileSync16(luxttsInferScript(), LUXTTS_DAEMON_PY, "utf-8");
|
|
507708
|
+
return venvPy;
|
|
507709
|
+
}
|
|
507710
|
+
const py = findPython32();
|
|
507711
|
+
if (!py)
|
|
507712
|
+
throw new Error("python3 is required to set up LuxTTS voice cloning.");
|
|
507713
|
+
if (!existsSync40(venvPy)) {
|
|
507714
|
+
execFileSync2(py, ["-m", "venv", luxttsVenvDir()], { stdio: "pipe", timeout: 18e4 });
|
|
507715
|
+
}
|
|
507716
|
+
execFileSync2(venvPy, ["-m", "pip", "install", "--upgrade", "pip", "wheel", "setuptools<81"], {
|
|
507717
|
+
stdio: "pipe",
|
|
507718
|
+
timeout: 3e5
|
|
507719
|
+
});
|
|
507720
|
+
pipInstall(venvPy, ["torch", "torchaudio"], 12e5);
|
|
507721
|
+
if (!existsSync40(join58(repoDir, "zipvoice", "luxvoice.py"))) {
|
|
507722
|
+
if (!hasCommand3("git"))
|
|
507723
|
+
throw new Error("git is required to set up LuxTTS voice cloning.");
|
|
507724
|
+
execFileSync2("git", ["clone", "--depth", "1", "https://github.com/ysharma3501/LuxTTS.git", repoDir], {
|
|
507725
|
+
stdio: "pipe",
|
|
507726
|
+
timeout: 3e5
|
|
507727
|
+
});
|
|
507728
|
+
}
|
|
507729
|
+
pipInstall(venvPy, [
|
|
507730
|
+
"lhotse",
|
|
507731
|
+
"huggingface_hub",
|
|
507732
|
+
"safetensors",
|
|
507733
|
+
"pydub",
|
|
507734
|
+
"onnxruntime",
|
|
507735
|
+
"librosa",
|
|
507736
|
+
"transformers<=4.57.6",
|
|
507737
|
+
"inflect",
|
|
507738
|
+
"numpy",
|
|
507739
|
+
"vocos",
|
|
507740
|
+
"jieba",
|
|
507741
|
+
"pypinyin",
|
|
507742
|
+
"cn2an"
|
|
507743
|
+
], 12e5);
|
|
507744
|
+
try {
|
|
507745
|
+
pipInstall(venvPy, ["git+https://github.com/ysharma3501/LinaCodec.git"], 12e5);
|
|
507746
|
+
} catch {
|
|
507747
|
+
}
|
|
507748
|
+
pipInstall(venvPy, ["-e", repoDir], 6e5);
|
|
507749
|
+
writeFileSync16(luxttsInferScript(), LUXTTS_DAEMON_PY, "utf-8");
|
|
507750
|
+
if (!pythonCanImportLuxTts(venvPy)) {
|
|
507751
|
+
throw new Error(`LuxTTS setup completed but import still fails in ${luxttsVenvDir()}.`);
|
|
507752
|
+
}
|
|
507753
|
+
return venvPy;
|
|
507754
|
+
}
|
|
507405
507755
|
function ensurePiperInstalled() {
|
|
507406
507756
|
if (hasCommand3("piper"))
|
|
507407
507757
|
return "piper";
|
|
@@ -507435,6 +507785,28 @@ function saveCloneRefFromSample(sample, cloneName) {
|
|
|
507435
507785
|
copyFileSync2(source, dest);
|
|
507436
507786
|
return dest;
|
|
507437
507787
|
}
|
|
507788
|
+
function cloneSampleArg(args) {
|
|
507789
|
+
for (const key of ["sample", "source_audio", "voice_sample", "reference_audio", "ref_audio", "clone_sample"]) {
|
|
507790
|
+
const value2 = args[key];
|
|
507791
|
+
if (typeof value2 === "string" && value2.trim())
|
|
507792
|
+
return value2.trim();
|
|
507793
|
+
}
|
|
507794
|
+
return "";
|
|
507795
|
+
}
|
|
507796
|
+
function wantsVoiceClone(args) {
|
|
507797
|
+
if (cloneSampleArg(args))
|
|
507798
|
+
return true;
|
|
507799
|
+
if (typeof args["clone_ref"] === "string" && args["clone_ref"].trim())
|
|
507800
|
+
return true;
|
|
507801
|
+
const voice = typeof args["voice"] === "string" ? args["voice"].trim() : "";
|
|
507802
|
+
return /\.(wav|mp3|flac|ogg|m4a)$/i.test(voice) || voice.startsWith("/") || voice.startsWith("./") || voice.startsWith("../") || voice.startsWith("~/");
|
|
507803
|
+
}
|
|
507804
|
+
function cloneRefForSynthesis(args) {
|
|
507805
|
+
const sample = cloneSampleArg(args);
|
|
507806
|
+
if (sample)
|
|
507807
|
+
return saveCloneRefFromSample(sample, typeof args["clone_name"] === "string" ? args["clone_name"] : void 0);
|
|
507808
|
+
return resolveCloneRef(args["clone_ref"] ?? args["voice"]);
|
|
507809
|
+
}
|
|
507438
507810
|
function ensureLuxttsDaemon() {
|
|
507439
507811
|
if (_luxttsDaemon && !_luxttsDaemon.killed && _luxttsReady)
|
|
507440
507812
|
return Promise.resolve(true);
|
|
@@ -507448,14 +507820,23 @@ function ensureLuxttsDaemon() {
|
|
|
507448
507820
|
}
|
|
507449
507821
|
if (_luxttsStarting)
|
|
507450
507822
|
return Promise.resolve(false);
|
|
507451
|
-
const venvPy =
|
|
507452
|
-
const inferScript =
|
|
507453
|
-
const repoDir =
|
|
507823
|
+
const venvPy = luxttsVenvPy();
|
|
507824
|
+
const inferScript = luxttsInferScript();
|
|
507825
|
+
const repoDir = luxttsRepoDir();
|
|
507454
507826
|
if (!existsSync40(venvPy) || !existsSync40(inferScript))
|
|
507455
507827
|
return Promise.resolve(false);
|
|
507456
507828
|
_luxttsStarting = true;
|
|
507457
507829
|
return new Promise((resolve48) => {
|
|
507458
|
-
|
|
507830
|
+
let settled = false;
|
|
507831
|
+
let timeout2;
|
|
507832
|
+
const finish = (ready) => {
|
|
507833
|
+
if (settled)
|
|
507834
|
+
return;
|
|
507835
|
+
settled = true;
|
|
507836
|
+
clearTimeout(timeout2);
|
|
507837
|
+
resolve48(ready);
|
|
507838
|
+
};
|
|
507839
|
+
timeout2 = setTimeout(() => {
|
|
507459
507840
|
_luxttsStarting = false;
|
|
507460
507841
|
if (_luxttsDaemon && !_luxttsReady) {
|
|
507461
507842
|
try {
|
|
@@ -507464,7 +507845,7 @@ function ensureLuxttsDaemon() {
|
|
|
507464
507845
|
}
|
|
507465
507846
|
_luxttsDaemon = null;
|
|
507466
507847
|
}
|
|
507467
|
-
|
|
507848
|
+
finish(false);
|
|
507468
507849
|
}, 12e4);
|
|
507469
507850
|
const daemon = spawn16(venvPy, [inferScript], {
|
|
507470
507851
|
stdio: ["pipe", "pipe", "pipe"],
|
|
@@ -507486,8 +507867,7 @@ function ensureLuxttsDaemon() {
|
|
|
507486
507867
|
if (msg.type === "ready") {
|
|
507487
507868
|
_luxttsReady = true;
|
|
507488
507869
|
_luxttsStarting = false;
|
|
507489
|
-
|
|
507490
|
-
resolve48(true);
|
|
507870
|
+
finish(true);
|
|
507491
507871
|
} else if (msg.type === "result" && msg.id) {
|
|
507492
507872
|
const pending = _luxttsPending.get(msg.id);
|
|
507493
507873
|
if (pending) {
|
|
@@ -507509,13 +507889,13 @@ function ensureLuxttsDaemon() {
|
|
|
507509
507889
|
_luxttsDaemon = null;
|
|
507510
507890
|
_luxttsReady = false;
|
|
507511
507891
|
_luxttsStarting = false;
|
|
507892
|
+
finish(false);
|
|
507512
507893
|
});
|
|
507513
507894
|
daemon.on("error", () => {
|
|
507514
507895
|
_luxttsDaemon = null;
|
|
507515
507896
|
_luxttsReady = false;
|
|
507516
507897
|
_luxttsStarting = false;
|
|
507517
|
-
|
|
507518
|
-
resolve48(false);
|
|
507898
|
+
finish(false);
|
|
507519
507899
|
});
|
|
507520
507900
|
});
|
|
507521
507901
|
}
|
|
@@ -507545,7 +507925,7 @@ function luxttsSynthesize(text, cloneRef, outputPath2, speed = 1) {
|
|
|
507545
507925
|
_luxttsDaemon.stdin.write(req2 + "\n");
|
|
507546
507926
|
});
|
|
507547
507927
|
}
|
|
507548
|
-
var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, SUPERTONIC_INFER_PY, AudioPlaybackTool, TtsGenerateTool, SoundPlaybackTool;
|
|
507928
|
+
var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, SUPERTONIC_INFER_PY, LUXTTS_DAEMON_PY, AudioPlaybackTool, TtsGenerateTool, SoundPlaybackTool;
|
|
507549
507929
|
var init_audio_playback = __esm({
|
|
507550
507930
|
"packages/execution/dist/tools/audio-playback.js"() {
|
|
507551
507931
|
"use strict";
|
|
@@ -507585,10 +507965,45 @@ try:
|
|
|
507585
507965
|
except Exception as exc:
|
|
507586
507966
|
print(json.dumps({"ok": False, "error": str(exc), "trace": traceback.format_exc(limit=3)}))
|
|
507587
507967
|
sys.exit(1)
|
|
507968
|
+
`;
|
|
507969
|
+
LUXTTS_DAEMON_PY = String.raw`
|
|
507970
|
+
import json, os, sys, traceback, wave
|
|
507971
|
+
import numpy as np
|
|
507972
|
+
import torch
|
|
507973
|
+
repo = os.environ.get("LUXTTS_REPO_PATH") or ""
|
|
507974
|
+
if repo:
|
|
507975
|
+
sys.path.insert(0, repo)
|
|
507976
|
+
from zipvoice.luxvoice import LuxTTS
|
|
507977
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
507978
|
+
tts = LuxTTS(model_path="YatharthS/LuxTTS", device=device, threads=4)
|
|
507979
|
+
print(json.dumps({"type": "ready", "device": device}), flush=True)
|
|
507980
|
+
for line in sys.stdin:
|
|
507981
|
+
if not line.strip():
|
|
507982
|
+
continue
|
|
507983
|
+
req = json.loads(line)
|
|
507984
|
+
if req.get("action") == "quit":
|
|
507985
|
+
break
|
|
507986
|
+
rid = req.get("id")
|
|
507987
|
+
try:
|
|
507988
|
+
text = str(req.get("text") or "").strip()
|
|
507989
|
+
clone_ref = str(req.get("clone_ref") or "")
|
|
507990
|
+
output = str(req.get("output_path") or "")
|
|
507991
|
+
speed = float(req.get("speed") or 1.0)
|
|
507992
|
+
enc = tts.encode_prompt(clone_ref, duration=5, rms=0.001)
|
|
507993
|
+
wav = tts.generate_speech(text, enc, num_steps=4, guidance_scale=3.0, t_shift=0.5, speed=speed)
|
|
507994
|
+
data = (np.clip(wav.cpu().numpy().squeeze(), -1, 1) * 32767).astype(np.int16)
|
|
507995
|
+
with wave.open(output, "wb") as f:
|
|
507996
|
+
f.setnchannels(1)
|
|
507997
|
+
f.setsampwidth(2)
|
|
507998
|
+
f.setframerate(48000)
|
|
507999
|
+
f.writeframes(data.tobytes())
|
|
508000
|
+
print(json.dumps({"type": "result", "id": rid, "path": output}), flush=True)
|
|
508001
|
+
except Exception as exc:
|
|
508002
|
+
print(json.dumps({"type": "error", "id": rid, "error": str(exc), "trace": traceback.format_exc(limit=3)}), flush=True)
|
|
507588
508003
|
`;
|
|
507589
508004
|
AudioPlaybackTool = class {
|
|
507590
508005
|
name = "audio_playback";
|
|
507591
|
-
description = "Play audio through speakers, synthesize text-to-speech, and manage TTS clone voices. Actions: 'play' to play an audio file (WAV/MP3/OGG — including recordings from memory episodes), 'speak' to synthesize and play text, 'synthesize' to save TTS to a WAV file, 'clone' to register a voice-clone
|
|
508006
|
+
description = "Play audio through speakers, synthesize text-to-speech, and manage TTS clone voices. Actions: 'play' to play an audio file (WAV/MP3/OGG — including recordings from memory episodes), 'speak' to synthesize and play text, 'synthesize' to save TTS to a WAV file, 'clone' to register a voice-clone source clip, 'list_voices' to inspect available clone refs/backends, 'volume' to get or set system volume, 'list' to enumerate audio output devices. TTS backends include auto, LuxTTS voice cloning, Supertonic, MLX, ONNX/Piper, and a local fallback. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. For cloned speech from a source clip, call generate_tts or audio_playback action=synthesize with sample/source_audio/voice_sample and backend=auto or luxtts. Use generate_tts when the task is specifically to create a TTS file; do not use shell speech commands or generate_audio for spoken TTS.";
|
|
507592
508007
|
parameters = {
|
|
507593
508008
|
type: "object",
|
|
507594
508009
|
properties: {
|
|
@@ -507615,8 +508030,8 @@ except Exception as exc:
|
|
|
507615
508030
|
},
|
|
507616
508031
|
backend: {
|
|
507617
508032
|
type: "string",
|
|
507618
|
-
enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"
|
|
507619
|
-
description: "TTS backend. auto tries LuxTTS clone, Supertonic, MLX on Apple Silicon, Piper/ONNX, then
|
|
508033
|
+
enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"],
|
|
508034
|
+
description: "TTS backend. auto tries LuxTTS clone, Supertonic, MLX on Apple Silicon, Piper/ONNX, then a local fallback."
|
|
507620
508035
|
},
|
|
507621
508036
|
output: {
|
|
507622
508037
|
type: "string",
|
|
@@ -507632,11 +508047,31 @@ except Exception as exc:
|
|
|
507632
508047
|
},
|
|
507633
508048
|
sample: {
|
|
507634
508049
|
type: "string",
|
|
507635
|
-
description: "Audio
|
|
508050
|
+
description: "Audio source clip path to register or use as a LuxTTS clone voice."
|
|
508051
|
+
},
|
|
508052
|
+
source_audio: {
|
|
508053
|
+
type: "string",
|
|
508054
|
+
description: "Alias for sample. Use this for cloned speech from a source voice clip."
|
|
508055
|
+
},
|
|
508056
|
+
voice_sample: {
|
|
508057
|
+
type: "string",
|
|
508058
|
+
description: "Alias for sample/source_audio."
|
|
508059
|
+
},
|
|
508060
|
+
reference_audio: {
|
|
508061
|
+
type: "string",
|
|
508062
|
+
description: "Alias for sample/source_audio."
|
|
508063
|
+
},
|
|
508064
|
+
ref_audio: {
|
|
508065
|
+
type: "string",
|
|
508066
|
+
description: "Alias for sample/source_audio."
|
|
508067
|
+
},
|
|
508068
|
+
clone_sample: {
|
|
508069
|
+
type: "string",
|
|
508070
|
+
description: "Alias for sample/source_audio."
|
|
507636
508071
|
},
|
|
507637
508072
|
clone_name: {
|
|
507638
508073
|
type: "string",
|
|
507639
|
-
description: "Friendly filename stem for action=clone."
|
|
508074
|
+
description: "Friendly filename stem for action=clone or for registering a source clip during synthesis."
|
|
507640
508075
|
},
|
|
507641
508076
|
model: {
|
|
507642
508077
|
type: "string",
|
|
@@ -507652,11 +508087,11 @@ except Exception as exc:
|
|
|
507652
508087
|
},
|
|
507653
508088
|
speed: {
|
|
507654
508089
|
type: "number",
|
|
507655
|
-
description: "Speech speed.
|
|
508090
|
+
description: "Speech speed. Neural backends use a multiplier; local fallback uses its backend-specific rate."
|
|
507656
508091
|
},
|
|
507657
508092
|
voice: {
|
|
507658
508093
|
type: "string",
|
|
507659
|
-
description: "Voice id/name. Examples: Supertonic voice M4, MLX voice af_heart,
|
|
508094
|
+
description: "Voice id/name. Examples: Supertonic voice M4, MLX voice af_heart, a source audio path for cloning, or Piper/ONNX model path."
|
|
507660
508095
|
},
|
|
507661
508096
|
lang: {
|
|
507662
508097
|
type: "string",
|
|
@@ -507720,9 +508155,9 @@ except Exception as exc:
|
|
|
507720
508155
|
return await this.synthesizeText(args, start2, true);
|
|
507721
508156
|
}
|
|
507722
508157
|
cloneVoice(args, start2) {
|
|
507723
|
-
const sample =
|
|
508158
|
+
const sample = cloneSampleArg(args) || (typeof args["file"] === "string" ? args["file"] : "");
|
|
507724
508159
|
if (!sample.trim()) {
|
|
507725
|
-
return { success: false, output: "", error: "Missing
|
|
508160
|
+
return { success: false, output: "", error: "Missing source audio. Provide sample=<file> or source_audio=<file> to register as a clone voice.", durationMs: performance.now() - start2 };
|
|
507726
508161
|
}
|
|
507727
508162
|
const saved = saveCloneRefFromSample(sample, typeof args["clone_name"] === "string" ? args["clone_name"] : void 0);
|
|
507728
508163
|
return {
|
|
@@ -507739,10 +508174,11 @@ except Exception as exc:
|
|
|
507739
508174
|
const lines = [
|
|
507740
508175
|
"TTS backends:",
|
|
507741
508176
|
` luxtts: ${existsSync40(join58(voiceDir(), "luxtts-venv", "bin", "python3")) ? "installed" : "not installed"}; clone refs: ${refs.length}`,
|
|
508177
|
+
" clone from source clip: generate_tts text=<words> source_audio=<wav/mp3/flac/ogg/m4a> backend=auto",
|
|
507742
508178
|
` supertonic: ${existsSync40(supertonicVenvPy()) ? "installed" : "not installed"}; voices include M1, M2, M3, M4 when package assets are available`,
|
|
507743
508179
|
` mlx: ${existsSync40(mlxVenvPy()) ? "installed" : "not installed"}; Apple Silicon only; default model mlx-community/Kokoro-82M-bf16`,
|
|
507744
508180
|
` piper/onnx: ${hasCommand3("piper") || existsSync40(piperVenvBin()) ? "available" : "not installed"}; first use installs piper-tts into ${piperVenvDir()}; pass model=<path.onnx> for raw ONNX voices`,
|
|
507745
|
-
`
|
|
508181
|
+
` local fallback: ${hasCommand3("espeak-ng") ? "available" : "not found"}`,
|
|
507746
508182
|
"",
|
|
507747
508183
|
"Registered clone refs:",
|
|
507748
508184
|
...refs.length ? refs.map((ref) => ` ${ref}`) : [" none"]
|
|
@@ -507756,11 +508192,20 @@ except Exception as exc:
|
|
|
507756
508192
|
}
|
|
507757
508193
|
const requestedBackend = normalizeTtsBackend(args["backend"]);
|
|
507758
508194
|
const strictBackend = boolArg(args["strict_backend"] ?? args["strictBackend"], false);
|
|
508195
|
+
const cloneRequested = wantsVoiceClone(args);
|
|
508196
|
+
if (cloneRequested && requestedBackend !== "auto" && requestedBackend !== "luxtts") {
|
|
508197
|
+
return {
|
|
508198
|
+
success: false,
|
|
508199
|
+
output: "",
|
|
508200
|
+
error: "Voice cloning from a source clip requires backend=auto or backend=luxtts.",
|
|
508201
|
+
durationMs: performance.now() - start2
|
|
508202
|
+
};
|
|
508203
|
+
}
|
|
507759
508204
|
const playback = playbackArg(args, speakDefault);
|
|
507760
508205
|
const outputPath2 = ttsOutputPath(args, requestedBackend);
|
|
507761
508206
|
const device = typeof args["device"] === "string" ? args["device"] : "default";
|
|
507762
508207
|
const tried = [];
|
|
507763
|
-
const autoCandidates = ["luxtts", "supertonic", ...process.platform === "darwin" && process.arch === "arm64" ? ["mlx"] : [], "piper", "espeak"];
|
|
508208
|
+
const autoCandidates = cloneRequested ? ["luxtts"] : ["luxtts", "supertonic", ...process.platform === "darwin" && process.arch === "arm64" ? ["mlx"] : [], "piper", "espeak"];
|
|
507764
508209
|
const candidates = requestedBackend === "auto" ? autoCandidates : strictBackend ? [requestedBackend] : [requestedBackend, ...autoCandidates.filter((backend) => backend !== requestedBackend)];
|
|
507765
508210
|
let usedBackend = "";
|
|
507766
508211
|
let voiceSummary = "";
|
|
@@ -507823,21 +508268,19 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
507823
508268
|
};
|
|
507824
508269
|
}
|
|
507825
508270
|
async synthesizeLuxtts(text, outputPath2, args) {
|
|
507826
|
-
const cloneRef =
|
|
508271
|
+
const cloneRef = cloneRefForSynthesis(args);
|
|
507827
508272
|
if (!cloneRef)
|
|
507828
|
-
throw new Error(`No LuxTTS clone
|
|
508273
|
+
throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
|
|
507829
508274
|
const speed = numberArg3(args["speed"], 1);
|
|
508275
|
+
ensureLuxttsInstalled();
|
|
507830
508276
|
const daemonReady = await ensureLuxttsDaemon();
|
|
507831
508277
|
if (daemonReady) {
|
|
507832
508278
|
await luxttsSynthesize(text, cloneRef, outputPath2, speed);
|
|
507833
508279
|
if (existsSync40(outputPath2))
|
|
507834
508280
|
return `${basename12(cloneRef)} (LuxTTS daemon)`;
|
|
507835
508281
|
}
|
|
507836
|
-
const venvPy =
|
|
507837
|
-
const repoDir =
|
|
507838
|
-
if (!existsSync40(venvPy) || !existsSync40(repoDir)) {
|
|
507839
|
-
throw new Error("LuxTTS is not installed in the managed voice environment yet.");
|
|
507840
|
-
}
|
|
508282
|
+
const venvPy = luxttsVenvPy();
|
|
508283
|
+
const repoDir = luxttsRepoDir();
|
|
507841
508284
|
const pyScript = [
|
|
507842
508285
|
"import json, sys, wave",
|
|
507843
508286
|
"import numpy as np, torch",
|
|
@@ -507913,7 +508356,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
|
|
|
507913
508356
|
}
|
|
507914
508357
|
synthesizeEspeak(text, outputPath2, args) {
|
|
507915
508358
|
if (!hasCommand3("espeak-ng"))
|
|
507916
|
-
throw new Error("
|
|
508359
|
+
throw new Error("Local fallback TTS command not found.");
|
|
507917
508360
|
const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
|
|
507918
508361
|
const speed = Math.round(numberArg3(args["speed"], 160));
|
|
507919
508362
|
execFileSync2("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
|
|
@@ -507995,20 +508438,27 @@ ${devices.join("\n")}`,
|
|
|
507995
508438
|
};
|
|
507996
508439
|
TtsGenerateTool = class {
|
|
507997
508440
|
name = "generate_tts";
|
|
507998
|
-
description = "Generate text-to-speech audio as a WAV file, optionally playing it after synthesis. Supports explicit backends: auto,
|
|
508441
|
+
description = "Generate text-to-speech audio as a WAV file, optionally playing it after synthesis. Supports explicit backends: auto, LuxTTS voice cloning, Supertonic, MLX, ONNX/Piper, and local fallback. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. For voice cloning, pass source_audio/sample/voice_sample with the reference clip and backend=auto or luxtts; clone_name can register it for reuse. Use clone_ref to select a registered LuxTTS voice and playback=false for silent file generation. Use this tool for speech/TTS requests; do not use shell commands or generate_audio as a TTS fallback.";
|
|
507999
508442
|
parameters = {
|
|
508000
508443
|
type: "object",
|
|
508001
508444
|
properties: {
|
|
508002
508445
|
text: { type: "string", description: "Text to synthesize" },
|
|
508003
508446
|
input: { type: "string", description: "Alias for text." },
|
|
508004
508447
|
prompt: { type: "string", description: "Alias for text." },
|
|
508005
|
-
backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"
|
|
508448
|
+
backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"] },
|
|
508006
508449
|
output: { type: "string", description: "Output WAV path. Defaults to ~/.omnius/voice/generated/tts-*.wav." },
|
|
508007
508450
|
path: { type: "string", description: "Alias for output." },
|
|
508008
508451
|
playback: { type: "boolean", description: "Whether to play after generating. Defaults false for generate_tts." },
|
|
508009
508452
|
strict_backend: { type: "boolean", description: "When true, fail instead of falling back if the requested backend is unavailable. Defaults false." },
|
|
508010
508453
|
voice: { type: "string", description: "Voice id/name, or raw Piper/ONNX path when backend=onnx/piper." },
|
|
508011
508454
|
clone_ref: { type: "string", description: "LuxTTS clone reference path, filename, or registered clone name." },
|
|
508455
|
+
sample: { type: "string", description: "Voice source clip path for cloned speech. Alias: source_audio." },
|
|
508456
|
+
source_audio: { type: "string", description: "Voice source clip path for cloned speech." },
|
|
508457
|
+
voice_sample: { type: "string", description: "Alias for source_audio." },
|
|
508458
|
+
reference_audio: { type: "string", description: "Alias for source_audio." },
|
|
508459
|
+
ref_audio: { type: "string", description: "Alias for source_audio." },
|
|
508460
|
+
clone_sample: { type: "string", description: "Alias for source_audio." },
|
|
508461
|
+
clone_name: { type: "string", description: "Optional name to register the source clip for later reuse." },
|
|
508012
508462
|
model: { type: "string", description: "Backend model id or raw ONNX/Piper model path." },
|
|
508013
508463
|
lang: { type: "string", description: "Language code for Supertonic/MLX where supported." },
|
|
508014
508464
|
speed: { type: "number", description: "Speech speed multiplier or backend-specific rate." },
|
|
@@ -575357,19 +575807,19 @@ function modelOnnxPath(id) {
|
|
|
575357
575807
|
function modelConfigPath(id) {
|
|
575358
575808
|
return join109(modelDir(id), "config.json");
|
|
575359
575809
|
}
|
|
575360
|
-
function
|
|
575810
|
+
function luxttsVenvDir2() {
|
|
575361
575811
|
return join109(voiceDir2(), "luxtts-venv");
|
|
575362
575812
|
}
|
|
575363
|
-
function
|
|
575364
|
-
return platform5() === "win32" ? join109(
|
|
575813
|
+
function luxttsVenvPy2() {
|
|
575814
|
+
return platform5() === "win32" ? join109(luxttsVenvDir2(), "Scripts", "python.exe") : join109(luxttsVenvDir2(), "bin", "python3");
|
|
575365
575815
|
}
|
|
575366
|
-
function
|
|
575816
|
+
function luxttsRepoDir2() {
|
|
575367
575817
|
return join109(voiceDir2(), "LuxTTS");
|
|
575368
575818
|
}
|
|
575369
575819
|
function luxttsCloneRefsDir() {
|
|
575370
575820
|
return join109(voiceDir2(), "clone-refs");
|
|
575371
575821
|
}
|
|
575372
|
-
function
|
|
575822
|
+
function luxttsInferScript2() {
|
|
575373
575823
|
return join109(voiceDir2(), "luxtts-infer.py");
|
|
575374
575824
|
}
|
|
575375
575825
|
function supertonicVenvDir() {
|
|
@@ -577936,12 +578386,12 @@ Error: ${err2 instanceof Error ? err2.message : String(err2)}`
|
|
|
577936
578386
|
"python3 not found. LuxTTS requires Python 3.10+. Try: apt install python3 / brew install python3"
|
|
577937
578387
|
);
|
|
577938
578388
|
}
|
|
577939
|
-
const venvDir =
|
|
577940
|
-
const venvPy =
|
|
578389
|
+
const venvDir = luxttsVenvDir2();
|
|
578390
|
+
const venvPy = luxttsVenvPy2();
|
|
577941
578391
|
if (existsSync95(venvPy)) {
|
|
577942
578392
|
try {
|
|
577943
578393
|
const quotedPy = `"${venvPy}"`;
|
|
577944
|
-
const repoPath =
|
|
578394
|
+
const repoPath = luxttsRepoDir2().replace(/\\/g, "/");
|
|
577945
578395
|
await this.asyncShell(
|
|
577946
578396
|
`${quotedPy} -c "import sys; sys.path.insert(0, '${repoPath}'); from zipvoice.luxvoice import LuxTTS; print('ok')"`,
|
|
577947
578397
|
3e4
|
|
@@ -578055,7 +578505,7 @@ Error: ${err2 instanceof Error ? err2.message : String(err2)}`
|
|
|
578055
578505
|
}
|
|
578056
578506
|
}
|
|
578057
578507
|
}
|
|
578058
|
-
const repoDir =
|
|
578508
|
+
const repoDir = luxttsRepoDir2();
|
|
578059
578509
|
if (!existsSync95(join109(repoDir, "zipvoice", "luxvoice.py"))) {
|
|
578060
578510
|
renderInfo(" Cloning LuxTTS repository...");
|
|
578061
578511
|
try {
|
|
@@ -578479,18 +578929,18 @@ def main():
|
|
|
578479
578929
|
if __name__ == '__main__':
|
|
578480
578930
|
main()
|
|
578481
578931
|
`;
|
|
578482
|
-
const scriptPath2 =
|
|
578932
|
+
const scriptPath2 = luxttsInferScript2();
|
|
578483
578933
|
mkdirSync52(voiceDir2(), { recursive: true });
|
|
578484
578934
|
writeFileSync49(scriptPath2, script);
|
|
578485
578935
|
}
|
|
578486
578936
|
/** Ensure the LuxTTS daemon is running, spawn if needed */
|
|
578487
578937
|
async ensureLuxttsDaemon() {
|
|
578488
578938
|
if (this._luxttsDaemon && !this._luxttsDaemon.killed) return true;
|
|
578489
|
-
const venvPy =
|
|
578939
|
+
const venvPy = luxttsVenvPy2();
|
|
578490
578940
|
if (!existsSync95(venvPy)) return false;
|
|
578491
578941
|
return new Promise((resolve48) => {
|
|
578492
|
-
const env2 = { ...process.env, LUXTTS_REPO_PATH:
|
|
578493
|
-
const daemon = nodeSpawn(venvPy, [
|
|
578942
|
+
const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir2() };
|
|
578943
|
+
const daemon = nodeSpawn(venvPy, [luxttsInferScript2()], {
|
|
578494
578944
|
stdio: ["pipe", "pipe", "pipe"],
|
|
578495
578945
|
cwd: tmpdir20(),
|
|
578496
578946
|
env: env2
|
|
@@ -596500,6 +596950,7 @@ function scopedTool(base3, root, mode) {
|
|
|
596500
596950
|
async execute(args) {
|
|
596501
596951
|
const next = { ...args };
|
|
596502
596952
|
if (base3.name === "generate_image" || base3.name === "generate_audio" || base3.name === "generate_tts") {
|
|
596953
|
+
const cleanup = [];
|
|
596503
596954
|
const localModel = typeof next["model_path"] === "string" ? String(next["model_path"]) : typeof next["model"] === "string" && looksLikeLocalPath(String(next["model"])) ? String(next["model"]) : "";
|
|
596504
596955
|
if (localModel) {
|
|
596505
596956
|
const guarded = guardPath(rootAbs, localModel);
|
|
@@ -596508,6 +596959,22 @@ function scopedTool(base3, root, mode) {
|
|
|
596508
596959
|
else next["model"] = guarded.path.abs;
|
|
596509
596960
|
}
|
|
596510
596961
|
if (base3.name === "generate_tts") {
|
|
596962
|
+
for (const key of TTS_CLONE_SOURCE_KEYS) {
|
|
596963
|
+
const value2 = next[key];
|
|
596964
|
+
if (typeof value2 !== "string" || !value2.trim()) continue;
|
|
596965
|
+
const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, value2.trim());
|
|
596966
|
+
if (!materialized.ok) return denied(materialized.error);
|
|
596967
|
+
next[key] = materialized.path;
|
|
596968
|
+
if (materialized.cleanup) cleanup.push(materialized.cleanup);
|
|
596969
|
+
}
|
|
596970
|
+
for (const key of ["clone_ref", "voice"]) {
|
|
596971
|
+
const value2 = next[key];
|
|
596972
|
+
if (typeof value2 !== "string" || !value2.trim() || !looksLikeAudioPath(value2.trim())) continue;
|
|
596973
|
+
const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, value2.trim());
|
|
596974
|
+
if (!materialized.ok) return denied(materialized.error);
|
|
596975
|
+
next[key] = materialized.path;
|
|
596976
|
+
if (materialized.cleanup) cleanup.push(materialized.cleanup);
|
|
596977
|
+
}
|
|
596511
596978
|
const rawOutput = typeof next["output"] === "string" && String(next["output"]).trim() ? String(next["output"]) : typeof next["output_path"] === "string" && String(next["output_path"]).trim() ? String(next["output_path"]) : `tts-${Date.now()}.wav`;
|
|
596512
596979
|
const guardedOutput = guardPath(rootAbs, rawOutput);
|
|
596513
596980
|
if (!guardedOutput.ok) return denied(guardedOutput.error);
|
|
@@ -596517,16 +596984,20 @@ function scopedTool(base3, root, mode) {
|
|
|
596517
596984
|
next["output"] = guardedOutput.path.abs;
|
|
596518
596985
|
next["playback"] = false;
|
|
596519
596986
|
}
|
|
596520
|
-
|
|
596521
|
-
|
|
596522
|
-
if (
|
|
596523
|
-
|
|
596524
|
-
|
|
596525
|
-
|
|
596526
|
-
|
|
596987
|
+
try {
|
|
596988
|
+
const result2 = await base3.execute(next);
|
|
596989
|
+
if (result2.success) {
|
|
596990
|
+
if (base3.name === "generate_tts" && typeof next["output"] === "string") {
|
|
596991
|
+
rememberCreated(rootAbs, String(next["output"]));
|
|
596992
|
+
}
|
|
596993
|
+
for (const path11 of collectGeneratedArtifactPathsFromText(result2.output, rootAbs)) {
|
|
596994
|
+
rememberCreated(rootAbs, path11);
|
|
596995
|
+
}
|
|
596527
596996
|
}
|
|
596997
|
+
return result2;
|
|
596998
|
+
} finally {
|
|
596999
|
+
for (const fn of cleanup) fn();
|
|
596528
597000
|
}
|
|
596529
|
-
return result2;
|
|
596530
597001
|
}
|
|
596531
597002
|
const pathKey = PATH_KEYS.find((key) => typeof next[key] === "string" && String(next[key]).trim());
|
|
596532
597003
|
if (pathKey) {
|
|
@@ -596591,6 +597062,9 @@ function isInside(root, path11) {
|
|
|
596591
597062
|
function looksLikeLocalPath(value2) {
|
|
596592
597063
|
return value2.startsWith("/") || value2.startsWith("./") || value2.startsWith("../");
|
|
596593
597064
|
}
|
|
597065
|
+
function looksLikeAudioPath(value2) {
|
|
597066
|
+
return looksLikeLocalPath(value2) || value2.startsWith("~/") || /\.(wav|mp3|flac|ogg|m4a)$/i.test(value2);
|
|
597067
|
+
}
|
|
596594
597068
|
function manifestPath(root) {
|
|
596595
597069
|
return join119(root, MANIFEST_FILE);
|
|
596596
597070
|
}
|
|
@@ -596753,7 +597227,7 @@ function denied(error) {
|
|
|
596753
597227
|
mutatedFiles: []
|
|
596754
597228
|
};
|
|
596755
597229
|
}
|
|
596756
|
-
var MANIFEST_FILE, OBJECTS_DIR, SEND_DIR, PATH_KEYS, MEDIA_PATH_RE, PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS, CreativeAudioFileTool;
|
|
597230
|
+
var MANIFEST_FILE, OBJECTS_DIR, SEND_DIR, PATH_KEYS, TTS_CLONE_SOURCE_KEYS, MEDIA_PATH_RE, PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS, CreativeAudioFileTool;
|
|
596757
597231
|
var init_telegram_creative_tools = __esm({
|
|
596758
597232
|
"packages/cli/src/tui/telegram-creative-tools.ts"() {
|
|
596759
597233
|
"use strict";
|
|
@@ -596762,6 +597236,7 @@ var init_telegram_creative_tools = __esm({
|
|
|
596762
597236
|
OBJECTS_DIR = ".objects";
|
|
596763
597237
|
SEND_DIR = ".send";
|
|
596764
597238
|
PATH_KEYS = ["path", "file", "file_path", "filename", "filepath", "filePath"];
|
|
597239
|
+
TTS_CLONE_SOURCE_KEYS = ["sample", "source_audio", "voice_sample", "reference_audio", "ref_audio", "clone_sample"];
|
|
596765
597240
|
MEDIA_PATH_RE = /(?:^|[\s([])(\/[^\s<>"')\]]+\.[A-Za-z0-9]{1,12})(?:$|[\s),.\]])/g;
|
|
596766
597241
|
PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
596767
597242
|
".sh",
|
|
@@ -596836,9 +597311,16 @@ var init_telegram_creative_tools = __esm({
|
|
|
596836
597311
|
input: { type: "string", description: "Alias for text" },
|
|
596837
597312
|
prompt: { type: "string", description: "Alias for text" },
|
|
596838
597313
|
path: { type: "string", description: "Output .wav path inside the creative workspace" },
|
|
596839
|
-
backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"
|
|
596840
|
-
voice: { type: "string", description: "Voice id/name for the selected TTS backend" },
|
|
597314
|
+
backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"], description: "TTS backend. Defaults to auto." },
|
|
597315
|
+
voice: { type: "string", description: "Voice id/name for the selected TTS backend, or a scoped source audio path for cloning" },
|
|
596841
597316
|
clone_ref: { type: "string", description: "Optional LuxTTS clone reference" },
|
|
597317
|
+
sample: { type: "string", description: "Voice source clip path inside the creative workspace" },
|
|
597318
|
+
source_audio: { type: "string", description: "Alias for sample" },
|
|
597319
|
+
voice_sample: { type: "string", description: "Alias for sample" },
|
|
597320
|
+
reference_audio: { type: "string", description: "Alias for sample" },
|
|
597321
|
+
ref_audio: { type: "string", description: "Alias for sample" },
|
|
597322
|
+
clone_sample: { type: "string", description: "Alias for sample" },
|
|
597323
|
+
clone_name: { type: "string", description: "Optional name to register the source clip for later reuse" },
|
|
596842
597324
|
model: { type: "string", description: "Optional backend model id or raw Piper/ONNX path" },
|
|
596843
597325
|
speed: { type: "number", description: "Speech speed multiplier or backend-specific rate" }
|
|
596844
597326
|
},
|
|
@@ -596857,26 +597339,57 @@ var init_telegram_creative_tools = __esm({
|
|
|
596857
597339
|
if (!guarded.path.abs.toLowerCase().endsWith(".wav")) {
|
|
596858
597340
|
return denied("create_audio_file currently writes WAV files; use a .wav output path.");
|
|
596859
597341
|
}
|
|
596860
|
-
|
|
596861
|
-
const
|
|
596862
|
-
const
|
|
596863
|
-
|
|
596864
|
-
|
|
596865
|
-
|
|
596866
|
-
|
|
596867
|
-
|
|
596868
|
-
|
|
596869
|
-
|
|
596870
|
-
|
|
596871
|
-
|
|
596872
|
-
|
|
596873
|
-
|
|
596874
|
-
|
|
596875
|
-
|
|
596876
|
-
|
|
597342
|
+
const cloneArgs = {};
|
|
597343
|
+
const cleanup = [];
|
|
597344
|
+
for (const key of TTS_CLONE_SOURCE_KEYS) {
|
|
597345
|
+
const value2 = args[key];
|
|
597346
|
+
if (typeof value2 !== "string" || !value2.trim()) continue;
|
|
597347
|
+
const materialized = materializeTelegramCreativeArtifactForSend(this.root, value2.trim());
|
|
597348
|
+
if (!materialized.ok) return denied(materialized.error);
|
|
597349
|
+
cloneArgs[key] = materialized.path;
|
|
597350
|
+
if (materialized.cleanup) cleanup.push(materialized.cleanup);
|
|
597351
|
+
}
|
|
597352
|
+
for (const key of ["clone_ref", "voice"]) {
|
|
597353
|
+
const value2 = args[key];
|
|
597354
|
+
if (typeof value2 !== "string" || !value2.trim() || !looksLikeAudioPath(value2.trim())) continue;
|
|
597355
|
+
const materialized = materializeTelegramCreativeArtifactForSend(this.root, value2.trim());
|
|
597356
|
+
if (!materialized.ok) return denied(materialized.error);
|
|
597357
|
+
cloneArgs[key] = materialized.path;
|
|
597358
|
+
if (materialized.cleanup) cleanup.push(materialized.cleanup);
|
|
597359
|
+
}
|
|
597360
|
+
let result;
|
|
597361
|
+
try {
|
|
597362
|
+
await mkdir17(dirname33(guarded.path.abs), { recursive: true });
|
|
597363
|
+
const tts = new TtsGenerateTool();
|
|
597364
|
+
result = await tts.execute({
|
|
597365
|
+
text,
|
|
597366
|
+
output: guarded.path.abs,
|
|
597367
|
+
playback: false,
|
|
597368
|
+
backend: args["backend"],
|
|
597369
|
+
voice: cloneArgs["voice"] ?? args["voice"],
|
|
597370
|
+
clone_ref: cloneArgs["clone_ref"] ?? args["clone_ref"],
|
|
597371
|
+
...cloneArgs,
|
|
597372
|
+
sample: cloneArgs["sample"],
|
|
597373
|
+
source_audio: cloneArgs["source_audio"],
|
|
597374
|
+
voice_sample: cloneArgs["voice_sample"],
|
|
597375
|
+
reference_audio: cloneArgs["reference_audio"],
|
|
597376
|
+
ref_audio: cloneArgs["ref_audio"],
|
|
597377
|
+
clone_sample: cloneArgs["clone_sample"],
|
|
597378
|
+
clone_name: args["clone_name"],
|
|
597379
|
+
model: args["model"],
|
|
597380
|
+
speed: args["speed"]
|
|
597381
|
+
});
|
|
597382
|
+
if (!result.success || !existsSync104(guarded.path.abs)) {
|
|
597383
|
+
return {
|
|
597384
|
+
success: false,
|
|
597385
|
+
output: "",
|
|
597386
|
+
error: `Audio synthesis failed through generate_tts.
|
|
596877
597387
|
${(result.error || result.output || "").slice(0, 1200)}`,
|
|
596878
|
-
|
|
596879
|
-
|
|
597388
|
+
durationMs: performance.now() - start2
|
|
597389
|
+
};
|
|
597390
|
+
}
|
|
597391
|
+
} finally {
|
|
597392
|
+
for (const fn of cleanup) fn();
|
|
596880
597393
|
}
|
|
596881
597394
|
rememberCreated(this.root, guarded.path.abs);
|
|
596882
597395
|
const sizeKB = Math.round(statSync35(guarded.path.abs).size / 1024);
|