npm - omnius - Versions diffs - 1.0.20 → 1.0.22 - Mend

omnius 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -1474,7 +1474,7 @@ var init_security_classifier = __esm({
       // ── Network reads (safe)
       { match: /^(web_search|web_fetch)$/, info: NETWORK_READ },
       // ── Network outbound (mutating or remote inference)
-      { match: /^(image_generate|generate_image|vision|video_understand)$/, info: NETWORK_OUTBOUND },
+      { match: /^(image_generate|generate_image|generate_audio|generate_tts|create_audio_file|vision|video_understand|telegram_send_file)$/, info: NETWORK_OUTBOUND },
       { match: /^(transcribe_file|transcribe_url|youtube_download)$/, info: NETWORK_OUTBOUND },
       { match: /^(fortemi_bridge)$/, info: NETWORK_OUTBOUND },
       // ── Memory tools
@@ -1491,7 +1491,7 @@ var init_security_classifier = __esm({
       { match: /^(file_read|file_explore|list_directory|grep_search|glob_find|find_files)$/, info: LOCAL_READ },
       { match: /^(image_read|ocr|ocr_pdf|ocr_image_advanced|pdf_to_text|structured_read|read_structured_file)$/, info: LOCAL_READ },
       { match: /^(symbol_search|impact_analysis|code_neighbors|repo_map|codebase_map|semantic_map|import_graph)$/, info: LOCAL_READ },
-      { match: /^(diagnostic|git_info|environment_snapshot|process_health|todo_read|explore_tools)$/, info: LOCAL_READ },
+      { match: /^(diagnostic|git_info|environment_snapshot|process_health|todo_read|explore_tools|telegram_media_recent)$/, info: LOCAL_READ },
       { match: /^(log_explore|log_packet|change_log|phase_recall|code_graph)$/, info: LOCAL_READ },
       { match: /^skill_(list|execute|read)$/, info: LOCAL_READ },
       // ── Task completion (neutral signal)
@@ -5733,13 +5733,20 @@ var init_explore_tools = __esm({
       diagnostic: "Run project diagnostics (build, test, lint)",
       image_read: "Read and describe image contents",
       screenshot: "Capture a screenshot of the desktop",
+      ocr: "Extract text from images via OCR",
       ocr_image: "Extract text from images via OCR",
+      ocr_image_advanced: "Advanced OCR for images with layout-aware extraction",
       ocr_pdf: "Extract text from PDF pages via OCR",
       pdf_to_text: "Convert PDF to plain text",
       vision: "Describe what's on screen using Moondream",
+      video_understand: "Analyze a video file with transcription and keyframe understanding",
+      audio_analyze: "Classify sounds, detect speech, inspect spectrum, or analyze audio files",
       desktop_click: "Click at coordinates on the desktop",
       desktop_describe: "Describe a region of the desktop",
       transcribe_file: "Transcribe audio/video files to text",
+      telegram_media_recent: "List recent Telegram media available in the current chat scope",
+      generate_audio: "Generate sound effects or music with local model backends",
+      generate_tts: "Generate speech from text with configured voice/TTS backends",
       create_tool: "Create a new custom tool from a workflow",
       manage_tools: "List, inspect, or remove custom tools",
       skill_list: "List available AIWG skills",
@@ -84452,7 +84459,7 @@ var require_mime_types = __commonJS({
   "../node_modules/mime-types/index.js"(exports) {
     "use strict";
     var db = require_mime_db();
-    var extname16 = __require("path").extname;
+    var extname17 = __require("path").extname;
     var EXTRACT_TYPE_REGEXP = /^\s*([^;\s]*)(?:;|\s|$)/;
     var TEXT_TYPE_REGEXP = /^text\//i;
     exports.charset = charset;
@@ -84506,7 +84513,7 @@ var require_mime_types = __commonJS({
       if (!path11 || typeof path11 !== "string") {
         return false;
       }
-      var extension4 = extname16("x." + path11).toLowerCase().substr(1);
+      var extension4 = extname17("x." + path11).toLowerCase().substr(1);
       if (!extension4) {
         return false;
       }
@@ -250375,6 +250382,22 @@ function optionalNumberArg(value2) {
   const n2 = Number(value2);
   return Number.isFinite(n2) ? n2 : void 0;
 }
+function booleanArg(value2, fallback) {
+  if (typeof value2 === "boolean")
+    return value2;
+  if (typeof value2 === "string") {
+    if (/^(1|true|yes|on)$/i.test(value2.trim()))
+      return true;
+    if (/^(0|false|no|off)$/i.test(value2.trim()))
+      return false;
+  }
+  return fallback;
+}
+function generationFallbackEnabled(args) {
+  if (booleanArg(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
+    return false;
+  return booleanArg(args["fallback"] ?? args["allow_fallback"] ?? args["allowFallback"], true);
+}
 function isBackend(value2) {
   return value2 === "auto" || value2 === "ollama" || value2 === "diffusers" || value2 === "sdcpp";
 }
@@ -250383,6 +250406,14 @@ function getImageGenerationPreset(model) {
     return void 0;
   return IMAGE_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model);
 }
+function imageGenerationQualityLadder() {
+  return IMAGE_GENERATION_QUALITY_LADDER.map((id) => getImageGenerationPreset(id)).filter((preset) => Boolean(preset));
+}
+function imageGenerationFallbackAlternates(model) {
+  if (!model)
+    return [];
+  return IMAGE_GENERATION_MODEL_PRESETS.filter((preset) => preset.fallbackFor?.includes(model));
+}
 function inferImageGenerationBackend(model, requested) {
   if (requested && isBackend(requested))
     return requested;
@@ -250399,6 +250430,45 @@ function inferImageGenerationBackend(model, requested) {
     return "sdcpp";
   return "diffusers";
 }
+function imageCandidateFor(model, requestedBackend) {
+  let backend = inferImageGenerationBackend(model, requestedBackend);
+  if (backend === "auto")
+    backend = "diffusers";
+  return {
+    model,
+    backend,
+    preset: getImageGenerationPreset(model)
+  };
+}
+function imageGenerationFallbackCandidates(requestedModel, requestedBackend, allowFallback = true) {
+  const ladder = imageGenerationQualityLadder();
+  const candidates = [];
+  const add2 = (candidate) => {
+    const key = `${candidate.backend}:${candidate.model}`;
+    if (!candidates.some((existing) => `${existing.backend}:${existing.model}` === key))
+      candidates.push(candidate);
+  };
+  if (requestedModel) {
+    add2(imageCandidateFor(requestedModel, requestedBackend));
+    for (const alternate of imageGenerationFallbackAlternates(requestedModel))
+      add2(imageCandidateFor(alternate.id));
+  } else if (requestedBackend && requestedBackend !== "auto") {
+    const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
+    add2(imageCandidateFor(firstForBackend?.id ?? (requestedBackend === "ollama" ? DEFAULT_OLLAMA_IMAGE_MODEL : DEFAULT_DIFFUSERS_IMAGE_MODEL), requestedBackend));
+  } else if (!allowFallback) {
+    add2(imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend));
+  }
+  if (!allowFallback)
+    return candidates.length ? candidates : [imageCandidateFor(DEFAULT_DIFFUSERS_IMAGE_MODEL, requestedBackend)];
+  const primaryIndex = requestedModel ? ladder.findIndex((preset) => preset.id === requestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
+  const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
+  for (const preset of fallbackTail) {
+    add2(imageCandidateFor(preset.id));
+    for (const alternate of imageGenerationFallbackAlternates(preset.id))
+      add2(imageCandidateFor(alternate.id));
+  }
+  return candidates;
+}
 function imageGenerationDir(repoRoot = ".") {
   return join36(repoRoot, ".omnius", "image-gen");
 }
@@ -250653,6 +250723,33 @@ function formatSuccessOutput(args) {
     `  Prompt: "${prompt}"`
   ].filter(Boolean).join("\n");
 }
+function summarizeToolResult(result) {
+  return trimProcessText(String(result.error || result.output || "unknown error"), 700).replace(/\s+/g, " ").trim();
+}
+function formatImageAttempt(candidate, reason, index) {
+  return `${index + 1}. ${candidate.model} [${candidate.backend}] - ${reason}`;
+}
+function formatImageFallbackFailure(failed) {
+  return [
+    "No image generation model in the fallback ladder completed successfully.",
+    "Attempted, highest quality to lowest:",
+    ...failed.map((attempt, index) => `  ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`)
+  ].join("\n");
+}
+function annotateImageFallbackSuccess(result, failed, winner) {
+  if (failed.length === 0)
+    return result;
+  const prefix = [
+    `Fallback ladder succeeded with ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    "Failed attempts:",
+    ...failed.map((attempt, index) => `  ${formatImageAttempt(attempt.candidate, attempt.reason, index)}`),
+    ""
+  ].join("\n");
+  return {
+    ...result,
+    output: prefix + result.output
+  };
+}
 function parseRunnerJson(stdout) {
   const lines = stdout.trim().split(/\r?\n/).reverse();
   for (const line of lines) {
@@ -250665,7 +250762,7 @@ function parseRunnerJson(stdout) {
   }
   return null;
 }
-var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
+var DEFAULT_DIFFUSERS_IMAGE_MODEL, DEFAULT_OLLAMA_IMAGE_MODEL, DIFFUSERS_PYTHON_PACKAGES, SDCPP_PYTHON_PACKAGES, IMAGE_GENERATION_MODEL_PRESETS, IMAGE_GENERATION_QUALITY_LADDER, OLLAMA_IMAGE_MODELS, DIFFUSERS_RUNNER, SDCPP_RUNNER, ImageGenerateTool;
 var init_image_generate = __esm({
   "packages/execution/dist/tools/image-generate.js"() {
     "use strict";
@@ -250737,6 +250834,78 @@ var init_image_generate = __esm({
         height: 1024,
         note: "Primary serious-generation baseline for maximum photorealism."
       },
+      {
+        id: "black-forest-labs/FLUX.1-dev-FP8",
+        label: "FLUX.1 dev FP8",
+        backend: "diffusers",
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-dev-FP8 --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        category: "Official FLUX fallback",
+        sizeClass: "12B FLUX.1 dev FP8",
+        quality: "Official lower-precision FLUX.1 dev route; best first fallback when full FLUX.1 dev is unavailable or too heavy.",
+        minVramGB: 16,
+        recommendedVramGB: 24,
+        deployment: "Prefer this before third-party mirrors when loader support is available.",
+        steps: 28,
+        guidance: 3.5,
+        width: 1024,
+        height: 1024,
+        fallbackFor: ["black-forest-labs/FLUX.1-dev"],
+        note: "Official BFL FP8 fallback for FLUX.1 dev."
+      },
+      {
+        id: "black-forest-labs/FLUX.1-Krea-dev",
+        label: "FLUX.1 Krea dev",
+        backend: "diffusers",
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.1-Krea-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        category: "Official FLUX fallback",
+        sizeClass: "12B FLUX.1 dev-family",
+        quality: "Official FLUX.1 dev-family aesthetic variant; useful when the base dev repo is unavailable and the requested task tolerates an opinionated realism bias.",
+        minVramGB: 24,
+        recommendedVramGB: 48,
+        deployment: "Heavy Diffusers/ComfyUI route with FLUX.1 dev-family license considerations.",
+        steps: 28,
+        guidance: 3.5,
+        width: 1024,
+        height: 1024,
+        fallbackFor: ["black-forest-labs/FLUX.1-dev"],
+        note: "Official aesthetic FLUX.1 fallback."
+      },
+      {
+        id: "lllyasviel/flux1-dev-bnb-nf4",
+        label: "FLUX.1 dev BNB NF4",
+        backend: "diffusers",
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model lllyasviel/flux1-dev-bnb-nf4 --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        category: "Traceable FLUX fallback",
+        sizeClass: "12B FLUX.1 dev NF4",
+        quality: "Lower-memory community quantization; useful after official BFL sources, with some possible quality loss and loader brittleness.",
+        minVramGB: 12,
+        recommendedVramGB: 16,
+        deployment: "Best with BNB-aware Diffusers/Forge-style runtimes. Falls through cleanly if the current runner cannot load it.",
+        steps: 28,
+        guidance: 3.5,
+        width: 1024,
+        height: 1024,
+        fallbackFor: ["black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-dev-FP8"],
+        note: "Traceable low-VRAM NF4 fallback for FLUX.1 dev."
+      },
+      {
+        id: "ChuckMcSneed/FLUX.1-dev",
+        label: "FLUX.1 dev mirror",
+        backend: "diffusers",
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model ChuckMcSneed/FLUX.1-dev --steps 28 --guidance 3.5 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        category: "Traceable FLUX fallback",
+        sizeClass: "12B FLUX.1 dev mirror",
+        quality: "Lower-priority mirror fallback for FLUX.1 dev. Use only after official and reputable quantized options fail.",
+        minVramGB: 24,
+        recommendedVramGB: 48,
+        deployment: "Treat as lower-trust than official BFL and well-known quantized conversions; verify provenance and license before relying on it.",
+        steps: 28,
+        guidance: 3.5,
+        width: 1024,
+        height: 1024,
+        fallbackFor: ["black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-dev-FP8"],
+        note: "Traceable mirror fallback for FLUX.1 dev."
+      },
       {
         id: "stabilityai/stable-diffusion-3.5-large",
         label: "Stable Diffusion 3.5 Large",
@@ -250837,6 +251006,40 @@ var init_image_generate = __esm({
         height: 1024,
         note: "More deployable compact FLUX-family model."
       },
+      {
+        id: "black-forest-labs/FLUX.2-klein-4b-fp8",
+        label: "FLUX.2 Klein 4B FP8",
+        backend: "diffusers",
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4b-fp8 --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        category: "Official FLUX fallback",
+        sizeClass: "4B compact FLUX-family FP8",
+        quality: "Official lower-precision FLUX.2 Klein route with better deployment fit than full-precision 4B.",
+        minVramGB: 8,
+        recommendedVramGB: 12,
+        deployment: "Preferred lower-memory official FLUX.2 fallback when compatible with the current loader.",
+        steps: 8,
+        width: 1024,
+        height: 1024,
+        fallbackFor: ["black-forest-labs/FLUX.2-klein-4B", "x/flux2-klein"],
+        note: "Official FP8 fallback for FLUX.2 Klein."
+      },
+      {
+        id: "black-forest-labs/FLUX.2-klein-4b-nvfp4",
+        label: "FLUX.2 Klein 4B NVFP4",
+        backend: "diffusers",
+        install: 'python3 .omnius/image-gen/diffusers_text2image.py --model black-forest-labs/FLUX.2-klein-4b-nvfp4 --steps 8 --width 1024 --height 1024 --prompt "..." --output .omnius/images/out.png',
+        category: "Official FLUX fallback",
+        sizeClass: "4B compact FLUX-family NVFP4",
+        quality: "Official NVIDIA-oriented low-precision FLUX.2 Klein fallback.",
+        minVramGB: 8,
+        recommendedVramGB: 12,
+        deployment: "Use when the runtime/GPU supports the NVFP4 path; otherwise the fallback ladder continues.",
+        steps: 8,
+        width: 1024,
+        height: 1024,
+        fallbackFor: ["black-forest-labs/FLUX.2-klein-4B", "x/flux2-klein", "black-forest-labs/FLUX.2-klein-4b-fp8"],
+        note: "Official NVFP4 fallback for FLUX.2 Klein."
+      },
       {
         id: "deepseek-ai/Janus-Pro-7B",
         label: "Janus-Pro-7B",
@@ -250989,6 +251192,21 @@ var init_image_generate = __esm({
         note: "CPU/GGUF/checkpoint route; requires a local model path."
       }
     ];
+    IMAGE_GENERATION_QUALITY_LADDER = [
+      "black-forest-labs/FLUX.1-dev",
+      "stabilityai/stable-diffusion-3.5-large",
+      DEFAULT_OLLAMA_IMAGE_MODEL,
+      "black-forest-labs/FLUX.1-schnell",
+      "stabilityai/stable-diffusion-3.5-large-turbo",
+      "Tongyi-MAI/Z-Image-Turbo",
+      "black-forest-labs/FLUX.2-klein-4B",
+      DEFAULT_DIFFUSERS_IMAGE_MODEL,
+      "Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
+      "SimianLuo/LCM_Dreamshaper_v7",
+      "stabilityai/sd-turbo",
+      "segmind/tiny-sd",
+      "nota-ai/bk-sdm-tiny-2m"
+    ];
     OLLAMA_IMAGE_MODELS = IMAGE_GENERATION_MODEL_PRESETS.filter((preset) => preset.backend === "ollama").map((preset) => preset.id);
     DIFFUSERS_RUNNER = String.raw`#!/usr/bin/env python3
 import argparse
@@ -251170,7 +251388,7 @@ if __name__ == "__main__":
 `;
     ImageGenerateTool = class {
       name = "generate_image";
-      description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. Saves a PNG under .omnius/images and returns the file path.";
+      description = "Generate an image from a text prompt using a local image-generation backend. Supports Ollama image models (x/z-image-turbo, x/flux2-klein), Python Diffusers models (SDXL Turbo default, FLUX.1 dev, SD3.5 Large, Tiny-SD, LCM, Sana Sprint), and stable-diffusion.cpp local checkpoints/GGUF. When fallback is enabled, auto generation tries ranked high-quality candidates first, including official/traceable FLUX fallbacks for Black Forest Labs models, and then falls back to smaller models if setup, download, or generation fails. Saves a PNG under .omnius/images and returns the file path.";
       parameters = {
         type: "object",
         properties: {
@@ -251215,6 +251433,14 @@ if __name__ == "__main__":
             type: "string",
             enum: ["generate", "list_models", "setup"],
             description: "Optional utility action. Default is generate."
+          },
+          fallback: {
+            type: "boolean",
+            description: "Whether to try the ranked quality ladder if the selected model/backend fails. Defaults true."
+          },
+          strict_model: {
+            type: "boolean",
+            description: "When true, use only the requested model/backend and do not fall back. Defaults false."
           }
         },
         required: ["prompt"]
@@ -251257,7 +251483,7 @@ if __name__ == "__main__":
         if (action === "list_models") {
           return {
             success: true,
-            output: IMAGE_GENERATION_MODEL_PRESETS.map((preset2) => `${preset2.id} [${preset2.backend}] - ${preset2.note}`).join("\n"),
+            output: IMAGE_GENERATION_MODEL_PRESETS.map((preset) => `${preset.id} [${preset.backend}] - ${preset.note}`).join("\n"),
             durationMs: performance.now() - start2
           };
         }
@@ -251281,19 +251507,8 @@ if __name__ == "__main__":
           const rawModel2 = args["model_path"] ? String(args["model_path"]) : args["model"] ? String(args["model"]) : this.defaultModel;
           const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
           const requestedBackend2 = args["backend"] ? String(args["backend"]) : this.defaultBackend;
-          let backend = inferImageGenerationBackend(requestedModel2, requestedBackend2);
-          if (backend === "auto") {
-            backend = inferImageGenerationBackend(requestedModel2, void 0);
-            if (backend === "auto")
-              backend = "diffusers";
-          }
-          const model = requestedModel2 ?? (backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL);
-          this.emitProgress({ stage: "setup", message: `Preparing image model ${model} (${backend})` });
-          if (backend === "ollama")
-            return await this.prewarmOllama({ model, start: start2 });
-          if (backend === "sdcpp")
-            return await this.prewarmSdCpp({ model, start: start2, python: args["python"] });
-          return await this.prewarmDiffusers({ model, start: start2, python: args["python"] });
+          const candidates2 = imageGenerationFallbackCandidates(requestedModel2, requestedBackend2, generationFallbackEnabled(args));
+          return await this.prewarmCandidateLadder({ candidates: candidates2, args, start: start2 });
         }
         const prompt = String(args["prompt"] ?? "").trim();
         if (!prompt) {
@@ -251302,31 +251517,10 @@ if __name__ == "__main__":
         const rawModel = args["model_path"] ? String(args["model_path"]) : args["model"] ? String(args["model"]) : this.defaultModel;
         const requestedModel = rawModel === "auto" ? void 0 : rawModel;
         const requestedBackend = args["backend"] ? String(args["backend"]) : this.defaultBackend;
-        const preset = getImageGenerationPreset(requestedModel);
-        const width = numberArg(args["width"], preset?.width ?? 1024);
-        const height = numberArg(args["height"], preset?.height ?? 1024);
-        const steps = optionalNumberArg(args["steps"]) ?? preset?.steps;
-        const guidance = optionalNumberArg(args["guidance"]) ?? preset?.guidance;
         const seed = optionalNumberArg(args["seed"]);
+        const candidates = imageGenerationFallbackCandidates(requestedModel, requestedBackend, generationFallbackEnabled(args));
         try {
-          let backend = inferImageGenerationBackend(requestedModel, requestedBackend);
-          let model = requestedModel;
-          if (backend === "auto") {
-            backend = inferImageGenerationBackend(model, void 0);
-            if (backend === "auto")
-              backend = "diffusers";
-          }
-          if (!model) {
-            model = backend === "diffusers" ? DEFAULT_DIFFUSERS_IMAGE_MODEL : DEFAULT_OLLAMA_IMAGE_MODEL;
-          }
-          this.emitProgress({ stage: "setup", message: `Using image model ${model} (${backend})` });
-          if (backend === "ollama") {
-            return await this.generateWithOllama({ prompt, model, width, height, steps, start: start2 });
-          }
-          if (backend === "sdcpp") {
-            return await this.generateWithSdCpp({ prompt, model, width, height, steps, seed, start: start2, python: args["python"] });
-          }
-          return await this.generateWithDiffusers({ prompt, model, width, height, steps, guidance, seed, start: start2, python: args["python"] });
+          return await this.generateCandidateLadder({ candidates, prompt, args, seed, start: start2 });
         } catch (err) {
           return {
             success: false,
@@ -251335,6 +251529,64 @@ if __name__ == "__main__":
           };
         }
       }
+      async prewarmCandidateLadder(args) {
+        const failed = [];
+        for (let index = 0; index < args.candidates.length; index++) {
+          const candidate = args.candidates[index];
+          this.emitProgress({
+            stage: "setup",
+            message: `Preparing image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
+          });
+          const result = candidate.backend === "ollama" ? await this.prewarmOllama({ model: candidate.model, start: args.start }) : candidate.backend === "sdcpp" ? await this.prewarmSdCpp({ model: candidate.model, start: args.start, python: args.args["python"] }) : await this.prewarmDiffusers({ model: candidate.model, start: args.start, python: args.args["python"] });
+          if (result.success)
+            return annotateImageFallbackSuccess(result, failed, candidate);
+          failed.push({ candidate, reason: summarizeToolResult(result) });
+          if (index < args.candidates.length - 1) {
+            this.emitProgress({
+              stage: "setup",
+              message: `${candidate.model} failed; trying ${args.candidates[index + 1].model}`
+            });
+          }
+        }
+        const output = formatImageFallbackFailure(failed);
+        return {
+          success: false,
+          output,
+          error: output,
+          durationMs: performance.now() - args.start
+        };
+      }
+      async generateCandidateLadder(args) {
+        const failed = [];
+        for (let index = 0; index < args.candidates.length; index++) {
+          const candidate = args.candidates[index];
+          const width = numberArg(args.args["width"], candidate.preset?.width ?? 1024);
+          const height = numberArg(args.args["height"], candidate.preset?.height ?? 1024);
+          const steps = optionalNumberArg(args.args["steps"]) ?? candidate.preset?.steps;
+          const guidance = optionalNumberArg(args.args["guidance"]) ?? candidate.preset?.guidance;
+          this.emitProgress({
+            stage: "setup",
+            message: `Using image model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
+          });
+          const result = candidate.backend === "ollama" ? await this.generateWithOllama({ prompt: args.prompt, model: candidate.model, width, height, steps, start: args.start }) : candidate.backend === "sdcpp" ? await this.generateWithSdCpp({ prompt: args.prompt, model: candidate.model, width, height, steps, seed: args.seed, start: args.start, python: args.args["python"] }) : await this.generateWithDiffusers({ prompt: args.prompt, model: candidate.model, width, height, steps, guidance, seed: args.seed, start: args.start, python: args.args["python"] });
+          if (result.success)
+            return annotateImageFallbackSuccess(result, failed, candidate);
+          failed.push({ candidate, reason: summarizeToolResult(result) });
+          if (index < args.candidates.length - 1) {
+            this.emitProgress({
+              stage: "setup",
+              message: `${candidate.model} failed; falling back to ${args.candidates[index + 1].model}`
+            });
+          }
+        }
+        const output = formatImageFallbackFailure(failed);
+        return {
+          success: false,
+          output,
+          error: output,
+          durationMs: performance.now() - args.start
+        };
+      }
       async prewarmOllama(args) {
         const model = args.model || DEFAULT_OLLAMA_IMAGE_MODEL;
         if (await this.ollamaHasModel(model)) {
@@ -251800,7 +252052,7 @@ ${errText.slice(0, 800)}`,
 });
 // packages/execution/dist/tools/audio-generate.js
-import { spawn as spawn10 } from "node:child_process";
+import { execFileSync as execFileSync2, spawn as spawn10 } from "node:child_process";
 import { existsSync as existsSync24, readdirSync as readdirSync10, statSync as statSync9 } from "node:fs";
 import { chmod as chmod4, mkdir as mkdir12, writeFile as writeFile17 } from "node:fs/promises";
 import { join as join37 } from "node:path";
@@ -251824,13 +252076,63 @@ function backendPackages(backend) {
     return TANGOFLUX_PACKAGES;
   return DIFFUSERS_AUDIO_PACKAGES;
 }
+function detectLegacyCudaComputeCapability() {
+  try {
+    const out = execFileSync2("nvidia-smi", ["--query-gpu=compute_cap,name", "--format=csv,noheader,nounits"], {
+      encoding: "utf8",
+      timeout: 5e3,
+      stdio: ["ignore", "pipe", "ignore"]
+    }).trim();
+    const first2 = out.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
+    const match = first2?.match(/^(\d+)\.(\d+)\s*,?\s*(.*)$/);
+    if (!match)
+      return null;
+    const major = Number(match[1]);
+    const minor = Number(match[2]);
+    if (!Number.isFinite(major) || !Number.isFinite(minor))
+      return null;
+    return { major, minor, name: match[3]?.trim() || void 0 };
+  } catch {
+    return null;
+  }
+}
+function isLegacyCudaCapability(major, minor) {
+  return major < 7 || major === 7 && minor < 5;
+}
+function torchInstallPlan(forceLegacyCuda = false) {
+  if (process.env["OMNIUS_AUDIO_TORCH_INDEX_URL"]) {
+    return {
+      args: ["torch", "torchaudio", "--index-url", process.env["OMNIUS_AUDIO_TORCH_INDEX_URL"]],
+      description: `env override ${process.env["OMNIUS_AUDIO_TORCH_INDEX_URL"]}`
+    };
+  }
+  if (forceLegacyCuda) {
+    return {
+      args: ["torch==2.3.1", "torchaudio==2.3.1", "--index-url", "https://download.pytorch.org/whl/cu118"],
+      description: "runtime-detected legacy CUDA GPU; using PyTorch 2.3.1 cu118 to avoid cuDNN 9 incompatibility"
+    };
+  }
+  if (process.platform === "linux" && process.arch === "x64") {
+    const gpu = detectLegacyCudaComputeCapability();
+    if (gpu && isLegacyCudaCapability(gpu.major, gpu.minor)) {
+      return {
+        args: ["torch==2.3.1", "torchaudio==2.3.1", "--index-url", "https://download.pytorch.org/whl/cu118"],
+        description: `CUDA legacy GPU SM ${gpu.major}.${gpu.minor}${gpu.name ? ` ${gpu.name}` : ""}; using PyTorch 2.3.1 cu118 to avoid cuDNN 9 incompatibility`
+      };
+    }
+  }
+  return { args: ["torch", "torchaudio"], description: "default PyTorch wheel selection" };
+}
+function withoutTorchPackages(packages) {
+  return packages.filter((pkg) => pkg !== "torch" && pkg !== "torchaudio");
+}
 function backendImportCheck(backend) {
   if (backend === "transformers")
     return "import torch, torchaudio, transformers, scipy\nfrom transformers import AutoProcessor, MusicgenForConditionalGeneration\n";
   if (backend === "audiocraft")
     return "import torch, torchaudio, audiocraft\nfrom audiocraft.models import MusicGen, AudioGen\n";
   if (backend === "stable-audio")
-    return "import torch, torchaudio, stable_audio_tools\n";
+    return "import torch, torchaudio, diffusers, scipy\nfrom diffusers import StableAudioPipeline\n";
   if (backend === "tangoflux")
     return "import torch, torchaudio\nfrom tangoflux import TangoFluxInference\n";
   return "import torch, diffusers, scipy\nfrom diffusers import AudioLDMPipeline\n";
@@ -252022,6 +252324,69 @@ async function pythonCanImport2(command, code8, repoRoot, env2) {
 async function pythonImportResult(command, code8, repoRoot, env2) {
   return await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
 }
+async function torchRuntimeCompatibilityResult(command, repoRoot, env2) {
+  const code8 = [
+    "import json, sys",
+    "import torch",
+    "payload={'torch': getattr(torch, '__version__', '?'), 'cuda_available': bool(torch.cuda.is_available())}",
+    "if torch.cuda.is_available():",
+    "    cap=torch.cuda.get_device_capability(0)",
+    "    cudnn=torch.backends.cudnn.version() or 0",
+    "    payload.update({'capability': list(cap), 'cudnn': int(cudnn), 'device': torch.cuda.get_device_name(0)})",
+    "    if int(cudnn) >= 90000 and tuple(cap) < (7, 5):",
+    "        print(json.dumps(payload))",
+    "        raise SystemExit(42)",
+    "print(json.dumps(payload))"
+  ].join("\n");
+  return await runProcess3(command, ["-c", code8], { cwd: repoRoot, timeoutMs: 6e4, env: env2 });
+}
+async function repairTorchRuntime(command, repoRoot, env2, forceLegacyCuda = false, onProgress) {
+  const plan = torchInstallPlan(forceLegacyCuda);
+  onProgress?.({ stage: "setup", message: `Installing PyTorch runtime: ${plan.description}` });
+  const result = await runProcess3(command, [
+    "-m",
+    "pip",
+    "install",
+    "--progress-bar",
+    "on",
+    "--prefer-binary",
+    "--force-reinstall",
+    ...plan.args
+  ], {
+    cwd: repoRoot,
+    timeoutMs: 18e5,
+    env: env2,
+    progressLabel: `Installing PyTorch runtime (${plan.description})`,
+    onProgress
+  });
+  if (result.code !== 0) {
+    throw new Error(`Failed to install compatible PyTorch runtime (${plan.description}).
+${trimProcessText2(result.stderr || result.stdout)}`);
+  }
+}
+async function ensureCompatibleTorchRuntime(command, repoRoot, env2, onProgress) {
+  const existing = await torchRuntimeCompatibilityResult(command, repoRoot, env2);
+  if (existing.code === 0)
+    return;
+  if (existing.code === 42) {
+    await repairTorchRuntime(command, repoRoot, env2, true, onProgress);
+  } else {
+    await repairTorchRuntime(command, repoRoot, env2, false, onProgress);
+  }
+  const installed = await torchRuntimeCompatibilityResult(command, repoRoot, env2);
+  if (installed.code === 0)
+    return;
+  if (installed.code === 42) {
+    await repairTorchRuntime(command, repoRoot, env2, true, onProgress);
+    const repaired = await torchRuntimeCompatibilityResult(command, repoRoot, env2);
+    if (repaired.code === 0)
+      return;
+    throw new Error(`Audio-generation PyTorch runtime remains incompatible after cu118 repair.
+${trimProcessText2(repaired.stderr || repaired.stdout)}`);
+  }
+  throw new Error(`Audio-generation PyTorch runtime could not be prepared.
+${trimProcessText2(installed.stderr || installed.stdout)}`);
+}
 function formatAudioSetupFailure(backend, text) {
   const body = trimProcessText2(text);
   const lowered = text.toLowerCase();
@@ -252032,6 +252397,9 @@ function formatAudioSetupFailure(backend, text) {
   if (lowered.includes("cuda") && lowered.includes("not available")) {
     notes2.push("CUDA was not available to the selected Python environment; install a Torch build matching this machine's CUDA runtime or use CPU-compatible settings.");
   }
+  if (lowered.includes("cudnn version") && lowered.includes("sm < 7.5")) {
+    notes2.push("The installed PyTorch wheel uses cuDNN 9 on a legacy CUDA GPU. Omnius now repairs audio-generation venvs by reinstalling PyTorch 2.3.1 from the cu118 index for SM < 7.5 hardware.");
+  }
   return [body, ...notes2.map((note) => `
 ${note}`)].filter(Boolean).join("");
 }
@@ -252060,9 +252428,13 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
     }
   }
   if (await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
-    return { command, env: pythonEnv };
+    await ensureCompatibleTorchRuntime(command, repoRoot, pythonEnv, onProgress);
+    if (await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
+      return { command, env: pythonEnv };
+    }
   }
   const packages = backendPackages(backend);
+  await ensureCompatibleTorchRuntime(command, repoRoot, pythonEnv, onProgress);
   onProgress?.({ stage: "setup", message: `Installing ${backend} audio-generation Python packages` });
   const pipArgs = [
     "-m",
@@ -252074,7 +252446,7 @@ ${trimProcessText2(created.stderr || created.stdout)}`);
     ...backend === "audiocraft" ? ["--only-binary", "av"] : [],
     "-U",
     "pip",
-    ...packages
+    ...withoutTorchPackages(packages)
   ];
   const pip = await runProcess3(command, pipArgs, {
     cwd: repoRoot,
@@ -252091,6 +252463,12 @@ ${formatAudioSetupFailure(backend, pip.stderr || pip.stdout)}`);
   if (importCheck.code !== 0) {
     throw new Error(`Audio-generation Python environment at ${venvDir} was created, but required ${backend} imports still fail.
 ${formatAudioSetupFailure(backend, importCheck.stderr || importCheck.stdout)}`);
+  }
+  await ensureCompatibleTorchRuntime(command, repoRoot, pythonEnv, onProgress);
+  if (!await pythonCanImport2(command, backendImportCheck(backend), repoRoot, pythonEnv)) {
+    const retry = await pythonImportResult(command, backendImportCheck(backend), repoRoot, pythonEnv);
+    throw new Error(`Audio-generation Python environment at ${venvDir} lost required ${backend} imports after PyTorch repair.
+${formatAudioSetupFailure(backend, retry.stderr || retry.stdout)}`);
   }
   return { command, env: pythonEnv };
 }
@@ -252160,11 +252538,31 @@ function playbackRequested(args) {
     return false;
   return true;
 }
+function booleanArg2(value2, fallback) {
+  if (typeof value2 === "boolean")
+    return value2;
+  if (typeof value2 === "string") {
+    if (/^(1|true|yes|on)$/i.test(value2.trim()))
+      return true;
+    if (/^(0|false|no|off)$/i.test(value2.trim()))
+      return false;
+  }
+  return fallback;
+}
+function generationFallbackEnabled2(args) {
+  if (booleanArg2(args["strict_model"] ?? args["strictModel"] ?? args["strict"], false))
+    return false;
+  return booleanArg2(args["fallback"] ?? args["allow_fallback"] ?? args["allowFallback"], true);
+}
 function getAudioGenerationPreset(model, kind) {
   if (!model)
     return void 0;
   return AUDIO_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model && (!kind || preset.kind === kind)) ?? AUDIO_GENERATION_MODEL_PRESETS.find((preset) => preset.id === model);
 }
+function audioGenerationQualityLadder(kind) {
+  const ids = kind === "music" ? MUSIC_GENERATION_QUALITY_LADDER : SOUND_GENERATION_QUALITY_LADDER;
+  return ids.map((id) => getAudioGenerationPreset(id, kind)).filter((preset) => Boolean(preset));
+}
 function inferAudioGenerationBackend(model, requested) {
   if (requested && requested !== "auto") {
     if (requested === "diffusers" || requested === "transformers" || requested === "audiocraft" || requested === "stable-audio" || requested === "tangoflux" || requested === "project")
@@ -252188,6 +252586,41 @@ function inferAudioGenerationBackend(model, requested) {
     return "project";
   return "diffusers";
 }
+function audioCandidateFor(kind, model, requestedBackend) {
+  const backend = inferAudioGenerationBackend(model, requestedBackend);
+  const resolvedBackend = backend === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend;
+  return {
+    kind,
+    model,
+    backend: resolvedBackend,
+    preset: getAudioGenerationPreset(model, kind)
+  };
+}
+function audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, allowFallback = true) {
+  const ladder = audioGenerationQualityLadder(kind);
+  const candidates = [];
+  const add2 = (candidate) => {
+    const key = `${candidate.kind}:${candidate.backend}:${candidate.model}`;
+    if (!candidates.some((existing) => `${existing.kind}:${existing.backend}:${existing.model}` === key)) {
+      candidates.push(candidate);
+    }
+  };
+  if (requestedModel) {
+    add2(audioCandidateFor(kind, requestedModel, requestedBackend));
+  } else if (requestedBackend && requestedBackend !== "auto") {
+    const firstForBackend = ladder.find((preset) => preset.backend === requestedBackend);
+    add2(audioCandidateFor(kind, firstForBackend?.id ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL), requestedBackend));
+  } else if (!allowFallback) {
+    add2(audioCandidateFor(kind, kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL, requestedBackend));
+  }
+  if (!allowFallback)
+    return candidates.length ? candidates : [audioCandidateFor(kind, kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL, requestedBackend)];
+  const primaryIndex = requestedModel ? ladder.findIndex((preset) => preset.id === requestedModel) : requestedBackend && requestedBackend !== "auto" ? ladder.findIndex((preset) => preset.backend === requestedBackend) : 0;
+  const fallbackTail = primaryIndex >= 0 ? ladder.slice(primaryIndex) : ladder;
+  for (const preset of fallbackTail)
+    add2(audioCandidateFor(kind, preset.id));
+  return candidates;
+}
 function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
   const commandName = kind === "music" ? "music" : "sound";
   const fallback = kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL;
@@ -252261,6 +252694,7 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
       ],
       notes: [
         "Use this path for Stable Audio Open 1.0, the serious stereo audio/music baseline.",
+        "Omnius uses Diffusers StableAudioPipeline here; stable-audio-tools is intentionally not installed because it often pulls build-from-source dependencies.",
         "Expect larger model downloads and higher VRAM pressure than AudioLDM or MusicGen small."
       ]
     };
@@ -252296,7 +252730,34 @@ function audioGenerationSetupPlan(kind, backend, repoRoot = ".", model) {
     ]
   };
 }
-var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
+function summarizeToolResult2(result) {
+  return trimProcessText2(String(result.error || result.output || "unknown error"), 700).replace(/\s+/g, " ").trim();
+}
+function formatAudioAttempt(candidate, reason, index) {
+  return `${index + 1}. ${candidate.model} [${candidate.backend}] - ${reason}`;
+}
+function formatAudioFallbackFailure(kind, failed) {
+  return [
+    `No ${kind} generation model in the fallback ladder completed successfully.`,
+    "Attempted, highest quality to lowest:",
+    ...failed.map((attempt, index) => `  ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`)
+  ].join("\n");
+}
+function annotateAudioFallbackSuccess(result, failed, winner) {
+  if (failed.length === 0)
+    return result;
+  const prefix = [
+    `Fallback ladder succeeded with ${winner.model} [${winner.backend}] after ${failed.length} failed attempt(s).`,
+    "Failed attempts:",
+    ...failed.map((attempt, index) => `  ${formatAudioAttempt(attempt.candidate, attempt.reason, index)}`),
+    ""
+  ].join("\n");
+  return {
+    ...result,
+    output: prefix + result.output
+  };
+}
+var DEFAULT_SOUND_MODEL, DEFAULT_MUSIC_MODEL, DIFFUSERS_AUDIO_PACKAGES, TRANSFORMERS_AUDIO_PACKAGES, AUDIOCRAFT_PACKAGES, STABLE_AUDIO_PACKAGES, TANGOFLUX_PACKAGES, AUDIO_GENERATION_MODEL_PRESETS, SOUND_GENERATION_QUALITY_LADDER, MUSIC_GENERATION_QUALITY_LADDER, DIFFUSERS_AUDIO_RUNNER, AUDIOCRAFT_RUNNER, TRANSFORMERS_AUDIO_RUNNER, TANGOFLUX_RUNNER, AudioGenerateTool;
 var init_audio_generate = __esm({
   "packages/execution/dist/tools/audio-generate.js"() {
     "use strict";
@@ -252338,7 +252799,6 @@ var init_audio_generate = __esm({
       "accelerate",
       "scipy",
       "soundfile",
-      "stable-audio-tools",
       "einops"
     ];
     TANGOFLUX_PACKAGES = [
@@ -252644,6 +253104,21 @@ var init_audio_generate = __esm({
         note: "Legacy specialized music-generation path."
       }
     ];
+    SOUND_GENERATION_QUALITY_LADDER = [
+      "stabilityai/stable-audio-open-1.0",
+      "cvssp/audioldm2-large",
+      "cvssp/audioldm2",
+      "facebook/audiogen-medium",
+      "declare-lab/TangoFlux",
+      DEFAULT_SOUND_MODEL
+    ];
+    MUSIC_GENERATION_QUALITY_LADDER = [
+      "stabilityai/stable-audio-open-1.0",
+      "facebook/musicgen-stereo-large",
+      "facebook/musicgen-large",
+      "facebook/musicgen-medium",
+      DEFAULT_MUSIC_MODEL
+    ];
     DIFFUSERS_AUDIO_RUNNER = String.raw`#!/usr/bin/env python3
 import argparse, json, sys, time
 from pathlib import Path
@@ -252685,6 +253160,10 @@ def _snapshot_model(repo_id):
 def _device():
     import torch
     if torch.cuda.is_available():
+        cap = torch.cuda.get_device_capability(0)
+        cudnn = torch.backends.cudnn.version() or 0
+        if int(cudnn) >= 90000 and tuple(cap) < (7, 5):
+            raise RuntimeError(f"PyTorch cuDNN {cudnn} is incompatible with CUDA device {torch.cuda.get_device_name(0)} SM {cap[0]}.{cap[1]}; recreate the audio venv or let Omnius repair it with a cu118-compatible Torch wheel")
         return "cuda"
     if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         return "mps"
@@ -252877,6 +253356,10 @@ def _snapshot_model(repo_id):
 def _device():
     import torch
     if torch.cuda.is_available():
+        cap = torch.cuda.get_device_capability(0)
+        cudnn = torch.backends.cudnn.version() or 0
+        if int(cudnn) >= 90000 and tuple(cap) < (7, 5):
+            raise RuntimeError(f"PyTorch cuDNN {cudnn} is incompatible with CUDA device {torch.cuda.get_device_name(0)} SM {cap[0]}.{cap[1]}; recreate the audio venv or let Omnius repair it with a cu118-compatible Torch wheel")
         return "cuda"
     if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         return "mps"
@@ -253030,7 +253513,7 @@ if __name__ == "__main__":
 `;
     AudioGenerateTool = class {
       name = "generate_audio";
-      description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. Saves WAV files under .omnius/audio and returns the file path.";
+      description = "Generate a sound effect or music clip from a text prompt using local audio-generation backends. Supports Diffusers AudioLDM/AudioLDM2, Transformers MusicGen, AudioCraft AudioGen, Stable Audio Open deployment paths, and explicit research-project profiles. When fallback is enabled, auto generation tries ranked high-quality candidates first and gracefully falls back to smaller models if setup, download, or generation fails. Saves WAV files under .omnius/audio and returns the file path.";
       parameters = {
         type: "object",
         properties: {
@@ -253044,6 +253527,14 @@ if __name__ == "__main__":
           playback: {
             type: "boolean",
             description: "Whether the TUI should play generated audio after saving it. Defaults true; set false for silent generation."
+          },
+          fallback: {
+            type: "boolean",
+            description: "Whether to try the ranked quality ladder if the selected model/backend fails. Defaults true."
+          },
+          strict_model: {
+            type: "boolean",
+            description: "When true, use only the requested model/backend and do not fall back. Defaults false."
           }
         },
         required: ["prompt"]
@@ -253147,14 +253638,14 @@ if __name__ == "__main__":
         if (action === "list_models") {
           return {
             success: true,
-            output: AUDIO_GENERATION_MODEL_PRESETS.filter((preset2) => preset2.kind === kind).map((preset2) => `${preset2.id} [${preset2.backend}] - ${preset2.note}`).join("\n"),
+            output: AUDIO_GENERATION_MODEL_PRESETS.filter((preset) => preset.kind === kind).map((preset) => `${preset.id} [${preset.backend}] - ${preset.note}`).join("\n"),
             durationMs: performance.now() - start2
           };
         }
         if (action === "setup") {
           const requested = String(args["backend"] ?? (kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend) ?? (kind === "music" ? "transformers" : "diffusers"));
-          const backend2 = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
-          const resolvedBackend = backend2 === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend2;
+          const backend = inferAudioGenerationBackend(typeof args["model"] === "string" ? args["model"] : void 0, requested);
+          const resolvedBackend = backend === "auto" ? kind === "music" ? "transformers" : "diffusers" : backend;
           const plan = audioGenerationSetupPlan(kind, resolvedBackend, this.cwd, typeof args["model"] === "string" ? args["model"] : void 0);
           return {
             success: true,
@@ -253173,37 +253664,9 @@ if __name__ == "__main__":
           const defaultBackend2 = kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend;
           const rawModel2 = args["model"] ? String(args["model"]) : defaultModel2;
           const requestedModel2 = rawModel2 === "auto" ? void 0 : rawModel2;
-          let backend2 = inferAudioGenerationBackend(requestedModel2, args["backend"] ? String(args["backend"]) : defaultBackend2);
-          if (backend2 === "auto")
-            backend2 = kind === "music" ? "transformers" : "diffusers";
-          const model2 = requestedModel2 ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
-          const preset2 = getAudioGenerationPreset(model2, kind);
-          const duration2 = numberArg2(args["duration"], preset2?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
-          if (backend2 === "project") {
-            const plan = audioGenerationSetupPlan(kind, "project", this.cwd, model2);
-            return {
-              success: false,
-              output: [
-                `${preset2?.label ?? model2} is a project deployment profile, not an automatic generic runner.`,
-                "",
-                "Setup path:",
-                ...plan.commands.map((cmd) => `  ${cmd}`),
-                "",
-                ...plan.notes.map((note) => `- ${note}`)
-              ].join("\n"),
-              durationMs: performance.now() - start2
-            };
-          }
-          this.emitProgress({ stage: "setup", message: `Preparing ${kind} model ${model2} (${backend2})` });
-          return await this.prewarmPythonBackend({
-            kind,
-            backend: backend2,
-            runnerBackend: backend2,
-            model: model2,
-            duration: duration2,
-            start: start2,
-            python: args["python"]
-          });
+          const requestedBackend2 = args["backend"] ? String(args["backend"]) : defaultBackend2;
+          const candidates2 = audioGenerationFallbackCandidates(kind, requestedModel2, requestedBackend2, generationFallbackEnabled2(args));
+          return await this.prewarmCandidateLadder({ kind, candidates: candidates2, args, start: start2 });
         }
         const prompt = String(args["prompt"] ?? "").trim();
         if (!prompt) {
@@ -253213,45 +253676,12 @@ if __name__ == "__main__":
         const defaultBackend = kind === "music" ? this.defaults.musicBackend : this.defaults.soundBackend;
         const rawModel = args["model"] ? String(args["model"]) : defaultModel;
         const requestedModel = rawModel === "auto" ? void 0 : rawModel;
-        let backend = inferAudioGenerationBackend(requestedModel, args["backend"] ? String(args["backend"]) : defaultBackend);
-        if (backend === "auto")
-          backend = kind === "music" ? "transformers" : "diffusers";
-        const model = requestedModel ?? (kind === "music" ? DEFAULT_MUSIC_MODEL : DEFAULT_SOUND_MODEL);
-        const preset = getAudioGenerationPreset(model, kind);
-        const duration = numberArg2(args["duration"], preset?.defaultDurationSec ?? (kind === "music" ? 20 : 8));
-        const steps = optionalNumberArg2(args["steps"]) ?? preset?.defaultSteps;
+        const requestedBackend = args["backend"] ? String(args["backend"]) : defaultBackend;
+        const candidates = audioGenerationFallbackCandidates(kind, requestedModel, requestedBackend, generationFallbackEnabled2(args));
         const seed = optionalNumberArg2(args["seed"]);
         const playback = playbackRequested(args);
         try {
-          this.emitProgress({ stage: "setup", message: `Using ${kind} model ${model} (${backend})` });
-          if (backend === "project") {
-            const plan = audioGenerationSetupPlan(kind, "project", this.cwd, model);
-            return {
-              success: false,
-              output: [
-                `${preset?.label ?? model} is a project deployment profile, not an automatic generic runner.`,
-                "",
-                "Setup path:",
-                ...plan.commands.map((cmd) => `  ${cmd}`),
-                "",
-                ...plan.notes.map((note) => `- ${note}`)
-              ].join("\n"),
-              durationMs: performance.now() - start2
-            };
-          }
-          if (backend === "tangoflux") {
-            return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "tangoflux", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
-          }
-          if (backend === "transformers") {
-            return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "transformers", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
-          }
-          if (backend === "audiocraft") {
-            return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "audiocraft", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
-          }
-          if (backend === "stable-audio") {
-            return await this.generateWithPythonBackend({ kind, backend, runnerBackend: "stable-audio", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
-          }
-          return await this.generateWithPythonBackend({ kind, backend: "diffusers", runnerBackend: "diffusers", prompt, model, duration, steps, seed, playback, start: start2, python: args["python"] });
+          return await this.generateCandidateLadder({ kind, candidates, prompt, args, seed, playback, start: start2 });
         } catch (err) {
           return {
             success: false,
@@ -253260,6 +253690,96 @@ if __name__ == "__main__":
           };
         }
       }
+      async prewarmCandidateLadder(args) {
+        const failed = [];
+        for (let index = 0; index < args.candidates.length; index++) {
+          const candidate = args.candidates[index];
+          const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
+          this.emitProgress({
+            stage: "setup",
+            message: `Preparing ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
+          });
+          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.prewarmPythonBackend({
+            kind: args.kind,
+            backend: candidate.backend,
+            runnerBackend: candidate.backend,
+            model: candidate.model,
+            duration,
+            start: args.start,
+            python: args.args["python"]
+          });
+          if (result.success)
+            return annotateAudioFallbackSuccess(result, failed, candidate);
+          failed.push({ candidate, reason: summarizeToolResult2(result) });
+          if (index < args.candidates.length - 1) {
+            this.emitProgress({
+              stage: "setup",
+              message: `${candidate.model} failed; trying ${args.candidates[index + 1].model}`
+            });
+          }
+        }
+        return {
+          success: false,
+          output: formatAudioFallbackFailure(args.kind, failed),
+          error: formatAudioFallbackFailure(args.kind, failed),
+          durationMs: performance.now() - args.start
+        };
+      }
+      async generateCandidateLadder(args) {
+        const failed = [];
+        for (let index = 0; index < args.candidates.length; index++) {
+          const candidate = args.candidates[index];
+          const duration = numberArg2(args.args["duration"], candidate.preset?.defaultDurationSec ?? (args.kind === "music" ? 20 : 8));
+          const steps = optionalNumberArg2(args.args["steps"]) ?? candidate.preset?.defaultSteps;
+          this.emitProgress({
+            stage: "setup",
+            message: `Using ${args.kind} model ${candidate.model} (${candidate.backend}) [${index + 1}/${args.candidates.length}]`
+          });
+          const result = candidate.backend === "project" ? this.projectProfileResult(args.kind, candidate, args.start) : await this.generateWithPythonBackend({
+            kind: args.kind,
+            backend: candidate.backend,
+            runnerBackend: candidate.backend,
+            prompt: args.prompt,
+            model: candidate.model,
+            duration,
+            steps,
+            seed: args.seed,
+            playback: args.playback,
+            start: args.start,
+            python: args.args["python"]
+          });
+          if (result.success)
+            return annotateAudioFallbackSuccess(result, failed, candidate);
+          failed.push({ candidate, reason: summarizeToolResult2(result) });
+          if (index < args.candidates.length - 1) {
+            this.emitProgress({
+              stage: "setup",
+              message: `${candidate.model} failed; falling back to ${args.candidates[index + 1].model}`
+            });
+          }
+        }
+        return {
+          success: false,
+          output: formatAudioFallbackFailure(args.kind, failed),
+          error: formatAudioFallbackFailure(args.kind, failed),
+          durationMs: performance.now() - args.start
+        };
+      }
+      projectProfileResult(kind, candidate, start2) {
+        const plan = audioGenerationSetupPlan(kind, "project", this.cwd, candidate.model);
+        return {
+          success: false,
+          output: [
+            `${candidate.preset?.label ?? candidate.model} is a project deployment profile, not an automatic generic runner.`,
+            "",
+            "Setup path:",
+            ...plan.commands.map((cmd) => `  ${cmd}`),
+            "",
+            ...plan.notes.map((note) => `- ${note}`)
+          ].join("\n"),
+          durationMs: performance.now() - start2
+        };
+      }
       async generateWithPythonBackend(args) {
         const runner = await ensureAudioRunner(this.cwd, args.runnerBackend);
         await mkdir12(audioOutputDir(this.cwd), { recursive: true });
@@ -477005,7 +477525,7 @@ var require_path_browserify = __commonJS({
           return path11.slice(start2, end);
         }
       },
-      extname: function extname16(path11) {
+      extname: function extname17(path11) {
         assertPath(path11);
         var startDot = -1;
         var startPart = 0;
@@ -507166,22 +507686,22 @@ Saved to: ${tempFile}`,
 });
 // packages/execution/dist/tools/audio-playback.js
-import { execFileSync as execFileSync2, execSync as execSync29, spawn as spawn16 } from "node:child_process";
+import { execFileSync as execFileSync3, execSync as execSync29, spawn as spawn16 } from "node:child_process";
 import { copyFileSync as copyFileSync2, existsSync as existsSync40, statSync as statSync18, writeFileSync as writeFileSync16, mkdirSync as mkdirSync16, readdirSync as readdirSync14 } from "node:fs";
 import { basename as basename12, extname as extname10, isAbsolute, join as join58 } from "node:path";
 import { homedir as homedir14, tmpdir as tmpdir11 } from "node:os";
 function hasCommand3(command) {
   try {
     if (process.platform === "win32") {
-      execFileSync2("where", [command], { stdio: "ignore", timeout: 2e3 });
+      execFileSync3("where", [command], { stdio: "ignore", timeout: 2e3 });
     } else {
-      execFileSync2("command", ["-v", command], { stdio: "ignore", timeout: 2e3 });
+      execFileSync3("command", ["-v", command], { stdio: "ignore", timeout: 2e3 });
     }
     return true;
   } catch {
     if (process.platform !== "win32") {
       try {
-        execFileSync2("which", [command], { stdio: "ignore", timeout: 2e3 });
+        execFileSync3("which", [command], { stdio: "ignore", timeout: 2e3 });
         return true;
       } catch {
         return false;
@@ -507236,7 +507756,7 @@ function playSoundFile(file, opts = {}) {
     };
   }
   try {
-    execFileSync2(command.command, command.args, { timeout: opts.timeoutMs ?? 3e5, stdio: "pipe" });
+    execFileSync3(command.command, command.args, { timeout: opts.timeoutMs ?? 3e5, stdio: "pipe" });
     return { ok: true, player: command.label };
   } catch (err) {
     return { ok: false, error: `Playback via ${command.label} failed: ${err instanceof Error ? err.message.slice(0, 300) : String(err).slice(0, 300)}` };
@@ -507359,6 +507879,18 @@ function supertonicInferScript() {
 function mlxVenvPy() {
   return process.platform === "win32" ? join58(voiceDir(), "mlx-venv", "Scripts", "python.exe") : join58(voiceDir(), "mlx-venv", "bin", "python3");
 }
+function luxttsVenvDir() {
+  return join58(voiceDir(), "luxtts-venv");
+}
+function luxttsVenvPy() {
+  return process.platform === "win32" ? join58(luxttsVenvDir(), "Scripts", "python.exe") : join58(luxttsVenvDir(), "bin", "python3");
+}
+function luxttsRepoDir() {
+  return join58(voiceDir(), "LuxTTS");
+}
+function luxttsInferScript() {
+  return join58(voiceDir(), "luxtts-infer.py");
+}
 function piperVenvDir() {
   return join58(voiceDir(), "piper-venv");
 }
@@ -507371,13 +507903,13 @@ function ensureSupertonicInstalled() {
     const py = findPython32();
     if (!py)
       throw new Error("python3 is required to set up Supertonic TTS.");
-    execFileSync2(py, ["-m", "venv", join58(voiceDir(), "supertonic3-venv")], { stdio: "pipe", timeout: 18e4 });
+    execFileSync3(py, ["-m", "venv", join58(voiceDir(), "supertonic3-venv")], { stdio: "pipe", timeout: 18e4 });
   }
   try {
-    execFileSync2(venvPy, ["-c", "import supertonic"], { stdio: "pipe", timeout: 1e4 });
+    execFileSync3(venvPy, ["-c", "import supertonic"], { stdio: "pipe", timeout: 1e4 });
   } catch {
-    execFileSync2(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
-    execFileSync2(venvPy, ["-m", "pip", "install", "--quiet", "supertonic"], { stdio: "pipe", timeout: 6e5 });
+    execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
+    execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "supertonic"], { stdio: "pipe", timeout: 6e5 });
   }
   mkdirSync16(voiceDir(), { recursive: true });
   writeFileSync16(supertonicInferScript(), SUPERTONIC_INFER_PY, "utf-8");
@@ -507385,20 +507917,95 @@ function ensureSupertonicInstalled() {
 }
 function ensureMlxInstalled() {
   if (process.platform !== "darwin" || process.arch !== "arm64") {
-    throw new Error("MLX TTS requires macOS on Apple Silicon. Use luxtts, supertonic, onnx/piper, or espeak on this machine.");
+    throw new Error("MLX TTS requires macOS on Apple Silicon. Use luxtts, supertonic, onnx/piper, or backend=auto on this machine.");
   }
   const venvPy = mlxVenvPy();
   if (!existsSync40(venvPy)) {
     const py = findPython32();
     if (!py)
       throw new Error("python3 is required to set up MLX Audio.");
-    execFileSync2(py, ["-m", "venv", join58(voiceDir(), "mlx-venv")], { stdio: "pipe", timeout: 18e4 });
+    execFileSync3(py, ["-m", "venv", join58(voiceDir(), "mlx-venv")], { stdio: "pipe", timeout: 18e4 });
+  }
+  try {
+    execFileSync3(venvPy, ["-c", "import mlx_audio"], { stdio: "pipe", timeout: 1e4 });
+  } catch {
+    execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
+    execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "mlx-audio"], { stdio: "pipe", timeout: 6e5 });
+  }
+  return venvPy;
+}
+function pythonCanImportLuxTts(venvPy) {
+  try {
+    execFileSync3(venvPy, [
+      "-c",
+      "import sys, os; sys.path.insert(0, os.environ['LUXTTS_REPO_PATH']); from zipvoice.luxvoice import LuxTTS; print('ok')"
+    ], {
+      stdio: "pipe",
+      timeout: 3e4,
+      env: { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir() }
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+function pipInstall(venvPy, packages, timeout2 = 9e5) {
+  execFileSync3(venvPy, ["-m", "pip", "install", "--prefer-binary", ...packages], {
+    stdio: "pipe",
+    timeout: timeout2,
+    env: process.env
+  });
+}
+function ensureLuxttsInstalled() {
+  const venvPy = luxttsVenvPy();
+  const repoDir = luxttsRepoDir();
+  mkdirSync16(voiceDir(), { recursive: true });
+  if (existsSync40(venvPy) && existsSync40(join58(repoDir, "zipvoice", "luxvoice.py")) && pythonCanImportLuxTts(venvPy)) {
+    writeFileSync16(luxttsInferScript(), LUXTTS_DAEMON_PY, "utf-8");
+    return venvPy;
+  }
+  const py = findPython32();
+  if (!py)
+    throw new Error("python3 is required to set up LuxTTS voice cloning.");
+  if (!existsSync40(venvPy)) {
+    execFileSync3(py, ["-m", "venv", luxttsVenvDir()], { stdio: "pipe", timeout: 18e4 });
+  }
+  execFileSync3(venvPy, ["-m", "pip", "install", "--upgrade", "pip", "wheel", "setuptools<81"], {
+    stdio: "pipe",
+    timeout: 3e5
+  });
+  pipInstall(venvPy, ["torch", "torchaudio"], 12e5);
+  if (!existsSync40(join58(repoDir, "zipvoice", "luxvoice.py"))) {
+    if (!hasCommand3("git"))
+      throw new Error("git is required to set up LuxTTS voice cloning.");
+    execFileSync3("git", ["clone", "--depth", "1", "https://github.com/ysharma3501/LuxTTS.git", repoDir], {
+      stdio: "pipe",
+      timeout: 3e5
+    });
   }
+  pipInstall(venvPy, [
+    "lhotse",
+    "huggingface_hub",
+    "safetensors",
+    "pydub",
+    "onnxruntime",
+    "librosa",
+    "transformers<=4.57.6",
+    "inflect",
+    "numpy",
+    "vocos",
+    "jieba",
+    "pypinyin",
+    "cn2an"
+  ], 12e5);
   try {
-    execFileSync2(venvPy, ["-c", "import mlx_audio"], { stdio: "pipe", timeout: 1e4 });
+    pipInstall(venvPy, ["git+https://github.com/ysharma3501/LinaCodec.git"], 12e5);
   } catch {
-    execFileSync2(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
-    execFileSync2(venvPy, ["-m", "pip", "install", "--quiet", "mlx-audio"], { stdio: "pipe", timeout: 6e5 });
+  }
+  pipInstall(venvPy, ["-e", repoDir], 6e5);
+  writeFileSync16(luxttsInferScript(), LUXTTS_DAEMON_PY, "utf-8");
+  if (!pythonCanImportLuxTts(venvPy)) {
+    throw new Error(`LuxTTS setup completed but import still fails in ${luxttsVenvDir()}.`);
   }
   return venvPy;
 }
@@ -507411,10 +508018,10 @@ function ensurePiperInstalled() {
     if (!py)
       throw new Error("python3 is required to set up Piper TTS.");
     mkdirSync16(voiceDir(), { recursive: true });
-    execFileSync2(py, ["-m", "venv", piperVenvDir()], { stdio: "pipe", timeout: 18e4 });
+    execFileSync3(py, ["-m", "venv", piperVenvDir()], { stdio: "pipe", timeout: 18e4 });
     const venvPy = process.platform === "win32" ? join58(piperVenvDir(), "Scripts", "python.exe") : join58(piperVenvDir(), "bin", "python3");
-    execFileSync2(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
-    execFileSync2(venvPy, ["-m", "pip", "install", "--quiet", "piper-tts"], { stdio: "pipe", timeout: 6e5 });
+    execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "--upgrade", "pip"], { stdio: "pipe", timeout: 12e4 });
+    execFileSync3(venvPy, ["-m", "pip", "install", "--quiet", "piper-tts"], { stdio: "pipe", timeout: 6e5 });
   }
   if (!existsSync40(bin)) {
     throw new Error("Piper TTS installed but the piper executable was not found in the managed venv.");
@@ -507435,6 +508042,28 @@ function saveCloneRefFromSample(sample, cloneName) {
   copyFileSync2(source, dest);
   return dest;
 }
+function cloneSampleArg(args) {
+  for (const key of ["sample", "source_audio", "voice_sample", "reference_audio", "ref_audio", "clone_sample"]) {
+    const value2 = args[key];
+    if (typeof value2 === "string" && value2.trim())
+      return value2.trim();
+  }
+  return "";
+}
+function wantsVoiceClone(args) {
+  if (cloneSampleArg(args))
+    return true;
+  if (typeof args["clone_ref"] === "string" && args["clone_ref"].trim())
+    return true;
+  const voice = typeof args["voice"] === "string" ? args["voice"].trim() : "";
+  return /\.(wav|mp3|flac|ogg|m4a)$/i.test(voice) || voice.startsWith("/") || voice.startsWith("./") || voice.startsWith("../") || voice.startsWith("~/");
+}
+function cloneRefForSynthesis(args) {
+  const sample = cloneSampleArg(args);
+  if (sample)
+    return saveCloneRefFromSample(sample, typeof args["clone_name"] === "string" ? args["clone_name"] : void 0);
+  return resolveCloneRef(args["clone_ref"] ?? args["voice"]);
+}
 function ensureLuxttsDaemon() {
   if (_luxttsDaemon && !_luxttsDaemon.killed && _luxttsReady)
     return Promise.resolve(true);
@@ -507448,14 +508077,23 @@ function ensureLuxttsDaemon() {
   }
   if (_luxttsStarting)
     return Promise.resolve(false);
-  const venvPy = join58(homedir14(), ".omnius", "voice", "luxtts-venv", "bin", "python3");
-  const inferScript = join58(homedir14(), ".omnius", "voice", "luxtts-infer.py");
-  const repoDir = join58(homedir14(), ".omnius", "voice", "LuxTTS");
+  const venvPy = luxttsVenvPy();
+  const inferScript = luxttsInferScript();
+  const repoDir = luxttsRepoDir();
   if (!existsSync40(venvPy) || !existsSync40(inferScript))
     return Promise.resolve(false);
   _luxttsStarting = true;
   return new Promise((resolve48) => {
-    const timeout2 = setTimeout(() => {
+    let settled = false;
+    let timeout2;
+    const finish = (ready) => {
+      if (settled)
+        return;
+      settled = true;
+      clearTimeout(timeout2);
+      resolve48(ready);
+    };
+    timeout2 = setTimeout(() => {
       _luxttsStarting = false;
       if (_luxttsDaemon && !_luxttsReady) {
         try {
@@ -507464,7 +508102,7 @@ function ensureLuxttsDaemon() {
         }
         _luxttsDaemon = null;
       }
-      resolve48(false);
+      finish(false);
     }, 12e4);
     const daemon = spawn16(venvPy, [inferScript], {
       stdio: ["pipe", "pipe", "pipe"],
@@ -507486,8 +508124,7 @@ function ensureLuxttsDaemon() {
           if (msg.type === "ready") {
             _luxttsReady = true;
             _luxttsStarting = false;
-            clearTimeout(timeout2);
-            resolve48(true);
+            finish(true);
           } else if (msg.type === "result" && msg.id) {
             const pending = _luxttsPending.get(msg.id);
             if (pending) {
@@ -507509,13 +508146,13 @@ function ensureLuxttsDaemon() {
       _luxttsDaemon = null;
       _luxttsReady = false;
       _luxttsStarting = false;
+      finish(false);
     });
     daemon.on("error", () => {
       _luxttsDaemon = null;
       _luxttsReady = false;
       _luxttsStarting = false;
-      clearTimeout(timeout2);
-      resolve48(false);
+      finish(false);
     });
   });
 }
@@ -507545,7 +508182,7 @@ function luxttsSynthesize(text, cloneRef, outputPath2, speed = 1) {
     _luxttsDaemon.stdin.write(req2 + "\n");
   });
 }
-var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, SUPERTONIC_INFER_PY, AudioPlaybackTool, TtsGenerateTool, SoundPlaybackTool;
+var _luxttsDaemon, _luxttsReady, _luxttsRequestId, _luxttsPending, _luxttsBuffer, _luxttsStarting, SUPERTONIC_INFER_PY, LUXTTS_DAEMON_PY, AudioPlaybackTool, TtsGenerateTool, SoundPlaybackTool;
 var init_audio_playback = __esm({
   "packages/execution/dist/tools/audio-playback.js"() {
     "use strict";
@@ -507585,10 +508222,45 @@ try:
 except Exception as exc:
     print(json.dumps({"ok": False, "error": str(exc), "trace": traceback.format_exc(limit=3)}))
     sys.exit(1)
+`;
+    LUXTTS_DAEMON_PY = String.raw`
+import json, os, sys, traceback, wave
+import numpy as np
+import torch
+repo = os.environ.get("LUXTTS_REPO_PATH") or ""
+if repo:
+    sys.path.insert(0, repo)
+from zipvoice.luxvoice import LuxTTS
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tts = LuxTTS(model_path="YatharthS/LuxTTS", device=device, threads=4)
+print(json.dumps({"type": "ready", "device": device}), flush=True)
+for line in sys.stdin:
+    if not line.strip():
+        continue
+    req = json.loads(line)
+    if req.get("action") == "quit":
+        break
+    rid = req.get("id")
+    try:
+        text = str(req.get("text") or "").strip()
+        clone_ref = str(req.get("clone_ref") or "")
+        output = str(req.get("output_path") or "")
+        speed = float(req.get("speed") or 1.0)
+        enc = tts.encode_prompt(clone_ref, duration=5, rms=0.001)
+        wav = tts.generate_speech(text, enc, num_steps=4, guidance_scale=3.0, t_shift=0.5, speed=speed)
+        data = (np.clip(wav.cpu().numpy().squeeze(), -1, 1) * 32767).astype(np.int16)
+        with wave.open(output, "wb") as f:
+            f.setnchannels(1)
+            f.setsampwidth(2)
+            f.setframerate(48000)
+            f.writeframes(data.tobytes())
+        print(json.dumps({"type": "result", "id": rid, "path": output}), flush=True)
+    except Exception as exc:
+        print(json.dumps({"type": "error", "id": rid, "error": str(exc), "trace": traceback.format_exc(limit=3)}), flush=True)
 `;
     AudioPlaybackTool = class {
       name = "audio_playback";
-      description = "Play audio through speakers, synthesize text-to-speech, and manage TTS clone voices. Actions: 'play' to play an audio file (WAV/MP3/OGG — including recordings from memory episodes), 'speak' to synthesize and play text, 'synthesize' to save TTS to a WAV file, 'clone' to register a voice-clone sample, 'list_voices' to inspect available clone refs/backends, 'volume' to get or set system volume, 'list' to enumerate audio output devices. TTS backends are explicit: auto, luxtts, supertonic, mlx, onnx/piper, or espeak. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. Use generate_tts when the task is specifically to create a TTS file; do not use shell speech commands or generate_audio for spoken TTS.";
+      description = "Play audio through speakers, synthesize text-to-speech, and manage TTS clone voices. Actions: 'play' to play an audio file (WAV/MP3/OGG — including recordings from memory episodes), 'speak' to synthesize and play text, 'synthesize' to save TTS to a WAV file, 'clone' to register a voice-clone source clip, 'list_voices' to inspect available clone refs/backends, 'volume' to get or set system volume, 'list' to enumerate audio output devices. TTS backends include auto, LuxTTS voice cloning, Supertonic, MLX, ONNX/Piper, and a local fallback. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. For cloned speech from a source clip, call generate_tts or audio_playback action=synthesize with sample/source_audio/voice_sample and backend=auto or luxtts. Use generate_tts when the task is specifically to create a TTS file; do not use shell speech commands or generate_audio for spoken TTS.";
       parameters = {
         type: "object",
         properties: {
@@ -507615,8 +508287,8 @@ except Exception as exc:
           },
           backend: {
             type: "string",
-            enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper", "espeak"],
-            description: "TTS backend. auto tries LuxTTS clone, Supertonic, MLX on Apple Silicon, Piper/ONNX, then espeak."
+            enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"],
+            description: "TTS backend. auto tries LuxTTS clone, Supertonic, MLX on Apple Silicon, Piper/ONNX, then a local fallback."
           },
           output: {
             type: "string",
@@ -507632,11 +508304,31 @@ except Exception as exc:
           },
           sample: {
             type: "string",
-            description: "Audio sample path to register as a clone voice for action=clone."
+            description: "Audio source clip path to register or use as a LuxTTS clone voice."
+          },
+          source_audio: {
+            type: "string",
+            description: "Alias for sample. Use this for cloned speech from a source voice clip."
+          },
+          voice_sample: {
+            type: "string",
+            description: "Alias for sample/source_audio."
+          },
+          reference_audio: {
+            type: "string",
+            description: "Alias for sample/source_audio."
+          },
+          ref_audio: {
+            type: "string",
+            description: "Alias for sample/source_audio."
+          },
+          clone_sample: {
+            type: "string",
+            description: "Alias for sample/source_audio."
           },
           clone_name: {
             type: "string",
-            description: "Friendly filename stem for action=clone."
+            description: "Friendly filename stem for action=clone or for registering a source clip during synthesis."
           },
           model: {
             type: "string",
@@ -507652,11 +508344,11 @@ except Exception as exc:
           },
           speed: {
             type: "number",
-            description: "Speech speed. espeak uses words per minute; neural backends use a multiplier."
+            description: "Speech speed. Neural backends use a multiplier; local fallback uses its backend-specific rate."
           },
           voice: {
             type: "string",
-            description: "Voice id/name. Examples: Supertonic voice M4, MLX voice af_heart, espeak voice en-us, or Piper/ONNX model path."
+            description: "Voice id/name. Examples: Supertonic voice M4, MLX voice af_heart, a source audio path for cloning, or Piper/ONNX model path."
           },
           lang: {
             type: "string",
@@ -507720,9 +508412,9 @@ except Exception as exc:
         return await this.synthesizeText(args, start2, true);
       }
       cloneVoice(args, start2) {
-        const sample = typeof args["sample"] === "string" ? args["sample"] : typeof args["file"] === "string" ? args["file"] : "";
+        const sample = cloneSampleArg(args) || (typeof args["file"] === "string" ? args["file"] : "");
         if (!sample.trim()) {
-          return { success: false, output: "", error: "Missing 'sample' parameter. Provide a local audio sample to register as a clone voice.", durationMs: performance.now() - start2 };
+          return { success: false, output: "", error: "Missing source audio. Provide sample=<file> or source_audio=<file> to register as a clone voice.", durationMs: performance.now() - start2 };
         }
         const saved = saveCloneRefFromSample(sample, typeof args["clone_name"] === "string" ? args["clone_name"] : void 0);
         return {
@@ -507739,10 +508431,11 @@ except Exception as exc:
         const lines = [
           "TTS backends:",
           `  luxtts: ${existsSync40(join58(voiceDir(), "luxtts-venv", "bin", "python3")) ? "installed" : "not installed"}; clone refs: ${refs.length}`,
+          "    clone from source clip: generate_tts text=<words> source_audio=<wav/mp3/flac/ogg/m4a> backend=auto",
           `  supertonic: ${existsSync40(supertonicVenvPy()) ? "installed" : "not installed"}; voices include M1, M2, M3, M4 when package assets are available`,
           `  mlx: ${existsSync40(mlxVenvPy()) ? "installed" : "not installed"}; Apple Silicon only; default model mlx-community/Kokoro-82M-bf16`,
           `  piper/onnx: ${hasCommand3("piper") || existsSync40(piperVenvBin()) ? "available" : "not installed"}; first use installs piper-tts into ${piperVenvDir()}; pass model=<path.onnx> for raw ONNX voices`,
-          `  espeak: ${hasCommand3("espeak-ng") ? "available" : "not found"}`,
+          `  local fallback: ${hasCommand3("espeak-ng") ? "available" : "not found"}`,
           "",
           "Registered clone refs:",
           ...refs.length ? refs.map((ref) => `  ${ref}`) : ["  none"]
@@ -507756,11 +508449,20 @@ except Exception as exc:
         }
         const requestedBackend = normalizeTtsBackend(args["backend"]);
         const strictBackend = boolArg(args["strict_backend"] ?? args["strictBackend"], false);
+        const cloneRequested = wantsVoiceClone(args);
+        if (cloneRequested && requestedBackend !== "auto" && requestedBackend !== "luxtts") {
+          return {
+            success: false,
+            output: "",
+            error: "Voice cloning from a source clip requires backend=auto or backend=luxtts.",
+            durationMs: performance.now() - start2
+          };
+        }
         const playback = playbackArg(args, speakDefault);
         const outputPath2 = ttsOutputPath(args, requestedBackend);
         const device = typeof args["device"] === "string" ? args["device"] : "default";
         const tried = [];
-        const autoCandidates = ["luxtts", "supertonic", ...process.platform === "darwin" && process.arch === "arm64" ? ["mlx"] : [], "piper", "espeak"];
+        const autoCandidates = cloneRequested ? ["luxtts"] : ["luxtts", "supertonic", ...process.platform === "darwin" && process.arch === "arm64" ? ["mlx"] : [], "piper", "espeak"];
         const candidates = requestedBackend === "auto" ? autoCandidates : strictBackend ? [requestedBackend] : [requestedBackend, ...autoCandidates.filter((backend) => backend !== requestedBackend)];
         let usedBackend = "";
         let voiceSummary = "";
@@ -507823,21 +508525,19 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         };
       }
       async synthesizeLuxtts(text, outputPath2, args) {
-        const cloneRef = resolveCloneRef(args["clone_ref"] ?? args["voice"] ?? args["sample"]);
+        const cloneRef = cloneRefForSynthesis(args);
         if (!cloneRef)
-          throw new Error(`No LuxTTS clone reference found. Register one with audio_playback action=clone sample=<file>.`);
+          throw new Error(`No LuxTTS clone source found. Provide source_audio=<voice clip> or clone_ref=<registered clip>.`);
         const speed = numberArg3(args["speed"], 1);
+        ensureLuxttsInstalled();
         const daemonReady = await ensureLuxttsDaemon();
         if (daemonReady) {
           await luxttsSynthesize(text, cloneRef, outputPath2, speed);
           if (existsSync40(outputPath2))
             return `${basename12(cloneRef)} (LuxTTS daemon)`;
         }
-        const venvPy = join58(voiceDir(), "luxtts-venv", "bin", "python3");
-        const repoDir = join58(voiceDir(), "LuxTTS");
-        if (!existsSync40(venvPy) || !existsSync40(repoDir)) {
-          throw new Error("LuxTTS is not installed in the managed voice environment yet.");
-        }
+        const venvPy = luxttsVenvPy();
+        const repoDir = luxttsRepoDir();
         const pyScript = [
           "import json, sys, wave",
           "import numpy as np, torch",
@@ -507851,7 +508551,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
           "d=(np.clip(wav.cpu().numpy().squeeze(), -1, 1)*32767).astype(np.int16)",
           "f=wave.open(args['output'], 'wb'); f.setnchannels(1); f.setsampwidth(2); f.setframerate(48000); f.writeframes(d.tobytes()); f.close()"
         ].join("; ");
-        execFileSync2(venvPy, ["-c", pyScript, JSON.stringify({ text, output: outputPath2, clone_ref: cloneRef, repo: repoDir, speed })], {
+        execFileSync3(venvPy, ["-c", pyScript, JSON.stringify({ text, output: outputPath2, clone_ref: cloneRef, repo: repoDir, speed })], {
           stdio: "pipe",
           timeout: 12e4,
           env: { ...process.env, LUXTTS_REPO_PATH: repoDir }
@@ -507864,7 +508564,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         const lang = typeof args["lang"] === "string" ? args["lang"] : "en";
         const speed = numberArg3(args["speed"], 1.05);
         const totalStep = Math.round(numberArg3(args["total_step"], 8));
-        const stdout = execFileSync2(venvPy, [supertonicInferScript()], {
+        const stdout = execFileSync3(venvPy, [supertonicInferScript()], {
           input: JSON.stringify({ text, output_path: outputPath2, voice_name: voice, lang, speed, total_step: totalStep }),
           encoding: "utf8",
           stdio: ["pipe", "pipe", "pipe"],
@@ -507887,7 +508587,7 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
           "args=json.loads(sys.argv[1])",
           "tts_gen.main(['--model', args['model'], '--text', args['text'], '--voice', args['voice'], '--lang_code', args['lang'], '--audio_path', args['output']])"
         ].join("; ");
-        execFileSync2(py, ["-c", pyScript, JSON.stringify({ text, model, voice, lang, output: outputPath2 })], {
+        execFileSync3(py, ["-c", pyScript, JSON.stringify({ text, model, voice, lang, output: outputPath2 })], {
           stdio: "pipe",
           timeout: 18e4,
           cwd: tmpdir11()
@@ -507908,15 +508608,15 @@ ${tried.map((line) => `- ${line}`).join("\n")}`,
         } else {
           throw new Error(`${requireModel ? "Raw ONNX" : "Piper"} TTS requires model=<path.onnx> or voice=<path.onnx>.`);
         }
-        execFileSync2(piper, argv, { input: text, stdio: ["pipe", "pipe", "pipe"], timeout: 12e4 });
+        execFileSync3(piper, argv, { input: text, stdio: ["pipe", "pipe", "pipe"], timeout: 12e4 });
         return summary;
       }
       synthesizeEspeak(text, outputPath2, args) {
         if (!hasCommand3("espeak-ng"))
-          throw new Error("espeak-ng command not found.");
+          throw new Error("Local fallback TTS command not found.");
         const voice = typeof args["voice"] === "string" ? args["voice"] : "en";
         const speed = Math.round(numberArg3(args["speed"], 160));
-        execFileSync2("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
+        execFileSync3("espeak-ng", ["-v", voice, "-s", String(speed), "-w", outputPath2, text], {
           stdio: "pipe",
           timeout: 6e4
         });
@@ -507995,20 +508695,27 @@ ${devices.join("\n")}`,
     };
     TtsGenerateTool = class {
       name = "generate_tts";
-      description = "Generate text-to-speech audio as a WAV file, optionally playing it after synthesis. Supports explicit backends: auto, luxtts voice cloning, supertonic, mlx, onnx/piper, and espeak. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. Use clone_ref to select a registered LuxTTS voice, sample+clone_name to register a clone sample via audio_playback action=clone, and playback=false for silent file generation. Use this tool for speech/TTS requests; do not use shell commands or generate_audio as a TTS fallback.";
+      description = "Generate text-to-speech audio as a WAV file, optionally playing it after synthesis. Supports explicit backends: auto, LuxTTS voice cloning, Supertonic, MLX, ONNX/Piper, and local fallback. Neural TTS backends self-provision into ~/.omnius/voice on first use where supported. For voice cloning, pass source_audio/sample/voice_sample with the reference clip and backend=auto or luxtts; clone_name can register it for reuse. Use clone_ref to select a registered LuxTTS voice and playback=false for silent file generation. Use this tool for speech/TTS requests; do not use shell commands or generate_audio as a TTS fallback.";
       parameters = {
         type: "object",
         properties: {
           text: { type: "string", description: "Text to synthesize" },
           input: { type: "string", description: "Alias for text." },
           prompt: { type: "string", description: "Alias for text." },
-          backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper", "espeak"] },
+          backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"] },
           output: { type: "string", description: "Output WAV path. Defaults to ~/.omnius/voice/generated/tts-*.wav." },
           path: { type: "string", description: "Alias for output." },
           playback: { type: "boolean", description: "Whether to play after generating. Defaults false for generate_tts." },
           strict_backend: { type: "boolean", description: "When true, fail instead of falling back if the requested backend is unavailable. Defaults false." },
           voice: { type: "string", description: "Voice id/name, or raw Piper/ONNX path when backend=onnx/piper." },
           clone_ref: { type: "string", description: "LuxTTS clone reference path, filename, or registered clone name." },
+          sample: { type: "string", description: "Voice source clip path for cloned speech. Alias: source_audio." },
+          source_audio: { type: "string", description: "Voice source clip path for cloned speech." },
+          voice_sample: { type: "string", description: "Alias for source_audio." },
+          reference_audio: { type: "string", description: "Alias for source_audio." },
+          ref_audio: { type: "string", description: "Alias for source_audio." },
+          clone_sample: { type: "string", description: "Alias for source_audio." },
+          clone_name: { type: "string", description: "Optional name to register the source clip for later reuse." },
           model: { type: "string", description: "Backend model id or raw ONNX/Piper model path." },
           lang: { type: "string", description: "Language code for Supertonic/MLX where supported." },
           speed: { type: "number", description: "Speech speed multiplier or backend-specific rate." },
@@ -575055,7 +575762,7 @@ __export(image_ascii_preview_exports, {
   extractSavedImagePath: () => extractSavedImagePath,
   formatImageAsciiContext: () => formatImageAsciiContext
 });
-import { execFileSync as execFileSync3 } from "node:child_process";
+import { execFileSync as execFileSync4 } from "node:child_process";
 import { createRequire as createRequire5 } from "node:module";
 import { existsSync as existsSync94, readFileSync as readFileSync75, statSync as statSync32 } from "node:fs";
 import { resolve as resolve37 } from "node:path";
@@ -575192,7 +575899,7 @@ function convertWithFfmpeg(imagePath, width, height, timeoutMs) {
       `scale=${width}:${height}`,
       "format=gray"
     ].join(",");
-    const raw = execFileSync3(
+    const raw = execFileSync4(
       "ffmpeg",
       [
         "-hide_banner",
@@ -575357,19 +576064,19 @@ function modelOnnxPath(id) {
 function modelConfigPath(id) {
   return join109(modelDir(id), "config.json");
 }
-function luxttsVenvDir() {
+function luxttsVenvDir2() {
   return join109(voiceDir2(), "luxtts-venv");
 }
-function luxttsVenvPy() {
-  return platform5() === "win32" ? join109(luxttsVenvDir(), "Scripts", "python.exe") : join109(luxttsVenvDir(), "bin", "python3");
+function luxttsVenvPy2() {
+  return platform5() === "win32" ? join109(luxttsVenvDir2(), "Scripts", "python.exe") : join109(luxttsVenvDir2(), "bin", "python3");
 }
-function luxttsRepoDir() {
+function luxttsRepoDir2() {
   return join109(voiceDir2(), "LuxTTS");
 }
 function luxttsCloneRefsDir() {
   return join109(voiceDir2(), "clone-refs");
 }
-function luxttsInferScript() {
+function luxttsInferScript2() {
   return join109(voiceDir2(), "luxtts-infer.py");
 }
 function supertonicVenvDir() {
@@ -577936,12 +578643,12 @@ Error: ${err2 instanceof Error ? err2.message : String(err2)}`
             "python3 not found. LuxTTS requires Python 3.10+. Try: apt install python3 / brew install python3"
           );
         }
-        const venvDir = luxttsVenvDir();
-        const venvPy = luxttsVenvPy();
+        const venvDir = luxttsVenvDir2();
+        const venvPy = luxttsVenvPy2();
         if (existsSync95(venvPy)) {
           try {
             const quotedPy = `"${venvPy}"`;
-            const repoPath = luxttsRepoDir().replace(/\\/g, "/");
+            const repoPath = luxttsRepoDir2().replace(/\\/g, "/");
             await this.asyncShell(
               `${quotedPy} -c "import sys; sys.path.insert(0, '${repoPath}'); from zipvoice.luxvoice import LuxTTS; print('ok')"`,
               3e4
@@ -578055,7 +578762,7 @@ Error: ${err2 instanceof Error ? err2.message : String(err2)}`
             }
           }
         }
-        const repoDir = luxttsRepoDir();
+        const repoDir = luxttsRepoDir2();
         if (!existsSync95(join109(repoDir, "zipvoice", "luxvoice.py"))) {
           renderInfo("  Cloning LuxTTS repository...");
           try {
@@ -578479,18 +579186,18 @@ def main():
 if __name__ == '__main__':
     main()
 `;
-        const scriptPath2 = luxttsInferScript();
+        const scriptPath2 = luxttsInferScript2();
         mkdirSync52(voiceDir2(), { recursive: true });
         writeFileSync49(scriptPath2, script);
       }
       /** Ensure the LuxTTS daemon is running, spawn if needed */
       async ensureLuxttsDaemon() {
         if (this._luxttsDaemon && !this._luxttsDaemon.killed) return true;
-        const venvPy = luxttsVenvPy();
+        const venvPy = luxttsVenvPy2();
         if (!existsSync95(venvPy)) return false;
         return new Promise((resolve48) => {
-          const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir() };
-          const daemon = nodeSpawn(venvPy, [luxttsInferScript()], {
+          const env2 = { ...process.env, LUXTTS_REPO_PATH: luxttsRepoDir2() };
+          const daemon = nodeSpawn(venvPy, [luxttsInferScript2()], {
             stdio: ["pipe", "pipe", "pipe"],
             cwd: tmpdir20(),
             env: env2
@@ -596377,6 +597084,17 @@ var init_tool_policy = __esm({
       "todo_write",
       "web_search",
       "web_fetch",
+      "image_read",
+      "ocr",
+      "ocr_image_advanced",
+      "ocr_pdf",
+      "pdf_to_text",
+      "vision",
+      "transcribe_file",
+      "video_understand",
+      "audio_analyze",
+      "explore_tools",
+      "telegram_media_recent",
       "generate_image",
       "generate_audio",
       "generate_tts",
@@ -596393,6 +597111,17 @@ var init_tool_policy = __esm({
       "web_search",
       "web_fetch",
       "web_crawl",
+      "image_read",
+      "ocr",
+      "ocr_image_advanced",
+      "ocr_pdf",
+      "pdf_to_text",
+      "vision",
+      "transcribe_file",
+      "video_understand",
+      "audio_analyze",
+      "explore_tools",
+      "telegram_media_recent",
       "generate_image",
       "generate_audio",
       "generate_tts",
@@ -596500,6 +597229,7 @@ function scopedTool(base3, root, mode) {
     async execute(args) {
       const next = { ...args };
       if (base3.name === "generate_image" || base3.name === "generate_audio" || base3.name === "generate_tts") {
+        const cleanup = [];
         const localModel = typeof next["model_path"] === "string" ? String(next["model_path"]) : typeof next["model"] === "string" && looksLikeLocalPath(String(next["model"])) ? String(next["model"]) : "";
         if (localModel) {
           const guarded = guardPath(rootAbs, localModel);
@@ -596508,6 +597238,22 @@ function scopedTool(base3, root, mode) {
           else next["model"] = guarded.path.abs;
         }
         if (base3.name === "generate_tts") {
+          for (const key of TTS_CLONE_SOURCE_KEYS) {
+            const value2 = next[key];
+            if (typeof value2 !== "string" || !value2.trim()) continue;
+            const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, value2.trim());
+            if (!materialized.ok) return denied(materialized.error);
+            next[key] = materialized.path;
+            if (materialized.cleanup) cleanup.push(materialized.cleanup);
+          }
+          for (const key of ["clone_ref", "voice"]) {
+            const value2 = next[key];
+            if (typeof value2 !== "string" || !value2.trim() || !looksLikeAudioPath(value2.trim())) continue;
+            const materialized = materializeTelegramCreativeArtifactForSend(rootAbs, value2.trim());
+            if (!materialized.ok) return denied(materialized.error);
+            next[key] = materialized.path;
+            if (materialized.cleanup) cleanup.push(materialized.cleanup);
+          }
           const rawOutput = typeof next["output"] === "string" && String(next["output"]).trim() ? String(next["output"]) : typeof next["output_path"] === "string" && String(next["output_path"]).trim() ? String(next["output_path"]) : `tts-${Date.now()}.wav`;
           const guardedOutput = guardPath(rootAbs, rawOutput);
           if (!guardedOutput.ok) return denied(guardedOutput.error);
@@ -596517,16 +597263,20 @@ function scopedTool(base3, root, mode) {
           next["output"] = guardedOutput.path.abs;
           next["playback"] = false;
         }
-        const result2 = await base3.execute(next);
-        if (result2.success) {
-          if (base3.name === "generate_tts" && typeof next["output"] === "string") {
-            rememberCreated(rootAbs, String(next["output"]));
-          }
-          for (const path11 of collectGeneratedArtifactPathsFromText(result2.output, rootAbs)) {
-            rememberCreated(rootAbs, path11);
+        try {
+          const result2 = await base3.execute(next);
+          if (result2.success) {
+            if (base3.name === "generate_tts" && typeof next["output"] === "string") {
+              rememberCreated(rootAbs, String(next["output"]));
+            }
+            for (const path11 of collectGeneratedArtifactPathsFromText(result2.output, rootAbs)) {
+              rememberCreated(rootAbs, path11);
+            }
           }
+          return result2;
+        } finally {
+          for (const fn of cleanup) fn();
         }
-        return result2;
       }
       const pathKey = PATH_KEYS.find((key) => typeof next[key] === "string" && String(next[key]).trim());
       if (pathKey) {
@@ -596591,6 +597341,9 @@ function isInside(root, path11) {
 function looksLikeLocalPath(value2) {
   return value2.startsWith("/") || value2.startsWith("./") || value2.startsWith("../");
 }
+function looksLikeAudioPath(value2) {
+  return looksLikeLocalPath(value2) || value2.startsWith("~/") || /\.(wav|mp3|flac|ogg|m4a)$/i.test(value2);
+}
 function manifestPath(root) {
   return join119(root, MANIFEST_FILE);
 }
@@ -596753,7 +597506,7 @@ function denied(error) {
     mutatedFiles: []
   };
 }
-var MANIFEST_FILE, OBJECTS_DIR, SEND_DIR, PATH_KEYS, MEDIA_PATH_RE, PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS, CreativeAudioFileTool;
+var MANIFEST_FILE, OBJECTS_DIR, SEND_DIR, PATH_KEYS, TTS_CLONE_SOURCE_KEYS, MEDIA_PATH_RE, PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS, CreativeAudioFileTool;
 var init_telegram_creative_tools = __esm({
   "packages/cli/src/tui/telegram-creative-tools.ts"() {
     "use strict";
@@ -596762,6 +597515,7 @@ var init_telegram_creative_tools = __esm({
     OBJECTS_DIR = ".objects";
     SEND_DIR = ".send";
     PATH_KEYS = ["path", "file", "file_path", "filename", "filepath", "filePath"];
+    TTS_CLONE_SOURCE_KEYS = ["sample", "source_audio", "voice_sample", "reference_audio", "ref_audio", "clone_sample"];
     MEDIA_PATH_RE = /(?:^|[\s([])(\/[^\s<>"')\]]+\.[A-Za-z0-9]{1,12})(?:$|[\s),.\]])/g;
     PUBLIC_EXECUTABLE_ARTIFACT_EXTENSIONS = /* @__PURE__ */ new Set([
       ".sh",
@@ -596836,9 +597590,16 @@ var init_telegram_creative_tools = __esm({
           input: { type: "string", description: "Alias for text" },
           prompt: { type: "string", description: "Alias for text" },
           path: { type: "string", description: "Output .wav path inside the creative workspace" },
-          backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper", "espeak"], description: "TTS backend. Defaults to auto." },
-          voice: { type: "string", description: "Voice id/name for the selected TTS backend" },
+          backend: { type: "string", enum: ["auto", "luxtts", "supertonic", "mlx", "onnx", "piper"], description: "TTS backend. Defaults to auto." },
+          voice: { type: "string", description: "Voice id/name for the selected TTS backend, or a scoped source audio path for cloning" },
           clone_ref: { type: "string", description: "Optional LuxTTS clone reference" },
+          sample: { type: "string", description: "Voice source clip path inside the creative workspace" },
+          source_audio: { type: "string", description: "Alias for sample" },
+          voice_sample: { type: "string", description: "Alias for sample" },
+          reference_audio: { type: "string", description: "Alias for sample" },
+          ref_audio: { type: "string", description: "Alias for sample" },
+          clone_sample: { type: "string", description: "Alias for sample" },
+          clone_name: { type: "string", description: "Optional name to register the source clip for later reuse" },
           model: { type: "string", description: "Optional backend model id or raw Piper/ONNX path" },
           speed: { type: "number", description: "Speech speed multiplier or backend-specific rate" }
         },
@@ -596857,26 +597618,57 @@ var init_telegram_creative_tools = __esm({
         if (!guarded.path.abs.toLowerCase().endsWith(".wav")) {
           return denied("create_audio_file currently writes WAV files; use a .wav output path.");
         }
-        await mkdir17(dirname33(guarded.path.abs), { recursive: true });
-        const tts = new TtsGenerateTool();
-        const result = await tts.execute({
-          text,
-          output: guarded.path.abs,
-          playback: false,
-          backend: args["backend"],
-          voice: args["voice"],
-          clone_ref: args["clone_ref"],
-          model: args["model"],
-          speed: args["speed"]
-        });
-        if (!result.success || !existsSync104(guarded.path.abs)) {
-          return {
-            success: false,
-            output: "",
-            error: `Audio synthesis failed through generate_tts.
+        const cloneArgs = {};
+        const cleanup = [];
+        for (const key of TTS_CLONE_SOURCE_KEYS) {
+          const value2 = args[key];
+          if (typeof value2 !== "string" || !value2.trim()) continue;
+          const materialized = materializeTelegramCreativeArtifactForSend(this.root, value2.trim());
+          if (!materialized.ok) return denied(materialized.error);
+          cloneArgs[key] = materialized.path;
+          if (materialized.cleanup) cleanup.push(materialized.cleanup);
+        }
+        for (const key of ["clone_ref", "voice"]) {
+          const value2 = args[key];
+          if (typeof value2 !== "string" || !value2.trim() || !looksLikeAudioPath(value2.trim())) continue;
+          const materialized = materializeTelegramCreativeArtifactForSend(this.root, value2.trim());
+          if (!materialized.ok) return denied(materialized.error);
+          cloneArgs[key] = materialized.path;
+          if (materialized.cleanup) cleanup.push(materialized.cleanup);
+        }
+        let result;
+        try {
+          await mkdir17(dirname33(guarded.path.abs), { recursive: true });
+          const tts = new TtsGenerateTool();
+          result = await tts.execute({
+            text,
+            output: guarded.path.abs,
+            playback: false,
+            backend: args["backend"],
+            voice: cloneArgs["voice"] ?? args["voice"],
+            clone_ref: cloneArgs["clone_ref"] ?? args["clone_ref"],
+            ...cloneArgs,
+            sample: cloneArgs["sample"],
+            source_audio: cloneArgs["source_audio"],
+            voice_sample: cloneArgs["voice_sample"],
+            reference_audio: cloneArgs["reference_audio"],
+            ref_audio: cloneArgs["ref_audio"],
+            clone_sample: cloneArgs["clone_sample"],
+            clone_name: args["clone_name"],
+            model: args["model"],
+            speed: args["speed"]
+          });
+          if (!result.success || !existsSync104(guarded.path.abs)) {
+            return {
+              success: false,
+              output: "",
+              error: `Audio synthesis failed through generate_tts.
 ${(result.error || result.output || "").slice(0, 1200)}`,
-            durationMs: performance.now() - start2
-          };
+              durationMs: performance.now() - start2
+            };
+          }
+        } finally {
+          for (const fn of cleanup) fn();
         }
         rememberCreated(this.root, guarded.path.abs);
         const sizeKB = Math.round(statSync35(guarded.path.abs).size / 1024);
@@ -596904,12 +597696,12 @@ __export(vision_ingress_exports, {
   queryVisionModel: () => queryVisionModel,
   runVisionIngress: () => runVisionIngress
 });
-import { execFileSync as execFileSync4 } from "node:child_process";
+import { execFileSync as execFileSync5 } from "node:child_process";
 import { existsSync as existsSync105, readFileSync as readFileSync86, unlinkSync as unlinkSync20 } from "node:fs";
 import { join as join120 } from "node:path";
 function isTesseractAvailable() {
   try {
-    execFileSync4("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
+    execFileSync5("tesseract", ["--version"], { stdio: "ignore", timeout: 3e3 });
     return true;
   } catch {
     return false;
@@ -596950,7 +597742,7 @@ function advancedOcr(imagePath) {
   for (const psm of psmModes) {
     const outFile = `${tmpBase}_psm${psm}`;
     try {
-      execFileSync4("tesseract", [
+      execFileSync5("tesseract", [
         imagePath,
         outFile,
         "--psm",
@@ -597049,7 +597841,7 @@ var init_vision_ingress = __esm({
 // packages/cli/src/tui/telegram-bridge.ts
 import { mkdirSync as mkdirSync60, existsSync as existsSync106, unlinkSync as unlinkSync21, readdirSync as readdirSync36, statSync as statSync36, readFileSync as readFileSync87, writeFileSync as writeFileSync57 } from "node:fs";
-import { join as join121, resolve as resolve39, basename as basename23, relative as relative13, isAbsolute as isAbsolute7 } from "node:path";
+import { join as join121, resolve as resolve39, basename as basename23, relative as relative13, isAbsolute as isAbsolute7, extname as extname15 } from "node:path";
 import { writeFile as writeFileAsync } from "node:fs/promises";
 import { createHash as createHash19, randomInt } from "node:crypto";
 function parseTelegramInteractionDecision(text, forcedRoute, options2 = {}) {
@@ -597247,6 +598039,19 @@ function summarizeTelegramMessageAttachments(msg) {
       parts.push(`caption: ${truncateTelegramContextLine(msg.media.caption, 180)}`);
     }
   }
+  if (msg.replyToMedia) {
+    const details = [
+      msg.replyToMedia.type,
+      msg.replyToMedia.mimeType,
+      msg.replyToMedia.fileName,
+      msg.replyToMedia.duration ? `${msg.replyToMedia.duration}s` : "",
+      msg.replyToMedia.fileSize ? `${msg.replyToMedia.fileSize} bytes` : ""
+    ].filter(Boolean).join(", ");
+    parts.push(`replied-to media: ${details}`);
+    if (msg.replyToMedia.caption) {
+      parts.push(`replied-to caption: ${truncateTelegramContextLine(msg.replyToMedia.caption, 180)}`);
+    }
+  }
   if (msg.poll) {
     parts.push(`poll: ${truncateTelegramContextLine(msg.poll.question, 180)}`);
   }
@@ -597620,6 +598425,25 @@ function telegramImageMime(media) {
   if (ext === ".tif" || ext === ".tiff") return "image/tiff";
   return "image/jpeg";
 }
+function telegramCachedMediaIsImage(entry) {
+  if (entry.mediaType === "photo") return true;
+  if (entry.mimeType?.toLowerCase().startsWith("image/")) return true;
+  return TELEGRAM_IMAGE_EXTENSIONS.has(extname15(entry.localPath).toLowerCase());
+}
+function telegramCachedMediaIsPdf(entry) {
+  if (entry.mimeType?.toLowerCase() === "application/pdf") return true;
+  return extname15(entry.localPath).toLowerCase() === ".pdf";
+}
+function telegramCachedMediaIsAudio(entry) {
+  if (entry.mediaType === "audio" || entry.mediaType === "voice") return true;
+  if (entry.mimeType?.toLowerCase().startsWith("audio/")) return true;
+  return [".wav", ".mp3", ".flac", ".aac", ".m4a", ".ogg", ".opus"].includes(extname15(entry.localPath).toLowerCase());
+}
+function telegramCachedMediaIsVideo(entry) {
+  if (entry.mediaType === "video" || entry.mediaType === "video_note" || entry.mediaType === "live_photo") return true;
+  if (entry.mimeType?.toLowerCase().startsWith("video/")) return true;
+  return [".mp4", ".mkv", ".avi", ".mov", ".webm"].includes(extname15(entry.localPath).toLowerCase());
+}
 function isPathInside(root, path11) {
   const rel = relative13(resolve39(root), resolve39(path11));
   return rel === "" || Boolean(rel) && !rel.startsWith("..") && !isAbsolute7(rel);
@@ -597653,6 +598477,10 @@ function normalizeTelegramUpdate(update2) {
   const username = message2.from?.username ?? message2.sender_chat?.username ?? "";
   const chatType = message2.chat?.type ?? "private";
   const media = normalizeTelegramMedia(message2);
+  const replyTo = message2.reply_to_message && typeof message2.reply_to_message === "object" ? message2.reply_to_message : void 0;
+  const replyToMedia = replyTo ? normalizeTelegramMedia(replyTo) : void 0;
+  const replyToPoll = replyTo ? normalizeTelegramPoll(replyTo.poll) : void 0;
+  const replyToText = replyTo ? replyTo.text || replyTo.caption || (replyToPoll ? formatTelegramPollSummary(replyToPoll) : "") : "";
   const poll = normalizeTelegramPoll(message2.poll);
   const livePhoto = normalizeTelegramLivePhoto(message2.live_photo);
   const text = message2.text || message2.caption || (poll ? formatTelegramPollSummary(poll) : "");
@@ -597667,6 +598495,8 @@ function normalizeTelegramUpdate(update2) {
     chatType,
     chatTitle: message2.chat?.title,
     media,
+    replyToMedia,
+    replyToText: replyToText || void 0,
     poll,
     livePhoto,
     guestQueryId: typeof message2.guest_query_id === "string" ? message2.guest_query_id : void 0,
@@ -597675,9 +598505,9 @@ function normalizeTelegramUpdate(update2) {
     isGuestMessage: sourceUpdateType === "guest_message",
     isDirectMessages: Boolean(message2.chat?.is_direct_messages),
     parentChatId: message2.chat?.parent_chat?.id ?? message2.direct_messages_topic?.parent_topic?.id,
-    replyToMessageId: message2.reply_to_message?.message_id,
-    replyToUsername: message2.reply_to_message?.from?.username ?? message2.reply_to_message?.sender_chat?.username,
-    replyToBot: Boolean(message2.reply_to_message?.from?.is_bot),
+    replyToMessageId: replyTo?.message_id,
+    replyToUsername: replyTo?.from?.username ?? replyTo?.sender_chat?.username,
+    replyToBot: Boolean(replyTo?.from?.is_bot),
     mentionedUsernames: extractTelegramMentionedUsernames(message2, text),
     sourceUpdateType
   };
@@ -597824,7 +598654,7 @@ function renderTelegramSubAgentError(username, error) {
   process.stdout.write(`    ${c3.dim("⎿")} ${c3.red("✘")} @${username}: ${c3.dim(preview)}
 `);
 }
-var TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_PUBLIC_HELP_COMMANDS, MEDIA_CACHE_TTL_MS, TelegramBridge;
+var TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_PUBLIC_HELP_COMMANDS, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TelegramBridge;
 var init_telegram_bridge = __esm({
   "packages/cli/src/tui/telegram-bridge.ts"() {
     "use strict";
@@ -598020,6 +598850,7 @@ Telegram response contract:
       "your"
     ]);
     TELEGRAM_PUBLIC_HELP_COMMANDS = /* @__PURE__ */ new Set(["help", "start", "auth", "call"]);
+    TELEGRAM_IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".svg"]);
     MEDIA_CACHE_TTL_MS = 30 * 60 * 1e3;
     TelegramBridge = class {
       constructor(botToken, onMessage, agentConfig, repoRoot, toolPolicyConfig) {
@@ -598431,6 +599262,80 @@ Telegram response contract:
           }
         }
       }
+      updateLastTelegramUserMessageText(msg, text) {
+        const sessionKey = this.sessionKeyForMessage(msg);
+        const history = this.chatHistory.get(sessionKey);
+        if (!history || !text.trim()) return;
+        for (let i2 = history.length - 1; i2 >= 0; i2--) {
+          const entry = history[i2];
+          if (entry.role !== "user") continue;
+          if (entry.messageId === msg.messageId || !entry.messageId && entry.text === msg.text) {
+            entry.text = text.trim();
+            entry.mediaSummary = summarizeTelegramMessageAttachments(msg) || entry.mediaSummary;
+            this.updateTelegramMemoryCards(sessionKey, entry);
+            this.saveTelegramConversationState(sessionKey);
+            return;
+          }
+        }
+      }
+      recentTelegramMediaEntries(chatId, limit = 12) {
+        const now = Date.now();
+        return [...this.mediaCache.values()].filter((entry) => {
+          if (chatId !== void 0 && String(entry.chatId) !== String(chatId)) return false;
+          return now - entry.cachedAt <= MEDIA_CACHE_TTL_MS;
+        }).sort((a2, b) => b.cachedAt - a2.cachedAt).slice(0, limit);
+      }
+      telegramMediaEntryMatchesKind(entry, kind) {
+        if (kind === "image") return telegramCachedMediaIsImage(entry);
+        if (kind === "pdf") return telegramCachedMediaIsPdf(entry);
+        if (kind === "audio") return telegramCachedMediaIsAudio(entry);
+        if (kind === "video") return telegramCachedMediaIsVideo(entry);
+        if (kind === "transcribable") {
+          return telegramCachedMediaIsAudio(entry) || telegramCachedMediaIsVideo(entry);
+        }
+        return true;
+      }
+      resolveTelegramScopedMediaPath(rawValue, chatId, currentMsg, kind) {
+        const raw = String(rawValue ?? "").trim();
+        const repoRoot = this.repoRoot || ".";
+        const creativeRoot = telegramCreativeWorkspaceRoot(repoRoot, chatId);
+        const mediaEntries = this.recentTelegramMediaEntries(chatId, 60).filter((entry) => this.telegramMediaEntryMatchesKind(entry, kind));
+        const aliases = /* @__PURE__ */ new Set(["", "latest", "last", "current", "this", "that", "it", "reply", "replied", "replied-to", "replied_to"]);
+        if (aliases.has(raw.toLowerCase())) {
+          const replied = currentMsg?.replyToMessageId ? mediaEntries.find((entry2) => entry2.messageId === currentMsg.replyToMessageId) : void 0;
+          const entry = replied ?? mediaEntries[0];
+          if (!entry) {
+            return { ok: false, error: `No recent ${kind} media is available in this Telegram chat scope.` };
+          }
+          return { ok: true, path: entry.localPath };
+        }
+        const matchingEntry = mediaEntries.find((entry) => {
+          if (resolve39(entry.localPath) === resolve39(raw)) return true;
+          if (basename23(entry.localPath) === raw) return true;
+          if (entry.fileUniqueId === raw || entry.fileId === raw) return true;
+          if (entry.messageId && String(entry.messageId) === raw) return true;
+          return false;
+        });
+        if (matchingEntry) return { ok: true, path: matchingEntry.localPath };
+        const creativeCandidate = isAbsolute7(raw) ? resolve39(raw) : resolve39(creativeRoot, raw);
+        if (isPathInside(creativeRoot, creativeCandidate) && existsSync106(creativeCandidate)) {
+          return { ok: true, path: creativeCandidate };
+        }
+        return {
+          ok: false,
+          error: `Path is outside this Telegram chat's media/workspace scope or does not exist: ${raw || "(empty)"}`
+        };
+      }
+      resolveTelegramScopedOutputPath(rawValue, chatId, fallbackName) {
+        const repoRoot = this.repoRoot || ".";
+        const creativeRoot = telegramCreativeWorkspaceRoot(repoRoot, chatId);
+        const raw = String(rawValue || fallbackName).trim() || fallbackName;
+        const outputPath2 = isAbsolute7(raw) ? resolve39(raw) : resolve39(creativeRoot, raw);
+        if (!isPathInside(creativeRoot, outputPath2)) {
+          return { ok: false, error: `Output path must stay inside this Telegram chat's creative workspace: ${raw}` };
+        }
+        return { ok: true, path: outputPath2 };
+      }
       updateTelegramParticipantProfile(sessionKey, msg, text) {
         const participantKey = String(msg.fromUserId || msg.username || msg.firstName || "unknown");
         const participants = this.chatParticipants.get(sessionKey) ?? /* @__PURE__ */ new Map();
@@ -598605,6 +599510,22 @@ ${notes2}`;
           sections.push(`### Zettelkasten Memory Recall
 ${cardLines.join("\n")}`);
         }
+        const recentMedia = this.recentTelegramMediaEntries(msg.chatId, 10);
+        if (recentMedia.length > 0) {
+          const mediaLines = recentMedia.map((entry) => {
+            const kind = telegramCachedMediaIsImage(entry) ? "image" : entry.mediaType;
+            const replyMark = msg.replyToMessageId && entry.messageId === msg.replyToMessageId ? " replied-to" : "";
+            const caption = entry.caption ? ` caption:${truncateTelegramContextLine(entry.caption, 120)}` : "";
+            const extracted = entry.extractedContent ? `
+    ${truncateTelegramContextLine(entry.extractedContent.replace(/\s+/g, " "), 220)}` : "";
+            return `- message_id ${entry.messageId}${replyMark}: ${kind}; path ${entry.localPath}; file ${basename23(entry.localPath)}${caption}${extracted}`;
+          });
+          sections.push([
+            "### Recent Chat Media",
+            "Use these paths only as tool inputs when the user asks about media in this chat. Do not quote local paths in the visible Telegram reply.",
+            mediaLines.join("\n")
+          ].join("\n"));
+        }
         if (olderCount > 0) {
           const older = history.slice(0, olderCount);
           const bySpeaker = /* @__PURE__ */ new Map();
@@ -599301,8 +600222,8 @@ Join: ${newUrl}`);
             }
           }
           let steeringText = msg.text;
-          if (msg.media) {
-            const mediaContext = await this.processMedia(msg);
+          if (msg.media || msg.replyToMedia) {
+            const mediaContext = await this.processMediaContextForMessage(msg);
             if (mediaContext) {
               steeringText += `
@@ -599376,8 +600297,8 @@ ${mediaContext}`;
         this.tuiWrite(() => renderTelegramSubAgentStart(msg.username, msg.text, isAdminDM));
         try {
           let mediaContext = "";
-          if (msg.media) {
-            mediaContext = await this.processMedia(msg);
+          if (msg.media || msg.replyToMedia) {
+            mediaContext = await this.processMediaContextForMessage(msg);
           }
           const result = await this.runSubAgent(msg, subAgent, mediaContext);
           if (subAgent.typingInterval) {
@@ -599479,8 +600400,8 @@ ${mediaContext}`;
         this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `admin chat with full context/tools (${this.interactionMode})`));
         try {
           let mediaContext = "";
-          if (msg.media) {
-            mediaContext = await this.processMedia(msg);
+          if (msg.media || msg.replyToMedia) {
+            mediaContext = await this.processMediaContextForMessage(msg);
           }
           const result = await this.runSubAgent(msg, subAgent, mediaContext, "chat");
           if (subAgent.typingInterval) {
@@ -599563,7 +600484,7 @@ ${mediaContext}`;
         }
         this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `live inference: chat reply (${this.interactionMode})`));
         try {
-          const mediaContext = msg.media || msg.livePhoto ? "Attachment received. Quick-chat mode does not inspect media; use action mode for media analysis." : "";
+          const mediaContext = msg.media || msg.replyToMedia || msg.livePhoto ? await this.processMediaContextForMessage(msg) : "";
           const finalText = await this.runTelegramChatCompletion(
             msg,
             toolContext,
@@ -600056,6 +600977,128 @@ ${lines.join("\n\n")}` };
               }
             };
           }
+          if (tool.name === "image_read") {
+            return {
+              ...tool,
+              description: "Read only images from this Telegram chat's media cache or creative workspace. Use path='reply' for the replied-to image or path='latest' for the most recent chat image.",
+              execute: async (args) => {
+                const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "image");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, path: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "ocr") {
+            return {
+              ...tool,
+              description: "Extract text only from images in this Telegram chat's media cache or creative workspace. Use path='reply' or path='latest' for chat media references.",
+              execute: async (args) => {
+                const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "image");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, path: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "vision") {
+            return {
+              ...tool,
+              description: "Analyze only images from this Telegram chat's media cache or creative workspace. Use image='reply' for the replied-to image or image='latest' for the most recent chat image.",
+              execute: async (args) => {
+                const resolved = this.resolveTelegramScopedMediaPath(args["image"], chatId, currentMsg, "image");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, image: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "ocr_image_advanced") {
+            return {
+              ...tool,
+              description: "Advanced OCR only for images in this Telegram chat's media cache or creative workspace. Batch directory mode is disabled in public Telegram scope.",
+              execute: async (args) => {
+                if (args["batch"] === true) return { success: false, output: "", error: "Batch directory OCR is not available in public Telegram scope." };
+                const resolved = this.resolveTelegramScopedMediaPath(args["image"], chatId, currentMsg, "image");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                const next = { ...args, image: resolved.path };
+                if (typeof next["output_dir"] === "string" && next["output_dir"].trim()) {
+                  const output = this.resolveTelegramScopedOutputPath(next["output_dir"], chatId, "ocr-output");
+                  if (!output.ok) return { success: false, output: "", error: output.error };
+                  next["output_dir"] = output.path;
+                }
+                return tool.execute(next);
+              }
+            };
+          }
+          if (tool.name === "transcribe_file") {
+            return {
+              ...tool,
+              description: "Transcribe only audio/video files from this Telegram chat's media cache or creative workspace. Use path='reply' or path='latest' for chat media references.",
+              execute: async (args) => {
+                const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "transcribable");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, path: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "pdf_to_text") {
+            return {
+              ...tool,
+              description: "Extract text only from PDFs in this Telegram chat's media cache or creative workspace. Use path='reply' or path='latest' for chat document references.",
+              execute: async (args) => {
+                const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "pdf");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, path: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "ocr_pdf") {
+            return {
+              ...tool,
+              description: "OCR only PDFs from this Telegram chat's media cache or creative workspace. Output, when requested, is forced into this chat's creative workspace.",
+              execute: async (args) => {
+                const input = this.resolveTelegramScopedMediaPath(args["input"], chatId, currentMsg, "pdf");
+                if (!input.ok) return { success: false, output: "", error: input.error };
+                const next = { ...args, input: input.path };
+                if (typeof next["output"] === "string" && next["output"].trim()) {
+                  const output = this.resolveTelegramScopedOutputPath(next["output"], chatId, `ocr-${Date.now()}.pdf`);
+                  if (!output.ok) return { success: false, output: "", error: output.error };
+                  next["output"] = output.path;
+                }
+                return tool.execute(next);
+              }
+            };
+          }
+          if (tool.name === "video_understand") {
+            return {
+              ...tool,
+              description: "Analyze only video files from this Telegram chat's media cache or creative workspace. URL download is disabled in public Telegram scope; use path='reply' or path='latest'.",
+              execute: async (args) => {
+                if (args["url"]) return { success: false, output: "", error: "URL video analysis is not available in public Telegram scope. Use a video posted in this chat." };
+                const resolved = this.resolveTelegramScopedMediaPath(args["path"], chatId, currentMsg, "video");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, path: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "audio_analyze") {
+            return {
+              ...tool,
+              description: "Analyze only audio files from this Telegram chat's media cache or creative workspace. Microphone/listen mode is disabled in public Telegram scope.",
+              execute: async (args) => {
+                if (String(args["action"] || "").toLowerCase() === "listen") {
+                  return { success: false, output: "", error: "Continuous microphone listening is not available in Telegram public scope." };
+                }
+                const resolved = this.resolveTelegramScopedMediaPath(args["file"] ?? args["path"], chatId, currentMsg, "audio");
+                if (!resolved.ok) return { success: false, output: "", error: resolved.error };
+                return tool.execute({ ...args, file: resolved.path, path: resolved.path });
+              }
+            };
+          }
+          if (tool.name === "explore_tools") {
+            return {
+              ...tool,
+              description: "List and explain the tools available in this Telegram public/group scope. Do not invent unavailable tool names."
+            };
+          }
           return tool;
         });
       }
@@ -600219,11 +601262,16 @@ Scoped workspace: ${scopedRoot}`,
           new ImageReadTool(repoRoot),
           new OCRTool(repoRoot),
           new VisionTool(repoRoot),
+          new OcrImageAdvancedTool(repoRoot),
           new OcrPdfTool(repoRoot),
           new PdfToTextTool(repoRoot),
           // Transcription tools
           new TranscribeFileTool(repoRoot),
-          new TranscribeUrlTool(repoRoot)
+          new TranscribeUrlTool(repoRoot),
+          new VideoUnderstandTool(repoRoot),
+          new AudioAnalyzeTool(),
+          new ExploreToolsTool(),
+          this.buildTelegramMediaRecentTool(chatId, msg)
         ];
         const adminTools = [
           new ShellTool(repoRoot),
@@ -600326,6 +601374,55 @@ Scoped workspace: ${scopedRoot}`,
         ]);
         return tools.filter((tool) => !blocked.has(tool.name));
       }
+      buildTelegramMediaRecentTool(chatId, currentMsg) {
+        const bridge = this;
+        return {
+          name: "telegram_media_recent",
+          description: "List recent media files available in this Telegram chat scope, including safe aliases for image_read, ocr, vision, transcribe_file, pdf_to_text, video_understand, and audio_analyze.",
+          parameters: {
+            type: "object",
+            properties: {
+              kind: {
+                type: "string",
+                enum: ["media", "image", "audio", "video", "pdf", "transcribable"],
+                description: "Filter by media kind. Defaults to all recent chat media."
+              },
+              limit: { type: "number", description: "Maximum entries to return, 1-20. Default: 10." }
+            }
+          },
+          async execute(args) {
+            const start2 = performance.now();
+            const kind = String(args["kind"] || "media").toLowerCase();
+            const limit = typeof args["limit"] === "number" && Number.isFinite(args["limit"]) ? Math.max(1, Math.min(20, Math.floor(args["limit"]))) : 10;
+            const entries = bridge.recentTelegramMediaEntries(chatId, 60).filter((entry) => bridge.telegramMediaEntryMatchesKind(entry, kind)).slice(0, limit);
+            if (entries.length === 0) {
+              return { success: true, output: `No recent ${kind} media is available in this Telegram chat scope.`, durationMs: performance.now() - start2 };
+            }
+            const lines = entries.map((entry, index) => {
+              const parts = [
+                `${index + 1}. message_id ${entry.messageId || "unknown"}`,
+                currentMsg?.replyToMessageId === entry.messageId ? "replied-to" : "",
+                telegramCachedMediaIsImage(entry) ? "image" : telegramCachedMediaIsPdf(entry) ? "pdf" : telegramCachedMediaIsAudio(entry) ? "audio" : telegramCachedMediaIsVideo(entry) ? "video" : entry.mediaType,
+                `file=${basename23(entry.localPath)}`,
+                `path=${entry.localPath}`,
+                entry.caption ? `caption=${truncateTelegramContextLine(entry.caption, 140)}` : ""
+              ].filter(Boolean);
+              const extracted = entry.extractedContent ? `
+   context: ${truncateTelegramContextLine(entry.extractedContent.replace(/\s+/g, " "), 240)}` : "";
+              return `${parts.join("; ")}${extracted}`;
+            });
+            return {
+              success: true,
+              output: [
+                "Recent scoped Telegram media:",
+                "Use path='reply' for replied-to media, path='latest' for the most recent matching item, or one of the listed paths.",
+                lines.join("\n")
+              ].join("\n"),
+              durationMs: performance.now() - start2
+            };
+          }
+        };
+      }
       imageGenerationDefaultsForRepo(repoRoot) {
         const settings = resolveSettings(repoRoot);
         return {
@@ -600543,30 +601640,36 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
        * Downloads the file, runs it through the appropriate pipeline,
        * caches it, and returns a text description for the agent.
        */
-      async processMedia(msg) {
-        if (!msg.media) return "";
-        const { type, fileId, fileUniqueId, mimeType, caption } = msg.media;
-        const isImageMedia = telegramMediaIsImage(msg.media);
+      async processMedia(msg, source = "message") {
+        const media = source === "reply" ? msg.replyToMedia : msg.media;
+        if (!media) return "";
+        const { type, fileId, fileUniqueId, mimeType, caption } = media;
+        const isImageMedia = telegramMediaIsImage(media);
+        const sourceMessageId = source === "reply" ? msg.replyToMessageId : msg.messageId;
+        const sourceLabel = source === "reply" ? "replied-to " : "";
         let ext = ".bin";
-        if (isImageMedia) ext = telegramImageExtension(msg.media);
+        if (isImageMedia) ext = telegramImageExtension(media);
         else if (type === "audio" || type === "voice") ext = ".ogg";
         else if (type === "video" || type === "video_note" || type === "live_photo") ext = ".mp4";
-        else if (msg.media.fileName) {
-          const dotIdx = msg.media.fileName.lastIndexOf(".");
-          if (dotIdx >= 0) ext = msg.media.fileName.slice(dotIdx);
+        else if (media.fileName) {
+          const dotIdx = media.fileName.lastIndexOf(".");
+          if (dotIdx >= 0) ext = media.fileName.slice(dotIdx);
         }
         const localPath = await this.downloadTelegramFile(fileId, ext);
         if (!localPath) return `[Media: ${type} — failed to download]`;
         const cacheEntry = {
           localPath,
           fileId,
+          fileUniqueId,
           chatId: msg.chatId,
+          messageId: sourceMessageId ?? 0,
           username: msg.username,
           mediaType: type,
           mimeType,
+          caption,
           cachedAt: Date.now()
         };
-        this.mediaCache.set(fileUniqueId, cacheEntry);
+        this.mediaCache.set(`${String(msg.chatId)}:${String(sourceMessageId ?? 0)}:${fileUniqueId}`, cacheEntry);
         const metadataKey = String(msg.chatId);
         if (!this.mediaMetadata.has(metadataKey)) {
           this.mediaMetadata.set(metadataKey, []);
@@ -600587,7 +601690,7 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
               {
                 path: localPath,
                 buffer: readFileSync87(localPath),
-                mime: telegramImageMime(msg.media)
+                mime: telegramImageMime(media)
               },
               this.agentConfig?.model ?? ""
             );
@@ -600596,10 +601699,10 @@ ${knownList}` : "Private-user telegram_send_file target must be this DM or a kno
           } catch {
           }
           if (visionContext) {
-            description = `[Image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
+            description = `[${sourceLabel}image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
 ${visionContext}]`;
           } else {
-            description = `[Image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read or vision tools to analyze it if available.]`;
+            description = `[${sourceLabel}image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read, ocr, or vision tools to analyze it.]`;
           }
           try {
             await fetch("http://127.0.0.1:11435/v1/memory/ingest", {
@@ -600623,9 +601726,9 @@ ${visionContext}]`;
           } catch {
           }
           if (transcription) {
-            description = `[Voice message transcribed: "${transcription}"${caption ? ` — caption: "${caption}"` : ""}]`;
+            description = `[${sourceLabel}voice message transcribed: "${transcription}"${caption ? ` — caption: "${caption}"` : ""}]`;
           } else {
-            description = `[Audio/voice message received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use transcribe_file to transcribe it if available.]`;
+            description = `[${sourceLabel}audio/voice message received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use transcribe_file to transcribe it.]`;
           }
           try {
             await fetch("http://127.0.0.1:11435/v1/memory/ingest", {
@@ -600638,13 +601741,30 @@ ${visionContext}]`;
           }
         } else if (type === "video" || type === "video_note" || type === "live_photo") {
           const label = type === "live_photo" ? "Live photo" : "Video";
-          description = `[${label} received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
+          description = `[${sourceLabel}${label.toLowerCase()} received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use video_understand or transcribe_file to analyze it.]`;
         } else if (type === "document") {
-          description = `[Document received: ${msg.media.fileName || "unnamed"}${mimeType ? ` (${mimeType})` : ""}, saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
+          description = `[${sourceLabel}document received: ${media.fileName || "unnamed"}${mimeType ? ` (${mimeType})` : ""}, saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
         }
         cacheEntry.extractedContent = description;
         return description;
       }
+      async processMediaContextForMessage(msg) {
+        const parts = [];
+        if (msg.media) {
+          const current = await this.processMedia(msg, "message");
+          if (current) parts.push(current);
+        }
+        if (msg.replyToMedia) {
+          const replied = await this.processMedia(msg, "reply");
+          if (replied) parts.push(replied);
+        }
+        const text = parts.join("\n\n");
+        if (text) this.updateLastTelegramUserMessageText(msg, `${msg.text}
+[Media context]
+${text}`.trim());
+        return text;
+      }
       /** Clean up expired media cache entries (older than 30 minutes) */
       cleanupMediaCache() {
         const now = Date.now();
@@ -625230,7 +626350,7 @@ var clipboard_media_exports = {};
 __export(clipboard_media_exports, {
   pasteClipboardImageToFile: () => pasteClipboardImageToFile
 });
-import { execFileSync as execFileSync5, execSync as execSync58 } from "node:child_process";
+import { execFileSync as execFileSync6, execSync as execSync58 } from "node:child_process";
 import { mkdirSync as mkdirSync72, readFileSync as readFileSync99, rmSync as rmSync5, writeFileSync as writeFileSync67 } from "node:fs";
 import { join as join136 } from "node:path";
 function pasteClipboardImageToFile(repoRoot) {
@@ -625247,7 +626367,7 @@ function readClipboardImage() {
     try {
       execSync58("command -v pngpaste", { stdio: "ignore", timeout: 1e3 });
       const tmp = `/tmp/omnius-clipboard-${Date.now()}.png`;
-      execFileSync5("pngpaste", [tmp], { timeout: 3e3 });
+      execFileSync6("pngpaste", [tmp], { timeout: 3e3 });
       const buffer2 = readFileSync99(tmp);
       try {
         rmSync5(tmp);
@@ -625267,7 +626387,7 @@ function readClipboardImage() {
     ];
     for (const attempt of attempts) {
       try {
-        const buffer2 = execFileSync5(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
+        const buffer2 = execFileSync6(attempt.cmd, attempt.args, { timeout: 3e3, maxBuffer: 25 * 1024 * 1024 });
         if (buffer2.length > 0) return { buffer: buffer2, mime: attempt.mime, ext: attempt.ext };
       } catch {
         continue;
@@ -625284,7 +626404,7 @@ function readClipboardImage() {
         "$img.Save($ms,[Drawing.Imaging.ImageFormat]::Png);",
         "[Console]::OpenStandardOutput().Write($ms.ToArray(),0,$ms.Length)"
       ].join("");
-      const buffer2 = execFileSync5("powershell.exe", ["-NoProfile", "-Command", ps], {
+      const buffer2 = execFileSync6("powershell.exe", ["-NoProfile", "-Command", ps], {
         timeout: 5e3,
         maxBuffer: 25 * 1024 * 1024
       });
@@ -625303,7 +626423,7 @@ var init_clipboard_media = __esm({
 // packages/cli/src/tui/interactive.ts
 import { cwd } from "node:process";
-import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname15, relative as relative14 } from "node:path";
+import { resolve as resolve44, join as join137, dirname as dirname38, extname as extname16, relative as relative14 } from "node:path";
 import { createRequire as createRequire8 } from "node:module";
 import { fileURLToPath as fileURLToPath18 } from "node:url";
 import {
@@ -632605,7 +633725,7 @@ Execute this skill now. Follow the behavioral guidance above.`;
           const imgPath = resolve44(repoRoot, cleanPath);
           const imgBuffer = readFileSync100(imgPath);
           const base642 = imgBuffer.toString("base64");
-          const ext = extname15(cleanPath).toLowerCase();
+          const ext = extname16(cleanPath).toLowerCase();
           const mime = ext === ".png" ? "image/png" : ext === ".gif" ? "image/gif" : ext === ".webp" ? "image/webp" : "image/jpeg";
           const asciiContext = await renderAsciiPreviewForImage(
             imgPath,