lollms-client 1.10.0__tar.gz → 1.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lollms_client-1.10.0/src/lollms_client.egg-info → lollms_client-1.10.1}/PKG-INFO +1 -1
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/__init__.py +1 -1
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/__init__.py +41 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/server/main.py +11 -10
- lollms_client-1.10.1/src/lollms_client/tts_bindings/FishSpeech/__init__.py +281 -0
- lollms_client-1.10.1/src/lollms_client/tts_bindings/FishSpeech/server/main.py +260 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/xtts/__init__.py +44 -11
- lollms_client-1.10.1/src/lollms_client/ttv_bindings/diffusers/__init__.py +255 -0
- lollms_client-1.10.1/src/lollms_client/ttv_bindings/diffusers/server/main.py +194 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1/src/lollms_client.egg-info}/PKG-INFO +1 -1
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/SOURCES.txt +4 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/LICENSE +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/README.md +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/pyproject.toml +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/setup.cfg +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/assets/models_ctx_sizes.json +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/azure_openai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/claude/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/gemini/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/grok/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/groq/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/litellm/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/llama_cpp_server/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/lollms/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/lollms_webui/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/mistral/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/novita_ai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/ollama/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/open_router/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/openai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/openllm/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/openwebui/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/perplexity/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/transformers/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/vllm/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_agentic.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_base_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_bindings_utils.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_config.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_core.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_discussion.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_js_analyzer.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_llm_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_mcp_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_mcp_security.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_personality.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_python_analyzer.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_stt_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_text_processing.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_tti_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_ttm_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_tts_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_ttv_binding.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_types.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_utilities.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/remote_mcp/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/standard_mcp/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/lollms/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/whisper/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/config.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gemini/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/server/dequant.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/server/main.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/server/ops.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/leonardo_ai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/lollms/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/novita_ai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/open_router/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/openai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/stability_ai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/beatoven_ai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/replicate/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/stability_ai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/topmediai/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/bark/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/bark/server/install_bark.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/bark/server/main.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/lollms/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/server/install_piper.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/server/main.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/server/setup_voices.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/vibevoice/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/vibevoice/server/main.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/xtts/server/main.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/xtts/server/setup_voices.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttv_bindings/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/dependency_links.txt +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/requires.txt +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/top_level.txt +0 -0
- {lollms_client-1.10.0 → lollms_client-1.10.1}/test/test_lollms_discussion.py +0 -0
|
@@ -9,7 +9,7 @@ from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
|
9
9
|
# Import new bindings utils
|
|
10
10
|
from lollms_client.lollms_bindings_utils import list_bindings, get_binding_desc
|
|
11
11
|
|
|
12
|
-
__version__ = "1.10.
|
|
12
|
+
__version__ = "1.10.1" # Updated version
|
|
13
13
|
|
|
14
14
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
15
15
|
__all__ = [
|
{lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/__init__.py
RENAMED
|
@@ -7,6 +7,7 @@ import time
|
|
|
7
7
|
import json
|
|
8
8
|
from io import BytesIO
|
|
9
9
|
from pathlib import Path
|
|
10
|
+
from ascii_colors import trace_exception
|
|
10
11
|
from typing import Optional, List, Dict, Any, Union, Callable
|
|
11
12
|
|
|
12
13
|
# Ensure pipmaster is available.
|
|
@@ -126,6 +127,14 @@ class DiffusersTTIBinding(LollmsTTIBinding):
|
|
|
126
127
|
pm_v.ensure_packages([
|
|
127
128
|
"transformers", "safetensors", "accelerate"
|
|
128
129
|
])
|
|
130
|
+
ASCIIColors.info(f"Installing hugging face dependencies")
|
|
131
|
+
pm_v.ensure_packages([
|
|
132
|
+
"hf_xet"
|
|
133
|
+
])
|
|
134
|
+
ASCIIColors.info(f"Installing bits and bytes for quantized models")
|
|
135
|
+
pm_v.ensure_packages([
|
|
136
|
+
"bitsandbytes"
|
|
137
|
+
])
|
|
129
138
|
ASCIIColors.info(f"[Optional] Installing xformers")
|
|
130
139
|
try:
|
|
131
140
|
pm_v.ensure_packages([
|
|
@@ -498,6 +507,38 @@ class DiffusersTTIBinding(LollmsTTIBinding):
|
|
|
498
507
|
progress_callback({"status": "error", "message": error_msg})
|
|
499
508
|
return {"status": False, "message": error_msg}
|
|
500
509
|
|
|
510
|
+
|
|
511
|
+
def reinstall_dependencies(self):
|
|
512
|
+
"""
|
|
513
|
+
Re‑install the Python packages required by the Diffusers server.
|
|
514
|
+
|
|
515
|
+
This method looks for a ``requirements.txt`` file located in the
|
|
516
|
+
same directory as this ``__init__.py``. It then runs:
|
|
517
|
+
|
|
518
|
+
``python -m pip install -r requirements.txt``
|
|
519
|
+
|
|
520
|
+
using the **same interpreter** that runs the current process,
|
|
521
|
+
ensuring that the correct virtual environment is targeted.
|
|
522
|
+
|
|
523
|
+
Returns
|
|
524
|
+
-------
|
|
525
|
+
dict
|
|
526
|
+
``{'status': bool, 'message': str}`` – ``status`` is ``True`` on
|
|
527
|
+
success, ``False`` otherwise. ``message`` contains a short
|
|
528
|
+
description or the error that occurred.
|
|
529
|
+
"""
|
|
530
|
+
try:
|
|
531
|
+
self.install_server_dependencies()
|
|
532
|
+
return {
|
|
533
|
+
"status": True,
|
|
534
|
+
"message": "Dependencies reinstalled successfully.",
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
except Exception as e:
|
|
538
|
+
trace_exception(e)
|
|
539
|
+
return {"status": False, "message": str(e)}
|
|
540
|
+
|
|
541
|
+
|
|
501
542
|
def __del__(self):
|
|
502
543
|
# The client destructor does not stop the server,
|
|
503
544
|
# as it is a shared resource for all worker processes.
|
|
@@ -287,8 +287,8 @@ class ModelManager:
|
|
|
287
287
|
load_params["cache_dir"] = str(self.config["hf_cache_path"])
|
|
288
288
|
load_params["torch_dtype"] = torch_dtype
|
|
289
289
|
|
|
290
|
-
is_qwen_model = "Qwen" in model_name_from_config
|
|
291
|
-
is_flux_model = "FLUX"
|
|
290
|
+
is_qwen_model = "Qwen".lower() in model_name_from_config.lower()
|
|
291
|
+
is_flux_model = "FLUX".lower() in model_name_from_config.lower()
|
|
292
292
|
|
|
293
293
|
if is_qwen_model or is_flux_model:
|
|
294
294
|
ASCIIColors.info(f"Special model '{model_name_from_config}' detected. Using dedicated pipeline loader.")
|
|
@@ -310,11 +310,12 @@ class ModelManager:
|
|
|
310
310
|
|
|
311
311
|
if is_flux_model:
|
|
312
312
|
self.pipeline = AutoPipelineForText2Image.from_pretrained(model_name_from_config, **load_params)
|
|
313
|
-
elif "Qwen-Image-Edit-2509" in model_name_from_config:
|
|
314
|
-
self.pipeline = QwenImageEditPlusPipeline.from_pretrained(model_name_from_config, **load_params)
|
|
315
313
|
elif "Qwen-Image-Edit" in model_name_from_config:
|
|
316
|
-
|
|
317
|
-
|
|
314
|
+
try:
|
|
315
|
+
self.pipeline = QwenImageEditPlusPipeline.from_pretrained(model_name_from_config, **load_params)
|
|
316
|
+
except:
|
|
317
|
+
self.pipeline = QwenImageEditPipeline.from_pretrained(model_name_from_config, **load_params)
|
|
318
|
+
else:#if "Qwen/Qwen-Image" in model_name_from_config:
|
|
318
319
|
self.pipeline = DiffusionPipeline.from_pretrained(model_name_from_config, **load_params)
|
|
319
320
|
|
|
320
321
|
else:
|
|
@@ -362,9 +363,9 @@ class ModelManager:
|
|
|
362
363
|
# --- FIX START ---
|
|
363
364
|
# Force VAE to float32 to prevent black/chunky artifacts on some GPUs when using float16
|
|
364
365
|
if self.pipeline and hasattr(self.pipeline, 'vae') and hasattr(self.pipeline.vae, 'dtype'):
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
366
|
+
if self.pipeline.vae.dtype == torch.float16:
|
|
367
|
+
ASCIIColors.info("Upcasting VAE to float32 to prevent artifacts.")
|
|
368
|
+
self.pipeline.vae = self.pipeline.vae.to(dtype=torch.float32)
|
|
368
369
|
# --- FIX END ---
|
|
369
370
|
|
|
370
371
|
self._set_scheduler()
|
|
@@ -808,7 +809,7 @@ async def edit_image(request: EditRequestJSON):
|
|
|
808
809
|
|
|
809
810
|
if "Qwen-Image-Edit-2509" in model_name:
|
|
810
811
|
task = "image2image"
|
|
811
|
-
pipeline_args.update({"true_cfg_scale": 4.0, "guidance_scale": 1.0, "num_inference_steps": 40, "negative_prompt": "
|
|
812
|
+
pipeline_args.update({"true_cfg_scale": pipeline_args.get("true_cfg_scale",4.0), "guidance_scale": pipeline_args.get("guidance_scale",1.0), "num_inference_steps": pipeline_args.get("num_inference_steps",40), "negative_prompt": pipeline_args.get("negative_prompt","")})
|
|
812
813
|
edit_mode = pipeline_args.get("edit_mode", "fusion")
|
|
813
814
|
if edit_mode == "fusion": pipeline_args["image"] = pil_images
|
|
814
815
|
else:
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import requests
|
|
4
|
+
import subprocess
|
|
5
|
+
import time
|
|
6
|
+
import base64
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, List
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from filelock import FileLock, Timeout
|
|
12
|
+
except ImportError:
|
|
13
|
+
print("FATAL: The 'filelock' library is required. Please install it: pip install filelock")
|
|
14
|
+
sys.exit(1)
|
|
15
|
+
|
|
16
|
+
from lollms_client.lollms_tts_binding import LollmsTTSBinding
|
|
17
|
+
from ascii_colors import ASCIIColors
|
|
18
|
+
|
|
19
|
+
BindingName = "FishSpeechClientBinding"
|
|
20
|
+
|
|
21
|
+
class FishSpeechClientBinding(LollmsTTSBinding):
|
|
22
|
+
"""
|
|
23
|
+
Client binding for Fish Speech (OpenAudio S1) TTS server.
|
|
24
|
+
Provides state-of-the-art multilingual voice synthesis with zero-shot cloning.
|
|
25
|
+
"""
|
|
26
|
+
def __init__(self, **kwargs):
|
|
27
|
+
if 'model' in kwargs and 'model_name' not in kwargs:
|
|
28
|
+
kwargs['model_name'] = kwargs.pop('model')
|
|
29
|
+
|
|
30
|
+
self.config = kwargs
|
|
31
|
+
self.host = kwargs.get("host", "localhost")
|
|
32
|
+
self.port = kwargs.get("port", 8080)
|
|
33
|
+
self.auto_start_server = kwargs.get("auto_start_server", False)
|
|
34
|
+
self.compile = kwargs.get("compile", True)
|
|
35
|
+
self.device = kwargs.get("device", "auto")
|
|
36
|
+
self.model_name = kwargs.get("model_name", "fishaudio/openaudio-s1-mini")
|
|
37
|
+
|
|
38
|
+
self.server_process = None
|
|
39
|
+
self.base_url = f"http://{self.host}:{self.port}"
|
|
40
|
+
self.binding_root = Path(__file__).parent
|
|
41
|
+
self.server_dir = self.binding_root / "server"
|
|
42
|
+
self.venv_dir = Path("./venv/tts_fish_speech_venv")
|
|
43
|
+
|
|
44
|
+
# Python version requirement
|
|
45
|
+
self.target_python_version = "3.12"
|
|
46
|
+
|
|
47
|
+
# Model paths
|
|
48
|
+
self.checkpoints_dir = self.server_dir / "checkpoints"
|
|
49
|
+
self.references_dir = self.server_dir / "references"
|
|
50
|
+
|
|
51
|
+
if self.auto_start_server:
|
|
52
|
+
self.ensure_server_is_running()
|
|
53
|
+
|
|
54
|
+
def is_server_running(self) -> bool:
|
|
55
|
+
"""Check if the Fish Speech server is running and responsive."""
|
|
56
|
+
try:
|
|
57
|
+
response = requests.get(f"{self.base_url}/health", timeout=2)
|
|
58
|
+
if response.status_code == 200:
|
|
59
|
+
return True
|
|
60
|
+
except requests.exceptions.RequestException:
|
|
61
|
+
return False
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def ensure_server_is_running(self):
|
|
65
|
+
"""
|
|
66
|
+
Ensure the Fish Speech server is running using file lock for process safety.
|
|
67
|
+
"""
|
|
68
|
+
self.server_dir.mkdir(exist_ok=True)
|
|
69
|
+
lock_path = self.server_dir / "fish_speech_server.lock"
|
|
70
|
+
lock = FileLock(lock_path)
|
|
71
|
+
|
|
72
|
+
ASCIIColors.info("Attempting to start or connect to Fish Speech server...")
|
|
73
|
+
|
|
74
|
+
if self.is_server_running():
|
|
75
|
+
ASCIIColors.green("Fish Speech server is already running.")
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
with lock.acquire(timeout=10):
|
|
80
|
+
if not self.is_server_running():
|
|
81
|
+
ASCIIColors.yellow("Lock acquired. Starting Fish Speech server...")
|
|
82
|
+
self.start_server()
|
|
83
|
+
self._wait_for_server(timeout=60)
|
|
84
|
+
else:
|
|
85
|
+
ASCIIColors.green("Server started by another process.")
|
|
86
|
+
except Timeout:
|
|
87
|
+
ASCIIColors.yellow("Waiting for another process to start the server...")
|
|
88
|
+
self._wait_for_server(timeout=90)
|
|
89
|
+
|
|
90
|
+
if not self.is_server_running():
|
|
91
|
+
raise RuntimeError("Failed to start or connect to Fish Speech server.")
|
|
92
|
+
|
|
93
|
+
def install_server_dependencies(self):
|
|
94
|
+
"""
|
|
95
|
+
Install Fish Speech dependencies into a dedicated Python 3.10 virtual environment.
|
|
96
|
+
"""
|
|
97
|
+
ASCIIColors.info(f"Setting up Python {self.target_python_version} environment in: {self.venv_dir}")
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
import pipmaster as pm
|
|
101
|
+
except ImportError:
|
|
102
|
+
print("FATAL: pipmaster is required. Install with: pip install pipmaster")
|
|
103
|
+
raise Exception("pipmaster not found")
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
ASCIIColors.info(f"Bootstrapping portable Python {self.target_python_version}...")
|
|
107
|
+
pm_instance = pm.get_pip_manager_for_version(
|
|
108
|
+
self.target_python_version,
|
|
109
|
+
str(self.venv_dir)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
ASCIIColors.green(f"Portable Python {self.target_python_version} ready.")
|
|
113
|
+
ASCIIColors.info(f"Using interpreter: {pm_instance.target_python_executable}")
|
|
114
|
+
|
|
115
|
+
except RuntimeError as e:
|
|
116
|
+
ASCIIColors.error(f"Failed to bootstrap Python {self.target_python_version}: {e}")
|
|
117
|
+
raise Exception(f"Fish Speech requires Python {self.target_python_version}")
|
|
118
|
+
|
|
119
|
+
# Install requirements
|
|
120
|
+
requirements_file = self.server_dir / "requirements.txt"
|
|
121
|
+
ASCIIColors.info("Installing Fish Speech dependencies...")
|
|
122
|
+
|
|
123
|
+
success = pm_instance.ensure_requirements(str(requirements_file), verbose=True)
|
|
124
|
+
if not success:
|
|
125
|
+
ASCIIColors.error("Failed to install dependencies.")
|
|
126
|
+
raise RuntimeError("Fish Speech dependency installation failed.")
|
|
127
|
+
|
|
128
|
+
ASCIIColors.green("Dependencies installed successfully.")
|
|
129
|
+
self._python_executable = pm_instance.target_python_executable
|
|
130
|
+
|
|
131
|
+
# Download model weights
|
|
132
|
+
self._download_model_weights(pm_instance)
|
|
133
|
+
|
|
134
|
+
def _download_model_weights(self, pm_instance):
|
|
135
|
+
"""Download Fish Speech model weights if not present."""
|
|
136
|
+
model_path = self.checkpoints_dir / self.model_name.split('/')[-1]
|
|
137
|
+
|
|
138
|
+
if model_path.exists():
|
|
139
|
+
ASCIIColors.info(f"Model weights found at {model_path}")
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
ASCIIColors.yellow(f"Downloading model weights for {self.model_name}...")
|
|
143
|
+
self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
# Use huggingface-cli to download
|
|
147
|
+
result = subprocess.run([
|
|
148
|
+
str(self._python_executable),
|
|
149
|
+
"-m", "huggingface_hub.commands.huggingface_cli",
|
|
150
|
+
"download",
|
|
151
|
+
self.model_name,
|
|
152
|
+
"--local-dir", str(model_path)
|
|
153
|
+
], check=True, capture_output=True, text=True)
|
|
154
|
+
|
|
155
|
+
ASCIIColors.green(f"Model downloaded to {model_path}")
|
|
156
|
+
except subprocess.CalledProcessError as e:
|
|
157
|
+
ASCIIColors.error(f"Failed to download model: {e.stderr}")
|
|
158
|
+
raise RuntimeError("Model download failed.")
|
|
159
|
+
|
|
160
|
+
def start_server(self):
|
|
161
|
+
"""Launch the Fish Speech API server as a background process."""
|
|
162
|
+
server_script = self.server_dir / "main.py"
|
|
163
|
+
if not server_script.exists():
|
|
164
|
+
raise FileNotFoundError(f"Server script not found at {server_script}")
|
|
165
|
+
|
|
166
|
+
if not self.venv_dir.exists():
|
|
167
|
+
self.install_server_dependencies()
|
|
168
|
+
else:
|
|
169
|
+
try:
|
|
170
|
+
import pipmaster as pm
|
|
171
|
+
pm_instance = pm.get_pip_manager_for_version(
|
|
172
|
+
self.target_python_version,
|
|
173
|
+
str(self.venv_dir)
|
|
174
|
+
)
|
|
175
|
+
self._python_executable = pm_instance.target_python_executable
|
|
176
|
+
except Exception as e:
|
|
177
|
+
ASCIIColors.warning(f"Could not verify Python version: {e}")
|
|
178
|
+
# Fallback
|
|
179
|
+
if sys.platform == "win32":
|
|
180
|
+
self._python_executable = str(self.venv_dir / "Scripts" / "python.exe")
|
|
181
|
+
else:
|
|
182
|
+
self._python_executable = str(self.venv_dir / "bin" / "python")
|
|
183
|
+
|
|
184
|
+
# Prepare model path
|
|
185
|
+
model_short_name = self.model_name.split('/')[-1]
|
|
186
|
+
model_path = self.checkpoints_dir / model_short_name
|
|
187
|
+
|
|
188
|
+
command = [
|
|
189
|
+
str(self._python_executable),
|
|
190
|
+
str(server_script),
|
|
191
|
+
"--host", self.host,
|
|
192
|
+
"--port", str(self.port),
|
|
193
|
+
"--model-path", str(model_path),
|
|
194
|
+
"--device", self.device
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
if self.compile:
|
|
198
|
+
command.append("--compile")
|
|
199
|
+
|
|
200
|
+
creationflags = subprocess.DETACHED_PROCESS if sys.platform == "win32" else 0
|
|
201
|
+
self.server_process = subprocess.Popen(command, creationflags=creationflags)
|
|
202
|
+
ASCIIColors.info("Fish Speech server launched.")
|
|
203
|
+
|
|
204
|
+
def _wait_for_server(self, timeout=60):
|
|
205
|
+
"""Wait for the server to become responsive."""
|
|
206
|
+
ASCIIColors.info("Waiting for Fish Speech server...")
|
|
207
|
+
start_time = time.time()
|
|
208
|
+
while time.time() - start_time < timeout:
|
|
209
|
+
if self.is_server_running():
|
|
210
|
+
ASCIIColors.green("Fish Speech server is ready.")
|
|
211
|
+
return
|
|
212
|
+
time.sleep(3)
|
|
213
|
+
raise RuntimeError("Fish Speech server failed to start within timeout.")
|
|
214
|
+
|
|
215
|
+
def generate_audio(self, text: str, voice: Optional[str] = None,
|
|
216
|
+
reference_text: Optional[str] = None, **kwargs) -> bytes:
|
|
217
|
+
"""
|
|
218
|
+
Generate audio from text using Fish Speech.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
text: Text to synthesize (supports emotion markers like (happy), (sad))
|
|
222
|
+
voice: Path to reference audio file for voice cloning (WAV/MP3, 10-30s)
|
|
223
|
+
reference_text: Transcript of reference audio (improves accuracy)
|
|
224
|
+
**kwargs: Additional parameters (format, top_p, temperature, etc.)
|
|
225
|
+
"""
|
|
226
|
+
self.ensure_server_is_running()
|
|
227
|
+
|
|
228
|
+
payload = {
|
|
229
|
+
"text": text,
|
|
230
|
+
"reference_text": reference_text,
|
|
231
|
+
"format": kwargs.get("format", "wav"),
|
|
232
|
+
"top_p": kwargs.get("top_p", 0.9),
|
|
233
|
+
"temperature": kwargs.get("temperature", 0.9),
|
|
234
|
+
"repetition_penalty": kwargs.get("repetition_penalty", 1.2),
|
|
235
|
+
"normalize": kwargs.get("normalize", True),
|
|
236
|
+
"chunk_length": kwargs.get("chunk_length", 200)
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
# Handle reference audio
|
|
240
|
+
if voice:
|
|
241
|
+
voice_path = Path(voice)
|
|
242
|
+
if not voice_path.exists():
|
|
243
|
+
# Try references directory
|
|
244
|
+
voice_path = self.references_dir / voice
|
|
245
|
+
if not voice_path.exists():
|
|
246
|
+
raise FileNotFoundError(f"Reference audio not found: {voice}")
|
|
247
|
+
|
|
248
|
+
# Encode audio as base64
|
|
249
|
+
with open(voice_path, 'rb') as f:
|
|
250
|
+
audio_base64 = base64.b64encode(f.read()).decode('utf-8')
|
|
251
|
+
payload["reference_audio"] = audio_base64
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
response = requests.post(
|
|
255
|
+
f"{self.base_url}/v1/tts",
|
|
256
|
+
json=payload,
|
|
257
|
+
timeout=300
|
|
258
|
+
)
|
|
259
|
+
response.raise_for_status()
|
|
260
|
+
return response.content
|
|
261
|
+
except requests.exceptions.RequestException as e:
|
|
262
|
+
ASCIIColors.error(f"Failed to communicate with Fish Speech server: {e}")
|
|
263
|
+
raise RuntimeError("Fish Speech server communication failed.") from e
|
|
264
|
+
|
|
265
|
+
def list_voices(self, **kwargs) -> List[str]:
|
|
266
|
+
"""Get available reference voices."""
|
|
267
|
+
self.ensure_server_is_running()
|
|
268
|
+
try:
|
|
269
|
+
response = requests.get(f"{self.base_url}/list_voices")
|
|
270
|
+
response.raise_for_status()
|
|
271
|
+
return response.json().get("voices", [])
|
|
272
|
+
except requests.exceptions.RequestException as e:
|
|
273
|
+
ASCIIColors.error(f"Failed to get voices: {e}")
|
|
274
|
+
return []
|
|
275
|
+
|
|
276
|
+
def list_models(self, **kwargs) -> List[str]:
|
|
277
|
+
"""List available Fish Speech models."""
|
|
278
|
+
return [
|
|
279
|
+
"fishaudio/openaudio-s1-mini",
|
|
280
|
+
"fishaudio/fish-speech-1.5"
|
|
281
|
+
]
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
try:
|
|
2
|
+
import uvicorn
|
|
3
|
+
from fastapi import FastAPI, HTTPException
|
|
4
|
+
from fastapi.responses import Response
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
import argparse
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import asyncio
|
|
11
|
+
import traceback
|
|
12
|
+
import base64
|
|
13
|
+
import io
|
|
14
|
+
import wave
|
|
15
|
+
import numpy as np
|
|
16
|
+
from typing import Optional, List
|
|
17
|
+
import warnings
|
|
18
|
+
|
|
19
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
20
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
21
|
+
|
|
22
|
+
from ascii_colors import ASCIIColors
|
|
23
|
+
|
|
24
|
+
# Fish Speech imports
|
|
25
|
+
try:
|
|
26
|
+
ASCIIColors.info("Server: Loading Fish Speech dependencies...")
|
|
27
|
+
import torch
|
|
28
|
+
from fish_speech.models.text2semantic.inference import InferenceBuilder as Text2SemanticInference
|
|
29
|
+
from fish_speech.models.dac.inference import AudioCodecInference
|
|
30
|
+
ASCIIColors.green("Server: Fish Speech dependencies loaded")
|
|
31
|
+
|
|
32
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
33
|
+
ASCIIColors.info(f"Server: Using device: {device}")
|
|
34
|
+
fish_speech_available = True
|
|
35
|
+
|
|
36
|
+
except Exception as e:
|
|
37
|
+
ASCIIColors.error(f"Server: Failed to load Fish Speech: {e}")
|
|
38
|
+
ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
39
|
+
fish_speech_available = False
|
|
40
|
+
|
|
41
|
+
# API Models
|
|
42
|
+
class TTSRequest(BaseModel):
|
|
43
|
+
text: str
|
|
44
|
+
reference_audio: Optional[str] = None # base64 encoded
|
|
45
|
+
reference_text: Optional[str] = None
|
|
46
|
+
format: str = "wav"
|
|
47
|
+
top_p: float = 0.9
|
|
48
|
+
temperature: float = 0.9
|
|
49
|
+
repetition_penalty: float = 1.2
|
|
50
|
+
normalize: bool = True
|
|
51
|
+
chunk_length: int = 200
|
|
52
|
+
|
|
53
|
+
class FishSpeechServer:
|
|
54
|
+
def __init__(self, model_path: str, device: str = "auto", compile: bool = False):
|
|
55
|
+
self.model_path = Path(model_path)
|
|
56
|
+
self.device = device if device != "auto" else ("cuda" if torch.cuda.is_available() else "cpu")
|
|
57
|
+
self.compile = compile and self.device == "cuda"
|
|
58
|
+
self.model_loaded = False
|
|
59
|
+
self.model_loading = False
|
|
60
|
+
|
|
61
|
+
self.text2semantic_model = None
|
|
62
|
+
self.codec_model = None
|
|
63
|
+
|
|
64
|
+
self.references_dir = Path(__file__).parent / "references"
|
|
65
|
+
self.references_dir.mkdir(exist_ok=True)
|
|
66
|
+
|
|
67
|
+
ASCIIColors.info(f"Server: Fish Speech server initialized (model will load on first request)")
|
|
68
|
+
ASCIIColors.info(f"Server: Model path: {self.model_path}")
|
|
69
|
+
ASCIIColors.info(f"Server: Device: {self.device}, Compile: {self.compile}")
|
|
70
|
+
|
|
71
|
+
async def _ensure_model_loaded(self):
|
|
72
|
+
"""Lazy load Fish Speech models."""
|
|
73
|
+
if self.model_loaded:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
if self.model_loading:
|
|
77
|
+
while self.model_loading and not self.model_loaded:
|
|
78
|
+
await asyncio.sleep(0.1)
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
if not fish_speech_available:
|
|
82
|
+
raise RuntimeError("Fish Speech not available. Check dependencies.")
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
self.model_loading = True
|
|
86
|
+
ASCIIColors.yellow("Server: Loading Fish Speech models (first run may take time)...")
|
|
87
|
+
|
|
88
|
+
# Load text2semantic model
|
|
89
|
+
self.text2semantic_model = Text2SemanticInference(
|
|
90
|
+
checkpoint_path=str(self.model_path),
|
|
91
|
+
device=self.device,
|
|
92
|
+
compile=self.compile
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Load codec model
|
|
96
|
+
codec_path = self.model_path / "codec.pth"
|
|
97
|
+
if not codec_path.exists():
|
|
98
|
+
# Try alternative names
|
|
99
|
+
codec_path = self.model_path / "firefly-gan-vq-fsq-8x1024-21hz-generator.pth"
|
|
100
|
+
|
|
101
|
+
self.codec_model = AudioCodecInference(
|
|
102
|
+
checkpoint_path=str(codec_path),
|
|
103
|
+
device=self.device
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
self.model_loaded = True
|
|
107
|
+
ASCIIColors.green("Server: Fish Speech models loaded successfully")
|
|
108
|
+
|
|
109
|
+
except Exception as e:
|
|
110
|
+
ASCIIColors.error(f"Server: Error loading models: {e}")
|
|
111
|
+
ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
112
|
+
self.model_loaded = False
|
|
113
|
+
raise
|
|
114
|
+
finally:
|
|
115
|
+
self.model_loading = False
|
|
116
|
+
|
|
117
|
+
async def generate_audio(self, request: TTSRequest) -> bytes:
|
|
118
|
+
"""Generate audio from text using Fish Speech."""
|
|
119
|
+
await self._ensure_model_loaded()
|
|
120
|
+
|
|
121
|
+
if not self.model_loaded:
|
|
122
|
+
raise RuntimeError("Fish Speech models not loaded")
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
ASCIIColors.info(f"Server: Generating audio for: '{request.text[:50]}...'")
|
|
126
|
+
|
|
127
|
+
# Prepare reference audio if provided
|
|
128
|
+
reference_tokens = None
|
|
129
|
+
if request.reference_audio:
|
|
130
|
+
audio_bytes = base64.b64decode(request.reference_audio)
|
|
131
|
+
# Encode reference audio
|
|
132
|
+
reference_tokens = self._encode_reference_audio(
|
|
133
|
+
audio_bytes,
|
|
134
|
+
request.reference_text
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Generate semantic tokens from text
|
|
138
|
+
codes = self.text2semantic_model.generate(
|
|
139
|
+
text=request.text,
|
|
140
|
+
prompt_tokens=reference_tokens,
|
|
141
|
+
prompt_text=request.reference_text,
|
|
142
|
+
top_p=request.top_p,
|
|
143
|
+
temperature=request.temperature,
|
|
144
|
+
repetition_penalty=request.repetition_penalty,
|
|
145
|
+
max_new_tokens=2048
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Generate audio from semantic tokens
|
|
149
|
+
audio_data = self.codec_model.decode(codes)
|
|
150
|
+
|
|
151
|
+
# Convert to bytes
|
|
152
|
+
if request.format == "wav":
|
|
153
|
+
audio_bytes = self._to_wav_bytes(audio_data)
|
|
154
|
+
elif request.format == "mp3":
|
|
155
|
+
audio_bytes = self._to_mp3_bytes(audio_data)
|
|
156
|
+
else: # pcm
|
|
157
|
+
audio_bytes = audio_data.tobytes()
|
|
158
|
+
|
|
159
|
+
ASCIIColors.green(f"Server: Generated {len(audio_bytes)} bytes")
|
|
160
|
+
return audio_bytes
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
ASCIIColors.error(f"Server: Error generating audio: {e}")
|
|
164
|
+
ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
def _encode_reference_audio(self, audio_bytes: bytes, transcript: Optional[str]) -> np.ndarray:
|
|
168
|
+
"""Encode reference audio to semantic tokens."""
|
|
169
|
+
# Save temporarily
|
|
170
|
+
temp_path = self.references_dir / "temp_reference.wav"
|
|
171
|
+
with open(temp_path, 'wb') as f:
|
|
172
|
+
f.write(audio_bytes)
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
tokens = self.codec_model.encode(str(temp_path))
|
|
176
|
+
return tokens
|
|
177
|
+
finally:
|
|
178
|
+
temp_path.unlink(missing_ok=True)
|
|
179
|
+
|
|
180
|
+
def _to_wav_bytes(self, audio_data: np.ndarray, sample_rate: int = 44100) -> bytes:
|
|
181
|
+
"""Convert audio array to WAV bytes."""
|
|
182
|
+
buffer = io.BytesIO()
|
|
183
|
+
with wave.open(buffer, 'wb') as wf:
|
|
184
|
+
wf.setnchannels(1)
|
|
185
|
+
wf.setsampwidth(2)
|
|
186
|
+
wf.setframerate(sample_rate)
|
|
187
|
+
wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
|
|
188
|
+
return buffer.getvalue()
|
|
189
|
+
|
|
190
|
+
def _to_mp3_bytes(self, audio_data: np.ndarray) -> bytes:
|
|
191
|
+
"""Convert audio array to MP3 bytes."""
|
|
192
|
+
# Requires pydub - fallback to WAV
|
|
193
|
+
return self._to_wav_bytes(audio_data)
|
|
194
|
+
|
|
195
|
+
def list_voices(self) -> List[str]:
|
|
196
|
+
"""List available reference voices."""
|
|
197
|
+
return [f.stem for f in self.references_dir.glob("*.[wW][aA][vV]")]
|
|
198
|
+
|
|
199
|
+
# FastAPI app
|
|
200
|
+
app = FastAPI(title="Fish Speech Server")
|
|
201
|
+
fish_server = None
|
|
202
|
+
|
|
203
|
+
@app.post("/v1/tts")
|
|
204
|
+
async def tts_endpoint(request: TTSRequest):
|
|
205
|
+
try:
|
|
206
|
+
audio_bytes = await fish_server.generate_audio(request)
|
|
207
|
+
|
|
208
|
+
media_type = {
|
|
209
|
+
"wav": "audio/wav",
|
|
210
|
+
"mp3": "audio/mpeg",
|
|
211
|
+
"pcm": "audio/pcm"
|
|
212
|
+
}.get(request.format, "audio/wav")
|
|
213
|
+
|
|
214
|
+
return Response(content=audio_bytes, media_type=media_type)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
ASCIIColors.error(f"Server: TTS endpoint error: {e}")
|
|
217
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
218
|
+
|
|
219
|
+
@app.get("/list_voices")
|
|
220
|
+
async def list_voices_endpoint():
|
|
221
|
+
try:
|
|
222
|
+
voices = fish_server.list_voices()
|
|
223
|
+
return {"voices": voices}
|
|
224
|
+
except Exception as e:
|
|
225
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
226
|
+
|
|
227
|
+
@app.get("/health")
|
|
228
|
+
async def health_check():
|
|
229
|
+
return {
|
|
230
|
+
"status": "running",
|
|
231
|
+
"fish_speech_available": fish_speech_available,
|
|
232
|
+
"model_loaded": fish_server.model_loaded if fish_server else False
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if __name__ == '__main__':
|
|
236
|
+
parser = argparse.ArgumentParser(description="Fish Speech TTS Server")
|
|
237
|
+
parser.add_argument("--host", type=str, default="localhost")
|
|
238
|
+
parser.add_argument("--port", type=int, default=8080)
|
|
239
|
+
parser.add_argument("--model-path", type=str, required=True)
|
|
240
|
+
parser.add_argument("--device", type=str, default="auto")
|
|
241
|
+
parser.add_argument("--compile", action="store_true")
|
|
242
|
+
|
|
243
|
+
args = parser.parse_args()
|
|
244
|
+
|
|
245
|
+
fish_server = FishSpeechServer(
|
|
246
|
+
model_path=args.model_path,
|
|
247
|
+
device=args.device,
|
|
248
|
+
compile=args.compile
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
ASCIIColors.cyan("--- Fish Speech TTS Server ---")
|
|
252
|
+
ASCIIColors.green(f"Starting server on http://{args.host}:{args.port}")
|
|
253
|
+
|
|
254
|
+
uvicorn.run(app, host=args.host, port=args.port)
|
|
255
|
+
|
|
256
|
+
except Exception as e:
|
|
257
|
+
from ascii_colors import ASCIIColors
|
|
258
|
+
ASCIIColors.red(f"Server: CRITICAL ERROR: {e}")
|
|
259
|
+
import traceback
|
|
260
|
+
ASCIIColors.red(f"Server: Traceback:\n{traceback.format_exc()}")
|