lollms-client 1.10.0__tar.gz → 1.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {lollms_client-1.10.0/src/lollms_client.egg-info → lollms_client-1.10.1}/PKG-INFO +1 -1
  2. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/__init__.py +1 -1
  3. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/__init__.py +41 -0
  4. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/server/main.py +11 -10
  5. lollms_client-1.10.1/src/lollms_client/tts_bindings/FishSpeech/__init__.py +281 -0
  6. lollms_client-1.10.1/src/lollms_client/tts_bindings/FishSpeech/server/main.py +260 -0
  7. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/xtts/__init__.py +44 -11
  8. lollms_client-1.10.1/src/lollms_client/ttv_bindings/diffusers/__init__.py +255 -0
  9. lollms_client-1.10.1/src/lollms_client/ttv_bindings/diffusers/server/main.py +194 -0
  10. {lollms_client-1.10.0 → lollms_client-1.10.1/src/lollms_client.egg-info}/PKG-INFO +1 -1
  11. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/SOURCES.txt +4 -0
  12. {lollms_client-1.10.0 → lollms_client-1.10.1}/LICENSE +0 -0
  13. {lollms_client-1.10.0 → lollms_client-1.10.1}/README.md +0 -0
  14. {lollms_client-1.10.0 → lollms_client-1.10.1}/pyproject.toml +0 -0
  15. {lollms_client-1.10.0 → lollms_client-1.10.1}/setup.cfg +0 -0
  16. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/assets/models_ctx_sizes.json +0 -0
  17. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/__init__.py +0 -0
  18. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/azure_openai/__init__.py +0 -0
  19. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/claude/__init__.py +0 -0
  20. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/gemini/__init__.py +0 -0
  21. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/grok/__init__.py +0 -0
  22. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/groq/__init__.py +0 -0
  23. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +0 -0
  24. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/litellm/__init__.py +0 -0
  25. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/llama_cpp_server/__init__.py +0 -0
  26. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/lollms/__init__.py +0 -0
  27. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/lollms_webui/__init__.py +0 -0
  28. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/mistral/__init__.py +0 -0
  29. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/novita_ai/__init__.py +0 -0
  30. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/ollama/__init__.py +0 -0
  31. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/open_router/__init__.py +0 -0
  32. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/openai/__init__.py +0 -0
  33. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/openllm/__init__.py +0 -0
  34. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/openwebui/__init__.py +0 -0
  35. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/perplexity/__init__.py +0 -0
  36. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
  37. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/transformers/__init__.py +0 -0
  38. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/llm_bindings/vllm/__init__.py +0 -0
  39. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_agentic.py +0 -0
  40. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_base_binding.py +0 -0
  41. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_bindings_utils.py +0 -0
  42. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_config.py +0 -0
  43. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_core.py +0 -0
  44. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_discussion.py +0 -0
  45. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_js_analyzer.py +0 -0
  46. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_llm_binding.py +0 -0
  47. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_mcp_binding.py +0 -0
  48. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_mcp_security.py +0 -0
  49. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_personality.py +0 -0
  50. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_python_analyzer.py +0 -0
  51. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_stt_binding.py +0 -0
  52. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_text_processing.py +0 -0
  53. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_tti_binding.py +0 -0
  54. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_ttm_binding.py +0 -0
  55. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_tts_binding.py +0 -0
  56. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_ttv_binding.py +0 -0
  57. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_types.py +0 -0
  58. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/lollms_utilities.py +0 -0
  59. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
  60. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
  61. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
  62. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
  63. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
  64. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/remote_mcp/__init__.py +0 -0
  65. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/mcp_bindings/standard_mcp/__init__.py +0 -0
  66. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/__init__.py +0 -0
  67. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/lollms/__init__.py +0 -0
  68. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/whisper/__init__.py +0 -0
  69. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
  70. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/__init__.py +0 -0
  71. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/diffusers/config.py +0 -0
  72. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gemini/__init__.py +0 -0
  73. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/__init__.py +0 -0
  74. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/server/dequant.py +0 -0
  75. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/server/main.py +0 -0
  76. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/gguf_diffusion/server/ops.py +0 -0
  77. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/leonardo_ai/__init__.py +0 -0
  78. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/lollms/__init__.py +0 -0
  79. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/novita_ai/__init__.py +0 -0
  80. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/open_router/__init__.py +0 -0
  81. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/openai/__init__.py +0 -0
  82. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tti_bindings/stability_ai/__init__.py +0 -0
  83. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/__init__.py +0 -0
  84. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
  85. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/beatoven_ai/__init__.py +0 -0
  86. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
  87. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/replicate/__init__.py +0 -0
  88. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/stability_ai/__init__.py +0 -0
  89. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttm_bindings/topmediai/__init__.py +0 -0
  90. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/__init__.py +0 -0
  91. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/bark/__init__.py +0 -0
  92. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/bark/server/install_bark.py +0 -0
  93. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/bark/server/main.py +0 -0
  94. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/lollms/__init__.py +0 -0
  95. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
  96. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/server/install_piper.py +0 -0
  97. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/server/main.py +0 -0
  98. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/piper_tts/server/setup_voices.py +0 -0
  99. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/vibevoice/__init__.py +0 -0
  100. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/vibevoice/server/main.py +0 -0
  101. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/xtts/server/main.py +0 -0
  102. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/tts_bindings/xtts/server/setup_voices.py +0 -0
  103. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttv_bindings/__init__.py +0 -0
  104. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
  105. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/dependency_links.txt +0 -0
  106. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/requires.txt +0 -0
  107. {lollms_client-1.10.0 → lollms_client-1.10.1}/src/lollms_client.egg-info/top_level.txt +0 -0
  108. {lollms_client-1.10.0 → lollms_client-1.10.1}/test/test_lollms_discussion.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 1.10.0
3
+ Version: 1.10.1
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache License
@@ -9,7 +9,7 @@ from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
9
9
  # Import new bindings utils
10
10
  from lollms_client.lollms_bindings_utils import list_bindings, get_binding_desc
11
11
 
12
- __version__ = "1.10.0" # Updated version
12
+ __version__ = "1.10.1" # Updated version
13
13
 
14
14
  # Optionally, you could define __all__ if you want to be explicit about exports
15
15
  __all__ = [
@@ -7,6 +7,7 @@ import time
7
7
  import json
8
8
  from io import BytesIO
9
9
  from pathlib import Path
10
+ from ascii_colors import trace_exception
10
11
  from typing import Optional, List, Dict, Any, Union, Callable
11
12
 
12
13
  # Ensure pipmaster is available.
@@ -126,6 +127,14 @@ class DiffusersTTIBinding(LollmsTTIBinding):
126
127
  pm_v.ensure_packages([
127
128
  "transformers", "safetensors", "accelerate"
128
129
  ])
130
+ ASCIIColors.info(f"Installing hugging face dependencies")
131
+ pm_v.ensure_packages([
132
+ "hf_xet"
133
+ ])
134
+ ASCIIColors.info(f"Installing bits and bytes for quantized models")
135
+ pm_v.ensure_packages([
136
+ "bitsandbytes"
137
+ ])
129
138
  ASCIIColors.info(f"[Optional] Installing xformers")
130
139
  try:
131
140
  pm_v.ensure_packages([
@@ -498,6 +507,38 @@ class DiffusersTTIBinding(LollmsTTIBinding):
498
507
  progress_callback({"status": "error", "message": error_msg})
499
508
  return {"status": False, "message": error_msg}
500
509
 
510
+
511
+ def reinstall_dependencies(self):
512
+ """
513
+ Re‑install the Python packages required by the Diffusers server.
514
+
515
+ This method looks for a ``requirements.txt`` file located in the
516
+ same directory as this ``__init__.py``. It then runs:
517
+
518
+ ``python -m pip install -r requirements.txt``
519
+
520
+ using the **same interpreter** that runs the current process,
521
+ ensuring that the correct virtual environment is targeted.
522
+
523
+ Returns
524
+ -------
525
+ dict
526
+ ``{'status': bool, 'message': str}`` – ``status`` is ``True`` on
527
+ success, ``False`` otherwise. ``message`` contains a short
528
+ description or the error that occurred.
529
+ """
530
+ try:
531
+ self.install_server_dependencies()
532
+ return {
533
+ "status": True,
534
+ "message": "Dependencies reinstalled successfully.",
535
+ }
536
+
537
+ except Exception as e:
538
+ trace_exception(e)
539
+ return {"status": False, "message": str(e)}
540
+
541
+
501
542
  def __del__(self):
502
543
  # The client destructor does not stop the server,
503
544
  # as it is a shared resource for all worker processes.
@@ -287,8 +287,8 @@ class ModelManager:
287
287
  load_params["cache_dir"] = str(self.config["hf_cache_path"])
288
288
  load_params["torch_dtype"] = torch_dtype
289
289
 
290
- is_qwen_model = "Qwen" in model_name_from_config
291
- is_flux_model = "FLUX" in model_name_from_config
290
+ is_qwen_model = "Qwen".lower() in model_name_from_config.lower()
291
+ is_flux_model = "FLUX".lower() in model_name_from_config.lower()
292
292
 
293
293
  if is_qwen_model or is_flux_model:
294
294
  ASCIIColors.info(f"Special model '{model_name_from_config}' detected. Using dedicated pipeline loader.")
@@ -310,11 +310,12 @@ class ModelManager:
310
310
 
311
311
  if is_flux_model:
312
312
  self.pipeline = AutoPipelineForText2Image.from_pretrained(model_name_from_config, **load_params)
313
- elif "Qwen-Image-Edit-2509" in model_name_from_config:
314
- self.pipeline = QwenImageEditPlusPipeline.from_pretrained(model_name_from_config, **load_params)
315
313
  elif "Qwen-Image-Edit" in model_name_from_config:
316
- self.pipeline = QwenImageEditPipeline.from_pretrained(model_name_from_config, **load_params)
317
- elif "Qwen/Qwen-Image" in model_name_from_config:
314
+ try:
315
+ self.pipeline = QwenImageEditPlusPipeline.from_pretrained(model_name_from_config, **load_params)
316
+ except:
317
+ self.pipeline = QwenImageEditPipeline.from_pretrained(model_name_from_config, **load_params)
318
+ else:#if "Qwen/Qwen-Image" in model_name_from_config:
318
319
  self.pipeline = DiffusionPipeline.from_pretrained(model_name_from_config, **load_params)
319
320
 
320
321
  else:
@@ -362,9 +363,9 @@ class ModelManager:
362
363
  # --- FIX START ---
363
364
  # Force VAE to float32 to prevent black/chunky artifacts on some GPUs when using float16
364
365
  if self.pipeline and hasattr(self.pipeline, 'vae') and hasattr(self.pipeline.vae, 'dtype'):
365
- if self.pipeline.vae.dtype == torch.float16:
366
- ASCIIColors.info("Upcasting VAE to float32 to prevent artifacts.")
367
- self.pipeline.vae = self.pipeline.vae.to(dtype=torch.float32)
366
+ if self.pipeline.vae.dtype == torch.float16:
367
+ ASCIIColors.info("Upcasting VAE to float32 to prevent artifacts.")
368
+ self.pipeline.vae = self.pipeline.vae.to(dtype=torch.float32)
368
369
  # --- FIX END ---
369
370
 
370
371
  self._set_scheduler()
@@ -808,7 +809,7 @@ async def edit_image(request: EditRequestJSON):
808
809
 
809
810
  if "Qwen-Image-Edit-2509" in model_name:
810
811
  task = "image2image"
811
- pipeline_args.update({"true_cfg_scale": 4.0, "guidance_scale": 1.0, "num_inference_steps": 40, "negative_prompt": " "})
812
+ pipeline_args.update({"true_cfg_scale": pipeline_args.get("true_cfg_scale",4.0), "guidance_scale": pipeline_args.get("guidance_scale",1.0), "num_inference_steps": pipeline_args.get("num_inference_steps",40), "negative_prompt": pipeline_args.get("negative_prompt","")})
812
813
  edit_mode = pipeline_args.get("edit_mode", "fusion")
813
814
  if edit_mode == "fusion": pipeline_args["image"] = pil_images
814
815
  else:
@@ -0,0 +1,281 @@
1
+ import os
2
+ import sys
3
+ import requests
4
+ import subprocess
5
+ import time
6
+ import base64
7
+ from pathlib import Path
8
+ from typing import Optional, List
9
+
10
+ try:
11
+ from filelock import FileLock, Timeout
12
+ except ImportError:
13
+ print("FATAL: The 'filelock' library is required. Please install it: pip install filelock")
14
+ sys.exit(1)
15
+
16
+ from lollms_client.lollms_tts_binding import LollmsTTSBinding
17
+ from ascii_colors import ASCIIColors
18
+
19
+ BindingName = "FishSpeechClientBinding"
20
+
21
+ class FishSpeechClientBinding(LollmsTTSBinding):
22
+ """
23
+ Client binding for Fish Speech (OpenAudio S1) TTS server.
24
+ Provides state-of-the-art multilingual voice synthesis with zero-shot cloning.
25
+ """
26
+ def __init__(self, **kwargs):
27
+ if 'model' in kwargs and 'model_name' not in kwargs:
28
+ kwargs['model_name'] = kwargs.pop('model')
29
+
30
+ self.config = kwargs
31
+ self.host = kwargs.get("host", "localhost")
32
+ self.port = kwargs.get("port", 8080)
33
+ self.auto_start_server = kwargs.get("auto_start_server", False)
34
+ self.compile = kwargs.get("compile", True)
35
+ self.device = kwargs.get("device", "auto")
36
+ self.model_name = kwargs.get("model_name", "fishaudio/openaudio-s1-mini")
37
+
38
+ self.server_process = None
39
+ self.base_url = f"http://{self.host}:{self.port}"
40
+ self.binding_root = Path(__file__).parent
41
+ self.server_dir = self.binding_root / "server"
42
+ self.venv_dir = Path("./venv/tts_fish_speech_venv")
43
+
44
+ # Python version requirement
45
+ self.target_python_version = "3.12"
46
+
47
+ # Model paths
48
+ self.checkpoints_dir = self.server_dir / "checkpoints"
49
+ self.references_dir = self.server_dir / "references"
50
+
51
+ if self.auto_start_server:
52
+ self.ensure_server_is_running()
53
+
54
+ def is_server_running(self) -> bool:
55
+ """Check if the Fish Speech server is running and responsive."""
56
+ try:
57
+ response = requests.get(f"{self.base_url}/health", timeout=2)
58
+ if response.status_code == 200:
59
+ return True
60
+ except requests.exceptions.RequestException:
61
+ return False
62
+ return False
63
+
64
+ def ensure_server_is_running(self):
65
+ """
66
+ Ensure the Fish Speech server is running using file lock for process safety.
67
+ """
68
+ self.server_dir.mkdir(exist_ok=True)
69
+ lock_path = self.server_dir / "fish_speech_server.lock"
70
+ lock = FileLock(lock_path)
71
+
72
+ ASCIIColors.info("Attempting to start or connect to Fish Speech server...")
73
+
74
+ if self.is_server_running():
75
+ ASCIIColors.green("Fish Speech server is already running.")
76
+ return
77
+
78
+ try:
79
+ with lock.acquire(timeout=10):
80
+ if not self.is_server_running():
81
+ ASCIIColors.yellow("Lock acquired. Starting Fish Speech server...")
82
+ self.start_server()
83
+ self._wait_for_server(timeout=60)
84
+ else:
85
+ ASCIIColors.green("Server started by another process.")
86
+ except Timeout:
87
+ ASCIIColors.yellow("Waiting for another process to start the server...")
88
+ self._wait_for_server(timeout=90)
89
+
90
+ if not self.is_server_running():
91
+ raise RuntimeError("Failed to start or connect to Fish Speech server.")
92
+
93
+ def install_server_dependencies(self):
94
+ """
95
+ Install Fish Speech dependencies into a dedicated Python 3.10 virtual environment.
96
+ """
97
+ ASCIIColors.info(f"Setting up Python {self.target_python_version} environment in: {self.venv_dir}")
98
+
99
+ try:
100
+ import pipmaster as pm
101
+ except ImportError:
102
+ print("FATAL: pipmaster is required. Install with: pip install pipmaster")
103
+ raise Exception("pipmaster not found")
104
+
105
+ try:
106
+ ASCIIColors.info(f"Bootstrapping portable Python {self.target_python_version}...")
107
+ pm_instance = pm.get_pip_manager_for_version(
108
+ self.target_python_version,
109
+ str(self.venv_dir)
110
+ )
111
+
112
+ ASCIIColors.green(f"Portable Python {self.target_python_version} ready.")
113
+ ASCIIColors.info(f"Using interpreter: {pm_instance.target_python_executable}")
114
+
115
+ except RuntimeError as e:
116
+ ASCIIColors.error(f"Failed to bootstrap Python {self.target_python_version}: {e}")
117
+ raise Exception(f"Fish Speech requires Python {self.target_python_version}")
118
+
119
+ # Install requirements
120
+ requirements_file = self.server_dir / "requirements.txt"
121
+ ASCIIColors.info("Installing Fish Speech dependencies...")
122
+
123
+ success = pm_instance.ensure_requirements(str(requirements_file), verbose=True)
124
+ if not success:
125
+ ASCIIColors.error("Failed to install dependencies.")
126
+ raise RuntimeError("Fish Speech dependency installation failed.")
127
+
128
+ ASCIIColors.green("Dependencies installed successfully.")
129
+ self._python_executable = pm_instance.target_python_executable
130
+
131
+ # Download model weights
132
+ self._download_model_weights(pm_instance)
133
+
134
+ def _download_model_weights(self, pm_instance):
135
+ """Download Fish Speech model weights if not present."""
136
+ model_path = self.checkpoints_dir / self.model_name.split('/')[-1]
137
+
138
+ if model_path.exists():
139
+ ASCIIColors.info(f"Model weights found at {model_path}")
140
+ return
141
+
142
+ ASCIIColors.yellow(f"Downloading model weights for {self.model_name}...")
143
+ self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
144
+
145
+ try:
146
+ # Use huggingface-cli to download
147
+ result = subprocess.run([
148
+ str(self._python_executable),
149
+ "-m", "huggingface_hub.commands.huggingface_cli",
150
+ "download",
151
+ self.model_name,
152
+ "--local-dir", str(model_path)
153
+ ], check=True, capture_output=True, text=True)
154
+
155
+ ASCIIColors.green(f"Model downloaded to {model_path}")
156
+ except subprocess.CalledProcessError as e:
157
+ ASCIIColors.error(f"Failed to download model: {e.stderr}")
158
+ raise RuntimeError("Model download failed.")
159
+
160
+ def start_server(self):
161
+ """Launch the Fish Speech API server as a background process."""
162
+ server_script = self.server_dir / "main.py"
163
+ if not server_script.exists():
164
+ raise FileNotFoundError(f"Server script not found at {server_script}")
165
+
166
+ if not self.venv_dir.exists():
167
+ self.install_server_dependencies()
168
+ else:
169
+ try:
170
+ import pipmaster as pm
171
+ pm_instance = pm.get_pip_manager_for_version(
172
+ self.target_python_version,
173
+ str(self.venv_dir)
174
+ )
175
+ self._python_executable = pm_instance.target_python_executable
176
+ except Exception as e:
177
+ ASCIIColors.warning(f"Could not verify Python version: {e}")
178
+ # Fallback
179
+ if sys.platform == "win32":
180
+ self._python_executable = str(self.venv_dir / "Scripts" / "python.exe")
181
+ else:
182
+ self._python_executable = str(self.venv_dir / "bin" / "python")
183
+
184
+ # Prepare model path
185
+ model_short_name = self.model_name.split('/')[-1]
186
+ model_path = self.checkpoints_dir / model_short_name
187
+
188
+ command = [
189
+ str(self._python_executable),
190
+ str(server_script),
191
+ "--host", self.host,
192
+ "--port", str(self.port),
193
+ "--model-path", str(model_path),
194
+ "--device", self.device
195
+ ]
196
+
197
+ if self.compile:
198
+ command.append("--compile")
199
+
200
+ creationflags = subprocess.DETACHED_PROCESS if sys.platform == "win32" else 0
201
+ self.server_process = subprocess.Popen(command, creationflags=creationflags)
202
+ ASCIIColors.info("Fish Speech server launched.")
203
+
204
+ def _wait_for_server(self, timeout=60):
205
+ """Wait for the server to become responsive."""
206
+ ASCIIColors.info("Waiting for Fish Speech server...")
207
+ start_time = time.time()
208
+ while time.time() - start_time < timeout:
209
+ if self.is_server_running():
210
+ ASCIIColors.green("Fish Speech server is ready.")
211
+ return
212
+ time.sleep(3)
213
+ raise RuntimeError("Fish Speech server failed to start within timeout.")
214
+
215
+ def generate_audio(self, text: str, voice: Optional[str] = None,
216
+ reference_text: Optional[str] = None, **kwargs) -> bytes:
217
+ """
218
+ Generate audio from text using Fish Speech.
219
+
220
+ Args:
221
+ text: Text to synthesize (supports emotion markers like (happy), (sad))
222
+ voice: Path to reference audio file for voice cloning (WAV/MP3, 10-30s)
223
+ reference_text: Transcript of reference audio (improves accuracy)
224
+ **kwargs: Additional parameters (format, top_p, temperature, etc.)
225
+ """
226
+ self.ensure_server_is_running()
227
+
228
+ payload = {
229
+ "text": text,
230
+ "reference_text": reference_text,
231
+ "format": kwargs.get("format", "wav"),
232
+ "top_p": kwargs.get("top_p", 0.9),
233
+ "temperature": kwargs.get("temperature", 0.9),
234
+ "repetition_penalty": kwargs.get("repetition_penalty", 1.2),
235
+ "normalize": kwargs.get("normalize", True),
236
+ "chunk_length": kwargs.get("chunk_length", 200)
237
+ }
238
+
239
+ # Handle reference audio
240
+ if voice:
241
+ voice_path = Path(voice)
242
+ if not voice_path.exists():
243
+ # Try references directory
244
+ voice_path = self.references_dir / voice
245
+ if not voice_path.exists():
246
+ raise FileNotFoundError(f"Reference audio not found: {voice}")
247
+
248
+ # Encode audio as base64
249
+ with open(voice_path, 'rb') as f:
250
+ audio_base64 = base64.b64encode(f.read()).decode('utf-8')
251
+ payload["reference_audio"] = audio_base64
252
+
253
+ try:
254
+ response = requests.post(
255
+ f"{self.base_url}/v1/tts",
256
+ json=payload,
257
+ timeout=300
258
+ )
259
+ response.raise_for_status()
260
+ return response.content
261
+ except requests.exceptions.RequestException as e:
262
+ ASCIIColors.error(f"Failed to communicate with Fish Speech server: {e}")
263
+ raise RuntimeError("Fish Speech server communication failed.") from e
264
+
265
+ def list_voices(self, **kwargs) -> List[str]:
266
+ """Get available reference voices."""
267
+ self.ensure_server_is_running()
268
+ try:
269
+ response = requests.get(f"{self.base_url}/list_voices")
270
+ response.raise_for_status()
271
+ return response.json().get("voices", [])
272
+ except requests.exceptions.RequestException as e:
273
+ ASCIIColors.error(f"Failed to get voices: {e}")
274
+ return []
275
+
276
+ def list_models(self, **kwargs) -> List[str]:
277
+ """List available Fish Speech models."""
278
+ return [
279
+ "fishaudio/openaudio-s1-mini",
280
+ "fishaudio/fish-speech-1.5"
281
+ ]
@@ -0,0 +1,260 @@
1
+ try:
2
+ import uvicorn
3
+ from fastapi import FastAPI, HTTPException
4
+ from fastapi.responses import Response
5
+ from pydantic import BaseModel
6
+ import argparse
7
+ import sys
8
+ import os
9
+ from pathlib import Path
10
+ import asyncio
11
+ import traceback
12
+ import base64
13
+ import io
14
+ import wave
15
+ import numpy as np
16
+ from typing import Optional, List
17
+ import warnings
18
+
19
+ warnings.filterwarnings("ignore", category=UserWarning)
20
+ warnings.filterwarnings("ignore", category=FutureWarning)
21
+
22
+ from ascii_colors import ASCIIColors
23
+
24
+ # Fish Speech imports
25
+ try:
26
+ ASCIIColors.info("Server: Loading Fish Speech dependencies...")
27
+ import torch
28
+ from fish_speech.models.text2semantic.inference import InferenceBuilder as Text2SemanticInference
29
+ from fish_speech.models.dac.inference import AudioCodecInference
30
+ ASCIIColors.green("Server: Fish Speech dependencies loaded")
31
+
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ ASCIIColors.info(f"Server: Using device: {device}")
34
+ fish_speech_available = True
35
+
36
+ except Exception as e:
37
+ ASCIIColors.error(f"Server: Failed to load Fish Speech: {e}")
38
+ ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
39
+ fish_speech_available = False
40
+
41
+ # API Models
42
+ class TTSRequest(BaseModel):
43
+ text: str
44
+ reference_audio: Optional[str] = None # base64 encoded
45
+ reference_text: Optional[str] = None
46
+ format: str = "wav"
47
+ top_p: float = 0.9
48
+ temperature: float = 0.9
49
+ repetition_penalty: float = 1.2
50
+ normalize: bool = True
51
+ chunk_length: int = 200
52
+
53
+ class FishSpeechServer:
54
+ def __init__(self, model_path: str, device: str = "auto", compile: bool = False):
55
+ self.model_path = Path(model_path)
56
+ self.device = device if device != "auto" else ("cuda" if torch.cuda.is_available() else "cpu")
57
+ self.compile = compile and self.device == "cuda"
58
+ self.model_loaded = False
59
+ self.model_loading = False
60
+
61
+ self.text2semantic_model = None
62
+ self.codec_model = None
63
+
64
+ self.references_dir = Path(__file__).parent / "references"
65
+ self.references_dir.mkdir(exist_ok=True)
66
+
67
+ ASCIIColors.info(f"Server: Fish Speech server initialized (model will load on first request)")
68
+ ASCIIColors.info(f"Server: Model path: {self.model_path}")
69
+ ASCIIColors.info(f"Server: Device: {self.device}, Compile: {self.compile}")
70
+
71
+ async def _ensure_model_loaded(self):
72
+ """Lazy load Fish Speech models."""
73
+ if self.model_loaded:
74
+ return
75
+
76
+ if self.model_loading:
77
+ while self.model_loading and not self.model_loaded:
78
+ await asyncio.sleep(0.1)
79
+ return
80
+
81
+ if not fish_speech_available:
82
+ raise RuntimeError("Fish Speech not available. Check dependencies.")
83
+
84
+ try:
85
+ self.model_loading = True
86
+ ASCIIColors.yellow("Server: Loading Fish Speech models (first run may take time)...")
87
+
88
+ # Load text2semantic model
89
+ self.text2semantic_model = Text2SemanticInference(
90
+ checkpoint_path=str(self.model_path),
91
+ device=self.device,
92
+ compile=self.compile
93
+ )
94
+
95
+ # Load codec model
96
+ codec_path = self.model_path / "codec.pth"
97
+ if not codec_path.exists():
98
+ # Try alternative names
99
+ codec_path = self.model_path / "firefly-gan-vq-fsq-8x1024-21hz-generator.pth"
100
+
101
+ self.codec_model = AudioCodecInference(
102
+ checkpoint_path=str(codec_path),
103
+ device=self.device
104
+ )
105
+
106
+ self.model_loaded = True
107
+ ASCIIColors.green("Server: Fish Speech models loaded successfully")
108
+
109
+ except Exception as e:
110
+ ASCIIColors.error(f"Server: Error loading models: {e}")
111
+ ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
112
+ self.model_loaded = False
113
+ raise
114
+ finally:
115
+ self.model_loading = False
116
+
117
+ async def generate_audio(self, request: TTSRequest) -> bytes:
118
+ """Generate audio from text using Fish Speech."""
119
+ await self._ensure_model_loaded()
120
+
121
+ if not self.model_loaded:
122
+ raise RuntimeError("Fish Speech models not loaded")
123
+
124
+ try:
125
+ ASCIIColors.info(f"Server: Generating audio for: '{request.text[:50]}...'")
126
+
127
+ # Prepare reference audio if provided
128
+ reference_tokens = None
129
+ if request.reference_audio:
130
+ audio_bytes = base64.b64decode(request.reference_audio)
131
+ # Encode reference audio
132
+ reference_tokens = self._encode_reference_audio(
133
+ audio_bytes,
134
+ request.reference_text
135
+ )
136
+
137
+ # Generate semantic tokens from text
138
+ codes = self.text2semantic_model.generate(
139
+ text=request.text,
140
+ prompt_tokens=reference_tokens,
141
+ prompt_text=request.reference_text,
142
+ top_p=request.top_p,
143
+ temperature=request.temperature,
144
+ repetition_penalty=request.repetition_penalty,
145
+ max_new_tokens=2048
146
+ )
147
+
148
+ # Generate audio from semantic tokens
149
+ audio_data = self.codec_model.decode(codes)
150
+
151
+ # Convert to bytes
152
+ if request.format == "wav":
153
+ audio_bytes = self._to_wav_bytes(audio_data)
154
+ elif request.format == "mp3":
155
+ audio_bytes = self._to_mp3_bytes(audio_data)
156
+ else: # pcm
157
+ audio_bytes = audio_data.tobytes()
158
+
159
+ ASCIIColors.green(f"Server: Generated {len(audio_bytes)} bytes")
160
+ return audio_bytes
161
+
162
+ except Exception as e:
163
+ ASCIIColors.error(f"Server: Error generating audio: {e}")
164
+ ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
165
+ raise
166
+
167
+ def _encode_reference_audio(self, audio_bytes: bytes, transcript: Optional[str]) -> np.ndarray:
168
+ """Encode reference audio to semantic tokens."""
169
+ # Save temporarily
170
+ temp_path = self.references_dir / "temp_reference.wav"
171
+ with open(temp_path, 'wb') as f:
172
+ f.write(audio_bytes)
173
+
174
+ try:
175
+ tokens = self.codec_model.encode(str(temp_path))
176
+ return tokens
177
+ finally:
178
+ temp_path.unlink(missing_ok=True)
179
+
180
+ def _to_wav_bytes(self, audio_data: np.ndarray, sample_rate: int = 44100) -> bytes:
181
+ """Convert audio array to WAV bytes."""
182
+ buffer = io.BytesIO()
183
+ with wave.open(buffer, 'wb') as wf:
184
+ wf.setnchannels(1)
185
+ wf.setsampwidth(2)
186
+ wf.setframerate(sample_rate)
187
+ wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
188
+ return buffer.getvalue()
189
+
190
+ def _to_mp3_bytes(self, audio_data: np.ndarray) -> bytes:
191
+ """Convert audio array to MP3 bytes."""
192
+ # Requires pydub - fallback to WAV
193
+ return self._to_wav_bytes(audio_data)
194
+
195
+ def list_voices(self) -> List[str]:
196
+ """List available reference voices."""
197
+ return [f.stem for f in self.references_dir.glob("*.[wW][aA][vV]")]
198
+
199
+ # FastAPI app
200
+ app = FastAPI(title="Fish Speech Server")
201
+ fish_server = None
202
+
203
+ @app.post("/v1/tts")
204
+ async def tts_endpoint(request: TTSRequest):
205
+ try:
206
+ audio_bytes = await fish_server.generate_audio(request)
207
+
208
+ media_type = {
209
+ "wav": "audio/wav",
210
+ "mp3": "audio/mpeg",
211
+ "pcm": "audio/pcm"
212
+ }.get(request.format, "audio/wav")
213
+
214
+ return Response(content=audio_bytes, media_type=media_type)
215
+ except Exception as e:
216
+ ASCIIColors.error(f"Server: TTS endpoint error: {e}")
217
+ raise HTTPException(status_code=500, detail=str(e))
218
+
219
+ @app.get("/list_voices")
220
+ async def list_voices_endpoint():
221
+ try:
222
+ voices = fish_server.list_voices()
223
+ return {"voices": voices}
224
+ except Exception as e:
225
+ raise HTTPException(status_code=500, detail=str(e))
226
+
227
+ @app.get("/health")
228
+ async def health_check():
229
+ return {
230
+ "status": "running",
231
+ "fish_speech_available": fish_speech_available,
232
+ "model_loaded": fish_server.model_loaded if fish_server else False
233
+ }
234
+
235
+ if __name__ == '__main__':
236
+ parser = argparse.ArgumentParser(description="Fish Speech TTS Server")
237
+ parser.add_argument("--host", type=str, default="localhost")
238
+ parser.add_argument("--port", type=int, default=8080)
239
+ parser.add_argument("--model-path", type=str, required=True)
240
+ parser.add_argument("--device", type=str, default="auto")
241
+ parser.add_argument("--compile", action="store_true")
242
+
243
+ args = parser.parse_args()
244
+
245
+ fish_server = FishSpeechServer(
246
+ model_path=args.model_path,
247
+ device=args.device,
248
+ compile=args.compile
249
+ )
250
+
251
+ ASCIIColors.cyan("--- Fish Speech TTS Server ---")
252
+ ASCIIColors.green(f"Starting server on http://{args.host}:{args.port}")
253
+
254
+ uvicorn.run(app, host=args.host, port=args.port)
255
+
256
+ except Exception as e:
257
+ from ascii_colors import ASCIIColors
258
+ ASCIIColors.red(f"Server: CRITICAL ERROR: {e}")
259
+ import traceback
260
+ ASCIIColors.red(f"Server: Traceback:\n{traceback.format_exc()}")