lollms-client 1.6.5__py3-none-any.whl → 1.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/tti_bindings/diffusers/__init__.py +45 -22
- lollms_client/tti_bindings/diffusers/server/main.py +105 -21
- lollms_client/tts_bindings/xtts/__init__.py +106 -81
- lollms_client/tts_bindings/xtts/server/main.py +128 -183
- {lollms_client-1.6.5.dist-info → lollms_client-1.6.6.dist-info}/METADATA +1 -1
- {lollms_client-1.6.5.dist-info → lollms_client-1.6.6.dist-info}/RECORD +10 -10
- {lollms_client-1.6.5.dist-info → lollms_client-1.6.6.dist-info}/WHEEL +0 -0
- {lollms_client-1.6.5.dist-info → lollms_client-1.6.6.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.6.5.dist-info → lollms_client-1.6.6.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
8
8
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
9
9
|
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
10
10
|
|
|
11
|
-
__version__ = "1.6.
|
|
11
|
+
__version__ = "1.6.6" # Updated version
|
|
12
12
|
|
|
13
13
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
14
14
|
__all__ = [
|
|
@@ -53,6 +53,7 @@ class DiffusersBinding(LollmsTTIBinding):
|
|
|
53
53
|
self.server_dir = self.binding_root / "server"
|
|
54
54
|
self.venv_dir = Path("./venv/tti_diffusers_venv")
|
|
55
55
|
self.models_path = Path(kwargs.get("models_path", "./data/models/diffusers_models")).resolve()
|
|
56
|
+
self.extra_models_path = kwargs.get("extra_models_path")
|
|
56
57
|
self.models_path.mkdir(exist_ok=True, parents=True)
|
|
57
58
|
if self.auto_start_server:
|
|
58
59
|
self.ensure_server_is_running()
|
|
@@ -68,36 +69,47 @@ class DiffusersBinding(LollmsTTIBinding):
|
|
|
68
69
|
return False
|
|
69
70
|
|
|
70
71
|
|
|
71
|
-
def ensure_server_is_running(self
|
|
72
|
+
def ensure_server_is_running(self):
|
|
72
73
|
"""
|
|
73
74
|
Ensures the Diffusers server is running. If not, it attempts to start it
|
|
74
|
-
in a process-safe manner using a file lock.
|
|
75
|
-
|
|
76
|
-
Args:
|
|
77
|
-
continue_if_locked (bool): If True, return immediately if another process
|
|
78
|
-
already holds the lock.
|
|
75
|
+
in a process-safe manner using a file lock. This method is designed to
|
|
76
|
+
prevent race conditions in multi-worker environments.
|
|
79
77
|
"""
|
|
80
78
|
self.server_dir.mkdir(exist_ok=True)
|
|
81
|
-
|
|
79
|
+
# Use a lock file in the binding's server directory for consistency across instances
|
|
80
|
+
lock_path = self.server_dir / "diffusers_server.lock"
|
|
82
81
|
lock = FileLock(lock_path)
|
|
83
82
|
|
|
84
83
|
ASCIIColors.info("Attempting to start or connect to the Diffusers server...")
|
|
84
|
+
|
|
85
|
+
# First, perform a quick check without the lock to avoid unnecessary waiting.
|
|
86
|
+
if self.is_server_running():
|
|
87
|
+
ASCIIColors.green("Diffusers Server is already running and responsive.")
|
|
88
|
+
return
|
|
89
|
+
|
|
85
90
|
try:
|
|
86
|
-
# Try to acquire lock
|
|
87
|
-
|
|
91
|
+
# Try to acquire the lock with a timeout. If another process is starting
|
|
92
|
+
# the server, this will wait until it's finished.
|
|
93
|
+
with lock.acquire(timeout=60):
|
|
94
|
+
# After acquiring the lock, we MUST re-check if the server is running.
|
|
95
|
+
# Another process might have started it and released the lock while we were waiting.
|
|
88
96
|
if not self.is_server_running():
|
|
89
97
|
ASCIIColors.yellow("Lock acquired. Starting dedicated Diffusers server...")
|
|
90
98
|
self.start_server()
|
|
99
|
+
# The process that starts the server is responsible for waiting for it to be ready
|
|
100
|
+
# BEFORE releasing the lock. This is the key to preventing race conditions.
|
|
101
|
+
self._wait_for_server()
|
|
91
102
|
else:
|
|
92
|
-
ASCIIColors.green("Server was started by another process. Connected successfully.")
|
|
103
|
+
ASCIIColors.green("Server was started by another process while we waited. Connected successfully.")
|
|
93
104
|
except Timeout:
|
|
94
|
-
if
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
ASCIIColors.yellow("Could not acquire lock within timeout. Waiting for server to become available...")
|
|
105
|
+
# This happens if the process holding the lock takes more than 60 seconds to start the server.
|
|
106
|
+
# We don't try to start another one. We just wait for the existing one to be ready.
|
|
107
|
+
ASCIIColors.yellow("Could not acquire lock, another process is taking a long time to start the server. Waiting...")
|
|
108
|
+
self._wait_for_server(timeout=300) # Give it a longer timeout here just in case.
|
|
99
109
|
|
|
100
|
-
|
|
110
|
+
# A final verification to ensure we are connected.
|
|
111
|
+
if not self.is_server_running():
|
|
112
|
+
raise RuntimeError("Failed to start or connect to the Diffusers server after all attempts.")
|
|
101
113
|
|
|
102
114
|
def install_server_dependencies(self):
|
|
103
115
|
"""
|
|
@@ -191,6 +203,10 @@ class DiffusersBinding(LollmsTTIBinding):
|
|
|
191
203
|
"--models-path", str(self.models_path.resolve()) # Pass models_path to server
|
|
192
204
|
]
|
|
193
205
|
|
|
206
|
+
if self.extra_models_path:
|
|
207
|
+
resolved_extra_path = Path(self.extra_models_path).resolve()
|
|
208
|
+
command.extend(["--extra-models-path", str(resolved_extra_path)])
|
|
209
|
+
|
|
194
210
|
# Use DETACHED_PROCESS on Windows to allow the server to run independently of the parent process.
|
|
195
211
|
# On Linux/macOS, the process will be daemonized enough to not be killed with the worker.
|
|
196
212
|
creationflags = subprocess.DETACHED_PROCESS if sys.platform == "win32" else 0
|
|
@@ -273,11 +289,14 @@ class DiffusersBinding(LollmsTTIBinding):
|
|
|
273
289
|
pass
|
|
274
290
|
|
|
275
291
|
def generate_image(self, prompt: str, negative_prompt: str = "", **kwargs) -> bytes:
|
|
276
|
-
|
|
292
|
+
params = kwargs.copy()
|
|
293
|
+
if "model_name" not in params and self.config.get("model_name"):
|
|
294
|
+
params["model_name"] = self.config["model_name"]
|
|
295
|
+
|
|
277
296
|
response = self._post_json_request("/generate_image", data={
|
|
278
297
|
"prompt": prompt,
|
|
279
298
|
"negative_prompt": negative_prompt,
|
|
280
|
-
"params":
|
|
299
|
+
"params": params
|
|
281
300
|
})
|
|
282
301
|
return response.content
|
|
283
302
|
|
|
@@ -307,15 +326,19 @@ class DiffusersBinding(LollmsTTIBinding):
|
|
|
307
326
|
raise ValueError(f"Unsupported image type in edit_image: {type(img)}")
|
|
308
327
|
if not images_b64:
|
|
309
328
|
raise ValueError("No valid images were provided to the edit_image function.")
|
|
329
|
+
|
|
330
|
+
params = kwargs.copy()
|
|
331
|
+
if "model_name" not in params and self.config.get("model_name"):
|
|
332
|
+
params["model_name"] = self.config["model_name"]
|
|
310
333
|
|
|
311
334
|
# Translate "mask" to "mask_image" for server compatibility
|
|
312
|
-
if "mask" in
|
|
313
|
-
|
|
335
|
+
if "mask" in params and params["mask"]:
|
|
336
|
+
params["mask_image"] = params.pop("mask")
|
|
314
337
|
|
|
315
338
|
json_payload = {
|
|
316
339
|
"prompt": prompt,
|
|
317
340
|
"images_b64": images_b64,
|
|
318
|
-
"params":
|
|
341
|
+
"params": params
|
|
319
342
|
}
|
|
320
343
|
response = self._post_json_request("/edit_image", data=json_payload)
|
|
321
344
|
return response.content
|
|
@@ -351,4 +374,4 @@ class DiffusersBinding(LollmsTTIBinding):
|
|
|
351
374
|
def __del__(self):
|
|
352
375
|
# The client destructor does not stop the server,
|
|
353
376
|
# as it is a shared resource for all worker processes.
|
|
354
|
-
pass
|
|
377
|
+
pass
|
|
@@ -62,7 +62,7 @@ MODELS_PATH = Path("./models")
|
|
|
62
62
|
CIVITAI_MODELS = {
|
|
63
63
|
"realistic-vision-v6": {
|
|
64
64
|
"display_name": "Realistic Vision V6.0", "url": "https://civitai.com/api/download/models/501240?type=Model&format=SafeTensor&size=pruned&fp=fp16",
|
|
65
|
-
"filename": "realisticVisionV60_v60B1.
|
|
65
|
+
"filename": "realisticVisionV60_v60B1.safensors", "description": "Photorealistic SD1.5 checkpoint.", "owned_by": "civitai"
|
|
66
66
|
},
|
|
67
67
|
"absolute-reality": {
|
|
68
68
|
"display_name": "Absolute Reality", "url": "https://civitai.com/api/download/models/132760?type=Model&format=SafeTensor&size=pruned&fp=fp16",
|
|
@@ -122,19 +122,45 @@ CIVITAI_MODELS = {
|
|
|
122
122
|
},
|
|
123
123
|
}
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
125
|
+
HF_PUBLIC_MODELS = {
|
|
126
|
+
"General Purpose & SDXL": [
|
|
127
|
+
{"model_name": "stabilityai/stable-diffusion-xl-base-1.0", "display_name": "Stable Diffusion XL 1.0", "desc": "Official 1024x1024 text-to-image model from Stability AI."},
|
|
128
|
+
{"model_name": "stabilityai/sdxl-turbo", "display_name": "SDXL Turbo", "desc": "A fast, real-time text-to-image model based on SDXL."},
|
|
129
|
+
{"model_name": "kandinsky-community/kandinsky-3", "display_name": "Kandinsky 3", "desc": "A powerful multilingual model with strong prompt understanding and aesthetic quality."},
|
|
130
|
+
{"model_name": "playgroundai/playground-v2.5-1024px-aesthetic", "display_name": "Playground v2.5", "desc": "A high-quality model focused on aesthetic outputs."},
|
|
131
|
+
],
|
|
132
|
+
"Photorealistic": [
|
|
133
|
+
{"model_name": "emilianJR/epiCRealism", "display_name": "epiCRealism", "desc": "A popular community model for generating photorealistic images."},
|
|
134
|
+
{"model_name": "SG161222/Realistic_Vision_V5.1_noVAE", "display_name": "Realistic Vision 5.1", "desc": "One of the most popular realistic models, great for portraits and scenes."},
|
|
135
|
+
{"model_name": "Photon-v1", "display_name": "Photon", "desc": "A model known for high-quality, realistic images with good lighting and detail."},
|
|
136
|
+
],
|
|
137
|
+
"Anime & Illustration": [
|
|
138
|
+
{"model_name": "hakurei/waifu-diffusion", "display_name": "Waifu Diffusion 1.4", "desc": "A widely-used model for generating high-quality anime-style images."},
|
|
139
|
+
{"model_name": "gsdf/Counterfeit-V3.0", "display_name": "Counterfeit V3.0", "desc": "A strong model for illustrative and 2.5D anime styles."},
|
|
140
|
+
{"model_name": "cagliostrolab/animagine-xl-3.0", "display_name": "Animagine XL 3.0", "desc": "A state-of-the-art anime model on the SDXL architecture."},
|
|
141
|
+
],
|
|
142
|
+
"Artistic & Stylized": [
|
|
143
|
+
{"model_name": "wavymulder/Analog-Diffusion", "display_name": "Analog Diffusion", "desc": "Creates images with a vintage, analog film aesthetic."},
|
|
144
|
+
{"model_name": "dreamlike-art/dreamlike-photoreal-2.0", "display_name": "Dreamlike Photoreal 2.0", "desc": "Produces stunning, artistic, and photorealistic images."},
|
|
145
|
+
],
|
|
146
|
+
"Image Editing Tools": [
|
|
147
|
+
{"model_name": "stabilityai/stable-diffusion-xl-refiner-1.0", "display_name": "SDXL Refiner 1.0", "desc": "A dedicated refiner model to improve details in SDXL generations."},
|
|
148
|
+
{"model_name": "Qwen/Qwen-Image-Edit", "display_name": "Qwen Image Edit", "desc": "An instruction-based model for various image editing tasks."},
|
|
149
|
+
{"model_name": "Qwen/Qwen-Image-Edit-2509", "display_name": "Qwen Image Edit Plus", "desc": "Advanced multi-image editing, fusion, and pose transfer."},
|
|
150
|
+
],
|
|
151
|
+
"Legacy & Base Models": [
|
|
152
|
+
{"model_name": "runwayml/stable-diffusion-v1-5", "display_name": "Stable Diffusion 1.5", "desc": "The classic and versatile SD1.5 base model."},
|
|
153
|
+
{"model_name": "stabilityai/stable-diffusion-2-1", "display_name": "Stable Diffusion 2.1", "desc": "The 768x768 base model from the SD2.x series."},
|
|
154
|
+
]
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
HF_GATED_MODELS = {
|
|
158
|
+
"Next-Generation (Gated Access Required)": [
|
|
159
|
+
{"model_name": "stabilityai/stable-diffusion-3-medium-diffusers", "display_name": "Stable Diffusion 3 Medium", "desc": "State-of-the-art model with advanced prompt understanding. Requires free registration."},
|
|
160
|
+
{"model_name": "black-forest-labs/FLUX.1-schnell", "display_name": "FLUX.1 Schnell", "desc": "A powerful and extremely fast next-generation model. Requires access request."},
|
|
161
|
+
{"model_name": "black-forest-labs/FLUX.1-dev", "display_name": "FLUX.1 Dev", "desc": "The larger developer version of the FLUX.1 model. Requires access request."},
|
|
162
|
+
]
|
|
163
|
+
}
|
|
138
164
|
|
|
139
165
|
|
|
140
166
|
TORCH_DTYPE_MAP_STR_TO_OBJ = {
|
|
@@ -228,9 +254,25 @@ class ModelManager:
|
|
|
228
254
|
if not local_path.exists():
|
|
229
255
|
self._download_civitai_model(model_name)
|
|
230
256
|
return local_path
|
|
257
|
+
|
|
258
|
+
# Search in extra models path
|
|
259
|
+
if state.extra_models_path and state.extra_models_path.exists():
|
|
260
|
+
found_paths = list(state.extra_models_path.rglob(model_name))
|
|
261
|
+
if found_paths:
|
|
262
|
+
ASCIIColors.info(f"Found model in extra path: {found_paths[0]}")
|
|
263
|
+
return found_paths[0]
|
|
264
|
+
|
|
265
|
+
# Search in primary models path
|
|
266
|
+
found_paths = list(self.models_path.rglob(model_name))
|
|
267
|
+
if found_paths:
|
|
268
|
+
ASCIIColors.info(f"Found model in primary path: {found_paths[0]}")
|
|
269
|
+
return found_paths[0]
|
|
270
|
+
|
|
271
|
+
# Fallback for HF hub models that are folders, not single files.
|
|
231
272
|
local_path = self.models_path / model_name
|
|
232
273
|
if local_path.exists():
|
|
233
274
|
return local_path
|
|
275
|
+
|
|
234
276
|
return model_name
|
|
235
277
|
|
|
236
278
|
def _download_civitai_model(self, model_key: str):
|
|
@@ -535,9 +577,12 @@ class PipelineRegistry:
|
|
|
535
577
|
return list(self._managers.values())
|
|
536
578
|
|
|
537
579
|
class ServerState:
|
|
538
|
-
def __init__(self, models_path: Path):
|
|
580
|
+
def __init__(self, models_path: Path, extra_models_path: Optional[Path] = None):
|
|
539
581
|
self.models_path = models_path
|
|
582
|
+
self.extra_models_path = extra_models_path
|
|
540
583
|
self.models_path.mkdir(parents=True, exist_ok=True)
|
|
584
|
+
if self.extra_models_path:
|
|
585
|
+
self.extra_models_path.mkdir(parents=True, exist_ok=True)
|
|
541
586
|
self.config_path = self.models_path.parent / "diffusers_server_config.json"
|
|
542
587
|
self.registry = PipelineRegistry()
|
|
543
588
|
self.manager: Optional[ModelManager] = None
|
|
@@ -802,14 +847,49 @@ async def edit_image(request: EditRequestJSON):
|
|
|
802
847
|
|
|
803
848
|
@router.get("/list_models")
|
|
804
849
|
def list_models_endpoint():
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
850
|
+
huggingface_models = []
|
|
851
|
+
# Add public models, organized by category
|
|
852
|
+
for category, models in HF_PUBLIC_MODELS.items():
|
|
853
|
+
for model_info in models:
|
|
854
|
+
huggingface_models.append({
|
|
855
|
+
'model_name': model_info['model_name'],
|
|
856
|
+
'display_name': model_info['display_name'],
|
|
857
|
+
'description': f"({category}) {model_info['desc']}",
|
|
858
|
+
'owned_by': 'huggingface'
|
|
859
|
+
})
|
|
860
|
+
|
|
861
|
+
# Conditionally add gated models if an HF token is provided in the server config
|
|
862
|
+
if state.config.get("hf_token"):
|
|
863
|
+
ASCIIColors.info("HF token detected, including gated models in the list.")
|
|
864
|
+
for category, models in HF_GATED_MODELS.items():
|
|
865
|
+
for model_info in models:
|
|
866
|
+
huggingface_models.append({
|
|
867
|
+
'model_name': model_info['model_name'],
|
|
868
|
+
'display_name': model_info['display_name'],
|
|
869
|
+
'description': f"({category}) {model_info['desc']}",
|
|
870
|
+
'owned_by': 'huggingface'
|
|
871
|
+
})
|
|
872
|
+
else:
|
|
873
|
+
ASCIIColors.info("No HF token found, showing public models only.")
|
|
874
|
+
|
|
875
|
+
civitai_models = [{'model_name': key, 'display_name': info['display_name'], 'description': f"(Civitai) {info['description']}", 'owned_by': info['owned_by']} for key, info in CIVITAI_MODELS.items()]
|
|
876
|
+
|
|
877
|
+
local_files = list_local_models_endpoint()
|
|
878
|
+
local_models = [{'model_name': filename, 'display_name': Path(filename).stem, 'description': '(Local) Local safetensors file.', 'owned_by': 'local_user'} for filename in local_files]
|
|
879
|
+
|
|
880
|
+
return huggingface_models + civitai_models + local_models
|
|
809
881
|
|
|
810
882
|
@router.get("/list_local_models")
|
|
811
883
|
def list_local_models_endpoint():
|
|
812
|
-
|
|
884
|
+
local_models = set()
|
|
885
|
+
# Main models path
|
|
886
|
+
for f in state.models_path.glob("**/*.safetensors"):
|
|
887
|
+
local_models.add(f.name)
|
|
888
|
+
# Extra models path
|
|
889
|
+
if state.extra_models_path and state.extra_models_path.exists():
|
|
890
|
+
for f in state.extra_models_path.glob("**/*.safetensors"):
|
|
891
|
+
local_models.add(f.name)
|
|
892
|
+
return sorted(list(local_models))
|
|
813
893
|
|
|
814
894
|
@router.get("/list_available_models")
|
|
815
895
|
def list_available_models_endpoint():
|
|
@@ -866,14 +946,18 @@ if __name__ == "__main__":
|
|
|
866
946
|
parser.add_argument("--host", type=str, default="localhost", help="Host to bind to.")
|
|
867
947
|
parser.add_argument("--port", type=int, default=9630, help="Port to bind to.")
|
|
868
948
|
parser.add_argument("--models-path", type=str, required=True, help="Path to the models directory.")
|
|
949
|
+
parser.add_argument("--extra-models-path", type=str, default=None, help="Path to an extra models directory.")
|
|
869
950
|
args = parser.parse_args()
|
|
870
951
|
|
|
871
952
|
MODELS_PATH = Path(args.models_path)
|
|
872
|
-
|
|
953
|
+
EXTRA_MODELS_PATH = Path(args.extra_models_path) if args.extra_models_path else None
|
|
954
|
+
state = ServerState(MODELS_PATH, EXTRA_MODELS_PATH)
|
|
873
955
|
|
|
874
956
|
ASCIIColors.cyan(f"--- Diffusers TTI Server ---")
|
|
875
957
|
ASCIIColors.green(f"Starting server on http://{args.host}:{args.port}")
|
|
876
958
|
ASCIIColors.green(f"Serving models from: {MODELS_PATH.resolve()}")
|
|
959
|
+
if EXTRA_MODELS_PATH:
|
|
960
|
+
ASCIIColors.green(f"Serving extra models from: {EXTRA_MODELS_PATH.resolve()}")
|
|
877
961
|
if not DIFFUSERS_AVAILABLE:
|
|
878
962
|
ASCIIColors.error("Diffusers or its dependencies are not installed correctly in the server's environment!")
|
|
879
963
|
else:
|
|
@@ -1,38 +1,51 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
4
3
|
import requests
|
|
5
4
|
import subprocess
|
|
6
|
-
import sys
|
|
7
5
|
import time
|
|
8
|
-
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional, List
|
|
9
8
|
|
|
10
|
-
#
|
|
9
|
+
# Ensure pipmaster is available.
|
|
10
|
+
try:
|
|
11
|
+
import pipmaster as pm
|
|
12
|
+
except ImportError:
|
|
13
|
+
print("FATAL: pipmaster is not installed. Please install it using: pip install pipmaster")
|
|
14
|
+
sys.exit(1)
|
|
15
|
+
|
|
16
|
+
# Ensure filelock is available for process-safe server startup.
|
|
11
17
|
try:
|
|
12
18
|
from filelock import FileLock, Timeout
|
|
13
19
|
except ImportError:
|
|
14
20
|
print("FATAL: The 'filelock' library is required. Please install it by running: pip install filelock")
|
|
15
21
|
sys.exit(1)
|
|
16
22
|
|
|
23
|
+
from lollms_client.lollms_tts_binding import LollmsTTSBinding
|
|
24
|
+
from ascii_colors import ASCIIColors
|
|
17
25
|
|
|
18
26
|
BindingName = "XTTSClientBinding"
|
|
19
27
|
|
|
20
28
|
class XTTSClientBinding(LollmsTTSBinding):
|
|
29
|
+
"""
|
|
30
|
+
Client binding for a dedicated, managed XTTS server.
|
|
31
|
+
This architecture prevents the heavy XTTS model from being loaded into memory
|
|
32
|
+
by multiple worker processes, solving potential OOM errors and speeding up TTS generation.
|
|
33
|
+
"""
|
|
21
34
|
def __init__(self,
|
|
22
|
-
host: str = "localhost",
|
|
23
|
-
port: int = 8081,
|
|
24
|
-
auto_start_server: bool = True,
|
|
25
35
|
**kwargs):
|
|
26
36
|
|
|
27
37
|
binding_name = "xtts"
|
|
28
38
|
super().__init__(binding_name=binding_name, **kwargs)
|
|
29
|
-
|
|
30
|
-
self.
|
|
31
|
-
self.
|
|
39
|
+
|
|
40
|
+
self.config = kwargs
|
|
41
|
+
self.host = kwargs.get("host", "localhost")
|
|
42
|
+
self.port = kwargs.get("port", 8081)
|
|
43
|
+
self.auto_start_server = kwargs.get("auto_start_server", True)
|
|
32
44
|
self.server_process = None
|
|
33
45
|
self.base_url = f"http://{self.host}:{self.port}"
|
|
34
46
|
self.binding_root = Path(__file__).parent
|
|
35
47
|
self.server_dir = self.binding_root / "server"
|
|
48
|
+
self.venv_dir = Path("./venv/tts_xtts_venv")
|
|
36
49
|
|
|
37
50
|
if self.auto_start_server:
|
|
38
51
|
self.ensure_server_is_running()
|
|
@@ -40,10 +53,10 @@ class XTTSClientBinding(LollmsTTSBinding):
|
|
|
40
53
|
def is_server_running(self) -> bool:
|
|
41
54
|
"""Checks if the server is already running and responsive."""
|
|
42
55
|
try:
|
|
43
|
-
response = requests.get(f"{self.base_url}/status", timeout=
|
|
56
|
+
response = requests.get(f"{self.base_url}/status", timeout=2)
|
|
44
57
|
if response.status_code == 200 and response.json().get("status") == "running":
|
|
45
58
|
return True
|
|
46
|
-
except requests.
|
|
59
|
+
except requests.exceptions.RequestException:
|
|
47
60
|
return False
|
|
48
61
|
return False
|
|
49
62
|
|
|
@@ -52,64 +65,69 @@ class XTTSClientBinding(LollmsTTSBinding):
|
|
|
52
65
|
Ensures the XTTS server is running. If not, it attempts to start it
|
|
53
66
|
in a process-safe manner using a file lock.
|
|
54
67
|
"""
|
|
68
|
+
self.server_dir.mkdir(exist_ok=True)
|
|
69
|
+
lock_path = self.server_dir / "xtts_server.lock"
|
|
70
|
+
lock = FileLock(lock_path)
|
|
71
|
+
|
|
72
|
+
ASCIIColors.info("Attempting to start or connect to the XTTS server...")
|
|
73
|
+
|
|
55
74
|
if self.is_server_running():
|
|
56
|
-
|
|
75
|
+
ASCIIColors.green("XTTS Server is already running and responsive.")
|
|
57
76
|
return
|
|
58
77
|
|
|
59
|
-
lock_path = self.server_dir / "xtts_server.lock"
|
|
60
|
-
lock = FileLock(lock_path, timeout=10) # Wait a maximum of 10 seconds for the lock
|
|
61
|
-
|
|
62
|
-
print("Attempting to start or wait for the XTTS server...")
|
|
63
78
|
try:
|
|
64
|
-
with lock:
|
|
65
|
-
# Double-check after acquiring the lock to handle race conditions
|
|
79
|
+
with lock.acquire(timeout=60):
|
|
66
80
|
if not self.is_server_running():
|
|
67
|
-
|
|
81
|
+
ASCIIColors.yellow("Lock acquired. Starting dedicated XTTS server...")
|
|
68
82
|
self.start_server()
|
|
83
|
+
self._wait_for_server()
|
|
69
84
|
else:
|
|
70
|
-
|
|
85
|
+
ASCIIColors.green("Server was started by another process while we waited. Connected successfully.")
|
|
71
86
|
except Timeout:
|
|
72
|
-
|
|
87
|
+
ASCIIColors.yellow("Could not acquire lock, another process is starting the server. Waiting...")
|
|
88
|
+
self._wait_for_server(timeout=180)
|
|
73
89
|
|
|
74
|
-
|
|
75
|
-
|
|
90
|
+
if not self.is_server_running():
|
|
91
|
+
raise RuntimeError("Failed to start or connect to the XTTS server after all attempts.")
|
|
76
92
|
|
|
77
|
-
def install(self, venv_path, requirements_file):
|
|
78
|
-
print(f"Ensuring virtual environment and dependencies in: {venv_path}")
|
|
79
|
-
pm_v = pm.PackageManager(venv_path=str(venv_path))
|
|
80
93
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
94
|
+
def install_server_dependencies(self):
|
|
95
|
+
"""
|
|
96
|
+
Installs the server's dependencies into a dedicated virtual environment
|
|
97
|
+
using pipmaster, which handles complex packages like PyTorch.
|
|
98
|
+
"""
|
|
99
|
+
ASCIIColors.info(f"Setting up virtual environment in: {self.venv_dir}")
|
|
100
|
+
pm_v = pm.PackageManager(venv_path=str(self.venv_dir))
|
|
101
|
+
|
|
102
|
+
requirements_file = self.server_dir / "requirements.txt"
|
|
103
|
+
|
|
104
|
+
ASCIIColors.info("Installing server dependencies from requirements.txt...")
|
|
105
|
+
success = pm_v.ensure_requirements(str(requirements_file), verbose=True)
|
|
85
106
|
|
|
86
107
|
if not success:
|
|
87
|
-
|
|
88
|
-
|
|
108
|
+
ASCIIColors.error("Failed to install server dependencies. Please check the console output for errors.")
|
|
109
|
+
raise RuntimeError("XTTS server dependency installation failed.")
|
|
110
|
+
|
|
111
|
+
ASCIIColors.green("Server dependencies are satisfied.")
|
|
89
112
|
|
|
90
|
-
print("Dependencies are satisfied. Proceeding to launch server...")
|
|
91
113
|
|
|
92
114
|
def start_server(self):
|
|
93
115
|
"""
|
|
94
|
-
Installs dependencies and launches the server as a background subprocess.
|
|
116
|
+
Installs dependencies and launches the FastAPI server as a background subprocess.
|
|
95
117
|
This method should only be called from within a file lock.
|
|
96
118
|
"""
|
|
97
|
-
requirements_file = self.server_dir / "requirements.txt"
|
|
98
119
|
server_script = self.server_dir / "main.py"
|
|
120
|
+
if not server_script.exists():
|
|
121
|
+
raise FileNotFoundError(f"Server script not found at {server_script}.")
|
|
99
122
|
|
|
100
|
-
|
|
101
|
-
|
|
123
|
+
if not self.venv_dir.exists():
|
|
124
|
+
self.install_server_dependencies()
|
|
102
125
|
|
|
103
|
-
if not venv_path.exists():
|
|
104
|
-
self.install(venv_path, requirements_file)
|
|
105
|
-
|
|
106
|
-
# 2. Get the python executable from the venv
|
|
107
126
|
if sys.platform == "win32":
|
|
108
|
-
python_executable =
|
|
127
|
+
python_executable = self.venv_dir / "Scripts" / "python.exe"
|
|
109
128
|
else:
|
|
110
|
-
python_executable =
|
|
129
|
+
python_executable = self.venv_dir / "bin" / "python"
|
|
111
130
|
|
|
112
|
-
# 3. Launch the server as a detached subprocess
|
|
113
131
|
command = [
|
|
114
132
|
str(python_executable),
|
|
115
133
|
str(server_script),
|
|
@@ -117,54 +135,61 @@ class XTTSClientBinding(LollmsTTSBinding):
|
|
|
117
135
|
"--port", str(self.port)
|
|
118
136
|
]
|
|
119
137
|
|
|
120
|
-
#
|
|
121
|
-
subprocess.
|
|
122
|
-
|
|
123
|
-
|
|
138
|
+
# Use DETACHED_PROCESS on Windows to allow the server to run independently.
|
|
139
|
+
creationflags = subprocess.DETACHED_PROCESS if sys.platform == "win32" else 0
|
|
140
|
+
|
|
141
|
+
self.server_process = subprocess.Popen(command, creationflags=creationflags)
|
|
142
|
+
ASCIIColors.info("XTTS server process launched in the background.")
|
|
124
143
|
|
|
125
|
-
def _wait_for_server(self, timeout=
|
|
126
|
-
|
|
144
|
+
def _wait_for_server(self, timeout=120):
|
|
145
|
+
"""Waits for the server to become responsive."""
|
|
146
|
+
ASCIIColors.info("Waiting for XTTS server to become available...")
|
|
127
147
|
start_time = time.time()
|
|
128
148
|
while time.time() - start_time < timeout:
|
|
129
149
|
if self.is_server_running():
|
|
130
|
-
|
|
150
|
+
ASCIIColors.green("XTTS Server is up and running.")
|
|
131
151
|
return
|
|
132
|
-
time.sleep(
|
|
133
|
-
|
|
152
|
+
time.sleep(2)
|
|
134
153
|
raise RuntimeError("Failed to connect to the XTTS server within the specified timeout.")
|
|
135
154
|
|
|
136
|
-
def stop_server(self):
|
|
137
|
-
"""
|
|
138
|
-
In a multi-worker setup, a single client instance should not stop the shared server.
|
|
139
|
-
The server will continue running until the main application is terminated.
|
|
140
|
-
"""
|
|
141
|
-
if self.server_process:
|
|
142
|
-
print("XTTS Client: An instance is shutting down, but the shared server will remain active for other workers.")
|
|
143
|
-
self.server_process = None
|
|
144
|
-
|
|
145
155
|
def __del__(self):
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
"""
|
|
156
|
+
# The client destructor does not stop the server,
|
|
157
|
+
# as it is a shared resource for other processes.
|
|
149
158
|
pass
|
|
150
159
|
|
|
151
160
|
def generate_audio(self, text: str, voice: Optional[str] = None, **kwargs) -> bytes:
|
|
152
161
|
"""Generate audio by calling the server's API"""
|
|
153
|
-
payload = {"text": text, "voice": voice
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
162
|
+
payload = {"text": text, "voice": voice}
|
|
163
|
+
# Pass other kwargs from the description file (language, split_sentences)
|
|
164
|
+
payload.update(kwargs)
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
response = requests.post(f"{self.base_url}/generate_audio", json=payload, timeout=300)
|
|
168
|
+
response.raise_for_status()
|
|
169
|
+
return response.content
|
|
170
|
+
except requests.exceptions.RequestException as e:
|
|
171
|
+
ASCIIColors.error(f"Failed to communicate with XTTS server at {self.base_url}.")
|
|
172
|
+
ASCIIColors.error(f"Error details: {e}")
|
|
173
|
+
raise RuntimeError("Communication with the XTTS server failed.") from e
|
|
174
|
+
|
|
157
175
|
|
|
158
176
|
def list_voices(self, **kwargs) -> List[str]:
|
|
159
177
|
"""Get available voices from the server"""
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
178
|
+
try:
|
|
179
|
+
response = requests.get(f"{self.base_url}/list_voices")
|
|
180
|
+
response.raise_for_status()
|
|
181
|
+
return response.json().get("voices", [])
|
|
182
|
+
except requests.exceptions.RequestException as e:
|
|
183
|
+
ASCIIColors.error(f"Failed to get voices from XTTS server: {e}")
|
|
184
|
+
return []
|
|
163
185
|
|
|
164
186
|
|
|
165
|
-
def list_models(self) -> list:
|
|
166
|
-
"""Lists models"""
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
187
|
+
def list_models(self, **kwargs) -> list:
|
|
188
|
+
"""Lists models supported by the server"""
|
|
189
|
+
try:
|
|
190
|
+
response = requests.get(f"{self.base_url}/list_models")
|
|
191
|
+
response.raise_for_status()
|
|
192
|
+
return response.json().get("models", [])
|
|
193
|
+
except requests.exceptions.RequestException as e:
|
|
194
|
+
ASCIIColors.error(f"Failed to get models from XTTS server: {e}")
|
|
195
|
+
return []
|
|
@@ -13,24 +13,26 @@ try:
|
|
|
13
13
|
import wave
|
|
14
14
|
import numpy as np
|
|
15
15
|
import tempfile
|
|
16
|
+
|
|
17
|
+
# Use ascii_colors for logging
|
|
18
|
+
from ascii_colors import ASCIIColors
|
|
16
19
|
|
|
17
20
|
# --- XTTS Implementation ---
|
|
18
21
|
try:
|
|
19
|
-
|
|
22
|
+
ASCIIColors.info("Server: Loading XTTS dependencies...")
|
|
20
23
|
import torch
|
|
21
|
-
import torchaudio
|
|
22
24
|
from TTS.api import TTS
|
|
23
|
-
|
|
25
|
+
ASCIIColors.green("Server: XTTS dependencies loaded successfully")
|
|
24
26
|
|
|
25
27
|
# Check for CUDA availability
|
|
26
28
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
27
|
-
|
|
29
|
+
ASCIIColors.info(f"Server: Using device: {device}")
|
|
28
30
|
|
|
29
31
|
xtts_available = True
|
|
30
32
|
|
|
31
33
|
except Exception as e:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
ASCIIColors.error(f"Server: Failed to load XTTS dependencies: {e}")
|
|
35
|
+
ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
34
36
|
xtts_available = False
|
|
35
37
|
|
|
36
38
|
# --- API Models ---
|
|
@@ -38,18 +40,21 @@ try:
|
|
|
38
40
|
text: str
|
|
39
41
|
voice: Optional[str] = None
|
|
40
42
|
language: Optional[str] = "en"
|
|
41
|
-
speaker_wav
|
|
43
|
+
# speaker_wav is kept for backward compatibility but voice is preferred
|
|
44
|
+
speaker_wav: Optional[str] = None
|
|
45
|
+
split_sentences: Optional[bool] = True
|
|
42
46
|
|
|
43
47
|
class XTTSServer:
|
|
44
48
|
def __init__(self):
|
|
45
49
|
self.model = None
|
|
46
50
|
self.model_loaded = False
|
|
47
51
|
self.model_loading = False # Flag to prevent concurrent loading
|
|
52
|
+
self.available_models = ["tts_models/multilingual/multi-dataset/xtts_v2"]
|
|
53
|
+
self.voices_dir = Path(__file__).parent / "voices"
|
|
54
|
+
self.voices_dir.mkdir(exist_ok=True)
|
|
48
55
|
self.available_voices = self._load_available_voices()
|
|
49
|
-
self.available_models = ["xtts_v2"]
|
|
50
56
|
|
|
51
|
-
|
|
52
|
-
print("Server: XTTS server initialized (model will be loaded on first request)")
|
|
57
|
+
ASCIIColors.info("Server: XTTS server initialized (model will be loaded on first request)")
|
|
53
58
|
|
|
54
59
|
async def _ensure_model_loaded(self):
|
|
55
60
|
"""Ensure the XTTS model is loaded (lazy loading)"""
|
|
@@ -63,171 +68,130 @@ try:
|
|
|
63
68
|
return
|
|
64
69
|
|
|
65
70
|
if not xtts_available:
|
|
66
|
-
raise RuntimeError("XTTS library not available")
|
|
71
|
+
raise RuntimeError("XTTS library not available. Please ensure all dependencies are installed correctly in the venv.")
|
|
67
72
|
|
|
68
73
|
try:
|
|
69
74
|
self.model_loading = True
|
|
70
|
-
|
|
75
|
+
ASCIIColors.yellow("Server: Loading XTTS model for the first time (this may take a few minutes)...")
|
|
71
76
|
|
|
72
77
|
# Initialize XTTS model
|
|
73
|
-
self.model = TTS(
|
|
78
|
+
self.model = TTS(self.available_models[0]).to(device)
|
|
74
79
|
|
|
75
80
|
self.model_loaded = True
|
|
76
|
-
|
|
81
|
+
ASCIIColors.green("Server: XTTS model loaded successfully")
|
|
77
82
|
|
|
78
83
|
except Exception as e:
|
|
79
|
-
|
|
80
|
-
|
|
84
|
+
ASCIIColors.error(f"Server: Error loading XTTS model: {e}")
|
|
85
|
+
ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
81
86
|
self.model_loaded = False
|
|
82
87
|
raise
|
|
83
88
|
finally:
|
|
84
89
|
self.model_loading = False
|
|
85
90
|
|
|
86
91
|
def _load_available_voices(self) -> List[str]:
|
|
87
|
-
"""Load and return available voices"""
|
|
92
|
+
"""Load and return available voices, ensuring 'default_voice' is always present."""
|
|
88
93
|
try:
|
|
89
|
-
|
|
90
|
-
voices_dir = Path(__file__).parent / "voices"
|
|
91
|
-
voices = []
|
|
94
|
+
self.voices_dir.mkdir(exist_ok=True)
|
|
92
95
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
voices.append(voice_file.stem)
|
|
96
|
+
# Scan for case-insensitive .wav and .mp3 files and get their stems
|
|
97
|
+
found_voices = {p.stem for p in self.voices_dir.glob("*.[wW][aA][vV]")}
|
|
98
|
+
found_voices.update({p.stem for p in self.voices_dir.glob("*.[mM][pP]3")})
|
|
97
99
|
|
|
98
|
-
#
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
# GUARANTEE 'default_voice' is in the list for UI consistency.
|
|
101
|
+
all_voices = {"default_voice"}.union(found_voices)
|
|
102
|
+
|
|
103
|
+
sorted_voices = sorted(list(all_voices))
|
|
104
|
+
ASCIIColors.info(f"Discovered voices: {sorted_voices}")
|
|
105
|
+
return sorted_voices
|
|
103
106
|
|
|
104
107
|
except Exception as e:
|
|
105
|
-
|
|
106
|
-
return
|
|
108
|
+
ASCIIColors.error(f"Server: Error scanning voices directory: {e}")
|
|
109
|
+
# If scanning fails, it's crucial to still return the default.
|
|
110
|
+
return ["default_voice"]
|
|
107
111
|
|
|
108
|
-
|
|
109
|
-
|
|
112
|
+
def _get_speaker_wav_path(self, voice_name: str) -> Optional[str]:
|
|
113
|
+
"""Find the path to a speaker wav/mp3 file from its name."""
|
|
114
|
+
if not voice_name:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
# Case 1: voice_name is an absolute path that exists
|
|
118
|
+
if os.path.isabs(voice_name) and os.path.exists(voice_name):
|
|
119
|
+
return voice_name
|
|
120
|
+
|
|
121
|
+
# Case 2: voice_name is a name in the voices directory (check for .mp3 then .wav)
|
|
122
|
+
mp3_path = self.voices_dir / f"{voice_name}.mp3"
|
|
123
|
+
if mp3_path.exists():
|
|
124
|
+
return str(mp3_path)
|
|
125
|
+
|
|
126
|
+
wav_path = self.voices_dir / f"{voice_name}.wav"
|
|
127
|
+
if wav_path.exists():
|
|
128
|
+
return str(wav_path)
|
|
129
|
+
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
async def generate_audio(self, req: GenerationRequest) -> bytes:
|
|
110
133
|
"""Generate audio from text using XTTS"""
|
|
111
|
-
# Ensure model is loaded before proceeding
|
|
112
134
|
await self._ensure_model_loaded()
|
|
113
135
|
|
|
114
136
|
if not self.model_loaded or self.model is None:
|
|
115
|
-
raise RuntimeError("XTTS model failed to load")
|
|
137
|
+
raise RuntimeError("XTTS model failed to load or is not available.")
|
|
116
138
|
|
|
117
139
|
try:
|
|
118
|
-
|
|
119
|
-
|
|
140
|
+
text_to_generate = req.text
|
|
141
|
+
ASCIIColors.info(f"Server: Generating audio for: '{text_to_generate[:50]}{'...' if len(text_to_generate) > 50 else ''}'")
|
|
142
|
+
ASCIIColors.info(f"Server: Language: {req.language}, Requested Voice: {req.voice}")
|
|
143
|
+
|
|
144
|
+
# Determine which voice name to use. Priority: speaker_wav > voice > 'default_voice'
|
|
145
|
+
voice_to_find = req.speaker_wav or req.voice or "default_voice"
|
|
146
|
+
speaker_wav_path = self._get_speaker_wav_path(voice_to_find)
|
|
147
|
+
|
|
148
|
+
# If the chosen voice wasn't found and it wasn't the default, try the default as a fallback.
|
|
149
|
+
if not speaker_wav_path and voice_to_find != "default_voice":
|
|
150
|
+
ASCIIColors.warning(f"Voice '{voice_to_find}' not found. Falling back to 'default_voice'.")
|
|
151
|
+
speaker_wav_path = self._get_speaker_wav_path("default_voice")
|
|
152
|
+
|
|
153
|
+
# If still no path, it's a critical error because even the default is missing.
|
|
154
|
+
if not speaker_wav_path:
|
|
155
|
+
available = self._get_all_available_voice_files()
|
|
156
|
+
raise RuntimeError(
|
|
157
|
+
f"XTTS requires a speaker reference file, but none could be found.\n"
|
|
158
|
+
f"Attempted to use '{voice_to_find}' but it was not found, and the fallback 'default_voice.mp3' is also missing from the voices folder.\n"
|
|
159
|
+
f"Please add audio files to the '{self.voices_dir.resolve()}' directory. Available files: {available or 'None'}"
|
|
160
|
+
)
|
|
120
161
|
|
|
121
|
-
|
|
122
|
-
|
|
162
|
+
ASCIIColors.info(f"Server: Using speaker reference: {speaker_wav_path}")
|
|
163
|
+
|
|
164
|
+
# Generate audio using XTTS
|
|
165
|
+
wav_chunks = self.model.tts(
|
|
166
|
+
text=text_to_generate,
|
|
167
|
+
speaker_wav=speaker_wav_path,
|
|
168
|
+
language=req.language,
|
|
169
|
+
split_sentences=req.split_sentences
|
|
170
|
+
)
|
|
123
171
|
|
|
124
|
-
#
|
|
125
|
-
|
|
126
|
-
speaker_wav_path = speaker_wav
|
|
127
|
-
print(f"Server: Using provided speaker_wav: {speaker_wav_path}")
|
|
172
|
+
# Combine chunks into a single audio stream
|
|
173
|
+
audio_data = np.array(wav_chunks, dtype=np.float32)
|
|
128
174
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
else:
|
|
136
|
-
# Look for voice file in voices directory
|
|
137
|
-
voices_dir = Path(__file__).parent / "voices"
|
|
138
|
-
potential_voice_path = voices_dir / f"{voice}.wav"
|
|
139
|
-
if potential_voice_path.exists():
|
|
140
|
-
speaker_wav_path = str(potential_voice_path)
|
|
141
|
-
print(f"Server: Using custom voice file: {speaker_wav_path}")
|
|
142
|
-
else:
|
|
143
|
-
print(f"Server: Voice '{voice}' not found in voices directory")
|
|
144
|
-
else:
|
|
145
|
-
voice = "default_voice"
|
|
146
|
-
# Look for voice file in voices directory
|
|
147
|
-
voices_dir = Path(__file__).parent / "voices"
|
|
148
|
-
potential_voice_path = voices_dir / f"{voice}.mp3"
|
|
149
|
-
if potential_voice_path.exists():
|
|
150
|
-
speaker_wav_path = str(potential_voice_path)
|
|
151
|
-
print(f"Server: Using custom voice file: {speaker_wav_path}")
|
|
152
|
-
else:
|
|
153
|
-
print(f"Server: Voice '{voice}' not found in voices directory")
|
|
154
|
-
# Create a temporary file for output
|
|
155
|
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
|
156
|
-
temp_output_path = temp_file.name
|
|
175
|
+
buffer = io.BytesIO()
|
|
176
|
+
with wave.open(buffer, 'wb') as wf:
|
|
177
|
+
wf.setnchannels(1)
|
|
178
|
+
wf.setsampwidth(2) # 16-bit
|
|
179
|
+
wf.setframerate(self.model.synthesizer.output_sample_rate)
|
|
180
|
+
wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
|
|
157
181
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
self.model.tts_to_file(
|
|
163
|
-
text=text,
|
|
164
|
-
speaker_wav=speaker_wav_path,
|
|
165
|
-
language=language,
|
|
166
|
-
file_path=temp_output_path
|
|
167
|
-
)
|
|
168
|
-
else:
|
|
169
|
-
print("Server: No valid speaker reference found, trying default")
|
|
170
|
-
# For XTTS without speaker reference, try to find a default
|
|
171
|
-
default_speaker = self._get_default_speaker_file()
|
|
172
|
-
if default_speaker and os.path.exists(default_speaker):
|
|
173
|
-
print(f"Server: Using default speaker: {default_speaker}")
|
|
174
|
-
self.model.tts_to_file(
|
|
175
|
-
text=text,
|
|
176
|
-
speaker_wav=default_speaker,
|
|
177
|
-
language=language,
|
|
178
|
-
file_path=temp_output_path
|
|
179
|
-
)
|
|
180
|
-
else:
|
|
181
|
-
# Create a more helpful error message
|
|
182
|
-
available_voices = self._get_all_available_voice_files()
|
|
183
|
-
error_msg = f"No speaker reference available. XTTS requires a speaker reference file.\n"
|
|
184
|
-
error_msg += f"Attempted to use: {speaker_wav_path if speaker_wav_path else 'None'}\n"
|
|
185
|
-
error_msg += f"Available voice files: {available_voices}"
|
|
186
|
-
raise RuntimeError(error_msg)
|
|
187
|
-
|
|
188
|
-
# Read the generated audio file
|
|
189
|
-
with open(temp_output_path, 'rb') as f:
|
|
190
|
-
audio_bytes = f.read()
|
|
191
|
-
|
|
192
|
-
print(f"Server: Generated {len(audio_bytes)} bytes of audio")
|
|
193
|
-
return audio_bytes
|
|
194
|
-
|
|
195
|
-
finally:
|
|
196
|
-
# Clean up temporary file
|
|
197
|
-
if os.path.exists(temp_output_path):
|
|
198
|
-
os.unlink(temp_output_path)
|
|
182
|
+
audio_bytes = buffer.getvalue()
|
|
183
|
+
|
|
184
|
+
ASCIIColors.green(f"Server: Generated {len(audio_bytes)} bytes of audio.")
|
|
185
|
+
return audio_bytes
|
|
199
186
|
|
|
200
187
|
except Exception as e:
|
|
201
|
-
|
|
202
|
-
|
|
188
|
+
ASCIIColors.error(f"Server: Error generating audio: {e}")
|
|
189
|
+
ASCIIColors.error(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
203
190
|
raise
|
|
204
191
|
|
|
205
192
|
def _get_all_available_voice_files(self) -> List[str]:
|
|
206
193
|
"""Get list of all available voice files for debugging"""
|
|
207
|
-
|
|
208
|
-
voice_files = []
|
|
209
|
-
|
|
210
|
-
if voices_dir.exists():
|
|
211
|
-
voice_files = [str(f) for f in voices_dir.glob("*.wav")]
|
|
212
|
-
|
|
213
|
-
return voice_files
|
|
214
|
-
|
|
215
|
-
def _get_default_speaker_file(self) -> Optional[str]:
|
|
216
|
-
"""Get path to default speaker file"""
|
|
217
|
-
voices_dir = Path(__file__).parent / "voices"
|
|
218
|
-
|
|
219
|
-
# Look for a default speaker file
|
|
220
|
-
for filename in ["default.wav", "speaker.wav", "reference.wav"]:
|
|
221
|
-
potential_path = voices_dir / filename
|
|
222
|
-
if potential_path.exists():
|
|
223
|
-
return str(potential_path)
|
|
224
|
-
|
|
225
|
-
# If no default found, look for any wav file
|
|
226
|
-
wav_files = list(voices_dir.glob("*.wav"))
|
|
227
|
-
if wav_files:
|
|
228
|
-
return str(wav_files[0])
|
|
229
|
-
|
|
230
|
-
return None
|
|
194
|
+
return [f.name for f in self.voices_dir.glob("*.*")]
|
|
231
195
|
|
|
232
196
|
def list_voices(self) -> List[str]:
|
|
233
197
|
"""Return list of available voices"""
|
|
@@ -241,47 +205,36 @@ try:
|
|
|
241
205
|
app = FastAPI(title="XTTS Server")
|
|
242
206
|
router = APIRouter()
|
|
243
207
|
xtts_server = XTTSServer()
|
|
244
|
-
model_lock = asyncio.Lock() # Ensure
|
|
208
|
+
model_lock = asyncio.Lock() # Ensure only one generation happens at a time on the model
|
|
245
209
|
|
|
246
210
|
# --- API Endpoints ---
|
|
247
211
|
@router.post("/generate_audio")
|
|
248
|
-
async def
|
|
212
|
+
async def api_generate_audio(request: GenerationRequest):
|
|
249
213
|
async with model_lock:
|
|
250
214
|
try:
|
|
251
|
-
print(f"request.language:{request.language}")
|
|
252
|
-
audio_bytes = await xtts_server.generate_audio(
|
|
253
|
-
text=request.text,
|
|
254
|
-
voice=request.voice,
|
|
255
|
-
language=request.language,
|
|
256
|
-
speaker_wav=request.speaker_wav
|
|
257
|
-
)
|
|
258
215
|
from fastapi.responses import Response
|
|
216
|
+
audio_bytes = await xtts_server.generate_audio(request)
|
|
259
217
|
return Response(content=audio_bytes, media_type="audio/wav")
|
|
260
218
|
except Exception as e:
|
|
261
|
-
|
|
262
|
-
print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
|
|
219
|
+
ASCIIColors.error(f"Server: ERROR in generate_audio endpoint: {e}")
|
|
263
220
|
raise HTTPException(status_code=500, detail=str(e))
|
|
264
221
|
|
|
265
222
|
@router.get("/list_voices")
|
|
266
|
-
async def
|
|
223
|
+
async def api_list_voices():
|
|
267
224
|
try:
|
|
268
225
|
voices = xtts_server.list_voices()
|
|
269
|
-
print(f"Server: Returning {len(voices)} voices: {voices}")
|
|
270
226
|
return {"voices": voices}
|
|
271
227
|
except Exception as e:
|
|
272
|
-
|
|
273
|
-
print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
|
|
228
|
+
ASCIIColors.error(f"Server: ERROR in list_voices endpoint: {e}")
|
|
274
229
|
raise HTTPException(status_code=500, detail=str(e))
|
|
275
230
|
|
|
276
231
|
@router.get("/list_models")
|
|
277
|
-
async def
|
|
232
|
+
async def api_list_models():
|
|
278
233
|
try:
|
|
279
234
|
models = xtts_server.list_models()
|
|
280
|
-
print(f"Server: Returning {len(models)} models: {models}")
|
|
281
235
|
return {"models": models}
|
|
282
236
|
except Exception as e:
|
|
283
|
-
|
|
284
|
-
print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
|
|
237
|
+
ASCIIColors.error(f"Server: ERROR in list_models endpoint: {e}")
|
|
285
238
|
raise HTTPException(status_code=500, detail=str(e))
|
|
286
239
|
|
|
287
240
|
@router.get("/status")
|
|
@@ -290,41 +243,33 @@ try:
|
|
|
290
243
|
"status": "running",
|
|
291
244
|
"xtts_available": xtts_available,
|
|
292
245
|
"model_loaded": xtts_server.model_loaded,
|
|
293
|
-
"
|
|
294
|
-
"voices_count": len(xtts_server.available_voices),
|
|
295
|
-
"device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
|
|
246
|
+
"device": device if xtts_available else "N/A"
|
|
296
247
|
}
|
|
297
248
|
|
|
298
|
-
# Add a health check endpoint that responds immediately
|
|
299
|
-
@router.get("/health")
|
|
300
|
-
async def health_check():
|
|
301
|
-
return {"status": "healthy", "ready": True}
|
|
302
|
-
|
|
303
249
|
app.include_router(router)
|
|
304
250
|
|
|
305
251
|
# --- Server Startup ---
|
|
306
252
|
if __name__ == '__main__':
|
|
307
|
-
parser = argparse.ArgumentParser(description="XTTS
|
|
253
|
+
parser = argparse.ArgumentParser(description="LoLLMs XTTS Server")
|
|
308
254
|
parser.add_argument("--host", type=str, default="localhost", help="Host to bind the server to.")
|
|
309
|
-
parser.add_argument("--port", type=int, default=
|
|
255
|
+
parser.add_argument("--port", type=int, default=8081, help="Port to bind the server to.")
|
|
310
256
|
|
|
311
257
|
args = parser.parse_args()
|
|
312
258
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
print(f"Server: Available voices: {len(xtts_server.available_voices)}")
|
|
317
|
-
if xtts_available:
|
|
318
|
-
print(f"Server: Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
|
|
259
|
+
ASCIIColors.cyan("--- LoLLMs XTTS Server ---")
|
|
260
|
+
ASCIIColors.green(f"Starting server on http://{args.host}:{args.port}")
|
|
261
|
+
ASCIIColors.info(f"Voices directory: {xtts_server.voices_dir.resolve()}")
|
|
319
262
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
print(f"Server: CRITICAL ERROR running server: {e}")
|
|
328
|
-
print(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
263
|
+
if not xtts_available:
|
|
264
|
+
ASCIIColors.red("Warning: XTTS dependencies not found. Server will run but generation will fail.")
|
|
265
|
+
else:
|
|
266
|
+
ASCIIColors.info(f"Detected device: {device}")
|
|
267
|
+
|
|
268
|
+
uvicorn.run(app, host=args.host, port=args.port)
|
|
269
|
+
|
|
329
270
|
except Exception as e:
|
|
330
|
-
|
|
271
|
+
# This will catch errors during initial imports
|
|
272
|
+
from ascii_colors import ASCIIColors
|
|
273
|
+
ASCIIColors.red(f"Server: CRITICAL ERROR during startup: {e}")
|
|
274
|
+
import traceback
|
|
275
|
+
ASCIIColors.red(f"Server: Traceback:\n{traceback.format_exc()}")```
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
lollms_client/__init__.py,sha256=
|
|
1
|
+
lollms_client/__init__.py,sha256=51YtCHNJCmroyA9htiIgjui1ZSFfkn_zhhe0USpE8nc,1146
|
|
2
2
|
lollms_client/lollms_agentic.py,sha256=pQiMEuB_XkG29-SW6u4KTaMFPr6eKqacInggcCuCW3k,13914
|
|
3
3
|
lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
|
|
4
4
|
lollms_client/lollms_core.py,sha256=Un74iLbnnn2yZYH6HBNRz1mTZ454NEMBEndS4nvh3ZI,244887
|
|
@@ -52,8 +52,8 @@ lollms_client/stt_bindings/lollms/__init__.py,sha256=9Vmn1sQQZKLGLe7nZnc-0LnNeSY
|
|
|
52
52
|
lollms_client/stt_bindings/whisper/__init__.py,sha256=1Ej67GdRKBy1bba14jMaYDYHiZkxJASkWm5eF07ztDQ,15363
|
|
53
53
|
lollms_client/stt_bindings/whispercpp/__init__.py,sha256=xSAQRjAhljak3vWCpkP0Vmdb6WmwTzPjXyaIB85KLGU,21439
|
|
54
54
|
lollms_client/tti_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
lollms_client/tti_bindings/diffusers/__init__.py,sha256=
|
|
56
|
-
lollms_client/tti_bindings/diffusers/server/main.py,sha256=
|
|
55
|
+
lollms_client/tti_bindings/diffusers/__init__.py,sha256=esrcyy_z_6HVCFKMVXl1h_qY_pX3kMHwO81M2C8hSIg,17706
|
|
56
|
+
lollms_client/tti_bindings/diffusers/server/main.py,sha256=PQ3WXhkQzEzyT100k7nu1ZHQtkGphvpWNGl7Bcg26eY,49593
|
|
57
57
|
lollms_client/tti_bindings/gemini/__init__.py,sha256=eYGz6gnOxWGdJu2O0H-EwGG-Hg7Yo3Hzsgn4neqx29Q,12963
|
|
58
58
|
lollms_client/tti_bindings/leonardo_ai/__init__.py,sha256=pUbF1rKPZib1x0Kn2Bk1A7sTFWmZzNG02kmW6Iu1j2w,5885
|
|
59
59
|
lollms_client/tti_bindings/lollms/__init__.py,sha256=5Tnsn4b17djvieQkcjtIDBm3qf0pg5ZWWov-4_2wmo0,8762
|
|
@@ -76,13 +76,13 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=7LQUuWV8I3IEdacc65NRHmDf
|
|
|
76
76
|
lollms_client/tts_bindings/piper_tts/server/install_piper.py,sha256=g71Ne2T18wAytOPipfQ9DNeTAOD9PrII5qC-vr9DtLA,3256
|
|
77
77
|
lollms_client/tts_bindings/piper_tts/server/main.py,sha256=DMozfSR1aCbrlmOXltRFjtXhYhXajsGcNKQjsWgRwZk,17402
|
|
78
78
|
lollms_client/tts_bindings/piper_tts/server/setup_voices.py,sha256=UdHaPa5aNcw8dR-aRGkZr2OfSFFejH79lXgfwT0P3ss,1964
|
|
79
|
-
lollms_client/tts_bindings/xtts/__init__.py,sha256=
|
|
80
|
-
lollms_client/tts_bindings/xtts/server/main.py,sha256=
|
|
79
|
+
lollms_client/tts_bindings/xtts/__init__.py,sha256=lTlExBPZ97FPaf9DoqxE4ilwwO5y88dPOHeRaR5BCnc,8002
|
|
80
|
+
lollms_client/tts_bindings/xtts/server/main.py,sha256=JYKUzg4qFOGW8O_QDb9ChEdhcPRSccdwOlR3q-kJX7I,12306
|
|
81
81
|
lollms_client/tts_bindings/xtts/server/setup_voices.py,sha256=UdHaPa5aNcw8dR-aRGkZr2OfSFFejH79lXgfwT0P3ss,1964
|
|
82
82
|
lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
|
|
83
83
|
lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
-
lollms_client-1.6.
|
|
85
|
-
lollms_client-1.6.
|
|
86
|
-
lollms_client-1.6.
|
|
87
|
-
lollms_client-1.6.
|
|
88
|
-
lollms_client-1.6.
|
|
84
|
+
lollms_client-1.6.6.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
85
|
+
lollms_client-1.6.6.dist-info/METADATA,sha256=i6Gb5wKrXNF6OPUCz41s5YbpBY5HEvLdAD5a6ONZV84,76835
|
|
86
|
+
lollms_client-1.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
87
|
+
lollms_client-1.6.6.dist-info/top_level.txt,sha256=Bk_kz-ri6Arwsk7YG-T5VsRorV66uVhcHGvb_g2WqgE,14
|
|
88
|
+
lollms_client-1.6.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|