lollms-client 1.3.4__py3-none-any.whl → 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +354 -233
- lollms_client/llm_bindings/lollms/__init__.py +152 -153
- lollms_client/lollms_core.py +162 -76
- lollms_client/lollms_discussion.py +2 -2
- lollms_client/lollms_llm_binding.py +3 -3
- lollms_client/lollms_tts_binding.py +80 -67
- lollms_client/tts_bindings/bark/__init__.py +110 -329
- lollms_client/tts_bindings/bark/server/install_bark.py +64 -0
- lollms_client/tts_bindings/bark/server/main.py +311 -0
- lollms_client/tts_bindings/piper_tts/__init__.py +115 -335
- lollms_client/tts_bindings/piper_tts/server/install_piper.py +92 -0
- lollms_client/tts_bindings/piper_tts/server/main.py +425 -0
- lollms_client/tts_bindings/piper_tts/server/setup_voices.py +67 -0
- lollms_client/tts_bindings/xtts/__init__.py +99 -305
- lollms_client/tts_bindings/xtts/server/main.py +314 -0
- lollms_client/tts_bindings/xtts/server/setup_voices.py +67 -0
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/METADATA +1 -1
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/RECORD +22 -15
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/WHEEL +0 -0
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/top_level.txt +0 -0
|
@@ -1,317 +1,111 @@
|
|
|
1
|
-
# lollms_client/tts_bindings/xtts/__init__.py
|
|
2
|
-
import io
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Optional, List, Union, Dict, Any
|
|
6
|
-
|
|
7
|
-
from ascii_colors import trace_exception, ASCIIColors
|
|
8
|
-
|
|
9
|
-
# --- Package Management and Conditional Imports ---
|
|
10
|
-
_xtts_deps_installed_with_correct_torch = False
|
|
11
|
-
_xtts_installation_error = ""
|
|
12
|
-
try:
|
|
13
|
-
import pipmaster as pm
|
|
14
|
-
import platform
|
|
15
|
-
|
|
16
|
-
preferred_torch_device_for_install = "cpu"
|
|
17
|
-
if platform.system() == "Linux" or platform.system() == "Windows":
|
|
18
|
-
preferred_torch_device_for_install = "cuda"
|
|
19
|
-
elif platform.system() == "Darwin":
|
|
20
|
-
preferred_torch_device_for_install = "mps"
|
|
21
|
-
|
|
22
|
-
torch_pkgs = ["torch", "torchaudio"] # TTS often needs torchaudio
|
|
23
|
-
# Coqui-TTS has specific version requirements sometimes, ensure_packages handles this
|
|
24
|
-
xtts_core_pkgs = ["TTS"]
|
|
25
|
-
other_deps = ["scipy", "numpy", "soundfile"] # soundfile is often a TTS dependency
|
|
26
|
-
|
|
27
|
-
torch_index_url = None
|
|
28
|
-
if preferred_torch_device_for_install == "cuda":
|
|
29
|
-
torch_index_url = "https://download.pytorch.org/whl/cu126"
|
|
30
|
-
ASCIIColors.info(f"Attempting to ensure PyTorch with CUDA support (target index: {torch_index_url}) for XTTS binding.")
|
|
31
|
-
pm.ensure_packages(torch_pkgs, index_url=torch_index_url)
|
|
32
|
-
pm.ensure_packages(xtts_core_pkgs + other_deps)
|
|
33
|
-
else:
|
|
34
|
-
ASCIIColors.info("Ensuring PyTorch, Coqui-TTS, and dependencies using default PyPI index for XTTS binding.")
|
|
35
|
-
pm.ensure_packages(torch_pkgs + xtts_core_pkgs + other_deps)
|
|
36
|
-
|
|
37
|
-
import torch
|
|
38
|
-
from TTS.api import TTS # Main Coqui TTS class
|
|
39
|
-
import scipy.io.wavfile
|
|
40
|
-
import numpy as np
|
|
41
|
-
import soundfile as sf # For reading speaker_wav if not in standard wav
|
|
42
|
-
|
|
43
|
-
_xtts_deps_installed_with_correct_torch = True
|
|
44
|
-
except ImportError as e_imp: # Catch ImportError specifically if TTS itself fails
|
|
45
|
-
_xtts_installation_error = f"ImportError: {e_imp}. Coqui TTS (TTS lib) might not be installed correctly or has missing dependencies."
|
|
46
|
-
TTS, torch, scipy, np, sf = None, None, None, None, None
|
|
47
|
-
except Exception as e:
|
|
48
|
-
_xtts_installation_error = str(e)
|
|
49
|
-
TTS, torch, scipy, np, sf = None, None, None, None, None
|
|
50
|
-
# --- End Package Management ---
|
|
51
|
-
|
|
1
|
+
# File: lollms_client/tts_bindings/xtts/__init__.py
|
|
52
2
|
from lollms_client.lollms_tts_binding import LollmsTTSBinding
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class XTTSBinding(LollmsTTSBinding):
|
|
70
|
-
def __init__(self,
|
|
71
|
-
model_name: str = "tts_models/multilingual/multi-dataset/xtts_v2", # Coqui TTS model identifier
|
|
72
|
-
default_speaker_wav: Optional[Union[str, Path]] = None, # Path to a reference WAV for default voice
|
|
73
|
-
default_language: str = "en",
|
|
74
|
-
device: Optional[str] = None,
|
|
75
|
-
# Standard LollmsTTSBinding args
|
|
76
|
-
host_address: Optional[str] = None,
|
|
77
|
-
service_key: Optional[str] = None,
|
|
78
|
-
verify_ssl_certificate: bool = True,
|
|
79
|
-
**kwargs): # Catch-all for future TTS API changes or specific params
|
|
80
|
-
|
|
81
|
-
super().__init__(binding_name="xtts")
|
|
82
|
-
|
|
83
|
-
if not _xtts_deps_installed_with_correct_torch:
|
|
84
|
-
raise ImportError(f"XTTS binding dependencies not met. Error: {_xtts_installation_error}")
|
|
85
|
-
|
|
86
|
-
self.device = device
|
|
87
|
-
if self.device is None:
|
|
88
|
-
if torch.cuda.is_available(): self.device = "cuda"; ASCIIColors.info("CUDA device detected by PyTorch for XTTS.")
|
|
89
|
-
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): self.device = "mps"; ASCIIColors.info("MPS device detected for XTTS.")
|
|
90
|
-
else: self.device = "cpu"; ASCIIColors.info("No GPU (CUDA/MPS) by PyTorch, using CPU for XTTS.")
|
|
91
|
-
elif self.device == "cuda" and not torch.cuda.is_available(): self.device = "cpu"; ASCIIColors.warning("CUDA req, not avail. CPU for XTTS.")
|
|
92
|
-
elif self.device == "mps" and not (hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()): self.device = "cpu"; ASCIIColors.warning("MPS req, not avail. CPU for XTTS.")
|
|
93
|
-
|
|
94
|
-
ASCIIColors.info(f"XTTSBinding: Using device '{self.device}'.")
|
|
95
|
-
|
|
96
|
-
self.xtts_model_id_or_path = model_name # Store the model identifier passed by user
|
|
97
|
-
self.loaded_xtts_model_id = None
|
|
98
|
-
self.tts_model: Optional[TTS] = None
|
|
99
|
-
self.default_speaker_wav = str(default_speaker_wav) if default_speaker_wav else None
|
|
100
|
-
self.default_language = default_language
|
|
3
|
+
from typing import Optional, List
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import requests
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
import pipmaster as pm
|
|
10
|
+
|
|
11
|
+
BindingName = "XTTSClientBinding"
|
|
12
|
+
|
|
13
|
+
class XTTSClientBinding(LollmsTTSBinding):
|
|
14
|
+
def __init__(self,
|
|
15
|
+
host: str = "localhost",
|
|
16
|
+
port: int = 8081,
|
|
17
|
+
auto_start_server: bool = True,
|
|
18
|
+
**kwargs):
|
|
101
19
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
self.
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if not speaker_wav_path:
|
|
139
|
-
raise ValueError("XTTS requires a 'speaker_wav' path for voice cloning. Provide it in the 'voice' argument or set 'default_speaker_wav' during initialization.")
|
|
20
|
+
binding_name = "xtts"
|
|
21
|
+
super().__init__(binding_name=binding_name, **kwargs)
|
|
22
|
+
self.host = host
|
|
23
|
+
self.port = port
|
|
24
|
+
self.auto_start_server = auto_start_server
|
|
25
|
+
self.server_process = None
|
|
26
|
+
self.base_url = f"http://{self.host}:{self.port}"
|
|
27
|
+
|
|
28
|
+
if self.auto_start_server:
|
|
29
|
+
self.start_server()
|
|
30
|
+
|
|
31
|
+
def start_server(self):
|
|
32
|
+
print("XTTS Client: Starting dedicated server...")
|
|
33
|
+
binding_root = Path(__file__).parent
|
|
34
|
+
server_dir = binding_root / "server"
|
|
35
|
+
requirements_file = server_dir / "requirements.txt"
|
|
36
|
+
server_script = server_dir / "main.py"
|
|
37
|
+
|
|
38
|
+
# 1. Ensure a virtual environment and dependencies
|
|
39
|
+
venv_path = server_dir / "venv"
|
|
40
|
+
pm_v = pm.PackageManager(venv_path=venv_path)
|
|
41
|
+
pm_v.ensure_requirements(str(requirements_file))
|
|
42
|
+
|
|
43
|
+
# 2. Get the python executable from the venv
|
|
44
|
+
if sys.platform == "win32":
|
|
45
|
+
python_executable = venv_path / "Scripts" / "python.exe"
|
|
46
|
+
else:
|
|
47
|
+
python_executable = venv_path / "bin" / "python"
|
|
48
|
+
|
|
49
|
+
# 3. Launch the server as a subprocess with stdout/stderr forwarded to console
|
|
50
|
+
command = [
|
|
51
|
+
str(python_executable),
|
|
52
|
+
str(server_script),
|
|
53
|
+
"--host", self.host,
|
|
54
|
+
"--port", str(self.port)
|
|
55
|
+
]
|
|
140
56
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
57
|
+
# Forward stdout and stderr to the parent process console
|
|
58
|
+
self.server_process = subprocess.Popen(
|
|
59
|
+
command,
|
|
60
|
+
stdout=None, # Inherit parent's stdout (shows in console)
|
|
61
|
+
stderr=None, # Inherit parent's stderr (shows in console)
|
|
62
|
+
)
|
|
144
63
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
f"Known supported: {XTTS_SUPPORTED_LANGUAGES}. Attempting anyway.")
|
|
64
|
+
# 4. Wait for the server to be ready
|
|
65
|
+
self._wait_for_server()
|
|
148
66
|
|
|
149
|
-
|
|
67
|
+
def _wait_for_server(self, timeout=60):
|
|
68
|
+
start_time = time.time()
|
|
69
|
+
while time.time() - start_time < timeout:
|
|
70
|
+
try:
|
|
71
|
+
response = requests.get(f"{self.base_url}/status")
|
|
72
|
+
if response.status_code == 200 and response.json().get("status") == "running":
|
|
73
|
+
print("XTTS Server is up and running.")
|
|
74
|
+
return
|
|
75
|
+
except requests.ConnectionError:
|
|
76
|
+
time.sleep(1)
|
|
150
77
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
speaker_wav=str(speaker_wav_path), # Must be a string path
|
|
162
|
-
language=effective_language,
|
|
163
|
-
# split_sentences=True, # Default True, good for longer texts
|
|
164
|
-
**kwargs # Pass other potential TTS lib args
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
if not wav_array_int_list: # Check if list is empty
|
|
168
|
-
raise RuntimeError("XTTS model returned empty audio data (list of ints was empty).")
|
|
169
|
-
|
|
170
|
-
# Convert list of ints to a NumPy array of int16
|
|
171
|
-
# The TTS library usually returns samples scaled appropriately for int16.
|
|
172
|
-
audio_array_np = np.array(wav_array_int_list, dtype=np.int16)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
if audio_array_np.ndim == 0 or audio_array_np.size == 0: # Double check after conversion
|
|
176
|
-
raise RuntimeError("XTTS model resulted in empty NumPy audio array.")
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
buffer = io.BytesIO()
|
|
180
|
-
# Get sample rate from the loaded TTS model's config
|
|
181
|
-
sample_rate = self.tts_model.synthesizer.output_sample_rate if hasattr(self.tts_model, 'synthesizer') and hasattr(self.tts_model.synthesizer, 'output_sample_rate') else 24000 # XTTS v2 default is 24kHz
|
|
182
|
-
|
|
183
|
-
scipy.io.wavfile.write(buffer, rate=sample_rate, data=audio_array_np)
|
|
184
|
-
audio_bytes = buffer.getvalue()
|
|
185
|
-
buffer.close()
|
|
186
|
-
|
|
187
|
-
ASCIIColors.green("XTTS audio generation successful.")
|
|
188
|
-
return audio_bytes
|
|
189
|
-
except Exception as e:
|
|
190
|
-
ASCIIColors.error(f"XTTS audio generation failed: {e}"); trace_exception(e)
|
|
191
|
-
if "out of memory" in str(e).lower() and self.device == "cuda":
|
|
192
|
-
ASCIIColors.yellow("CUDA out of memory. Ensure GPU has sufficient VRAM for XTTS (can be several GB).")
|
|
193
|
-
raise RuntimeError(f"XTTS audio generation error: {e}") from e
|
|
194
|
-
|
|
195
|
-
def list_voices(self, **kwargs) -> List[str]:
|
|
196
|
-
"""
|
|
197
|
-
For XTTS, voices are determined by the `speaker_wav` file.
|
|
198
|
-
This method returns a message or an empty list, as there are no predefined voices.
|
|
199
|
-
Optionally, one could implement scanning a user-defined directory of speaker WAVs.
|
|
200
|
-
"""
|
|
201
|
-
# return ["Dynamic (provide 'speaker_wav' path to generate_audio)"]
|
|
202
|
-
ASCIIColors.info("XTTS voices are dynamic and determined by the 'speaker_wav' file provided during generation.")
|
|
203
|
-
ASCIIColors.info("You can provide a path to any reference WAV file for voice cloning.")
|
|
204
|
-
return [] # Or provide a helper message as above in a different way
|
|
205
|
-
|
|
206
|
-
def get_xtts_model_ids(self) -> List[str]:
|
|
207
|
-
"""Helper to list known XTTS model identifiers for Coqui TTS library."""
|
|
208
|
-
return XTTS_MODELS.copy()
|
|
78
|
+
self.stop_server()
|
|
79
|
+
raise RuntimeError("Failed to start the XTTS server in the specified timeout.")
|
|
80
|
+
|
|
81
|
+
def stop_server(self):
|
|
82
|
+
if self.server_process:
|
|
83
|
+
print("XTTS Client: Stopping dedicated server...")
|
|
84
|
+
self.server_process.terminate()
|
|
85
|
+
self.server_process.wait()
|
|
86
|
+
self.server_process = None
|
|
87
|
+
print("Server stopped.")
|
|
209
88
|
|
|
210
|
-
def get_supported_languages(self) -> List[str]:
|
|
211
|
-
"""Helper to list known supported languages for XTTS v2."""
|
|
212
|
-
return XTTS_SUPPORTED_LANGUAGES.copy()
|
|
213
|
-
|
|
214
|
-
|
|
215
89
|
def __del__(self):
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if torch and hasattr(torch, 'cuda') and torch.cuda.is_available():
|
|
219
|
-
torch.cuda.empty_cache()
|
|
220
|
-
loaded_name = getattr(self, 'loaded_xtts_model_id', None)
|
|
221
|
-
msg = f"XTTSBinding for model '{loaded_name}' destroyed." if loaded_name else "XTTSBinding destroyed."
|
|
222
|
-
ASCIIColors.info(msg)
|
|
90
|
+
# Ensure the server is stopped when the object is destroyed
|
|
91
|
+
self.stop_server()
|
|
223
92
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
93
|
+
def generate_audio(self, text: str, voice: Optional[str] = None, **kwargs) -> bytes:
|
|
94
|
+
"""Generate audio by calling the server's API"""
|
|
95
|
+
payload = {"text": text, "voice": voice, **kwargs}
|
|
96
|
+
response = requests.post(f"{self.base_url}/generate_audio", json=payload)
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
return response.content
|
|
229
99
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
# --- IMPORTANT: Create or provide a speaker reference WAV file ---
|
|
237
|
-
# For this test to work, you need a short (~5-15 seconds) clean audio file of a voice.
|
|
238
|
-
# Name it 'speaker_ref.wav' and place it in the same directory as this script,
|
|
239
|
-
# or update the path below.
|
|
240
|
-
default_speaker_wav_path = Path(__file__).parent / "speaker_ref.wav" # Assumes it's next to this __init__.py
|
|
241
|
-
|
|
242
|
-
if not default_speaker_wav_path.exists():
|
|
243
|
-
ASCIIColors.warning(f"Reference speaker WAV file not found: {default_speaker_wav_path}")
|
|
244
|
-
ASCIIColors.warning("Please create/place a 'speaker_ref.wav' (clean, ~5-15s audio) in the "
|
|
245
|
-
f"'{default_speaker_wav_path.parent}' directory for the test to run properly.")
|
|
246
|
-
# Attempt to create a very basic dummy if scipy available, NOT suitable for good cloning
|
|
247
|
-
try:
|
|
248
|
-
import numpy as np; import scipy.io.wavfile
|
|
249
|
-
samplerate = 22050; duration = 2; frequency = 440
|
|
250
|
-
t = np.linspace(0., duration, int(samplerate * duration), endpoint=False)
|
|
251
|
-
data = (np.iinfo(np.int16).max * 0.1 * np.sin(2. * np.pi * frequency * t)).astype(np.int16)
|
|
252
|
-
scipy.io.wavfile.write(default_speaker_wav_path, samplerate, data)
|
|
253
|
-
ASCIIColors.info(f"Created a VERY BASIC dummy 'speaker_ref.wav'. Replace with a real voice sample for good results.")
|
|
254
|
-
except Exception as e_dummy_spk:
|
|
255
|
-
ASCIIColors.error(f"Could not create dummy speaker_ref.wav: {e_dummy_spk}. Test will likely fail or use no speaker.")
|
|
256
|
-
default_speaker_wav_path = None # Ensure it's None if creation failed
|
|
257
|
-
|
|
258
|
-
tts_binding = None
|
|
259
|
-
try:
|
|
260
|
-
ASCIIColors.cyan(f"\n--- Initializing XTTSBinding (XTTS Model: '{test_xtts_model_id}') ---")
|
|
261
|
-
tts_binding = XTTSBinding(
|
|
262
|
-
model_name=test_xtts_model_id,
|
|
263
|
-
default_speaker_wav=str(default_speaker_wav_path) if default_speaker_wav_path else None,
|
|
264
|
-
default_language="en"
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
ASCIIColors.cyan("\n--- Listing XTTS 'voices' (dynamic, requires speaker_wav) ---")
|
|
268
|
-
voices = tts_binding.list_voices(); # This will print an informational message
|
|
269
|
-
|
|
270
|
-
ASCIIColors.cyan("\n--- Listing known XTTS model IDs for Coqui TTS library ---")
|
|
271
|
-
xtts_models = tts_binding.get_xtts_model_ids(); print(f"Known XTTS model IDs: {xtts_models}")
|
|
272
|
-
ASCIIColors.cyan("\n--- Listing known XTTS supported languages ---")
|
|
273
|
-
langs = tts_binding.get_supported_languages(); print(f"Supported languages (example): {langs[:5]}...")
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
texts_to_synthesize = [
|
|
277
|
-
("english_greeting", "Hello, this is a test of the XTTS voice synthesis system. I hope you like my voice!", "en"),
|
|
278
|
-
("spanish_question", "¿Cómo estás hoy? Espero que tengas un día maravilloso.", "es"),
|
|
279
|
-
# ("short_custom_voice", "This voice should sound like your reference audio.", "en", "path/to/your/custom_speaker.wav"), # Example for custom
|
|
280
|
-
]
|
|
281
|
-
if not default_speaker_wav_path: # If no default speaker, we can't run text loop as is
|
|
282
|
-
ASCIIColors.error("No default_speaker_wav available. Skipping synthesis loop.")
|
|
283
|
-
texts_to_synthesize = []
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
for name, text, lang, *speaker_override_list in texts_to_synthesize:
|
|
287
|
-
speaker_to_use = speaker_override_list[0] if speaker_override_list else None # Uses binding default if None
|
|
288
|
-
|
|
289
|
-
ASCIIColors.cyan(f"\n--- Synthesizing TTS for: '{name}' (Lang: {lang}, Speaker: {speaker_to_use or tts_binding.default_speaker_wav}) ---")
|
|
290
|
-
print(f"Text: {text}")
|
|
291
|
-
try:
|
|
292
|
-
# XTTS tts() doesn't have as many direct generation params as Bark's generate()
|
|
293
|
-
# Control is more via the model config or specific methods if available.
|
|
294
|
-
audio_bytes = tts_binding.generate_audio(text, voice=speaker_to_use, language=lang)
|
|
295
|
-
if audio_bytes:
|
|
296
|
-
output_filename = f"tts_{name}_{tts_binding.loaded_xtts_model_id.replace('/','_')}.wav"
|
|
297
|
-
output_path = test_output_dir / output_filename
|
|
298
|
-
with open(output_path, "wb") as f: f.write(audio_bytes)
|
|
299
|
-
ASCIIColors.green(f"TTS for '{name}' saved to: {output_path} ({len(audio_bytes) / 1024:.2f} KB)")
|
|
300
|
-
else: ASCIIColors.error(f"TTS generation for '{name}' returned empty bytes.")
|
|
301
|
-
except Exception as e_gen: ASCIIColors.error(f"Failed to generate TTS for '{name}': {e_gen}")
|
|
302
|
-
|
|
303
|
-
except ImportError as e_imp: ASCIIColors.error(f"Import error: {e_imp}")
|
|
304
|
-
except RuntimeError as e_rt: ASCIIColors.error(f"Runtime error: {e_rt}")
|
|
305
|
-
except Exception as e: ASCIIColors.error(f"Unexpected error: {e}"); trace_exception(e)
|
|
306
|
-
finally:
|
|
307
|
-
if tts_binding: del tts_binding
|
|
308
|
-
ASCIIColors.info(f"Test TTS audio (if any) are in: {test_output_dir.resolve()}")
|
|
309
|
-
print(f"{ASCIIColors.YELLOW}Check the audio files in '{test_output_dir.resolve()}'!{ASCIIColors.RESET}")
|
|
310
|
-
# Clean up dummy speaker_ref.wav if we created it
|
|
311
|
-
if "samplerate" in locals() and default_speaker_wav_path and default_speaker_wav_path.name == "speaker_ref.wav" and "dummy" in str(default_speaker_wav_path).lower():
|
|
312
|
-
if default_speaker_wav_path.exists():
|
|
313
|
-
try: default_speaker_wav_path.unlink(); ASCIIColors.info("Removed dummy speaker_ref.wav")
|
|
314
|
-
except: pass
|
|
100
|
+
def list_voices(self, **kwargs) -> List[str]:
|
|
101
|
+
"""Get available voices from the server"""
|
|
102
|
+
response = requests.get(f"{self.base_url}/list_voices")
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
return response.json().get("voices", [])
|
|
315
105
|
|
|
106
|
+
def list_models(self, **kwargs) -> List[str]:
|
|
107
|
+
"""Get available models from the server"""
|
|
108
|
+
response = requests.get(f"{self.base_url}/list_models")
|
|
109
|
+
response.raise_for_status()
|
|
110
|
+
return response.json().get("models", [])
|
|
316
111
|
|
|
317
|
-
ASCIIColors.yellow("\n--- XTTSBinding Test Finished ---")
|