lollms-client 1.3.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +354 -233
- lollms_client/llm_bindings/lollms/__init__.py +152 -153
- lollms_client/lollms_core.py +163 -75
- lollms_client/lollms_discussion.py +2 -2
- lollms_client/lollms_llm_binding.py +3 -3
- lollms_client/lollms_tts_binding.py +80 -67
- lollms_client/tts_bindings/bark/__init__.py +110 -329
- lollms_client/tts_bindings/bark/server/install_bark.py +64 -0
- lollms_client/tts_bindings/bark/server/main.py +311 -0
- lollms_client/tts_bindings/piper_tts/__init__.py +115 -335
- lollms_client/tts_bindings/piper_tts/server/install_piper.py +92 -0
- lollms_client/tts_bindings/piper_tts/server/main.py +425 -0
- lollms_client/tts_bindings/piper_tts/server/setup_voices.py +67 -0
- lollms_client/tts_bindings/xtts/__init__.py +99 -305
- lollms_client/tts_bindings/xtts/server/main.py +314 -0
- lollms_client/tts_bindings/xtts/server/setup_voices.py +67 -0
- {lollms_client-1.3.3.dist-info → lollms_client-1.3.6.dist-info}/METADATA +1 -1
- {lollms_client-1.3.3.dist-info → lollms_client-1.3.6.dist-info}/RECORD +22 -15
- {lollms_client-1.3.3.dist-info → lollms_client-1.3.6.dist-info}/WHEEL +0 -0
- {lollms_client-1.3.3.dist-info → lollms_client-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.3.3.dist-info → lollms_client-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,336 +1,117 @@
|
|
|
1
|
-
# lollms_client/tts_bindings/bark/__init__.py
|
|
2
|
-
import
|
|
3
|
-
import
|
|
1
|
+
# File: lollms_client/tts_bindings/bark/__init__.py
|
|
2
|
+
from lollms_client.lollms_tts_binding import LollmsTTSBinding
|
|
3
|
+
from typing import Optional, List
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
preferred_torch_device_for_install = "cuda"
|
|
19
|
-
elif platform.system() == "Darwin":
|
|
20
|
-
preferred_torch_device_for_install = "mps"
|
|
21
|
-
|
|
22
|
-
torch_pkgs = ["torch", "torchaudio","xformers"]
|
|
23
|
-
bark_core_pkgs = ["transformers", "accelerate", "sentencepiece"]
|
|
24
|
-
other_deps = ["scipy", "numpy"]
|
|
25
|
-
|
|
26
|
-
torch_index_url = None
|
|
27
|
-
if preferred_torch_device_for_install == "cuda":
|
|
28
|
-
torch_index_url = "https://download.pytorch.org/whl/cu126"
|
|
29
|
-
ASCIIColors.info(f"Attempting to ensure PyTorch with CUDA support (target index: {torch_index_url}) for Bark TTS binding.")
|
|
30
|
-
pm.ensure_packages(torch_pkgs, index_url=torch_index_url)
|
|
31
|
-
pm.ensure_packages(bark_core_pkgs + other_deps)
|
|
32
|
-
else:
|
|
33
|
-
ASCIIColors.info("Ensuring PyTorch, Bark dependencies, and others using default PyPI index for Bark TTS binding.")
|
|
34
|
-
pm.ensure_packages(torch_pkgs + bark_core_pkgs + other_deps)
|
|
35
|
-
|
|
36
|
-
import torch
|
|
37
|
-
from transformers import AutoProcessor, BarkModel, GenerationConfig
|
|
38
|
-
import scipy.io.wavfile
|
|
39
|
-
import numpy as np
|
|
40
|
-
|
|
41
|
-
_bark_deps_installed_with_correct_torch = True
|
|
42
|
-
except Exception as e:
|
|
43
|
-
_bark_installation_error = str(e)
|
|
44
|
-
AutoProcessor, BarkModel, GenerationConfig, torch, scipy, np = None, None, None, None, None, None
|
|
45
|
-
# --- End Package Management ---
|
|
46
|
-
|
|
47
|
-
from lollms_client.lollms_tts_binding import LollmsTTSBinding # Changed base class
|
|
48
|
-
|
|
49
|
-
BindingName = "BarkTTSBinding" # Changed BindingName
|
|
50
|
-
|
|
51
|
-
# Bark model IDs (can be used as 'model_name' for this binding)
|
|
52
|
-
BARK_MODELS = [
|
|
53
|
-
"suno/bark", # Full model
|
|
54
|
-
"suno/bark-small", # Smaller, faster model
|
|
55
|
-
]
|
|
56
|
-
|
|
57
|
-
# Bark voice presets, used as the 'voice' argument in generate_audio
|
|
58
|
-
BARK_VOICE_PRESETS = [
|
|
59
|
-
"v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3",
|
|
60
|
-
"v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_7",
|
|
61
|
-
"v2/en_speaker_8", "v2/en_speaker_9",
|
|
62
|
-
"v2/de_speaker_0", "v2/es_speaker_0", "v2/fr_speaker_0", "v2/hi_speaker_0",
|
|
63
|
-
"v2/it_speaker_0", "v2/ja_speaker_0", "v2/ko_speaker_0", "v2/pl_speaker_0",
|
|
64
|
-
"v2/pt_speaker_0", "v2/ru_speaker_0", "v2/tr_speaker_0", "v2/zh_speaker_0",
|
|
65
|
-
# Non-speech sounds (less relevant for pure TTS, but part of Bark's capabilities)
|
|
66
|
-
"[laughter]", "[laughs]", "[sighs]", "[music]", "[gasps]", "[clears throat]",
|
|
67
|
-
"♪", "...", "[MAN]", "[WOMAN]" # Special tokens
|
|
68
|
-
]
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class BarkTTSBinding(LollmsTTSBinding): # Changed class name and base class
|
|
72
|
-
def __init__(self,
|
|
73
|
-
model_name: str = "suno/bark-small", # This is the Bark model ID
|
|
74
|
-
default_voice: Optional[str] = "v2/en_speaker_6", # This is the default voice_preset
|
|
75
|
-
device: Optional[str] = None,
|
|
76
|
-
enable_better_transformer: bool = True,
|
|
77
|
-
host_address: Optional[str] = None, # Unused for local binding
|
|
78
|
-
service_key: Optional[str] = None, # Unused for local binding
|
|
79
|
-
verify_ssl_certificate: bool = True, # Unused for local binding
|
|
5
|
+
import requests
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
import pipmaster as pm
|
|
10
|
+
|
|
11
|
+
BindingName = "BarkClientBinding"
|
|
12
|
+
|
|
13
|
+
class BarkClientBinding(LollmsTTSBinding):
|
|
14
|
+
def __init__(self,
|
|
15
|
+
host: str = "localhost",
|
|
16
|
+
port: int = 8082,
|
|
17
|
+
auto_start_server: bool = True,
|
|
80
18
|
**kwargs):
|
|
81
|
-
|
|
82
|
-
super().__init__(binding_name="bark") # Call LollmsTTSBinding's init
|
|
83
|
-
|
|
84
|
-
if not _bark_deps_installed_with_correct_torch:
|
|
85
|
-
raise ImportError(f"Bark TTS binding dependencies not met. Error: {_bark_installation_error}")
|
|
86
|
-
|
|
87
|
-
self.device = device
|
|
88
|
-
if self.device is None:
|
|
89
|
-
if torch.cuda.is_available(): self.device = "cuda"; ASCIIColors.info("CUDA device detected by PyTorch for Bark TTS.")
|
|
90
|
-
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): self.device = "mps"; ASCIIColors.info("MPS device detected for Bark TTS.")
|
|
91
|
-
else: self.device = "cpu"; ASCIIColors.info("No GPU (CUDA/MPS) by PyTorch, using CPU for Bark TTS.")
|
|
92
|
-
elif self.device == "cuda" and not torch.cuda.is_available(): self.device = "cpu"; ASCIIColors.warning("CUDA req, not avail. CPU for Bark TTS.")
|
|
93
|
-
elif self.device == "mps" and not (hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()): self.device = "cpu"; ASCIIColors.warning("MPS req, not avail. CPU for Bark TTS.")
|
|
94
|
-
|
|
95
|
-
ASCIIColors.info(f"BarkTTSBinding: Using device '{self.device}'.")
|
|
96
|
-
|
|
97
|
-
self.bark_model_id = model_name # Store the actual Bark model ID separately
|
|
98
|
-
self.loaded_bark_model_id = None
|
|
99
|
-
self.model: Optional[BarkModel] = None
|
|
100
|
-
self.processor: Optional[AutoProcessor] = None
|
|
101
|
-
self.default_voice_preset = default_voice # Renamed for clarity in TTS context
|
|
102
|
-
self.enable_better_transformer = enable_better_transformer
|
|
103
19
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
self.
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
try: self.model.enable_model_cpu_offload(); ASCIIColors.info("Enabled model_cpu_offload for Bark.")
|
|
141
|
-
except Exception as e: ASCIIColors.warning(f"Could not enable model_cpu_offload: {e}")
|
|
142
|
-
elif hasattr(self.model, "enable_cpu_offload"):
|
|
143
|
-
try: self.model.enable_cpu_offload(); ASCIIColors.info("Enabled cpu_offload for Bark (older API).")
|
|
144
|
-
except Exception as e: ASCIIColors.warning(f"Could not enable cpu_offload (older API): {e}")
|
|
145
|
-
else: ASCIIColors.info("CPU offload not explicitly enabled.")
|
|
146
|
-
|
|
147
|
-
self.loaded_bark_model_id = model_id_to_load
|
|
148
|
-
ASCIIColors.green(f"Bark model '{model_id_to_load}' for TTS loaded successfully.")
|
|
149
|
-
except Exception as e:
|
|
150
|
-
self.model, self.processor, self.loaded_bark_model_id = None, None, None
|
|
151
|
-
ASCIIColors.error(f"Failed to load Bark model '{model_id_to_load}': {e}"); trace_exception(e)
|
|
152
|
-
raise RuntimeError(f"Failed to load Bark model '{model_id_to_load}'") from e
|
|
153
|
-
|
|
154
|
-
def generate_audio(self,
|
|
155
|
-
text: str,
|
|
156
|
-
voice: Optional[str] = None, # This will be the Bark voice_preset
|
|
157
|
-
do_sample: Optional[bool] = True, # Default to True for more natural speech
|
|
158
|
-
temperature: Optional[float] = 0.7, # General speech temperature
|
|
159
|
-
**kwargs) -> bytes:
|
|
160
|
-
if self.model is None or self.processor is None:
|
|
161
|
-
raise RuntimeError("Bark model or processor not loaded.")
|
|
162
|
-
|
|
163
|
-
effective_voice_preset = voice if voice is not None else self.default_voice_preset
|
|
164
|
-
if effective_voice_preset not in BARK_VOICE_PRESETS and not Path(effective_voice_preset).exists():
|
|
165
|
-
ASCIIColors.warning(f"Voice preset '{effective_voice_preset}' not in known presets. Bark will attempt to use it as is.")
|
|
20
|
+
binding_name = "bark"
|
|
21
|
+
super().__init__(binding_name=binding_name, **kwargs)
|
|
22
|
+
self.host = host
|
|
23
|
+
self.port = port
|
|
24
|
+
self.auto_start_server = auto_start_server
|
|
25
|
+
self.server_process = None
|
|
26
|
+
self.base_url = f"http://{self.host}:{self.port}"
|
|
27
|
+
|
|
28
|
+
if self.auto_start_server:
|
|
29
|
+
self.start_server()
|
|
30
|
+
|
|
31
|
+
def start_server(self):
|
|
32
|
+
print("Bark Client: Starting dedicated server...")
|
|
33
|
+
binding_root = Path(__file__).parent
|
|
34
|
+
server_dir = binding_root / "server"
|
|
35
|
+
requirements_file = server_dir / "requirements.txt"
|
|
36
|
+
server_script = server_dir / "main.py"
|
|
37
|
+
|
|
38
|
+
# 1. Ensure a virtual environment and dependencies
|
|
39
|
+
venv_path = server_dir / "venv"
|
|
40
|
+
pm_v = pm.PackageManager(venv_path=venv_path)
|
|
41
|
+
pm_v.ensure_requirements(str(requirements_file), verbose=True)
|
|
42
|
+
|
|
43
|
+
# 2. Get the python executable from the venv
|
|
44
|
+
if sys.platform == "win32":
|
|
45
|
+
python_executable = venv_path / "Scripts" / "python.exe"
|
|
46
|
+
else:
|
|
47
|
+
python_executable = venv_path / "bin" / "python"
|
|
48
|
+
|
|
49
|
+
# 3. Launch the server as a subprocess with stdout/stderr forwarded to console
|
|
50
|
+
command = [
|
|
51
|
+
str(python_executable),
|
|
52
|
+
str(server_script),
|
|
53
|
+
"--host", self.host,
|
|
54
|
+
"--port", str(self.port)
|
|
55
|
+
]
|
|
166
56
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
if 'attention_mask' not in inputs:
|
|
173
|
-
inputs['attention_mask'] = torch.ones_like(inputs['input_ids'])
|
|
174
|
-
|
|
175
|
-
if hasattr(self.model, 'generation_config') and self.model.generation_config is not None:
|
|
176
|
-
gen_config = GenerationConfig.from_dict(self.model.generation_config.to_dict())
|
|
177
|
-
else:
|
|
178
|
-
gen_config = GenerationConfig()
|
|
179
|
-
|
|
180
|
-
for key, value in self.default_generation_params.items():
|
|
181
|
-
if hasattr(gen_config, key): setattr(gen_config, key, value)
|
|
182
|
-
|
|
183
|
-
# For TTS, do_sample is usually True
|
|
184
|
-
gen_config.do_sample = do_sample if do_sample is not None else True
|
|
185
|
-
|
|
186
|
-
# Apply general temperature hint for TTS
|
|
187
|
-
if temperature is not None:
|
|
188
|
-
# Bark's main temperatures for speech quality are often coarse and fine.
|
|
189
|
-
# Semantic temperature can also play a role.
|
|
190
|
-
if 'semantic_temperature' not in kwargs and hasattr(gen_config, 'semantic_temperature'):
|
|
191
|
-
gen_config.semantic_temperature = kwargs.get("semantic_temperature", temperature)
|
|
192
|
-
if 'coarse_temperature' not in kwargs and hasattr(gen_config, 'coarse_temperature'):
|
|
193
|
-
gen_config.coarse_temperature = kwargs.get("coarse_temperature", temperature)
|
|
194
|
-
if 'fine_temperature' not in kwargs and hasattr(gen_config, 'fine_temperature'):
|
|
195
|
-
gen_config.fine_temperature = kwargs.get("fine_temperature", temperature * 0.8) # Fine is often lower
|
|
196
|
-
|
|
197
|
-
for key, value in kwargs.items():
|
|
198
|
-
if hasattr(gen_config, key): setattr(gen_config, key, value)
|
|
199
|
-
|
|
200
|
-
pad_token_id_to_set = None
|
|
201
|
-
# (pad_token_id logic remains the same)
|
|
202
|
-
if hasattr(self.model.config, 'semantic_config') and hasattr(self.model.config.semantic_config, 'pad_token_id'):
|
|
203
|
-
pad_token_id_to_set = self.model.config.semantic_config.pad_token_id
|
|
204
|
-
elif hasattr(self.model.config, 'text_config') and hasattr(self.model.config.text_config, 'pad_token_id'):
|
|
205
|
-
pad_token_id_to_set = self.model.config.text_config.pad_token_id
|
|
206
|
-
elif hasattr(self.processor, 'tokenizer') and self.processor.tokenizer and self.processor.tokenizer.pad_token_id is not None:
|
|
207
|
-
pad_token_id_to_set = self.processor.tokenizer.pad_token_id
|
|
208
|
-
|
|
209
|
-
if pad_token_id_to_set is not None:
|
|
210
|
-
gen_config.pad_token_id = pad_token_id_to_set
|
|
211
|
-
if hasattr(gen_config, 'eos_token_id') and gen_config.eos_token_id is None:
|
|
212
|
-
eos_id = getattr(getattr(self.model.config, 'semantic_config', None), 'eos_token_id', None)
|
|
213
|
-
if eos_id is not None: gen_config.eos_token_id = eos_id
|
|
214
|
-
else:
|
|
215
|
-
ASCIIColors.warning("Could not determine pad_token_id for Bark TTS. Using default in GenerationConfig.")
|
|
216
|
-
if gen_config.eos_token_id is not None and gen_config.pad_token_id is None:
|
|
217
|
-
gen_config.pad_token_id = gen_config.eos_token_id
|
|
218
|
-
elif gen_config.pad_token_id is None:
|
|
219
|
-
gen_config.pad_token_id = 0
|
|
220
|
-
|
|
221
|
-
ASCIIColors.debug(f"Bark TTS final generation_config: {gen_config.to_json_string()}")
|
|
222
|
-
|
|
223
|
-
with torch.no_grad():
|
|
224
|
-
output = self.model.generate(
|
|
225
|
-
input_ids=inputs['input_ids'],
|
|
226
|
-
attention_mask=inputs.get('attention_mask'),
|
|
227
|
-
generation_config=gen_config
|
|
228
|
-
)
|
|
229
|
-
|
|
230
|
-
if isinstance(output, torch.Tensor): speech_output_tensor = output
|
|
231
|
-
elif isinstance(output, dict) and ("audio_features" in output or "waveform" in output) :
|
|
232
|
-
speech_output_tensor = output.get("waveform", output.get("audio_features"))
|
|
233
|
-
else: raise TypeError(f"Unexpected output type from BarkModel.generate: {type(output)}. Content: {output}")
|
|
234
|
-
|
|
235
|
-
audio_array_np = speech_output_tensor.cpu().numpy().squeeze()
|
|
236
|
-
if audio_array_np.ndim == 0 or audio_array_np.size == 0:
|
|
237
|
-
raise RuntimeError("Bark model returned empty audio data.")
|
|
238
|
-
|
|
239
|
-
audio_int16 = (audio_array_np * 32767).astype(np.int16)
|
|
240
|
-
|
|
241
|
-
buffer = io.BytesIO()
|
|
242
|
-
sample_rate_to_use = int(self.model.generation_config.sample_rate if hasattr(self.model.generation_config, 'sample_rate') and self.model.generation_config.sample_rate else 24_000)
|
|
243
|
-
scipy.io.wavfile.write(buffer, rate=sample_rate_to_use, data=audio_int16)
|
|
244
|
-
audio_bytes = buffer.getvalue()
|
|
245
|
-
buffer.close()
|
|
246
|
-
|
|
247
|
-
ASCIIColors.green("Bark TTS audio generation successful.")
|
|
248
|
-
return audio_bytes
|
|
249
|
-
except Exception as e:
|
|
250
|
-
ASCIIColors.error(f"Bark TTS audio generation failed: {e}"); trace_exception(e)
|
|
251
|
-
if "out of memory" in str(e).lower() and self.device == "cuda":
|
|
252
|
-
ASCIIColors.yellow("CUDA out of memory. Consider using suno/bark-small or ensure GPU has sufficient VRAM.")
|
|
253
|
-
raise RuntimeError(f"Bark TTS audio generation error: {e}") from e
|
|
254
|
-
|
|
255
|
-
def list_voices(self, **kwargs) -> List[str]: # Renamed from list_models
|
|
256
|
-
"""Lists available Bark voice presets."""
|
|
257
|
-
return BARK_VOICE_PRESETS.copy()
|
|
258
|
-
|
|
259
|
-
def get_bark_model_ids(self) -> List[str]: # Helper to list actual Bark models
|
|
260
|
-
"""Lists available Bark underlying model IDs."""
|
|
261
|
-
return BARK_MODELS.copy()
|
|
262
|
-
|
|
263
|
-
def __del__(self):
|
|
264
|
-
if hasattr(self, 'model') and self.model is not None:
|
|
265
|
-
del self.model; self.model = None
|
|
266
|
-
if hasattr(self, 'processor') and self.processor is not None:
|
|
267
|
-
del self.processor; self.processor = None
|
|
268
|
-
if torch and hasattr(torch, 'cuda') and torch.cuda.is_available():
|
|
269
|
-
torch.cuda.empty_cache()
|
|
270
|
-
loaded_name = getattr(self, 'loaded_bark_model_id', None) # Use specific attribute
|
|
271
|
-
msg = f"BarkTTSBinding for model '{loaded_name}' destroyed." if loaded_name else "BarkTTSBinding destroyed."
|
|
272
|
-
ASCIIColors.info(msg)
|
|
273
|
-
|
|
274
|
-
# --- Main Test Block ---
|
|
275
|
-
if __name__ == '__main__':
|
|
276
|
-
if not _bark_deps_installed_with_correct_torch:
|
|
277
|
-
print(f"{ASCIIColors.RED}Bark TTS binding dependencies not met. Skipping tests. Error: {_bark_installation_error}{ASCIIColors.RESET}")
|
|
278
|
-
exit()
|
|
279
|
-
|
|
280
|
-
ASCIIColors.yellow("--- BarkTTSBinding Test ---")
|
|
281
|
-
# Use bark_model_id to specify the underlying Bark model
|
|
282
|
-
test_bark_model_id = "suno/bark-small"
|
|
283
|
-
test_output_dir = Path("./test_bark_tts_output")
|
|
284
|
-
test_output_dir.mkdir(exist_ok=True)
|
|
285
|
-
tts_binding = None
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
ASCIIColors.cyan(f"\n--- Initializing BarkTTSBinding (Bark Model: '{test_bark_model_id}') ---")
|
|
289
|
-
tts_binding = BarkTTSBinding(
|
|
290
|
-
model_name=test_bark_model_id, # This is the Bark model ID from HF
|
|
291
|
-
default_voice="v2/en_speaker_3" # This is the default voice_preset
|
|
57
|
+
# Forward stdout and stderr to the parent process console
|
|
58
|
+
self.server_process = subprocess.Popen(
|
|
59
|
+
command,
|
|
60
|
+
stdout=None, # Inherit parent's stdout (shows in console)
|
|
61
|
+
stderr=None, # Inherit parent's stderr (shows in console)
|
|
292
62
|
)
|
|
63
|
+
|
|
64
|
+
# 4. Wait for the server to be ready
|
|
65
|
+
self._wait_for_server()
|
|
293
66
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
texts_to_synthesize = [
|
|
301
|
-
("hello_world_default_voice", "Hello world, this is a test of the Bark text to speech binding."),
|
|
302
|
-
("excited_greeting_spk6", "Wow! This is really cool! I can't believe it's working so well.", "v2/en_speaker_6"),
|
|
303
|
-
("question_spk1", "Can you generate different types of voices?", "v2/en_speaker_1"),
|
|
304
|
-
("german_example", "Hallo Welt, wie geht es dir heute?", "v2/de_speaker_0"),
|
|
305
|
-
("laughter_in_text", "This is so funny [laughter] I can't stop laughing.", "v2/en_speaker_0"), # Testing non-speech token
|
|
306
|
-
]
|
|
307
|
-
|
|
308
|
-
for name, text, *voice_arg in texts_to_synthesize:
|
|
309
|
-
voice_to_use = voice_arg[0] if voice_arg else None # Use specified voice or binding's default
|
|
310
|
-
ASCIIColors.cyan(f"\n--- Synthesizing TTS for: '{name}' (Voice: {voice_to_use or tts_binding.default_voice_preset}) ---")
|
|
311
|
-
print(f"Text: {text}")
|
|
67
|
+
def _wait_for_server(self, timeout=120): # Increased timeout for model loading
|
|
68
|
+
start_time = time.time()
|
|
69
|
+
print("Bark Client: Waiting for server to initialize (this may take a while for first run)...")
|
|
70
|
+
while time.time() - start_time < timeout:
|
|
312
71
|
try:
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
72
|
+
response = requests.get(f"{self.base_url}/status")
|
|
73
|
+
if response.status_code == 200 and response.json().get("status") == "running":
|
|
74
|
+
print("Bark Server is up and running.")
|
|
75
|
+
return
|
|
76
|
+
except requests.ConnectionError:
|
|
77
|
+
time.sleep(2)
|
|
78
|
+
|
|
79
|
+
self.stop_server()
|
|
80
|
+
raise RuntimeError("Failed to start the Bark server in the specified timeout.")
|
|
81
|
+
|
|
82
|
+
def stop_server(self):
|
|
83
|
+
if self.server_process:
|
|
84
|
+
print("Bark Client: Stopping dedicated server...")
|
|
85
|
+
self.server_process.terminate()
|
|
86
|
+
self.server_process.wait()
|
|
87
|
+
self.server_process = None
|
|
88
|
+
print("Server stopped.")
|
|
89
|
+
|
|
90
|
+
def __del__(self):
|
|
91
|
+
# Ensure the server is stopped when the object is destroyed
|
|
92
|
+
self.stop_server()
|
|
93
|
+
|
|
94
|
+
def generate_audio(self, text: str, voice: Optional[str] = None, **kwargs) -> bytes:
|
|
95
|
+
"""Generate audio by calling the server's API"""
|
|
96
|
+
payload = {"text": text, "voice": voice, **kwargs}
|
|
97
|
+
response = requests.post(f"{self.base_url}/generate_audio", json=payload, timeout=60)
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
return response.content
|
|
100
|
+
|
|
101
|
+
def list_voices(self, **kwargs) -> List[str]:
|
|
102
|
+
"""Get available voices from the server"""
|
|
103
|
+
response = requests.get(f"{self.base_url}/list_voices")
|
|
104
|
+
response.raise_for_status()
|
|
105
|
+
return response.json().get("voices", [])
|
|
106
|
+
|
|
107
|
+
def list_models(self, **kwargs) -> List[str]:
|
|
108
|
+
"""Get available models from the server"""
|
|
109
|
+
response = requests.get(f"{self.base_url}/list_models")
|
|
110
|
+
response.raise_for_status()
|
|
111
|
+
return response.json().get("models", [])
|
|
112
|
+
|
|
113
|
+
def set_voice(self, voice: str):
|
|
114
|
+
"""Set the default voice for future generations"""
|
|
115
|
+
response = requests.post(f"{self.base_url}/set_voice", json={"voice": voice})
|
|
116
|
+
response.raise_for_status()
|
|
117
|
+
return response.json()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# File: lollms_client/tts_bindings/bark/server/install_bark.py
|
|
2
|
+
#!/usr/bin/env python3
|
|
3
|
+
"""
|
|
4
|
+
Bark installation script with GPU support detection
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
import torch
|
|
10
|
+
|
|
11
|
+
def install_bark():
|
|
12
|
+
"""Install Bark with appropriate PyTorch version for GPU support"""
|
|
13
|
+
|
|
14
|
+
print("Checking for CUDA availability...")
|
|
15
|
+
cuda_available = torch.cuda.is_available()
|
|
16
|
+
|
|
17
|
+
if cuda_available:
|
|
18
|
+
print(f"CUDA detected! GPU: {torch.cuda.get_device_name(0)}")
|
|
19
|
+
print("Installing Bark with GPU support...")
|
|
20
|
+
else:
|
|
21
|
+
print("No CUDA detected, installing CPU-only version...")
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
# Install Bark
|
|
25
|
+
print("Installing bark...")
|
|
26
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "bark"])
|
|
27
|
+
|
|
28
|
+
print("Bark installation completed successfully!")
|
|
29
|
+
|
|
30
|
+
# Test installation
|
|
31
|
+
print("Testing Bark installation...")
|
|
32
|
+
try:
|
|
33
|
+
from bark import generate_audio, SAMPLE_RATE
|
|
34
|
+
print("✓ Bark imported successfully!")
|
|
35
|
+
|
|
36
|
+
# Quick test generation
|
|
37
|
+
print("Running quick test generation...")
|
|
38
|
+
audio = generate_audio("Hello, this is a test.", history_prompt="v2/en_speaker_6")
|
|
39
|
+
print(f"✓ Test generation successful! Generated {len(audio)} audio samples.")
|
|
40
|
+
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"✗ Bark test failed: {e}")
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
except subprocess.CalledProcessError as e:
|
|
48
|
+
print(f"✗ Installation failed: {e}")
|
|
49
|
+
return False
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(f"✗ Unexpected error: {e}")
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
success = install_bark()
|
|
56
|
+
if success:
|
|
57
|
+
print("\n🎉 Bark TTS is ready to use!")
|
|
58
|
+
if torch.cuda.is_available():
|
|
59
|
+
print(f"🚀 GPU acceleration enabled with {torch.cuda.get_device_name(0)}")
|
|
60
|
+
else:
|
|
61
|
+
print("💻 Running on CPU (consider installing CUDA for better performance)")
|
|
62
|
+
else:
|
|
63
|
+
print("\n❌ Installation failed. Please check the error messages above.")
|
|
64
|
+
sys.exit(1)
|