lollms-client 1.3.4__py3-none-any.whl → 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +354 -233
- lollms_client/llm_bindings/lollms/__init__.py +152 -153
- lollms_client/lollms_core.py +162 -76
- lollms_client/lollms_discussion.py +2 -2
- lollms_client/lollms_llm_binding.py +3 -3
- lollms_client/lollms_tts_binding.py +80 -67
- lollms_client/tts_bindings/bark/__init__.py +110 -329
- lollms_client/tts_bindings/bark/server/install_bark.py +64 -0
- lollms_client/tts_bindings/bark/server/main.py +311 -0
- lollms_client/tts_bindings/piper_tts/__init__.py +115 -335
- lollms_client/tts_bindings/piper_tts/server/install_piper.py +92 -0
- lollms_client/tts_bindings/piper_tts/server/main.py +425 -0
- lollms_client/tts_bindings/piper_tts/server/setup_voices.py +67 -0
- lollms_client/tts_bindings/xtts/__init__.py +99 -305
- lollms_client/tts_bindings/xtts/server/main.py +314 -0
- lollms_client/tts_bindings/xtts/server/setup_voices.py +67 -0
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/METADATA +1 -1
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/RECORD +22 -15
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/WHEEL +0 -0
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.3.4.dist-info → lollms_client-1.3.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# File: lollms_client/tts_bindings/bark/server/main.py
|
|
2
|
+
|
|
3
|
+
import uvicorn
|
|
4
|
+
from fastapi import FastAPI, APIRouter, HTTPException
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
import argparse
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import asyncio
|
|
10
|
+
import traceback
|
|
11
|
+
import os
|
|
12
|
+
from typing import Optional, List
|
|
13
|
+
import io
|
|
14
|
+
import wave
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
# --- Bark TTS Implementation ---
|
|
18
|
+
try:
|
|
19
|
+
print("Server: Loading Bark dependencies...")
|
|
20
|
+
import torch
|
|
21
|
+
import torchaudio
|
|
22
|
+
from bark import SAMPLE_RATE, generate_audio, preload_models
|
|
23
|
+
from bark.generation import set_seed
|
|
24
|
+
print("Server: Bark dependencies loaded successfully")
|
|
25
|
+
|
|
26
|
+
# Check for CUDA availability
|
|
27
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
28
|
+
print(f"Server: Using device: {device}")
|
|
29
|
+
|
|
30
|
+
# Set environment variable for Bark to use GPU if available
|
|
31
|
+
if device == "cuda":
|
|
32
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
33
|
+
|
|
34
|
+
bark_available = True
|
|
35
|
+
|
|
36
|
+
except Exception as e:
|
|
37
|
+
print(f"Server: Failed to load Bark dependencies: {e}")
|
|
38
|
+
print(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
39
|
+
bark_available = False
|
|
40
|
+
SAMPLE_RATE = 24000 # Bark's default sample rate
|
|
41
|
+
|
|
42
|
+
# --- API Models ---
|
|
43
|
+
class GenerationRequest(BaseModel):
|
|
44
|
+
text: str
|
|
45
|
+
voice: Optional[str] = "v2/en_speaker_6" # Default voice
|
|
46
|
+
temperature: Optional[float] = 0.7
|
|
47
|
+
silence_duration: Optional[float] = 0.25
|
|
48
|
+
seed: Optional[int] = None
|
|
49
|
+
|
|
50
|
+
class VoiceRequest(BaseModel):
|
|
51
|
+
voice: str
|
|
52
|
+
|
|
53
|
+
class BarkServer:
|
|
54
|
+
def __init__(self):
|
|
55
|
+
self.model_loaded = False
|
|
56
|
+
self.current_voice = "v2/en_speaker_6"
|
|
57
|
+
self.available_voices = self._get_available_voices()
|
|
58
|
+
self.available_models = ["bark"]
|
|
59
|
+
|
|
60
|
+
if bark_available:
|
|
61
|
+
self._initialize_model()
|
|
62
|
+
|
|
63
|
+
def _initialize_model(self):
|
|
64
|
+
"""Initialize the Bark model"""
|
|
65
|
+
try:
|
|
66
|
+
print("Server: Initializing Bark model (this may take a few minutes on first run)...")
|
|
67
|
+
|
|
68
|
+
# Preload models for faster generation
|
|
69
|
+
preload_models()
|
|
70
|
+
|
|
71
|
+
self.model_loaded = True
|
|
72
|
+
print("Server: Bark model loaded successfully")
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(f"Server: Error initializing Bark model: {e}")
|
|
76
|
+
print(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
77
|
+
self.model_loaded = False
|
|
78
|
+
|
|
79
|
+
def _get_available_voices(self) -> List[str]:
|
|
80
|
+
"""Return list of available Bark voices"""
|
|
81
|
+
# Bark voice presets - these are the built-in speaker voices
|
|
82
|
+
voices = [
|
|
83
|
+
# English speakers
|
|
84
|
+
"v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3",
|
|
85
|
+
"v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_7",
|
|
86
|
+
"v2/en_speaker_8", "v2/en_speaker_9",
|
|
87
|
+
|
|
88
|
+
# Chinese speakers
|
|
89
|
+
"v2/zh_speaker_0", "v2/zh_speaker_1", "v2/zh_speaker_2", "v2/zh_speaker_3",
|
|
90
|
+
"v2/zh_speaker_4", "v2/zh_speaker_5", "v2/zh_speaker_6", "v2/zh_speaker_7",
|
|
91
|
+
"v2/zh_speaker_8", "v2/zh_speaker_9",
|
|
92
|
+
|
|
93
|
+
# French speakers
|
|
94
|
+
"v2/fr_speaker_0", "v2/fr_speaker_1", "v2/fr_speaker_2", "v2/fr_speaker_3",
|
|
95
|
+
"v2/fr_speaker_4", "v2/fr_speaker_5", "v2/fr_speaker_6", "v2/fr_speaker_7",
|
|
96
|
+
"v2/fr_speaker_8", "v2/fr_speaker_9",
|
|
97
|
+
|
|
98
|
+
# German speakers
|
|
99
|
+
"v2/de_speaker_0", "v2/de_speaker_1", "v2/de_speaker_2", "v2/de_speaker_3",
|
|
100
|
+
"v2/de_speaker_4", "v2/de_speaker_5", "v2/de_speaker_6", "v2/de_speaker_7",
|
|
101
|
+
"v2/de_speaker_8", "v2/de_speaker_9",
|
|
102
|
+
|
|
103
|
+
# Hindi speakers
|
|
104
|
+
"v2/hi_speaker_0", "v2/hi_speaker_1", "v2/hi_speaker_2", "v2/hi_speaker_3",
|
|
105
|
+
"v2/hi_speaker_4", "v2/hi_speaker_5", "v2/hi_speaker_6", "v2/hi_speaker_7",
|
|
106
|
+
"v2/hi_speaker_8", "v2/hi_speaker_9",
|
|
107
|
+
|
|
108
|
+
# Italian speakers
|
|
109
|
+
"v2/it_speaker_0", "v2/it_speaker_1", "v2/it_speaker_2", "v2/it_speaker_3",
|
|
110
|
+
"v2/it_speaker_4", "v2/it_speaker_5", "v2/it_speaker_6", "v2/it_speaker_7",
|
|
111
|
+
"v2/it_speaker_8", "v2/it_speaker_9",
|
|
112
|
+
|
|
113
|
+
# Japanese speakers
|
|
114
|
+
"v2/ja_speaker_0", "v2/ja_speaker_1", "v2/ja_speaker_2", "v2/ja_speaker_3",
|
|
115
|
+
"v2/ja_speaker_4", "v2/ja_speaker_5", "v2/ja_speaker_6", "v2/ja_speaker_7",
|
|
116
|
+
"v2/ja_speaker_8", "v2/ja_speaker_9",
|
|
117
|
+
|
|
118
|
+
# Korean speakers
|
|
119
|
+
"v2/ko_speaker_0", "v2/ko_speaker_1", "v2/ko_speaker_2", "v2/ko_speaker_3",
|
|
120
|
+
"v2/ko_speaker_4", "v2/ko_speaker_5", "v2/ko_speaker_6", "v2/ko_speaker_7",
|
|
121
|
+
"v2/ko_speaker_8", "v2/ko_speaker_9",
|
|
122
|
+
|
|
123
|
+
# Polish speakers
|
|
124
|
+
"v2/pl_speaker_0", "v2/pl_speaker_1", "v2/pl_speaker_2", "v2/pl_speaker_3",
|
|
125
|
+
"v2/pl_speaker_4", "v2/pl_speaker_5", "v2/pl_speaker_6", "v2/pl_speaker_7",
|
|
126
|
+
"v2/pl_speaker_8", "v2/pl_speaker_9",
|
|
127
|
+
|
|
128
|
+
# Portuguese speakers
|
|
129
|
+
"v2/pt_speaker_0", "v2/pt_speaker_1", "v2/pt_speaker_2", "v2/pt_speaker_3",
|
|
130
|
+
"v2/pt_speaker_4", "v2/pt_speaker_5", "v2/pt_speaker_6", "v2/pt_speaker_7",
|
|
131
|
+
"v2/pt_speaker_8", "v2/pt_speaker_9",
|
|
132
|
+
|
|
133
|
+
# Russian speakers
|
|
134
|
+
"v2/ru_speaker_0", "v2/ru_speaker_1", "v2/ru_speaker_2", "v2/ru_speaker_3",
|
|
135
|
+
"v2/ru_speaker_4", "v2/ru_speaker_5", "v2/ru_speaker_6", "v2/ru_speaker_7",
|
|
136
|
+
"v2/ru_speaker_8", "v2/ru_speaker_9",
|
|
137
|
+
|
|
138
|
+
# Spanish speakers
|
|
139
|
+
"v2/es_speaker_0", "v2/es_speaker_1", "v2/es_speaker_2", "v2/es_speaker_3",
|
|
140
|
+
"v2/es_speaker_4", "v2/es_speaker_5", "v2/es_speaker_6", "v2/es_speaker_7",
|
|
141
|
+
"v2/es_speaker_8", "v2/es_speaker_9",
|
|
142
|
+
|
|
143
|
+
# Turkish speakers
|
|
144
|
+
"v2/tr_speaker_0", "v2/tr_speaker_1", "v2/tr_speaker_2", "v2/tr_speaker_3",
|
|
145
|
+
"v2/tr_speaker_4", "v2/tr_speaker_5", "v2/tr_speaker_6", "v2/tr_speaker_7",
|
|
146
|
+
"v2/tr_speaker_8", "v2/tr_speaker_9",
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
return voices
|
|
150
|
+
|
|
151
|
+
def generate_audio(self, text: str, voice: Optional[str] = None, temperature: float = 0.7,
|
|
152
|
+
silence_duration: float = 0.25, seed: Optional[int] = None) -> bytes:
|
|
153
|
+
"""Generate audio from text using Bark"""
|
|
154
|
+
if not bark_available:
|
|
155
|
+
raise RuntimeError("Bark library not available")
|
|
156
|
+
|
|
157
|
+
if not self.model_loaded:
|
|
158
|
+
raise RuntimeError("Bark model not initialized")
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
# Use provided voice or current default
|
|
162
|
+
speaker_voice = voice or self.current_voice
|
|
163
|
+
|
|
164
|
+
print(f"Server: Generating audio for: '{text[:50]}{'...' if len(text) > 50 else ''}'")
|
|
165
|
+
print(f"Server: Using voice: {speaker_voice}")
|
|
166
|
+
print(f"Server: Temperature: {temperature}, Seed: {seed}")
|
|
167
|
+
|
|
168
|
+
# Set seed for reproducibility if provided
|
|
169
|
+
if seed is not None:
|
|
170
|
+
set_seed(seed)
|
|
171
|
+
|
|
172
|
+
# Generate audio using Bark
|
|
173
|
+
# Bark expects text prompts that can include special tokens for emotions, etc.
|
|
174
|
+
audio_array = generate_audio(
|
|
175
|
+
text,
|
|
176
|
+
history_prompt=speaker_voice,
|
|
177
|
+
text_temp=temperature,
|
|
178
|
+
waveform_temp=temperature
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Add silence at the end if requested
|
|
182
|
+
if silence_duration > 0:
|
|
183
|
+
silence_samples = int(SAMPLE_RATE * silence_duration)
|
|
184
|
+
silence = np.zeros(silence_samples, dtype=audio_array.dtype)
|
|
185
|
+
audio_array = np.concatenate([audio_array, silence])
|
|
186
|
+
|
|
187
|
+
# Convert to 16-bit PCM
|
|
188
|
+
audio_array = (audio_array * 32767).astype(np.int16)
|
|
189
|
+
|
|
190
|
+
# Convert to WAV bytes
|
|
191
|
+
buffer = io.BytesIO()
|
|
192
|
+
with wave.open(buffer, 'wb') as wav_file:
|
|
193
|
+
wav_file.setnchannels(1) # Mono
|
|
194
|
+
wav_file.setsampwidth(2) # 16-bit
|
|
195
|
+
wav_file.setframerate(SAMPLE_RATE)
|
|
196
|
+
wav_file.writeframes(audio_array.tobytes())
|
|
197
|
+
|
|
198
|
+
audio_bytes = buffer.getvalue()
|
|
199
|
+
print(f"Server: Generated {len(audio_bytes)} bytes of audio")
|
|
200
|
+
return audio_bytes
|
|
201
|
+
|
|
202
|
+
except Exception as e:
|
|
203
|
+
print(f"Server: Error generating audio: {e}")
|
|
204
|
+
print(f"Server: Traceback:\n{traceback.format_exc()}")
|
|
205
|
+
raise
|
|
206
|
+
|
|
207
|
+
def set_voice(self, voice: str) -> bool:
|
|
208
|
+
"""Set the current default voice"""
|
|
209
|
+
if voice in self.available_voices:
|
|
210
|
+
self.current_voice = voice
|
|
211
|
+
print(f"Server: Voice changed to: {voice}")
|
|
212
|
+
return True
|
|
213
|
+
else:
|
|
214
|
+
print(f"Server: Voice '{voice}' not found in available voices")
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
def list_voices(self) -> List[str]:
|
|
218
|
+
"""Return list of available voices"""
|
|
219
|
+
return self.available_voices
|
|
220
|
+
|
|
221
|
+
def list_models(self) -> List[str]:
|
|
222
|
+
"""Return list of available models"""
|
|
223
|
+
return self.available_models
|
|
224
|
+
|
|
225
|
+
# --- Globals ---
|
|
226
|
+
app = FastAPI(title="Bark TTS Server")
|
|
227
|
+
router = APIRouter()
|
|
228
|
+
bark_server = BarkServer()
|
|
229
|
+
model_lock = asyncio.Lock() # Ensure thread-safe access
|
|
230
|
+
|
|
231
|
+
# --- API Endpoints ---
|
|
232
|
+
@router.post("/generate_audio")
|
|
233
|
+
async def generate_audio(request: GenerationRequest):
|
|
234
|
+
async with model_lock:
|
|
235
|
+
try:
|
|
236
|
+
audio_bytes = bark_server.generate_audio(
|
|
237
|
+
text=request.text,
|
|
238
|
+
voice=request.voice,
|
|
239
|
+
temperature=request.temperature,
|
|
240
|
+
silence_duration=request.silence_duration,
|
|
241
|
+
seed=request.seed
|
|
242
|
+
)
|
|
243
|
+
from fastapi.responses import Response
|
|
244
|
+
return Response(content=audio_bytes, media_type="audio/wav")
|
|
245
|
+
except Exception as e:
|
|
246
|
+
print(f"Server: ERROR in generate_audio endpoint: {e}")
|
|
247
|
+
print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
|
|
248
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
249
|
+
|
|
250
|
+
@router.post("/set_voice")
|
|
251
|
+
async def set_voice(request: VoiceRequest):
|
|
252
|
+
try:
|
|
253
|
+
success = bark_server.set_voice(request.voice)
|
|
254
|
+
if success:
|
|
255
|
+
return {"success": True, "message": f"Voice set to {request.voice}"}
|
|
256
|
+
else:
|
|
257
|
+
return {"success": False, "message": f"Voice {request.voice} not found"}
|
|
258
|
+
except Exception as e:
|
|
259
|
+
print(f"Server: ERROR in set_voice endpoint: {e}")
|
|
260
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
261
|
+
|
|
262
|
+
@router.get("/list_voices")
|
|
263
|
+
async def list_voices():
|
|
264
|
+
try:
|
|
265
|
+
voices = bark_server.list_voices()
|
|
266
|
+
print(f"Server: Returning {len(voices)} voices")
|
|
267
|
+
return {"voices": voices}
|
|
268
|
+
except Exception as e:
|
|
269
|
+
print(f"Server: ERROR in list_voices endpoint: {e}")
|
|
270
|
+
print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
|
|
271
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
272
|
+
|
|
273
|
+
@router.get("/list_models")
|
|
274
|
+
async def list_models():
|
|
275
|
+
try:
|
|
276
|
+
models = bark_server.list_models()
|
|
277
|
+
print(f"Server: Returning {len(models)} models: {models}")
|
|
278
|
+
return {"models": models}
|
|
279
|
+
except Exception as e:
|
|
280
|
+
print(f"Server: ERROR in list_models endpoint: {e}")
|
|
281
|
+
print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
|
|
282
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
283
|
+
|
|
284
|
+
@router.get("/status")
|
|
285
|
+
async def status():
|
|
286
|
+
return {
|
|
287
|
+
"status": "running",
|
|
288
|
+
"bark_available": bark_available,
|
|
289
|
+
"model_loaded": bark_server.model_loaded,
|
|
290
|
+
"current_voice": bark_server.current_voice,
|
|
291
|
+
"voices_count": len(bark_server.available_voices),
|
|
292
|
+
"device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
app.include_router(router)
|
|
296
|
+
|
|
297
|
+
# --- Server Startup ---
|
|
298
|
+
if __name__ == '__main__':
|
|
299
|
+
parser = argparse.ArgumentParser(description="Bark TTS Server")
|
|
300
|
+
parser.add_argument("--host", type=str, default="localhost", help="Host to bind the server to.")
|
|
301
|
+
parser.add_argument("--port", type=int, default=8082, help="Port to bind the server to.")
|
|
302
|
+
|
|
303
|
+
args = parser.parse_args()
|
|
304
|
+
|
|
305
|
+
print(f"Server: Starting Bark TTS server on {args.host}:{args.port}")
|
|
306
|
+
print(f"Server: Bark available: {bark_available}")
|
|
307
|
+
print(f"Server: Model loaded: {bark_server.model_loaded}")
|
|
308
|
+
print(f"Server: Available voices: {len(bark_server.available_voices)}")
|
|
309
|
+
print(f"Server: Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
|
|
310
|
+
|
|
311
|
+
uvicorn.run(app, host=args.host, port=args.port)
|