lollms-client 1.3.4__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -0,0 +1,311 @@
1
+ # File: lollms_client/tts_bindings/bark/server/main.py
2
+
3
+ import uvicorn
4
+ from fastapi import FastAPI, APIRouter, HTTPException
5
+ from pydantic import BaseModel
6
+ import argparse
7
+ import sys
8
+ from pathlib import Path
9
+ import asyncio
10
+ import traceback
11
+ import os
12
+ from typing import Optional, List
13
+ import io
14
+ import wave
15
+ import numpy as np
16
+
17
+ # --- Bark TTS Implementation ---
18
+ try:
19
+ print("Server: Loading Bark dependencies...")
20
+ import torch
21
+ import torchaudio
22
+ from bark import SAMPLE_RATE, generate_audio, preload_models
23
+ from bark.generation import set_seed
24
+ print("Server: Bark dependencies loaded successfully")
25
+
26
+ # Check for CUDA availability
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ print(f"Server: Using device: {device}")
29
+
30
+ # Set environment variable for Bark to use GPU if available
31
+ if device == "cuda":
32
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
33
+
34
+ bark_available = True
35
+
36
+ except Exception as e:
37
+ print(f"Server: Failed to load Bark dependencies: {e}")
38
+ print(f"Server: Traceback:\n{traceback.format_exc()}")
39
+ bark_available = False
40
+ SAMPLE_RATE = 24000 # Bark's default sample rate
41
+
42
+ # --- API Models ---
43
+ class GenerationRequest(BaseModel):
44
+ text: str
45
+ voice: Optional[str] = "v2/en_speaker_6" # Default voice
46
+ temperature: Optional[float] = 0.7
47
+ silence_duration: Optional[float] = 0.25
48
+ seed: Optional[int] = None
49
+
50
+ class VoiceRequest(BaseModel):
51
+ voice: str
52
+
53
+ class BarkServer:
54
+ def __init__(self):
55
+ self.model_loaded = False
56
+ self.current_voice = "v2/en_speaker_6"
57
+ self.available_voices = self._get_available_voices()
58
+ self.available_models = ["bark"]
59
+
60
+ if bark_available:
61
+ self._initialize_model()
62
+
63
+ def _initialize_model(self):
64
+ """Initialize the Bark model"""
65
+ try:
66
+ print("Server: Initializing Bark model (this may take a few minutes on first run)...")
67
+
68
+ # Preload models for faster generation
69
+ preload_models()
70
+
71
+ self.model_loaded = True
72
+ print("Server: Bark model loaded successfully")
73
+
74
+ except Exception as e:
75
+ print(f"Server: Error initializing Bark model: {e}")
76
+ print(f"Server: Traceback:\n{traceback.format_exc()}")
77
+ self.model_loaded = False
78
+
79
+ def _get_available_voices(self) -> List[str]:
80
+ """Return list of available Bark voices"""
81
+ # Bark voice presets - these are the built-in speaker voices
82
+ voices = [
83
+ # English speakers
84
+ "v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3",
85
+ "v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_7",
86
+ "v2/en_speaker_8", "v2/en_speaker_9",
87
+
88
+ # Chinese speakers
89
+ "v2/zh_speaker_0", "v2/zh_speaker_1", "v2/zh_speaker_2", "v2/zh_speaker_3",
90
+ "v2/zh_speaker_4", "v2/zh_speaker_5", "v2/zh_speaker_6", "v2/zh_speaker_7",
91
+ "v2/zh_speaker_8", "v2/zh_speaker_9",
92
+
93
+ # French speakers
94
+ "v2/fr_speaker_0", "v2/fr_speaker_1", "v2/fr_speaker_2", "v2/fr_speaker_3",
95
+ "v2/fr_speaker_4", "v2/fr_speaker_5", "v2/fr_speaker_6", "v2/fr_speaker_7",
96
+ "v2/fr_speaker_8", "v2/fr_speaker_9",
97
+
98
+ # German speakers
99
+ "v2/de_speaker_0", "v2/de_speaker_1", "v2/de_speaker_2", "v2/de_speaker_3",
100
+ "v2/de_speaker_4", "v2/de_speaker_5", "v2/de_speaker_6", "v2/de_speaker_7",
101
+ "v2/de_speaker_8", "v2/de_speaker_9",
102
+
103
+ # Hindi speakers
104
+ "v2/hi_speaker_0", "v2/hi_speaker_1", "v2/hi_speaker_2", "v2/hi_speaker_3",
105
+ "v2/hi_speaker_4", "v2/hi_speaker_5", "v2/hi_speaker_6", "v2/hi_speaker_7",
106
+ "v2/hi_speaker_8", "v2/hi_speaker_9",
107
+
108
+ # Italian speakers
109
+ "v2/it_speaker_0", "v2/it_speaker_1", "v2/it_speaker_2", "v2/it_speaker_3",
110
+ "v2/it_speaker_4", "v2/it_speaker_5", "v2/it_speaker_6", "v2/it_speaker_7",
111
+ "v2/it_speaker_8", "v2/it_speaker_9",
112
+
113
+ # Japanese speakers
114
+ "v2/ja_speaker_0", "v2/ja_speaker_1", "v2/ja_speaker_2", "v2/ja_speaker_3",
115
+ "v2/ja_speaker_4", "v2/ja_speaker_5", "v2/ja_speaker_6", "v2/ja_speaker_7",
116
+ "v2/ja_speaker_8", "v2/ja_speaker_9",
117
+
118
+ # Korean speakers
119
+ "v2/ko_speaker_0", "v2/ko_speaker_1", "v2/ko_speaker_2", "v2/ko_speaker_3",
120
+ "v2/ko_speaker_4", "v2/ko_speaker_5", "v2/ko_speaker_6", "v2/ko_speaker_7",
121
+ "v2/ko_speaker_8", "v2/ko_speaker_9",
122
+
123
+ # Polish speakers
124
+ "v2/pl_speaker_0", "v2/pl_speaker_1", "v2/pl_speaker_2", "v2/pl_speaker_3",
125
+ "v2/pl_speaker_4", "v2/pl_speaker_5", "v2/pl_speaker_6", "v2/pl_speaker_7",
126
+ "v2/pl_speaker_8", "v2/pl_speaker_9",
127
+
128
+ # Portuguese speakers
129
+ "v2/pt_speaker_0", "v2/pt_speaker_1", "v2/pt_speaker_2", "v2/pt_speaker_3",
130
+ "v2/pt_speaker_4", "v2/pt_speaker_5", "v2/pt_speaker_6", "v2/pt_speaker_7",
131
+ "v2/pt_speaker_8", "v2/pt_speaker_9",
132
+
133
+ # Russian speakers
134
+ "v2/ru_speaker_0", "v2/ru_speaker_1", "v2/ru_speaker_2", "v2/ru_speaker_3",
135
+ "v2/ru_speaker_4", "v2/ru_speaker_5", "v2/ru_speaker_6", "v2/ru_speaker_7",
136
+ "v2/ru_speaker_8", "v2/ru_speaker_9",
137
+
138
+ # Spanish speakers
139
+ "v2/es_speaker_0", "v2/es_speaker_1", "v2/es_speaker_2", "v2/es_speaker_3",
140
+ "v2/es_speaker_4", "v2/es_speaker_5", "v2/es_speaker_6", "v2/es_speaker_7",
141
+ "v2/es_speaker_8", "v2/es_speaker_9",
142
+
143
+ # Turkish speakers
144
+ "v2/tr_speaker_0", "v2/tr_speaker_1", "v2/tr_speaker_2", "v2/tr_speaker_3",
145
+ "v2/tr_speaker_4", "v2/tr_speaker_5", "v2/tr_speaker_6", "v2/tr_speaker_7",
146
+ "v2/tr_speaker_8", "v2/tr_speaker_9",
147
+ ]
148
+
149
+ return voices
150
+
151
+ def generate_audio(self, text: str, voice: Optional[str] = None, temperature: float = 0.7,
152
+ silence_duration: float = 0.25, seed: Optional[int] = None) -> bytes:
153
+ """Generate audio from text using Bark"""
154
+ if not bark_available:
155
+ raise RuntimeError("Bark library not available")
156
+
157
+ if not self.model_loaded:
158
+ raise RuntimeError("Bark model not initialized")
159
+
160
+ try:
161
+ # Use provided voice or current default
162
+ speaker_voice = voice or self.current_voice
163
+
164
+ print(f"Server: Generating audio for: '{text[:50]}{'...' if len(text) > 50 else ''}'")
165
+ print(f"Server: Using voice: {speaker_voice}")
166
+ print(f"Server: Temperature: {temperature}, Seed: {seed}")
167
+
168
+ # Set seed for reproducibility if provided
169
+ if seed is not None:
170
+ set_seed(seed)
171
+
172
+ # Generate audio using Bark
173
+ # Bark expects text prompts that can include special tokens for emotions, etc.
174
+ audio_array = generate_audio(
175
+ text,
176
+ history_prompt=speaker_voice,
177
+ text_temp=temperature,
178
+ waveform_temp=temperature
179
+ )
180
+
181
+ # Add silence at the end if requested
182
+ if silence_duration > 0:
183
+ silence_samples = int(SAMPLE_RATE * silence_duration)
184
+ silence = np.zeros(silence_samples, dtype=audio_array.dtype)
185
+ audio_array = np.concatenate([audio_array, silence])
186
+
187
+ # Convert to 16-bit PCM
188
+ audio_array = (audio_array * 32767).astype(np.int16)
189
+
190
+ # Convert to WAV bytes
191
+ buffer = io.BytesIO()
192
+ with wave.open(buffer, 'wb') as wav_file:
193
+ wav_file.setnchannels(1) # Mono
194
+ wav_file.setsampwidth(2) # 16-bit
195
+ wav_file.setframerate(SAMPLE_RATE)
196
+ wav_file.writeframes(audio_array.tobytes())
197
+
198
+ audio_bytes = buffer.getvalue()
199
+ print(f"Server: Generated {len(audio_bytes)} bytes of audio")
200
+ return audio_bytes
201
+
202
+ except Exception as e:
203
+ print(f"Server: Error generating audio: {e}")
204
+ print(f"Server: Traceback:\n{traceback.format_exc()}")
205
+ raise
206
+
207
+ def set_voice(self, voice: str) -> bool:
208
+ """Set the current default voice"""
209
+ if voice in self.available_voices:
210
+ self.current_voice = voice
211
+ print(f"Server: Voice changed to: {voice}")
212
+ return True
213
+ else:
214
+ print(f"Server: Voice '{voice}' not found in available voices")
215
+ return False
216
+
217
+ def list_voices(self) -> List[str]:
218
+ """Return list of available voices"""
219
+ return self.available_voices
220
+
221
+ def list_models(self) -> List[str]:
222
+ """Return list of available models"""
223
+ return self.available_models
224
+
225
+ # --- Globals ---
226
+ app = FastAPI(title="Bark TTS Server")
227
+ router = APIRouter()
228
+ bark_server = BarkServer()
229
+ model_lock = asyncio.Lock() # Ensure thread-safe access
230
+
231
+ # --- API Endpoints ---
232
+ @router.post("/generate_audio")
233
+ async def generate_audio(request: GenerationRequest):
234
+ async with model_lock:
235
+ try:
236
+ audio_bytes = bark_server.generate_audio(
237
+ text=request.text,
238
+ voice=request.voice,
239
+ temperature=request.temperature,
240
+ silence_duration=request.silence_duration,
241
+ seed=request.seed
242
+ )
243
+ from fastapi.responses import Response
244
+ return Response(content=audio_bytes, media_type="audio/wav")
245
+ except Exception as e:
246
+ print(f"Server: ERROR in generate_audio endpoint: {e}")
247
+ print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
248
+ raise HTTPException(status_code=500, detail=str(e))
249
+
250
+ @router.post("/set_voice")
251
+ async def set_voice(request: VoiceRequest):
252
+ try:
253
+ success = bark_server.set_voice(request.voice)
254
+ if success:
255
+ return {"success": True, "message": f"Voice set to {request.voice}"}
256
+ else:
257
+ return {"success": False, "message": f"Voice {request.voice} not found"}
258
+ except Exception as e:
259
+ print(f"Server: ERROR in set_voice endpoint: {e}")
260
+ raise HTTPException(status_code=500, detail=str(e))
261
+
262
+ @router.get("/list_voices")
263
+ async def list_voices():
264
+ try:
265
+ voices = bark_server.list_voices()
266
+ print(f"Server: Returning {len(voices)} voices")
267
+ return {"voices": voices}
268
+ except Exception as e:
269
+ print(f"Server: ERROR in list_voices endpoint: {e}")
270
+ print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
271
+ raise HTTPException(status_code=500, detail=str(e))
272
+
273
+ @router.get("/list_models")
274
+ async def list_models():
275
+ try:
276
+ models = bark_server.list_models()
277
+ print(f"Server: Returning {len(models)} models: {models}")
278
+ return {"models": models}
279
+ except Exception as e:
280
+ print(f"Server: ERROR in list_models endpoint: {e}")
281
+ print(f"Server: ERROR traceback:\n{traceback.format_exc()}")
282
+ raise HTTPException(status_code=500, detail=str(e))
283
+
284
+ @router.get("/status")
285
+ async def status():
286
+ return {
287
+ "status": "running",
288
+ "bark_available": bark_available,
289
+ "model_loaded": bark_server.model_loaded,
290
+ "current_voice": bark_server.current_voice,
291
+ "voices_count": len(bark_server.available_voices),
292
+ "device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
293
+ }
294
+
295
+ app.include_router(router)
296
+
297
+ # --- Server Startup ---
298
+ if __name__ == '__main__':
299
+ parser = argparse.ArgumentParser(description="Bark TTS Server")
300
+ parser.add_argument("--host", type=str, default="localhost", help="Host to bind the server to.")
301
+ parser.add_argument("--port", type=int, default=8082, help="Port to bind the server to.")
302
+
303
+ args = parser.parse_args()
304
+
305
+ print(f"Server: Starting Bark TTS server on {args.host}:{args.port}")
306
+ print(f"Server: Bark available: {bark_available}")
307
+ print(f"Server: Model loaded: {bark_server.model_loaded}")
308
+ print(f"Server: Available voices: {len(bark_server.available_voices)}")
309
+ print(f"Server: Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
310
+
311
+ uvicorn.run(app, host=args.host, port=args.port)