lollms-client 0.15.2__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/generate_and_speak/generate_and_speak.py +251 -0
- examples/generate_game_sfx/generate_game_fx.py +240 -0
- examples/simple_text_gen_with_image_test.py +8 -8
- examples/text_2_image.py +0 -1
- examples/text_gen.py +1 -1
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +61 -11
- lollms_client/llm_bindings/lollms/__init__.py +31 -24
- lollms_client/llm_bindings/ollama/__init__.py +47 -27
- lollms_client/llm_bindings/openai/__init__.py +62 -35
- lollms_client/llm_bindings/openllm/__init__.py +4 -1
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -0
- lollms_client/llm_bindings/tensor_rt/__init__.py +4 -1
- lollms_client/llm_bindings/transformers/__init__.py +3 -0
- lollms_client/llm_bindings/vllm/__init__.py +4 -1
- lollms_client/lollms_core.py +65 -33
- lollms_client/lollms_llm_binding.py +76 -22
- lollms_client/lollms_stt_binding.py +3 -15
- lollms_client/lollms_tti_binding.py +5 -29
- lollms_client/lollms_ttm_binding.py +5 -28
- lollms_client/lollms_tts_binding.py +4 -28
- lollms_client/lollms_ttv_binding.py +4 -28
- lollms_client/lollms_utilities.py +5 -3
- lollms_client/stt_bindings/lollms/__init__.py +5 -4
- lollms_client/stt_bindings/whisper/__init__.py +304 -0
- lollms_client/stt_bindings/whispercpp/__init__.py +380 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -6
- lollms_client/ttm_bindings/audiocraft/__init__.py +281 -0
- lollms_client/ttm_bindings/bark/__init__.py +339 -0
- lollms_client/tts_bindings/bark/__init__.py +336 -0
- lollms_client/tts_bindings/piper_tts/__init__.py +343 -0
- lollms_client/tts_bindings/xtts/__init__.py +317 -0
- lollms_client-0.17.0.dist-info/METADATA +183 -0
- lollms_client-0.17.0.dist-info/RECORD +65 -0
- lollms_client-0.15.2.dist-info/METADATA +0 -192
- lollms_client-0.15.2.dist-info/RECORD +0 -56
- {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# lollms_client/stt_bindings/whisper/__init__.py
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, List, Union, Dict, Any
|
|
5
|
+
from ascii_colors import trace_exception, ASCIIColors
|
|
6
|
+
|
|
7
|
+
# --- Package Management and Conditional Imports ---
|
|
8
|
+
_whisper_installed = False
|
|
9
|
+
_whisper_installation_error = ""
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import pipmaster as pm
|
|
13
|
+
import platform # For OS detection for torch index
|
|
14
|
+
|
|
15
|
+
# Determine initial device preference to guide torch installation
|
|
16
|
+
preferred_torch_device_for_install = "cpu" # Default assumption
|
|
17
|
+
|
|
18
|
+
# Tentatively set preference based on OS, assuming user might want GPU if available
|
|
19
|
+
if platform.system() == "Linux" or platform.system() == "Windows":
|
|
20
|
+
# On Linux/Windows, CUDA is the primary GPU acceleration for PyTorch.
|
|
21
|
+
# We will try to install a CUDA version of PyTorch.
|
|
22
|
+
preferred_torch_device_for_install = "cuda"
|
|
23
|
+
elif platform.system() == "Darwin":
|
|
24
|
+
# On macOS, MPS is the acceleration. Standard torch install usually handles this.
|
|
25
|
+
preferred_torch_device_for_install = "mps" # or keep cpu if mps detection is later
|
|
26
|
+
|
|
27
|
+
torch_pkgs = ["torch", "torchaudio","xformers"]
|
|
28
|
+
audiocraft_core_pkgs = ["openai-whisper"]
|
|
29
|
+
|
|
30
|
+
torch_index_url = None
|
|
31
|
+
if preferred_torch_device_for_install == "cuda":
|
|
32
|
+
# Specify a common CUDA version index. Pip should resolve the correct torch version.
|
|
33
|
+
# As of late 2023/early 2024, cu118 or cu121 are common. Let's use cu121.
|
|
34
|
+
# Users with different CUDA setups might need to pre-install torch manually.
|
|
35
|
+
torch_index_url = "https://download.pytorch.org/whl/cu126"
|
|
36
|
+
ASCIIColors.info(f"Attempting to ensure PyTorch with CUDA support (target index: {torch_index_url})")
|
|
37
|
+
# Install torch and torchaudio first from the specific index
|
|
38
|
+
pm.ensure_packages(torch_pkgs, index_url=torch_index_url)
|
|
39
|
+
# Then install audiocraft and other dependencies; pip should use the already installed torch
|
|
40
|
+
pm.ensure_packages(audiocraft_core_pkgs)
|
|
41
|
+
else:
|
|
42
|
+
# For CPU, MPS, or if no specific CUDA preference was determined for install
|
|
43
|
+
ASCIIColors.info("Ensuring PyTorch, AudioCraft, and dependencies using default PyPI index.")
|
|
44
|
+
pm.ensure_packages(torch_pkgs + audiocraft_core_pkgs)
|
|
45
|
+
|
|
46
|
+
import whisper
|
|
47
|
+
import torch
|
|
48
|
+
_whisper_installed = True
|
|
49
|
+
except Exception as e:
|
|
50
|
+
_whisper_installation_error = str(e)
|
|
51
|
+
whisper = None
|
|
52
|
+
torch = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# --- End Package Management ---
|
|
56
|
+
|
|
57
|
+
from lollms_client.lollms_stt_binding import LollmsSTTBinding
|
|
58
|
+
|
|
59
|
+
# Defines the binding name for the manager
|
|
60
|
+
BindingName = "WhisperSTTBinding" # Changed to avoid conflict with class name
|
|
61
|
+
|
|
62
|
+
class WhisperSTTBinding(LollmsSTTBinding):
|
|
63
|
+
"""
|
|
64
|
+
LollmsSTTBinding implementation for OpenAI's Whisper model.
|
|
65
|
+
This binding runs Whisper locally.
|
|
66
|
+
Requires `ffmpeg` to be installed on the system.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
# Standard Whisper model sizes
|
|
70
|
+
WHISPER_MODEL_SIZES = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2", "large-v3"]
|
|
71
|
+
|
|
72
|
+
def __init__(self,
|
|
73
|
+
model_name: str = "base", # Default Whisper model size
|
|
74
|
+
device: Optional[str] = None, # "cpu", "cuda", "mps", or None for auto
|
|
75
|
+
**kwargs # To catch any other LollmsSTTBinding standard args
|
|
76
|
+
):
|
|
77
|
+
"""
|
|
78
|
+
Initialize the Whisper STT binding.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
model_name (str): The Whisper model size to use (e.g., "tiny", "base", "small", "medium", "large", "large-v2", "large-v3").
|
|
82
|
+
Defaults to "base".
|
|
83
|
+
device (Optional[str]): The device to run the model on ("cpu", "cuda", "mps").
|
|
84
|
+
If None, `torch` will attempt to auto-detect. Defaults to None.
|
|
85
|
+
"""
|
|
86
|
+
super().__init__(binding_name="whisper") # Not applicable
|
|
87
|
+
|
|
88
|
+
if not _whisper_installed:
|
|
89
|
+
raise ImportError(f"Whisper STT binding dependencies not met. Please ensure 'openai-whisper' and 'torch' are installed. Error: {_whisper_installation_error}")
|
|
90
|
+
|
|
91
|
+
self.device = device
|
|
92
|
+
if self.device is None: # Auto-detect if not specified
|
|
93
|
+
if torch.cuda.is_available():
|
|
94
|
+
self.device = "cuda"
|
|
95
|
+
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): # For Apple Silicon
|
|
96
|
+
self.device = "mps"
|
|
97
|
+
else:
|
|
98
|
+
self.device = "cpu"
|
|
99
|
+
|
|
100
|
+
ASCIIColors.info(f"WhisperSTTBinding: Using device '{self.device}'.")
|
|
101
|
+
|
|
102
|
+
self.loaded_model_name = None
|
|
103
|
+
self.model = None
|
|
104
|
+
self._load_whisper_model(model_name)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _load_whisper_model(self, model_name_to_load: str):
|
|
108
|
+
"""Loads or reloads the Whisper model."""
|
|
109
|
+
if model_name_to_load not in self.WHISPER_MODEL_SIZES:
|
|
110
|
+
ASCIIColors.warning(f"'{model_name_to_load}' is not a standard Whisper model size. Attempting to load anyway. Known sizes: {self.WHISPER_MODEL_SIZES}")
|
|
111
|
+
|
|
112
|
+
if self.model is not None and self.loaded_model_name == model_name_to_load:
|
|
113
|
+
ASCIIColors.info(f"Whisper model '{model_name_to_load}' already loaded.")
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
ASCIIColors.info(f"Loading Whisper model: '{model_name_to_load}' on device '{self.device}'...")
|
|
117
|
+
try:
|
|
118
|
+
# Whisper's load_model might download the model if not already cached.
|
|
119
|
+
# Cache is typically in ~/.cache/whisper
|
|
120
|
+
self.model = whisper.load_model(model_name_to_load, device=self.device)
|
|
121
|
+
self.loaded_model_name = model_name_to_load
|
|
122
|
+
self.model_name = model_name_to_load # Update the binding's current model_name
|
|
123
|
+
ASCIIColors.green(f"Whisper model '{model_name_to_load}' loaded successfully.")
|
|
124
|
+
except Exception as e:
|
|
125
|
+
self.model = None
|
|
126
|
+
self.loaded_model_name = None
|
|
127
|
+
ASCIIColors.error(f"Failed to load Whisper model '{model_name_to_load}': {e}")
|
|
128
|
+
trace_exception(e)
|
|
129
|
+
# Re-raise critical error for initialization or model switching
|
|
130
|
+
raise RuntimeError(f"Failed to load Whisper model '{model_name_to_load}'") from e
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def transcribe_audio(self, audio_path: Union[str, Path], model: Optional[str] = None, **kwargs) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Transcribes the audio file at the given path using Whisper.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
audio_path (Union[str, Path]): The path to the audio file to transcribe.
|
|
139
|
+
model (Optional[str]): The specific Whisper model size to use.
|
|
140
|
+
If None, uses the model loaded during initialization.
|
|
141
|
+
**kwargs: Additional parameters for Whisper's transcribe method, e.g.:
|
|
142
|
+
`language` (str): Language code (e.g., "en", "fr"). If None, Whisper auto-detects.
|
|
143
|
+
`fp16` (bool): Whether to use fp16, defaults to True if CUDA available.
|
|
144
|
+
`task` (str): "transcribe" or "translate".
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
str: The transcribed text.
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
FileNotFoundError: If the audio file does not exist.
|
|
151
|
+
RuntimeError: If the Whisper model is not loaded or transcription fails.
|
|
152
|
+
Exception: For other errors during transcription.
|
|
153
|
+
"""
|
|
154
|
+
audio_file = Path(audio_path)
|
|
155
|
+
if not audio_file.exists():
|
|
156
|
+
raise FileNotFoundError(f"Audio file not found at: {audio_path}")
|
|
157
|
+
|
|
158
|
+
if model and model != self.loaded_model_name:
|
|
159
|
+
ASCIIColors.info(f"Switching Whisper model to '{model}' for this transcription.")
|
|
160
|
+
try:
|
|
161
|
+
self._load_whisper_model(model) # Attempt to load the new model
|
|
162
|
+
except RuntimeError as e:
|
|
163
|
+
# If switching fails, keep using the old model if available, or raise if none loaded
|
|
164
|
+
if self.model is None:
|
|
165
|
+
raise RuntimeError(f"Failed to switch to Whisper model '{model}' and no model currently loaded.") from e
|
|
166
|
+
else:
|
|
167
|
+
ASCIIColors.warning(f"Failed to switch to Whisper model '{model}'. Using previously loaded model '{self.loaded_model_name}'. Error: {e}")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if self.model is None:
|
|
171
|
+
raise RuntimeError("Whisper model is not loaded. Cannot transcribe.")
|
|
172
|
+
|
|
173
|
+
# Prepare Whisper-specific options from kwargs
|
|
174
|
+
whisper_options = {}
|
|
175
|
+
if "language" in kwargs:
|
|
176
|
+
whisper_options["language"] = kwargs["language"]
|
|
177
|
+
if "fp16" in kwargs: # Typically handled by device selection, but allow override
|
|
178
|
+
whisper_options["fp16"] = kwargs["fp16"]
|
|
179
|
+
else: # Default fp16 based on device
|
|
180
|
+
whisper_options["fp16"] = (self.device == "cuda")
|
|
181
|
+
if "task" in kwargs: # "transcribe" or "translate"
|
|
182
|
+
whisper_options["task"] = kwargs["task"]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
ASCIIColors.info(f"Transcribing '{audio_file.name}' with Whisper model '{self.loaded_model_name}' (options: {whisper_options})...")
|
|
186
|
+
try:
|
|
187
|
+
result = self.model.transcribe(str(audio_file), **whisper_options)
|
|
188
|
+
transcribed_text = result.get("text", "")
|
|
189
|
+
ASCIIColors.green("Transcription successful.")
|
|
190
|
+
return transcribed_text.strip()
|
|
191
|
+
except Exception as e:
|
|
192
|
+
ASCIIColors.error(f"Whisper transcription failed for '{audio_file.name}': {e}")
|
|
193
|
+
trace_exception(e)
|
|
194
|
+
raise Exception(f"Whisper transcription error: {e}") from e
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def list_models(self, **kwargs) -> List[str]:
|
|
198
|
+
"""
|
|
199
|
+
Lists the available standard Whisper model sizes.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
**kwargs: Additional parameters (currently unused).
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
List[str]: A list of available Whisper model size identifiers.
|
|
206
|
+
"""
|
|
207
|
+
return self.WHISPER_MODEL_SIZES.copy() # Return a copy
|
|
208
|
+
|
|
209
|
+
def __del__(self):
|
|
210
|
+
"""Clean up: Unload the model to free resources."""
|
|
211
|
+
if self.model is not None:
|
|
212
|
+
del self.model
|
|
213
|
+
self.model = None
|
|
214
|
+
if torch and hasattr(torch, 'cuda') and torch.cuda.is_available():
|
|
215
|
+
torch.cuda.empty_cache()
|
|
216
|
+
ASCIIColors.info(f"WhisperSTTBinding for model '{self.loaded_model_name}' destroyed and resources released.")
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# --- Main Test Block (Example Usage) ---
|
|
220
|
+
if __name__ == '__main__':
|
|
221
|
+
if not _whisper_installed:
|
|
222
|
+
print(f"{ASCIIColors.RED}Whisper dependencies not met. Skipping tests. Error: {_whisper_installation_error}{ASCIIColors.RESET}")
|
|
223
|
+
print(f"{ASCIIColors.YELLOW}Please ensure 'openai-whisper' and 'torch' are installed, and 'ffmpeg' is in your system PATH.{ASCIIColors.RESET}")
|
|
224
|
+
exit()
|
|
225
|
+
|
|
226
|
+
ASCIIColors.yellow("--- WhisperSTTBinding Test ---")
|
|
227
|
+
|
|
228
|
+
# --- Prerequisites for testing ---
|
|
229
|
+
# 1. Create a dummy WAV file for testing, or provide a path to a real one.
|
|
230
|
+
# You'll need `scipy` to create a dummy WAV easily, or use an external tool.
|
|
231
|
+
# Let's assume a simple way to signal a missing test file.
|
|
232
|
+
test_audio_file = Path("test_audio_for_whisper.wav")
|
|
233
|
+
|
|
234
|
+
# Try to create a dummy file if it doesn't exist (requires scipy)
|
|
235
|
+
if not test_audio_file.exists():
|
|
236
|
+
try:
|
|
237
|
+
import numpy as np
|
|
238
|
+
from scipy.io.wavfile import write as write_wav
|
|
239
|
+
samplerate = 44100; fs = 100
|
|
240
|
+
t = np.linspace(0., 1., samplerate)
|
|
241
|
+
amplitude = np.iinfo(np.int16).max
|
|
242
|
+
data = amplitude * np.sin(2. * np.pi * fs * t)
|
|
243
|
+
write_wav(test_audio_file, samplerate, data.astype(np.int16))
|
|
244
|
+
ASCIIColors.green(f"Created dummy audio file: {test_audio_file}")
|
|
245
|
+
except ImportError:
|
|
246
|
+
ASCIIColors.warning(f"SciPy not installed. Cannot create dummy audio file.")
|
|
247
|
+
ASCIIColors.warning(f"Please place a '{test_audio_file.name}' in the current directory or modify the path.")
|
|
248
|
+
except Exception as e_dummy_audio:
|
|
249
|
+
ASCIIColors.error(f"Could not create dummy audio file: {e_dummy_audio}")
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
if not test_audio_file.exists():
|
|
253
|
+
ASCIIColors.error(f"Test audio file '{test_audio_file}' not found. Skipping transcription test.")
|
|
254
|
+
else:
|
|
255
|
+
try:
|
|
256
|
+
ASCIIColors.cyan("\n--- Initializing WhisperSTTBinding (model: 'tiny') ---")
|
|
257
|
+
# Using 'tiny' model for faster testing. Change to 'base' or 'small' for better quality.
|
|
258
|
+
stt_binding = WhisperSTTBinding(model_name="tiny")
|
|
259
|
+
|
|
260
|
+
ASCIIColors.cyan("\n--- Listing available Whisper models ---")
|
|
261
|
+
models = stt_binding.list_models()
|
|
262
|
+
print(f"Available models: {models}")
|
|
263
|
+
|
|
264
|
+
ASCIIColors.cyan(f"\n--- Transcribing '{test_audio_file.name}' with 'tiny' model ---")
|
|
265
|
+
transcription = stt_binding.transcribe_audio(test_audio_file)
|
|
266
|
+
print(f"Transcription (tiny): '{transcription}'")
|
|
267
|
+
|
|
268
|
+
# Test with a specific language hint (if your audio is not English or for robustness)
|
|
269
|
+
# ASCIIColors.cyan(f"\n--- Transcribing '{test_audio_file.name}' with 'tiny' model and language hint 'en' ---")
|
|
270
|
+
# transcription_lang_hint = stt_binding.transcribe_audio(test_audio_file, language="en")
|
|
271
|
+
# print(f"Transcription (tiny, lang='en'): '{transcription_lang_hint}'")
|
|
272
|
+
|
|
273
|
+
# Test switching model dynamically (optional, will re-download/load if different)
|
|
274
|
+
# ASCIIColors.cyan(f"\n--- Transcribing '{test_audio_file.name}' by switching to 'base' model ---")
|
|
275
|
+
# transcription_base = stt_binding.transcribe_audio(test_audio_file, model="base")
|
|
276
|
+
# print(f"Transcription (base): '{transcription_base}'")
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
except ImportError as e_imp:
|
|
280
|
+
ASCIIColors.error(f"Import error during test: {e_imp}")
|
|
281
|
+
ASCIIColors.info("This might be due to `openai-whisper` or `torch` not being installed correctly.")
|
|
282
|
+
except FileNotFoundError as e_fnf:
|
|
283
|
+
ASCIIColors.error(f"File not found during test: {e_fnf}")
|
|
284
|
+
except RuntimeError as e_rt:
|
|
285
|
+
ASCIIColors.error(f"Runtime error during test (often model load or ffmpeg issue): {e_rt}")
|
|
286
|
+
if "ffmpeg" in str(e_rt).lower():
|
|
287
|
+
ASCIIColors.yellow("This error often means 'ffmpeg' is not installed or not found in your system's PATH.")
|
|
288
|
+
ASCIIColors.yellow("Please install ffmpeg: https://ffmpeg.org/download.html")
|
|
289
|
+
except Exception as e:
|
|
290
|
+
ASCIIColors.error(f"An unexpected error occurred during testing: {e}")
|
|
291
|
+
trace_exception(e)
|
|
292
|
+
finally:
|
|
293
|
+
# Clean up dummy audio file if we created it for this test
|
|
294
|
+
# (Be careful if you are using a real test_audio_file you want to keep)
|
|
295
|
+
# if "samplerate" in locals() and test_audio_file.exists(): # Simple check if we likely created it
|
|
296
|
+
# try:
|
|
297
|
+
# os.remove(test_audio_file)
|
|
298
|
+
# ASCIIColors.info(f"Removed dummy audio file: {test_audio_file}")
|
|
299
|
+
# except Exception as e_del:
|
|
300
|
+
# ASCIIColors.warning(f"Could not remove dummy audio file {test_audio_file}: {e_del}")
|
|
301
|
+
pass # For this example, let's not auto-delete. User can manage it.
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
ASCIIColors.yellow("\n--- WhisperSTTBinding Test Finished ---")
|