lollms-client 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -0,0 +1,304 @@
1
+ # lollms_client/stt_bindings/whisper/__init__.py
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Optional, List, Union, Dict, Any
5
+ from ascii_colors import trace_exception, ASCIIColors
6
+
7
+ # --- Package Management and Conditional Imports ---
8
+ _whisper_installed = False
9
+ _whisper_installation_error = ""
10
+
11
+ try:
12
+ import pipmaster as pm
13
+ import platform # For OS detection for torch index
14
+
15
+ # Determine initial device preference to guide torch installation
16
+ preferred_torch_device_for_install = "cpu" # Default assumption
17
+
18
+ # Tentatively set preference based on OS, assuming user might want GPU if available
19
+ if platform.system() == "Linux" or platform.system() == "Windows":
20
+ # On Linux/Windows, CUDA is the primary GPU acceleration for PyTorch.
21
+ # We will try to install a CUDA version of PyTorch.
22
+ preferred_torch_device_for_install = "cuda"
23
+ elif platform.system() == "Darwin":
24
+ # On macOS, MPS is the acceleration. Standard torch install usually handles this.
25
+ preferred_torch_device_for_install = "mps" # or keep cpu if mps detection is later
26
+
27
+ torch_pkgs = ["torch", "torchaudio","xformers"]
28
+ audiocraft_core_pkgs = ["openai-whisper"]
29
+
30
+ torch_index_url = None
31
+ if preferred_torch_device_for_install == "cuda":
32
+ # Specify a common CUDA version index. Pip should resolve the correct torch version.
33
+ # As of late 2023/early 2024, cu118 or cu121 are common. Let's use cu121.
34
+ # Users with different CUDA setups might need to pre-install torch manually.
35
+ torch_index_url = "https://download.pytorch.org/whl/cu126"
36
+ ASCIIColors.info(f"Attempting to ensure PyTorch with CUDA support (target index: {torch_index_url})")
37
+ # Install torch and torchaudio first from the specific index
38
+ pm.ensure_packages(torch_pkgs, index_url=torch_index_url)
39
+ # Then install audiocraft and other dependencies; pip should use the already installed torch
40
+ pm.ensure_packages(audiocraft_core_pkgs)
41
+ else:
42
+ # For CPU, MPS, or if no specific CUDA preference was determined for install
43
+ ASCIIColors.info("Ensuring PyTorch, AudioCraft, and dependencies using default PyPI index.")
44
+ pm.ensure_packages(torch_pkgs + audiocraft_core_pkgs)
45
+
46
+ import whisper
47
+ import torch
48
+ _whisper_installed = True
49
+ except Exception as e:
50
+ _whisper_installation_error = str(e)
51
+ whisper = None
52
+ torch = None
53
+
54
+
55
+ # --- End Package Management ---
56
+
57
+ from lollms_client.lollms_stt_binding import LollmsSTTBinding
58
+
59
+ # Defines the binding name for the manager
60
+ BindingName = "WhisperSTTBinding" # Changed to avoid conflict with class name
61
+
62
+ class WhisperSTTBinding(LollmsSTTBinding):
63
+ """
64
+ LollmsSTTBinding implementation for OpenAI's Whisper model.
65
+ This binding runs Whisper locally.
66
+ Requires `ffmpeg` to be installed on the system.
67
+ """
68
+
69
+ # Standard Whisper model sizes
70
+ WHISPER_MODEL_SIZES = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2", "large-v3"]
71
+
72
+ def __init__(self,
73
+ model_name: str = "base", # Default Whisper model size
74
+ device: Optional[str] = None, # "cpu", "cuda", "mps", or None for auto
75
+ **kwargs # To catch any other LollmsSTTBinding standard args
76
+ ):
77
+ """
78
+ Initialize the Whisper STT binding.
79
+
80
+ Args:
81
+ model_name (str): The Whisper model size to use (e.g., "tiny", "base", "small", "medium", "large", "large-v2", "large-v3").
82
+ Defaults to "base".
83
+ device (Optional[str]): The device to run the model on ("cpu", "cuda", "mps").
84
+ If None, `torch` will attempt to auto-detect. Defaults to None.
85
+ """
86
+ super().__init__(binding_name="whisper") # Not applicable
87
+
88
+ if not _whisper_installed:
89
+ raise ImportError(f"Whisper STT binding dependencies not met. Please ensure 'openai-whisper' and 'torch' are installed. Error: {_whisper_installation_error}")
90
+
91
+ self.device = device
92
+ if self.device is None: # Auto-detect if not specified
93
+ if torch.cuda.is_available():
94
+ self.device = "cuda"
95
+ elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): # For Apple Silicon
96
+ self.device = "mps"
97
+ else:
98
+ self.device = "cpu"
99
+
100
+ ASCIIColors.info(f"WhisperSTTBinding: Using device '{self.device}'.")
101
+
102
+ self.loaded_model_name = None
103
+ self.model = None
104
+ self._load_whisper_model(model_name)
105
+
106
+
107
+ def _load_whisper_model(self, model_name_to_load: str):
108
+ """Loads or reloads the Whisper model."""
109
+ if model_name_to_load not in self.WHISPER_MODEL_SIZES:
110
+ ASCIIColors.warning(f"'{model_name_to_load}' is not a standard Whisper model size. Attempting to load anyway. Known sizes: {self.WHISPER_MODEL_SIZES}")
111
+
112
+ if self.model is not None and self.loaded_model_name == model_name_to_load:
113
+ ASCIIColors.info(f"Whisper model '{model_name_to_load}' already loaded.")
114
+ return
115
+
116
+ ASCIIColors.info(f"Loading Whisper model: '{model_name_to_load}' on device '{self.device}'...")
117
+ try:
118
+ # Whisper's load_model might download the model if not already cached.
119
+ # Cache is typically in ~/.cache/whisper
120
+ self.model = whisper.load_model(model_name_to_load, device=self.device)
121
+ self.loaded_model_name = model_name_to_load
122
+ self.model_name = model_name_to_load # Update the binding's current model_name
123
+ ASCIIColors.green(f"Whisper model '{model_name_to_load}' loaded successfully.")
124
+ except Exception as e:
125
+ self.model = None
126
+ self.loaded_model_name = None
127
+ ASCIIColors.error(f"Failed to load Whisper model '{model_name_to_load}': {e}")
128
+ trace_exception(e)
129
+ # Re-raise critical error for initialization or model switching
130
+ raise RuntimeError(f"Failed to load Whisper model '{model_name_to_load}'") from e
131
+
132
+
133
+ def transcribe_audio(self, audio_path: Union[str, Path], model: Optional[str] = None, **kwargs) -> str:
134
+ """
135
+ Transcribes the audio file at the given path using Whisper.
136
+
137
+ Args:
138
+ audio_path (Union[str, Path]): The path to the audio file to transcribe.
139
+ model (Optional[str]): The specific Whisper model size to use.
140
+ If None, uses the model loaded during initialization.
141
+ **kwargs: Additional parameters for Whisper's transcribe method, e.g.:
142
+ `language` (str): Language code (e.g., "en", "fr"). If None, Whisper auto-detects.
143
+ `fp16` (bool): Whether to use fp16, defaults to True if CUDA available.
144
+ `task` (str): "transcribe" or "translate".
145
+
146
+ Returns:
147
+ str: The transcribed text.
148
+
149
+ Raises:
150
+ FileNotFoundError: If the audio file does not exist.
151
+ RuntimeError: If the Whisper model is not loaded or transcription fails.
152
+ Exception: For other errors during transcription.
153
+ """
154
+ audio_file = Path(audio_path)
155
+ if not audio_file.exists():
156
+ raise FileNotFoundError(f"Audio file not found at: {audio_path}")
157
+
158
+ if model and model != self.loaded_model_name:
159
+ ASCIIColors.info(f"Switching Whisper model to '{model}' for this transcription.")
160
+ try:
161
+ self._load_whisper_model(model) # Attempt to load the new model
162
+ except RuntimeError as e:
163
+ # If switching fails, keep using the old model if available, or raise if none loaded
164
+ if self.model is None:
165
+ raise RuntimeError(f"Failed to switch to Whisper model '{model}' and no model currently loaded.") from e
166
+ else:
167
+ ASCIIColors.warning(f"Failed to switch to Whisper model '{model}'. Using previously loaded model '{self.loaded_model_name}'. Error: {e}")
168
+
169
+
170
+ if self.model is None:
171
+ raise RuntimeError("Whisper model is not loaded. Cannot transcribe.")
172
+
173
+ # Prepare Whisper-specific options from kwargs
174
+ whisper_options = {}
175
+ if "language" in kwargs:
176
+ whisper_options["language"] = kwargs["language"]
177
+ if "fp16" in kwargs: # Typically handled by device selection, but allow override
178
+ whisper_options["fp16"] = kwargs["fp16"]
179
+ else: # Default fp16 based on device
180
+ whisper_options["fp16"] = (self.device == "cuda")
181
+ if "task" in kwargs: # "transcribe" or "translate"
182
+ whisper_options["task"] = kwargs["task"]
183
+
184
+
185
+ ASCIIColors.info(f"Transcribing '{audio_file.name}' with Whisper model '{self.loaded_model_name}' (options: {whisper_options})...")
186
+ try:
187
+ result = self.model.transcribe(str(audio_file), **whisper_options)
188
+ transcribed_text = result.get("text", "")
189
+ ASCIIColors.green("Transcription successful.")
190
+ return transcribed_text.strip()
191
+ except Exception as e:
192
+ ASCIIColors.error(f"Whisper transcription failed for '{audio_file.name}': {e}")
193
+ trace_exception(e)
194
+ raise Exception(f"Whisper transcription error: {e}") from e
195
+
196
+
197
+ def list_models(self, **kwargs) -> List[str]:
198
+ """
199
+ Lists the available standard Whisper model sizes.
200
+
201
+ Args:
202
+ **kwargs: Additional parameters (currently unused).
203
+
204
+ Returns:
205
+ List[str]: A list of available Whisper model size identifiers.
206
+ """
207
+ return self.WHISPER_MODEL_SIZES.copy() # Return a copy
208
+
209
+ def __del__(self):
210
+ """Clean up: Unload the model to free resources."""
211
+ if self.model is not None:
212
+ del self.model
213
+ self.model = None
214
+ if torch and hasattr(torch, 'cuda') and torch.cuda.is_available():
215
+ torch.cuda.empty_cache()
216
+ ASCIIColors.info(f"WhisperSTTBinding for model '{self.loaded_model_name}' destroyed and resources released.")
217
+
218
+
219
+ # --- Main Test Block (Example Usage) ---
220
+ if __name__ == '__main__':
221
+ if not _whisper_installed:
222
+ print(f"{ASCIIColors.RED}Whisper dependencies not met. Skipping tests. Error: {_whisper_installation_error}{ASCIIColors.RESET}")
223
+ print(f"{ASCIIColors.YELLOW}Please ensure 'openai-whisper' and 'torch' are installed, and 'ffmpeg' is in your system PATH.{ASCIIColors.RESET}")
224
+ exit()
225
+
226
+ ASCIIColors.yellow("--- WhisperSTTBinding Test ---")
227
+
228
+ # --- Prerequisites for testing ---
229
+ # 1. Create a dummy WAV file for testing, or provide a path to a real one.
230
+ # You'll need `scipy` to create a dummy WAV easily, or use an external tool.
231
+ # Let's assume a simple way to signal a missing test file.
232
+ test_audio_file = Path("test_audio_for_whisper.wav")
233
+
234
+ # Try to create a dummy file if it doesn't exist (requires scipy)
235
+ if not test_audio_file.exists():
236
+ try:
237
+ import numpy as np
238
+ from scipy.io.wavfile import write as write_wav
239
+ samplerate = 44100; fs = 100
240
+ t = np.linspace(0., 1., samplerate)
241
+ amplitude = np.iinfo(np.int16).max
242
+ data = amplitude * np.sin(2. * np.pi * fs * t)
243
+ write_wav(test_audio_file, samplerate, data.astype(np.int16))
244
+ ASCIIColors.green(f"Created dummy audio file: {test_audio_file}")
245
+ except ImportError:
246
+ ASCIIColors.warning(f"SciPy not installed. Cannot create dummy audio file.")
247
+ ASCIIColors.warning(f"Please place a '{test_audio_file.name}' in the current directory or modify the path.")
248
+ except Exception as e_dummy_audio:
249
+ ASCIIColors.error(f"Could not create dummy audio file: {e_dummy_audio}")
250
+
251
+
252
+ if not test_audio_file.exists():
253
+ ASCIIColors.error(f"Test audio file '{test_audio_file}' not found. Skipping transcription test.")
254
+ else:
255
+ try:
256
+ ASCIIColors.cyan("\n--- Initializing WhisperSTTBinding (model: 'tiny') ---")
257
+ # Using 'tiny' model for faster testing. Change to 'base' or 'small' for better quality.
258
+ stt_binding = WhisperSTTBinding(model_name="tiny")
259
+
260
+ ASCIIColors.cyan("\n--- Listing available Whisper models ---")
261
+ models = stt_binding.list_models()
262
+ print(f"Available models: {models}")
263
+
264
+ ASCIIColors.cyan(f"\n--- Transcribing '{test_audio_file.name}' with 'tiny' model ---")
265
+ transcription = stt_binding.transcribe_audio(test_audio_file)
266
+ print(f"Transcription (tiny): '{transcription}'")
267
+
268
+ # Test with a specific language hint (if your audio is not English or for robustness)
269
+ # ASCIIColors.cyan(f"\n--- Transcribing '{test_audio_file.name}' with 'tiny' model and language hint 'en' ---")
270
+ # transcription_lang_hint = stt_binding.transcribe_audio(test_audio_file, language="en")
271
+ # print(f"Transcription (tiny, lang='en'): '{transcription_lang_hint}'")
272
+
273
+ # Test switching model dynamically (optional, will re-download/load if different)
274
+ # ASCIIColors.cyan(f"\n--- Transcribing '{test_audio_file.name}' by switching to 'base' model ---")
275
+ # transcription_base = stt_binding.transcribe_audio(test_audio_file, model="base")
276
+ # print(f"Transcription (base): '{transcription_base}'")
277
+
278
+
279
+ except ImportError as e_imp:
280
+ ASCIIColors.error(f"Import error during test: {e_imp}")
281
+ ASCIIColors.info("This might be due to `openai-whisper` or `torch` not being installed correctly.")
282
+ except FileNotFoundError as e_fnf:
283
+ ASCIIColors.error(f"File not found during test: {e_fnf}")
284
+ except RuntimeError as e_rt:
285
+ ASCIIColors.error(f"Runtime error during test (often model load or ffmpeg issue): {e_rt}")
286
+ if "ffmpeg" in str(e_rt).lower():
287
+ ASCIIColors.yellow("This error often means 'ffmpeg' is not installed or not found in your system's PATH.")
288
+ ASCIIColors.yellow("Please install ffmpeg: https://ffmpeg.org/download.html")
289
+ except Exception as e:
290
+ ASCIIColors.error(f"An unexpected error occurred during testing: {e}")
291
+ trace_exception(e)
292
+ finally:
293
+ # Clean up dummy audio file if we created it for this test
294
+ # (Be careful if you are using a real test_audio_file you want to keep)
295
+ # if "samplerate" in locals() and test_audio_file.exists(): # Simple check if we likely created it
296
+ # try:
297
+ # os.remove(test_audio_file)
298
+ # ASCIIColors.info(f"Removed dummy audio file: {test_audio_file}")
299
+ # except Exception as e_del:
300
+ # ASCIIColors.warning(f"Could not remove dummy audio file {test_audio_file}: {e_del}")
301
+ pass # For this example, let's not auto-delete. User can manage it.
302
+
303
+
304
+ ASCIIColors.yellow("\n--- WhisperSTTBinding Test Finished ---")