npcsh 0.3.30__py3-none-any.whl → 0.3.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. npcsh/audio.py +540 -181
  2. npcsh/audio_gen.py +1 -0
  3. npcsh/cli.py +37 -19
  4. npcsh/conversation.py +14 -251
  5. npcsh/dataframes.py +13 -5
  6. npcsh/helpers.py +5 -0
  7. npcsh/image.py +2 -4
  8. npcsh/image_gen.py +38 -38
  9. npcsh/knowledge_graph.py +4 -4
  10. npcsh/llm_funcs.py +517 -349
  11. npcsh/npc_compiler.py +44 -23
  12. npcsh/npc_sysenv.py +5 -0
  13. npcsh/npc_team/npcsh.ctx +8 -2
  14. npcsh/npc_team/tools/generic_search.tool +9 -1
  15. npcsh/plonk.py +2 -2
  16. npcsh/response.py +131 -482
  17. npcsh/search.py +20 -9
  18. npcsh/serve.py +210 -203
  19. npcsh/shell.py +78 -80
  20. npcsh/shell_helpers.py +513 -102
  21. npcsh/stream.py +87 -554
  22. npcsh/video.py +5 -2
  23. npcsh/video_gen.py +69 -0
  24. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/generic_search.tool +9 -1
  25. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/npcsh.ctx +8 -2
  26. npcsh-0.3.32.dist-info/METADATA +779 -0
  27. npcsh-0.3.32.dist-info/RECORD +78 -0
  28. npcsh-0.3.30.dist-info/METADATA +0 -1862
  29. npcsh-0.3.30.dist-info/RECORD +0 -76
  30. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/bash_executer.tool +0 -0
  31. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/calculator.tool +0 -0
  32. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/celona.npc +0 -0
  33. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/code_executor.tool +0 -0
  34. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/corca.npc +0 -0
  35. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/eriane.npc +0 -0
  36. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/foreman.npc +0 -0
  37. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/image_generation.tool +0 -0
  38. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/lineru.npc +0 -0
  39. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/local_search.tool +0 -0
  40. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/maurawa.npc +0 -0
  41. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/npcsh_executor.tool +0 -0
  42. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/raone.npc +0 -0
  43. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/screen_cap.tool +0 -0
  44. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/sibiji.npc +0 -0
  45. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/slean.npc +0 -0
  46. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/sql_executor.tool +0 -0
  47. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/test_pipeline.py +0 -0
  48. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/turnic.npc +0 -0
  49. {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/welxor.npc +0 -0
  50. {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/WHEEL +0 -0
  51. {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/entry_points.txt +0 -0
  52. {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/licenses/LICENSE +0 -0
  53. {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/top_level.txt +0 -0
npcsh/audio.py CHANGED
@@ -1,210 +1,569 @@
1
- # Move optional imports into try/except
1
+ import os
2
+ import numpy as np
3
+ import tempfile
4
+ import threading
5
+ import time
6
+ import queue
7
+ import re
8
+ import json
9
+
10
+ import subprocess
11
+
2
12
  try:
3
- import whisper
4
- from playsound import playsound
5
- from gtts import gTTS
13
+ import torch
6
14
  import pyaudio
7
- except Exception as e:
8
- print(f"Error importing audio dependencies: {e}")
15
+ import wave
16
+ from typing import Optional, List, Dict, Any
17
+ from gtts import gTTS
18
+ from faster_whisper import WhisperModel
19
+ import pygame
20
+
21
+ FORMAT = pyaudio.paInt16
22
+ CHANNELS = 1
23
+ RATE = 16000
24
+ CHUNK = 512
25
+
26
+ # State Management
27
+ is_speaking = False
28
+ should_stop_speaking = False
29
+ tts_sequence = 0
30
+ recording_data = []
31
+ buffer_data = []
32
+ is_recording = False
33
+ last_speech_time = 0
34
+ running = True
35
+
36
+ # Queues
37
+ audio_queue = queue.Queue()
38
+ tts_queue = queue.PriorityQueue()
39
+ cleanup_files = []
40
+
41
+ # Initialize pygame mixer
42
+ pygame.mixer.quit()
43
+ pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
44
+
45
+ # Device selection
46
+ device = "cpu"
47
+ print(f"Using device: {device}")
48
+
49
+ # Load VAD model
50
+ print("Loading Silero VAD model...")
51
+ vad_model, _ = torch.hub.load(
52
+ repo_or_dir="snakers4/silero-vad",
53
+ model="silero_vad",
54
+ force_reload=False,
55
+ onnx=False,
56
+ verbose=False,
57
+ )
58
+ vad_model.to(device)
9
59
 
10
- import numpy as np
60
+ # Load Whisper model
61
+ print("Loading Whisper model...")
62
+ whisper_model = WhisperModel("base", device=device, compute_type="int8")
63
+
64
+ # Conversation History Management
65
+ history = []
66
+ max_history = 10
67
+ memory_file = "conversation_history.json"
68
+
69
+
70
+ except:
71
+ print("audio dependencies not installed")
72
+
73
+
74
+ def convert_mp3_to_wav(mp3_file, wav_file):
75
+ try:
76
+ # Ensure the output file doesn't exist before conversion
77
+ if os.path.exists(wav_file):
78
+ os.remove(wav_file)
79
+
80
+ subprocess.run(
81
+ [
82
+ "ffmpeg",
83
+ "-y",
84
+ "-i",
85
+ mp3_file,
86
+ "-acodec",
87
+ "pcm_s16le",
88
+ "-ac",
89
+ "1",
90
+ "-ar",
91
+ "44100",
92
+ wav_file,
93
+ ],
94
+ check=True,
95
+ capture_output=True,
96
+ text=True,
97
+ )
98
+ except subprocess.CalledProcessError as e:
99
+ print(f"Error converting MP3 to WAV: {e.stderr}")
100
+ raise
101
+ except Exception as e:
102
+ print(f"Unexpected error during conversion: {e}")
103
+ raise
104
+
105
+
106
+ # Check if FFmpeg is available
107
+ def check_ffmpeg():
108
+ try:
109
+ subprocess.run(
110
+ ["ffmpeg", "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
111
+ )
112
+ return True
113
+ except (subprocess.SubprocessError, FileNotFoundError):
114
+ return False
115
+
116
+
117
+ has_ffmpeg = check_ffmpeg()
118
+
119
+ # Device selection
120
+ device = "cpu"
121
+ print(f"Using device: {device}")
122
+
123
+ # Load VAD model
124
+ print("Loading Silero VAD model...")
125
+ vad_model, _ = torch.hub.load(
126
+ repo_or_dir="snakers4/silero-vad",
127
+ model="silero_vad",
128
+ force_reload=False,
129
+ onnx=False,
130
+ verbose=False,
131
+ )
132
+ vad_model.to(device)
133
+
134
+ # Load Whisper model
135
+ print("Loading Whisper model...")
136
+ whisper_model = WhisperModel("base", device=device, compute_type="int8")
137
+
138
+ # Conversation History Management
139
+ history = []
140
+ max_history = 10
141
+ memory_file = "conversation_history.json"
142
+
143
+
144
+ # History Management Functions
145
+ def load_history():
146
+ global history
147
+ try:
148
+ if os.path.exists(memory_file):
149
+ with open(memory_file, "r") as f:
150
+ history = json.load(f)
151
+ except Exception as e:
152
+ print(f"Error loading conversation history: {e}")
153
+ history = []
154
+
155
+
156
+ def save_history():
157
+ try:
158
+ with open(memory_file, "w") as f:
159
+ json.dump(history, f)
160
+ except Exception as e:
161
+ print(f"Error saving conversation history: {e}")
162
+
163
+
164
+ def add_exchange(user_input, assistant_response):
165
+ global history
166
+ exchange = {
167
+ "user": user_input,
168
+ "assistant": assistant_response,
169
+ "timestamp": time.time(),
170
+ }
171
+ history.append(exchange)
172
+ if len(history) > max_history:
173
+ history.pop(0)
174
+ save_history()
175
+
176
+
177
+ def get_context_string():
178
+ context = []
179
+ for exchange in history:
180
+ context.append(f"User: {exchange['user']}")
181
+ context.append(f"Assistant: {exchange['assistant']}")
182
+ return "\n".join(context)
183
+
184
+
185
+ # Audio Management Functions
186
+ def cleanup_temp_files():
187
+ global cleanup_files
188
+ for file in list(cleanup_files):
189
+ try:
190
+ if os.path.exists(file):
191
+ os.remove(file)
192
+ cleanup_files.remove(file)
193
+ except Exception:
194
+ pass
195
+
196
+
197
+ def interrupt_speech():
198
+ global should_stop_speaking
199
+ should_stop_speaking = True
200
+ pygame.mixer.music.stop()
201
+ pygame.mixer.music.unload()
202
+
203
+ while not tts_queue.empty():
204
+ try:
205
+ _, temp_filename = tts_queue.get_nowait()
206
+ try:
207
+ if os.path.exists(temp_filename):
208
+ os.remove(temp_filename)
209
+ except:
210
+ if temp_filename not in cleanup_files:
211
+ cleanup_files.append(temp_filename)
212
+ except queue.Empty:
213
+ break
214
+
215
+ global tts_sequence
216
+ tts_sequence = 0
217
+
218
+
219
+ def audio_callback(in_data, frame_count, time_info, status):
220
+ audio_queue.put(in_data)
221
+ return (in_data, pyaudio.paContinue)
222
+
223
+
224
+ def transcribe_recording(audio_data):
225
+ if not audio_data:
226
+ return None
227
+
228
+ audio_np = (
229
+ np.frombuffer(b"".join(audio_data), dtype=np.int16).astype(np.float32) / 32768.0
230
+ )
231
+ return run_transcription(audio_np)
232
+
233
+
234
+ def run_transcription(audio_np):
235
+ try:
236
+ temp_file = os.path.join(
237
+ tempfile.gettempdir(), f"temp_recording_{int(time.time())}.wav"
238
+ )
239
+ with wave.open(temp_file, "wb") as wf:
240
+ wf.setnchannels(CHANNELS)
241
+ wf.setsampwidth(2)
242
+ wf.setframerate(RATE)
243
+ wf.writeframes((audio_np * 32768).astype(np.int16).tobytes())
244
+
245
+ segments, info = whisper_model.transcribe(temp_file, language="en", beam_size=5)
246
+ transcription = " ".join([segment.text for segment in segments])
247
+
248
+ try:
249
+ if os.path.exists(temp_file):
250
+ os.remove(temp_file)
251
+ except Exception:
252
+ if temp_file not in cleanup_files:
253
+ cleanup_files.append(temp_file)
254
+
255
+ return transcription.strip()
256
+
257
+ except Exception as e:
258
+ print(f"Transcription error: {str(e)}")
259
+ return None
260
+
261
+
262
+ # History Management Functions
263
+ def load_history():
264
+ global history
265
+ try:
266
+ if os.path.exists(memory_file):
267
+ with open(memory_file, "r") as f:
268
+ history = json.load(f)
269
+ except Exception as e:
270
+ print(f"Error loading conversation history: {e}")
271
+ history = []
272
+
273
+
274
+ def save_history():
275
+ try:
276
+ with open(memory_file, "w") as f:
277
+ json.dump(history, f)
278
+ except Exception as e:
279
+ print(f"Error saving conversation history: {e}")
280
+
281
+
282
+ def add_exchange(user_input, assistant_response):
283
+ global history
284
+ exchange = {
285
+ "user": user_input,
286
+ "assistant": assistant_response,
287
+ "timestamp": time.time(),
288
+ }
289
+ history.append(exchange)
290
+ if len(history) > max_history:
291
+ history.pop(0)
292
+ save_history()
293
+
294
+
295
+ def get_context_string():
296
+ context = []
297
+ for exchange in history:
298
+ context.append(f"User: {exchange['user']}")
299
+ context.append(f"Assistant: {exchange['assistant']}")
300
+ return "\n".join(context)
301
+
302
+
303
+ # Audio Management Functions
304
+ def cleanup_temp_files():
305
+ global cleanup_files
306
+ for file in list(cleanup_files):
307
+ try:
308
+ if os.path.exists(file):
309
+ os.remove(file)
310
+ cleanup_files.remove(file)
311
+ except Exception:
312
+ pass
313
+
314
+
315
+ def interrupt_speech():
316
+ global should_stop_speaking, response_generator, is_speaking, tts_sequence
317
+ should_stop_speaking = True
318
+ pygame.mixer.music.stop()
319
+ pygame.mixer.music.unload()
320
+
321
+ while not tts_queue.empty():
322
+ try:
323
+ _, temp_filename = tts_queue.get_nowait()
324
+ try:
325
+ if os.path.exists(temp_filename):
326
+ os.remove(temp_filename)
327
+ except:
328
+ if temp_filename not in cleanup_files:
329
+ cleanup_files.append(temp_filename)
330
+ except queue.Empty:
331
+ break
332
+
333
+ tts_sequence = 0
334
+ is_speaking = False
335
+
336
+
337
+ def audio_callback(in_data, frame_count, time_info, status):
338
+ audio_queue.put(in_data)
339
+ return (in_data, pyaudio.paContinue)
340
+
341
+
342
+ # Text-to-Speech Functions
343
+ def play_audio_from_queue():
344
+ global is_speaking, cleanup_files, should_stop_speaking
345
+ next_sequence = 0
346
+
347
+ while True:
348
+ if should_stop_speaking:
349
+ pygame.mixer.music.stop()
350
+ pygame.mixer.music.unload()
351
+
352
+ while not tts_queue.empty():
353
+ try:
354
+ _, temp_filename = tts_queue.get_nowait()
355
+ try:
356
+ if os.path.exists(temp_filename):
357
+ os.remove(temp_filename)
358
+ except:
359
+ if temp_filename not in cleanup_files:
360
+ cleanup_files.append(temp_filename)
361
+ except queue.Empty:
362
+ break
363
+
364
+ next_sequence = 0
365
+ is_speaking = False
366
+ should_stop_speaking = False
367
+ time.sleep(0.1)
368
+ continue
369
+
370
+ try:
371
+ if not tts_queue.empty():
372
+ sequence, temp_filename = tts_queue.queue[0]
373
+
374
+ if sequence == next_sequence:
375
+ sequence, temp_filename = tts_queue.get()
376
+ is_speaking = True
377
+
378
+ try:
379
+ if len(cleanup_files) > 0 and not pygame.mixer.music.get_busy():
380
+ cleanup_temp_files()
381
+
382
+ if should_stop_speaking:
383
+ continue
384
+
385
+ pygame.mixer.music.load(temp_filename)
386
+ pygame.mixer.music.play()
387
+
388
+ while (
389
+ pygame.mixer.music.get_busy() and not should_stop_speaking
390
+ ):
391
+ pygame.time.wait(50)
392
+
393
+ pygame.mixer.music.unload()
394
+
395
+ except Exception as e:
396
+ print(f"Audio playback error: {str(e)}")
397
+ finally:
398
+ try:
399
+ if os.path.exists(temp_filename):
400
+ os.remove(temp_filename)
401
+ except:
402
+ if temp_filename not in cleanup_files:
403
+ cleanup_files.append(temp_filename)
404
+
405
+ if not should_stop_speaking:
406
+ next_sequence += 1
407
+ is_speaking = False
408
+
409
+ time.sleep(0.05)
410
+ except Exception:
411
+ time.sleep(0.05)
412
+
413
+
414
+ import pygame
415
+ from gtts import gTTS
11
416
  import tempfile
12
417
  import os
13
- import time
14
- from typing import Optional, List
15
- from .llm_funcs import get_llm_response
418
+ import logging
16
419
 
420
+ logging.basicConfig(level=logging.ERROR)
421
+ logger = logging.getLogger(__name__)
17
422
 
18
- def get_audio_level(audio_data):
19
- return np.max(np.abs(np.frombuffer(audio_data, dtype=np.int16)))
423
+ import pyaudio
424
+ import wave
425
+ from gtts import gTTS
426
+ import tempfile
427
+ import os
428
+ import logging
20
429
 
430
+ import tempfile
431
+ import uuid
21
432
 
22
- def calibrate_silence(sample_rate=16000, duration=2):
23
- """
24
- Function Description:
25
- This function calibrates the silence level for audio recording.
26
- Args:
27
- None
28
- Keyword Args:
29
- sample_rate: The sample rate for audio recording.
30
- duration: The duration in seconds for calibration.
31
- Returns:
32
- The silence threshold level.
33
- """
34
433
 
35
- p = pyaudio.PyAudio()
36
- stream = p.open(
37
- format=pyaudio.paInt16,
38
- channels=1,
39
- rate=sample_rate,
40
- input=True,
41
- frames_per_buffer=1024,
42
- )
434
+ def create_and_queue_audio(text, state):
435
+ """Create and queue audio with state awareness for TTS/recording coordination"""
436
+ # Set TTS speaking flag
437
+ state["tts_is_speaking"] = True
43
438
 
44
- print("Calibrating silence level. Please remain quiet...")
45
- levels = []
46
- for _ in range(int(sample_rate * duration / 1024)):
47
- data = stream.read(1024)
48
- levels.append(get_audio_level(data))
439
+ if not text.strip():
440
+ print("Empty text, skipping TTS")
441
+ state["tts_is_speaking"] = False
442
+ return
49
443
 
50
- stream.stop_stream()
51
- stream.close()
52
- p.terminate()
444
+ try:
445
+ unique_id = uuid.uuid4()
446
+ with tempfile.TemporaryDirectory() as temp_dir:
447
+ mp3_file = os.path.join(temp_dir, f"temp_{unique_id}.mp3")
448
+ wav_file = os.path.join(temp_dir, f"temp_{unique_id}.wav")
449
+
450
+ tts = gTTS(text=text, lang="en", slow=False)
451
+ tts.save(mp3_file)
53
452
 
54
- avg_level = np.mean(levels)
55
- silence_threshold = avg_level * 1.5 # Set threshold slightly above average
56
- print(f"Silence threshold set to: {silence_threshold}")
57
- return silence_threshold
58
-
59
-
60
- def is_silent(audio_data: bytes, threshold: float) -> bool:
61
- """
62
- Function Description:
63
- This function checks if audio data is silent based on a threshold.
64
- Args:
65
- audio_data: The audio data to check.
66
- threshold: The silence threshold level.
67
- Keyword Args:
68
- None
69
- Returns:
70
- A boolean indicating whether the audio is silent.
71
- """
72
-
73
- return get_audio_level(audio_data) < threshold
74
-
75
-
76
- def record_audio(
77
- sample_rate: int = 16000,
78
- max_duration: int = 10,
79
- silence_threshold: Optional[float] = None,
80
- ) -> bytes:
81
- """
82
- Function Description:
83
- This function records audio from the microphone.
84
- Args:
85
- None
86
- Keyword Args:
87
- sample_rate: The sample rate for audio recording.
88
- max_duration: The maximum duration in seconds.
89
- silence_threshold: The silence threshold level.
90
- Returns:
91
- The recorded audio data.
92
- """
93
-
94
- if silence_threshold is None:
95
- silence_threshold = calibrate_silence()
453
+ convert_mp3_to_wav(mp3_file, wav_file)
454
+
455
+ # Play audio and wait for completion
456
+ play_audio(wav_file, state)
457
+ except Exception as e:
458
+ print(f"Error in TTS process: {e}")
459
+ finally:
460
+ # Ensure flag is reset even if there's an error
461
+ state["tts_is_speaking"] = False
462
+ state["tts_just_finished"] = True
96
463
 
464
+ for file in [mp3_file, wav_file]:
465
+ try:
466
+ if os.path.exists(file):
467
+ os.remove(file)
468
+ except Exception as e:
469
+ print(f"Error removing temporary file {file}: {e}")
470
+
471
+
472
+ def play_audio(filename, state):
473
+ """Play audio with state awareness for TTS/recording coordination"""
474
+ CHUNK = 4096 # Increased chunk size
475
+
476
+ wf = wave.open(filename, "rb")
97
477
  p = pyaudio.PyAudio()
478
+
98
479
  stream = p.open(
99
- format=pyaudio.paInt16,
100
- channels=1,
101
- rate=sample_rate,
102
- input=True,
103
- frames_per_buffer=1024,
480
+ format=p.get_format_from_width(wf.getsampwidth()),
481
+ channels=wf.getnchannels(),
482
+ rate=wf.getframerate(),
483
+ output=True,
104
484
  )
105
485
 
106
- print("Listening... (speak now)")
107
- frames = []
108
- silent_chunks = 0
109
- has_speech = False
110
- max_silent_chunks = int(sample_rate * 3.0 / 1024) # 3.0 seconds of silence
111
- max_chunks = int(sample_rate * max_duration / 1024) # Maximum duration in chunks
112
-
113
- start_time = time.time()
114
- for _ in range(max_chunks):
115
- data = stream.read(1024)
116
- frames.append(data)
117
-
118
- if is_silent(data, silence_threshold):
119
- silent_chunks += 1
120
- if has_speech and silent_chunks > max_silent_chunks:
121
- break
122
- else:
123
- silent_chunks = 0
124
- has_speech = True
125
-
126
- if len(frames) % 10 == 0: # Print a dot every ~0.5 seconds
127
- print(".", end="", flush=True)
128
-
129
- if time.time() - start_time > max_duration:
130
- print("\nMax duration reached.")
131
- break
486
+ data = wf.readframes(CHUNK)
132
487
 
133
- print("\nProcessing...")
488
+ # This is blocking until audio is done playing
489
+ while data and state["running"]: # Check if system still running
490
+ stream.write(data)
491
+ data = wf.readframes(CHUNK)
134
492
 
135
493
  stream.stop_stream()
136
494
  stream.close()
137
495
  p.terminate()
138
496
 
139
- return b"".join(frames)
497
+ try:
498
+ os.unlink(filename)
499
+ except:
500
+ pass
501
+
502
+
503
+ def select_model():
504
+ models = [
505
+ "gpt-4o-mini",
506
+ "claude-haiku-3-5-latest",
507
+ ]
508
+
509
+ while True:
510
+ try:
511
+ choice = input(
512
+ "\nSelect a model number (or press Enter for default): "
513
+ ).strip()
514
+ if not choice:
515
+ return models[0]["name"]
516
+
517
+ choice = int(choice)
518
+ if 1 <= choice <= len(models):
519
+ selected_model = models[choice - 1]["name"]
520
+ print(f"Selected model: {selected_model}")
521
+ return selected_model
522
+ else:
523
+ print(f"Please enter a number between 1 and {len(models)}")
524
+ except ValueError:
525
+ print("Please enter a valid number")
526
+ except Exception as e:
527
+ print(f"Error selecting model: {str(e)}")
528
+ if models:
529
+ return models[0]["name"]
530
+ return "gemma:2b"
140
531
 
141
532
 
142
- def speak_text(text: str) -> None:
143
- """
144
- Function Description:
145
- This function converts text to speech and plays the audio.
146
- Args:
147
- text: The text to convert to speech.
148
- Keyword Args:
149
- None
150
- Returns:
151
- None
152
- """
533
+ def process_response_chunk(text_chunk):
534
+ if not text_chunk.strip():
535
+ return
536
+ processed_text = process_text_for_tts(text_chunk)
537
+ create_and_queue_audio(processed_text)
153
538
 
154
- try:
155
- tts = gTTS(text=text, lang="en")
156
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
157
- tts.save(fp.name)
158
- playsound(fp.name)
159
- os.unlink(fp.name)
160
- except Exception as e:
161
- print(f"Text-to-speech error: {e}")
162
-
163
-
164
- def process_audio(file_path: str, table_name: str) -> List:
165
- """
166
- Function Description:
167
- This function is used to process an audio file.
168
- Args:
169
- file_path : str : The file path.
170
- table_name : str : The table name.
171
- Keyword Args:
172
- None
173
- Returns:
174
- List : The embeddings and texts.
175
- """
176
-
177
- embeddings = []
178
- texts = []
179
- try:
180
- audio, sr = librosa.load(file_path)
181
- # Transcribe audio using Whisper
182
- model = whisper.load_model("base") # Or a larger model if available
183
- result = model.transcribe(file_path)
184
- transcribed_text = result["text"].strip()
185
-
186
- # Split transcribed text into chunks (adjust chunk_size as needed)
187
- chunk_size = 1000
188
- for i in range(0, len(transcribed_text), chunk_size):
189
- chunk = transcribed_text[i : i + chunk_size]
190
- text_embedding_response = get_llm_response(
191
- f"Generate an embedding for: {chunk}",
192
- model="text-embedding-ada-002",
193
- provider="openai",
194
- ) # Use a text embedding model
195
- if (
196
- isinstance(text_embedding_response, dict)
197
- and "error" in text_embedding_response
198
- ):
199
- print(
200
- f"Error generating text embedding: {text_embedding_response['error']}"
201
- )
202
- else:
203
- embeddings.append(text_embedding_response) # Store the embedding
204
- texts.append(chunk) # Store the corresponding text chunk
205
539
 
206
- return embeddings, texts
540
+ def process_text_for_tts(text):
541
+ text = re.sub(r"[*<>{}()\[\]&%#@^_=+~]", "", text)
542
+ text = text.strip()
543
+ text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
544
+ text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
545
+ return text
207
546
 
208
- except Exception as e:
209
- print(f"Error processing audio: {e}")
210
- return [], [] # Return empty lists in case of error
547
+
548
+ """
549
+
550
+ To use this code, you'll need to have the following dependencies installed:
551
+
552
+ ```bash
553
+ pip install numpy torch torchaudio faster-whisper pygame pyaudio gtts ollama
554
+ ```
555
+
556
+ And optionally FFmpeg for audio speed adjustment:
557
+ ```bash
558
+ # On Ubuntu/Debian
559
+ sudo apt-get install ffmpeg
560
+
561
+ # On MacOS with Homebrew
562
+ brew install ffmpeg
563
+
564
+ # On Windows with Chocolatey
565
+ choco install ffmpeg
566
+ ```
567
+
568
+
569
+ """