npcsh 0.3.30__py3-none-any.whl → 0.3.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/audio.py +540 -181
- npcsh/audio_gen.py +1 -0
- npcsh/cli.py +37 -19
- npcsh/conversation.py +14 -251
- npcsh/dataframes.py +13 -5
- npcsh/helpers.py +5 -0
- npcsh/image.py +2 -4
- npcsh/image_gen.py +38 -38
- npcsh/knowledge_graph.py +4 -4
- npcsh/llm_funcs.py +517 -349
- npcsh/npc_compiler.py +44 -23
- npcsh/npc_sysenv.py +5 -0
- npcsh/npc_team/npcsh.ctx +8 -2
- npcsh/npc_team/tools/generic_search.tool +9 -1
- npcsh/plonk.py +2 -2
- npcsh/response.py +131 -482
- npcsh/search.py +20 -9
- npcsh/serve.py +210 -203
- npcsh/shell.py +78 -80
- npcsh/shell_helpers.py +513 -102
- npcsh/stream.py +87 -554
- npcsh/video.py +5 -2
- npcsh/video_gen.py +69 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/generic_search.tool +9 -1
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/npcsh.ctx +8 -2
- npcsh-0.3.32.dist-info/METADATA +779 -0
- npcsh-0.3.32.dist-info/RECORD +78 -0
- npcsh-0.3.30.dist-info/METADATA +0 -1862
- npcsh-0.3.30.dist-info/RECORD +0 -76
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/bash_executer.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/calculator.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/celona.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/code_executor.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/corca.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/eriane.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/foreman.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/image_generation.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/lineru.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/local_search.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/maurawa.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/npcsh_executor.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/raone.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/screen_cap.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/sibiji.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/slean.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/sql_executor.tool +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/test_pipeline.py +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/turnic.npc +0 -0
- {npcsh-0.3.30.data → npcsh-0.3.32.data}/data/npcsh/npc_team/welxor.npc +0 -0
- {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/WHEEL +0 -0
- {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/entry_points.txt +0 -0
- {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/licenses/LICENSE +0 -0
- {npcsh-0.3.30.dist-info → npcsh-0.3.32.dist-info}/top_level.txt +0 -0
npcsh/audio.py
CHANGED
|
@@ -1,210 +1,569 @@
|
|
|
1
|
-
|
|
1
|
+
import os
|
|
2
|
+
import numpy as np
|
|
3
|
+
import tempfile
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import queue
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
|
+
|
|
10
|
+
import subprocess
|
|
11
|
+
|
|
2
12
|
try:
|
|
3
|
-
import
|
|
4
|
-
from playsound import playsound
|
|
5
|
-
from gtts import gTTS
|
|
13
|
+
import torch
|
|
6
14
|
import pyaudio
|
|
7
|
-
|
|
8
|
-
|
|
15
|
+
import wave
|
|
16
|
+
from typing import Optional, List, Dict, Any
|
|
17
|
+
from gtts import gTTS
|
|
18
|
+
from faster_whisper import WhisperModel
|
|
19
|
+
import pygame
|
|
20
|
+
|
|
21
|
+
FORMAT = pyaudio.paInt16
|
|
22
|
+
CHANNELS = 1
|
|
23
|
+
RATE = 16000
|
|
24
|
+
CHUNK = 512
|
|
25
|
+
|
|
26
|
+
# State Management
|
|
27
|
+
is_speaking = False
|
|
28
|
+
should_stop_speaking = False
|
|
29
|
+
tts_sequence = 0
|
|
30
|
+
recording_data = []
|
|
31
|
+
buffer_data = []
|
|
32
|
+
is_recording = False
|
|
33
|
+
last_speech_time = 0
|
|
34
|
+
running = True
|
|
35
|
+
|
|
36
|
+
# Queues
|
|
37
|
+
audio_queue = queue.Queue()
|
|
38
|
+
tts_queue = queue.PriorityQueue()
|
|
39
|
+
cleanup_files = []
|
|
40
|
+
|
|
41
|
+
# Initialize pygame mixer
|
|
42
|
+
pygame.mixer.quit()
|
|
43
|
+
pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
|
|
44
|
+
|
|
45
|
+
# Device selection
|
|
46
|
+
device = "cpu"
|
|
47
|
+
print(f"Using device: {device}")
|
|
48
|
+
|
|
49
|
+
# Load VAD model
|
|
50
|
+
print("Loading Silero VAD model...")
|
|
51
|
+
vad_model, _ = torch.hub.load(
|
|
52
|
+
repo_or_dir="snakers4/silero-vad",
|
|
53
|
+
model="silero_vad",
|
|
54
|
+
force_reload=False,
|
|
55
|
+
onnx=False,
|
|
56
|
+
verbose=False,
|
|
57
|
+
)
|
|
58
|
+
vad_model.to(device)
|
|
9
59
|
|
|
10
|
-
|
|
60
|
+
# Load Whisper model
|
|
61
|
+
print("Loading Whisper model...")
|
|
62
|
+
whisper_model = WhisperModel("base", device=device, compute_type="int8")
|
|
63
|
+
|
|
64
|
+
# Conversation History Management
|
|
65
|
+
history = []
|
|
66
|
+
max_history = 10
|
|
67
|
+
memory_file = "conversation_history.json"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
except:
|
|
71
|
+
print("audio dependencies not installed")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def convert_mp3_to_wav(mp3_file, wav_file):
|
|
75
|
+
try:
|
|
76
|
+
# Ensure the output file doesn't exist before conversion
|
|
77
|
+
if os.path.exists(wav_file):
|
|
78
|
+
os.remove(wav_file)
|
|
79
|
+
|
|
80
|
+
subprocess.run(
|
|
81
|
+
[
|
|
82
|
+
"ffmpeg",
|
|
83
|
+
"-y",
|
|
84
|
+
"-i",
|
|
85
|
+
mp3_file,
|
|
86
|
+
"-acodec",
|
|
87
|
+
"pcm_s16le",
|
|
88
|
+
"-ac",
|
|
89
|
+
"1",
|
|
90
|
+
"-ar",
|
|
91
|
+
"44100",
|
|
92
|
+
wav_file,
|
|
93
|
+
],
|
|
94
|
+
check=True,
|
|
95
|
+
capture_output=True,
|
|
96
|
+
text=True,
|
|
97
|
+
)
|
|
98
|
+
except subprocess.CalledProcessError as e:
|
|
99
|
+
print(f"Error converting MP3 to WAV: {e.stderr}")
|
|
100
|
+
raise
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"Unexpected error during conversion: {e}")
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Check if FFmpeg is available
|
|
107
|
+
def check_ffmpeg():
|
|
108
|
+
try:
|
|
109
|
+
subprocess.run(
|
|
110
|
+
["ffmpeg", "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
|
111
|
+
)
|
|
112
|
+
return True
|
|
113
|
+
except (subprocess.SubprocessError, FileNotFoundError):
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
has_ffmpeg = check_ffmpeg()
|
|
118
|
+
|
|
119
|
+
# Device selection
|
|
120
|
+
device = "cpu"
|
|
121
|
+
print(f"Using device: {device}")
|
|
122
|
+
|
|
123
|
+
# Load VAD model
|
|
124
|
+
print("Loading Silero VAD model...")
|
|
125
|
+
vad_model, _ = torch.hub.load(
|
|
126
|
+
repo_or_dir="snakers4/silero-vad",
|
|
127
|
+
model="silero_vad",
|
|
128
|
+
force_reload=False,
|
|
129
|
+
onnx=False,
|
|
130
|
+
verbose=False,
|
|
131
|
+
)
|
|
132
|
+
vad_model.to(device)
|
|
133
|
+
|
|
134
|
+
# Load Whisper model
|
|
135
|
+
print("Loading Whisper model...")
|
|
136
|
+
whisper_model = WhisperModel("base", device=device, compute_type="int8")
|
|
137
|
+
|
|
138
|
+
# Conversation History Management
|
|
139
|
+
history = []
|
|
140
|
+
max_history = 10
|
|
141
|
+
memory_file = "conversation_history.json"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# History Management Functions
|
|
145
|
+
def load_history():
|
|
146
|
+
global history
|
|
147
|
+
try:
|
|
148
|
+
if os.path.exists(memory_file):
|
|
149
|
+
with open(memory_file, "r") as f:
|
|
150
|
+
history = json.load(f)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
print(f"Error loading conversation history: {e}")
|
|
153
|
+
history = []
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def save_history():
|
|
157
|
+
try:
|
|
158
|
+
with open(memory_file, "w") as f:
|
|
159
|
+
json.dump(history, f)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"Error saving conversation history: {e}")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def add_exchange(user_input, assistant_response):
|
|
165
|
+
global history
|
|
166
|
+
exchange = {
|
|
167
|
+
"user": user_input,
|
|
168
|
+
"assistant": assistant_response,
|
|
169
|
+
"timestamp": time.time(),
|
|
170
|
+
}
|
|
171
|
+
history.append(exchange)
|
|
172
|
+
if len(history) > max_history:
|
|
173
|
+
history.pop(0)
|
|
174
|
+
save_history()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def get_context_string():
|
|
178
|
+
context = []
|
|
179
|
+
for exchange in history:
|
|
180
|
+
context.append(f"User: {exchange['user']}")
|
|
181
|
+
context.append(f"Assistant: {exchange['assistant']}")
|
|
182
|
+
return "\n".join(context)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
# Audio Management Functions
|
|
186
|
+
def cleanup_temp_files():
|
|
187
|
+
global cleanup_files
|
|
188
|
+
for file in list(cleanup_files):
|
|
189
|
+
try:
|
|
190
|
+
if os.path.exists(file):
|
|
191
|
+
os.remove(file)
|
|
192
|
+
cleanup_files.remove(file)
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def interrupt_speech():
|
|
198
|
+
global should_stop_speaking
|
|
199
|
+
should_stop_speaking = True
|
|
200
|
+
pygame.mixer.music.stop()
|
|
201
|
+
pygame.mixer.music.unload()
|
|
202
|
+
|
|
203
|
+
while not tts_queue.empty():
|
|
204
|
+
try:
|
|
205
|
+
_, temp_filename = tts_queue.get_nowait()
|
|
206
|
+
try:
|
|
207
|
+
if os.path.exists(temp_filename):
|
|
208
|
+
os.remove(temp_filename)
|
|
209
|
+
except:
|
|
210
|
+
if temp_filename not in cleanup_files:
|
|
211
|
+
cleanup_files.append(temp_filename)
|
|
212
|
+
except queue.Empty:
|
|
213
|
+
break
|
|
214
|
+
|
|
215
|
+
global tts_sequence
|
|
216
|
+
tts_sequence = 0
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def audio_callback(in_data, frame_count, time_info, status):
|
|
220
|
+
audio_queue.put(in_data)
|
|
221
|
+
return (in_data, pyaudio.paContinue)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def transcribe_recording(audio_data):
|
|
225
|
+
if not audio_data:
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
audio_np = (
|
|
229
|
+
np.frombuffer(b"".join(audio_data), dtype=np.int16).astype(np.float32) / 32768.0
|
|
230
|
+
)
|
|
231
|
+
return run_transcription(audio_np)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def run_transcription(audio_np):
|
|
235
|
+
try:
|
|
236
|
+
temp_file = os.path.join(
|
|
237
|
+
tempfile.gettempdir(), f"temp_recording_{int(time.time())}.wav"
|
|
238
|
+
)
|
|
239
|
+
with wave.open(temp_file, "wb") as wf:
|
|
240
|
+
wf.setnchannels(CHANNELS)
|
|
241
|
+
wf.setsampwidth(2)
|
|
242
|
+
wf.setframerate(RATE)
|
|
243
|
+
wf.writeframes((audio_np * 32768).astype(np.int16).tobytes())
|
|
244
|
+
|
|
245
|
+
segments, info = whisper_model.transcribe(temp_file, language="en", beam_size=5)
|
|
246
|
+
transcription = " ".join([segment.text for segment in segments])
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
if os.path.exists(temp_file):
|
|
250
|
+
os.remove(temp_file)
|
|
251
|
+
except Exception:
|
|
252
|
+
if temp_file not in cleanup_files:
|
|
253
|
+
cleanup_files.append(temp_file)
|
|
254
|
+
|
|
255
|
+
return transcription.strip()
|
|
256
|
+
|
|
257
|
+
except Exception as e:
|
|
258
|
+
print(f"Transcription error: {str(e)}")
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# History Management Functions
|
|
263
|
+
def load_history():
|
|
264
|
+
global history
|
|
265
|
+
try:
|
|
266
|
+
if os.path.exists(memory_file):
|
|
267
|
+
with open(memory_file, "r") as f:
|
|
268
|
+
history = json.load(f)
|
|
269
|
+
except Exception as e:
|
|
270
|
+
print(f"Error loading conversation history: {e}")
|
|
271
|
+
history = []
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def save_history():
|
|
275
|
+
try:
|
|
276
|
+
with open(memory_file, "w") as f:
|
|
277
|
+
json.dump(history, f)
|
|
278
|
+
except Exception as e:
|
|
279
|
+
print(f"Error saving conversation history: {e}")
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def add_exchange(user_input, assistant_response):
|
|
283
|
+
global history
|
|
284
|
+
exchange = {
|
|
285
|
+
"user": user_input,
|
|
286
|
+
"assistant": assistant_response,
|
|
287
|
+
"timestamp": time.time(),
|
|
288
|
+
}
|
|
289
|
+
history.append(exchange)
|
|
290
|
+
if len(history) > max_history:
|
|
291
|
+
history.pop(0)
|
|
292
|
+
save_history()
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def get_context_string():
|
|
296
|
+
context = []
|
|
297
|
+
for exchange in history:
|
|
298
|
+
context.append(f"User: {exchange['user']}")
|
|
299
|
+
context.append(f"Assistant: {exchange['assistant']}")
|
|
300
|
+
return "\n".join(context)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# Audio Management Functions
|
|
304
|
+
def cleanup_temp_files():
|
|
305
|
+
global cleanup_files
|
|
306
|
+
for file in list(cleanup_files):
|
|
307
|
+
try:
|
|
308
|
+
if os.path.exists(file):
|
|
309
|
+
os.remove(file)
|
|
310
|
+
cleanup_files.remove(file)
|
|
311
|
+
except Exception:
|
|
312
|
+
pass
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def interrupt_speech():
|
|
316
|
+
global should_stop_speaking, response_generator, is_speaking, tts_sequence
|
|
317
|
+
should_stop_speaking = True
|
|
318
|
+
pygame.mixer.music.stop()
|
|
319
|
+
pygame.mixer.music.unload()
|
|
320
|
+
|
|
321
|
+
while not tts_queue.empty():
|
|
322
|
+
try:
|
|
323
|
+
_, temp_filename = tts_queue.get_nowait()
|
|
324
|
+
try:
|
|
325
|
+
if os.path.exists(temp_filename):
|
|
326
|
+
os.remove(temp_filename)
|
|
327
|
+
except:
|
|
328
|
+
if temp_filename not in cleanup_files:
|
|
329
|
+
cleanup_files.append(temp_filename)
|
|
330
|
+
except queue.Empty:
|
|
331
|
+
break
|
|
332
|
+
|
|
333
|
+
tts_sequence = 0
|
|
334
|
+
is_speaking = False
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def audio_callback(in_data, frame_count, time_info, status):
|
|
338
|
+
audio_queue.put(in_data)
|
|
339
|
+
return (in_data, pyaudio.paContinue)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
# Text-to-Speech Functions
|
|
343
|
+
def play_audio_from_queue():
|
|
344
|
+
global is_speaking, cleanup_files, should_stop_speaking
|
|
345
|
+
next_sequence = 0
|
|
346
|
+
|
|
347
|
+
while True:
|
|
348
|
+
if should_stop_speaking:
|
|
349
|
+
pygame.mixer.music.stop()
|
|
350
|
+
pygame.mixer.music.unload()
|
|
351
|
+
|
|
352
|
+
while not tts_queue.empty():
|
|
353
|
+
try:
|
|
354
|
+
_, temp_filename = tts_queue.get_nowait()
|
|
355
|
+
try:
|
|
356
|
+
if os.path.exists(temp_filename):
|
|
357
|
+
os.remove(temp_filename)
|
|
358
|
+
except:
|
|
359
|
+
if temp_filename not in cleanup_files:
|
|
360
|
+
cleanup_files.append(temp_filename)
|
|
361
|
+
except queue.Empty:
|
|
362
|
+
break
|
|
363
|
+
|
|
364
|
+
next_sequence = 0
|
|
365
|
+
is_speaking = False
|
|
366
|
+
should_stop_speaking = False
|
|
367
|
+
time.sleep(0.1)
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
if not tts_queue.empty():
|
|
372
|
+
sequence, temp_filename = tts_queue.queue[0]
|
|
373
|
+
|
|
374
|
+
if sequence == next_sequence:
|
|
375
|
+
sequence, temp_filename = tts_queue.get()
|
|
376
|
+
is_speaking = True
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
if len(cleanup_files) > 0 and not pygame.mixer.music.get_busy():
|
|
380
|
+
cleanup_temp_files()
|
|
381
|
+
|
|
382
|
+
if should_stop_speaking:
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
pygame.mixer.music.load(temp_filename)
|
|
386
|
+
pygame.mixer.music.play()
|
|
387
|
+
|
|
388
|
+
while (
|
|
389
|
+
pygame.mixer.music.get_busy() and not should_stop_speaking
|
|
390
|
+
):
|
|
391
|
+
pygame.time.wait(50)
|
|
392
|
+
|
|
393
|
+
pygame.mixer.music.unload()
|
|
394
|
+
|
|
395
|
+
except Exception as e:
|
|
396
|
+
print(f"Audio playback error: {str(e)}")
|
|
397
|
+
finally:
|
|
398
|
+
try:
|
|
399
|
+
if os.path.exists(temp_filename):
|
|
400
|
+
os.remove(temp_filename)
|
|
401
|
+
except:
|
|
402
|
+
if temp_filename not in cleanup_files:
|
|
403
|
+
cleanup_files.append(temp_filename)
|
|
404
|
+
|
|
405
|
+
if not should_stop_speaking:
|
|
406
|
+
next_sequence += 1
|
|
407
|
+
is_speaking = False
|
|
408
|
+
|
|
409
|
+
time.sleep(0.05)
|
|
410
|
+
except Exception:
|
|
411
|
+
time.sleep(0.05)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
import pygame
|
|
415
|
+
from gtts import gTTS
|
|
11
416
|
import tempfile
|
|
12
417
|
import os
|
|
13
|
-
import
|
|
14
|
-
from typing import Optional, List
|
|
15
|
-
from .llm_funcs import get_llm_response
|
|
418
|
+
import logging
|
|
16
419
|
|
|
420
|
+
logging.basicConfig(level=logging.ERROR)
|
|
421
|
+
logger = logging.getLogger(__name__)
|
|
17
422
|
|
|
18
|
-
|
|
19
|
-
|
|
423
|
+
import pyaudio
|
|
424
|
+
import wave
|
|
425
|
+
from gtts import gTTS
|
|
426
|
+
import tempfile
|
|
427
|
+
import os
|
|
428
|
+
import logging
|
|
20
429
|
|
|
430
|
+
import tempfile
|
|
431
|
+
import uuid
|
|
21
432
|
|
|
22
|
-
def calibrate_silence(sample_rate=16000, duration=2):
|
|
23
|
-
"""
|
|
24
|
-
Function Description:
|
|
25
|
-
This function calibrates the silence level for audio recording.
|
|
26
|
-
Args:
|
|
27
|
-
None
|
|
28
|
-
Keyword Args:
|
|
29
|
-
sample_rate: The sample rate for audio recording.
|
|
30
|
-
duration: The duration in seconds for calibration.
|
|
31
|
-
Returns:
|
|
32
|
-
The silence threshold level.
|
|
33
|
-
"""
|
|
34
433
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
rate=sample_rate,
|
|
40
|
-
input=True,
|
|
41
|
-
frames_per_buffer=1024,
|
|
42
|
-
)
|
|
434
|
+
def create_and_queue_audio(text, state):
|
|
435
|
+
"""Create and queue audio with state awareness for TTS/recording coordination"""
|
|
436
|
+
# Set TTS speaking flag
|
|
437
|
+
state["tts_is_speaking"] = True
|
|
43
438
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
levels.append(get_audio_level(data))
|
|
439
|
+
if not text.strip():
|
|
440
|
+
print("Empty text, skipping TTS")
|
|
441
|
+
state["tts_is_speaking"] = False
|
|
442
|
+
return
|
|
49
443
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
444
|
+
try:
|
|
445
|
+
unique_id = uuid.uuid4()
|
|
446
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
447
|
+
mp3_file = os.path.join(temp_dir, f"temp_{unique_id}.mp3")
|
|
448
|
+
wav_file = os.path.join(temp_dir, f"temp_{unique_id}.wav")
|
|
449
|
+
|
|
450
|
+
tts = gTTS(text=text, lang="en", slow=False)
|
|
451
|
+
tts.save(mp3_file)
|
|
53
452
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
audio_data: The audio data to check.
|
|
66
|
-
threshold: The silence threshold level.
|
|
67
|
-
Keyword Args:
|
|
68
|
-
None
|
|
69
|
-
Returns:
|
|
70
|
-
A boolean indicating whether the audio is silent.
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
return get_audio_level(audio_data) < threshold
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def record_audio(
|
|
77
|
-
sample_rate: int = 16000,
|
|
78
|
-
max_duration: int = 10,
|
|
79
|
-
silence_threshold: Optional[float] = None,
|
|
80
|
-
) -> bytes:
|
|
81
|
-
"""
|
|
82
|
-
Function Description:
|
|
83
|
-
This function records audio from the microphone.
|
|
84
|
-
Args:
|
|
85
|
-
None
|
|
86
|
-
Keyword Args:
|
|
87
|
-
sample_rate: The sample rate for audio recording.
|
|
88
|
-
max_duration: The maximum duration in seconds.
|
|
89
|
-
silence_threshold: The silence threshold level.
|
|
90
|
-
Returns:
|
|
91
|
-
The recorded audio data.
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
if silence_threshold is None:
|
|
95
|
-
silence_threshold = calibrate_silence()
|
|
453
|
+
convert_mp3_to_wav(mp3_file, wav_file)
|
|
454
|
+
|
|
455
|
+
# Play audio and wait for completion
|
|
456
|
+
play_audio(wav_file, state)
|
|
457
|
+
except Exception as e:
|
|
458
|
+
print(f"Error in TTS process: {e}")
|
|
459
|
+
finally:
|
|
460
|
+
# Ensure flag is reset even if there's an error
|
|
461
|
+
state["tts_is_speaking"] = False
|
|
462
|
+
state["tts_just_finished"] = True
|
|
96
463
|
|
|
464
|
+
for file in [mp3_file, wav_file]:
|
|
465
|
+
try:
|
|
466
|
+
if os.path.exists(file):
|
|
467
|
+
os.remove(file)
|
|
468
|
+
except Exception as e:
|
|
469
|
+
print(f"Error removing temporary file {file}: {e}")
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def play_audio(filename, state):
|
|
473
|
+
"""Play audio with state awareness for TTS/recording coordination"""
|
|
474
|
+
CHUNK = 4096 # Increased chunk size
|
|
475
|
+
|
|
476
|
+
wf = wave.open(filename, "rb")
|
|
97
477
|
p = pyaudio.PyAudio()
|
|
478
|
+
|
|
98
479
|
stream = p.open(
|
|
99
|
-
format=
|
|
100
|
-
channels=
|
|
101
|
-
rate=
|
|
102
|
-
|
|
103
|
-
frames_per_buffer=1024,
|
|
480
|
+
format=p.get_format_from_width(wf.getsampwidth()),
|
|
481
|
+
channels=wf.getnchannels(),
|
|
482
|
+
rate=wf.getframerate(),
|
|
483
|
+
output=True,
|
|
104
484
|
)
|
|
105
485
|
|
|
106
|
-
|
|
107
|
-
frames = []
|
|
108
|
-
silent_chunks = 0
|
|
109
|
-
has_speech = False
|
|
110
|
-
max_silent_chunks = int(sample_rate * 3.0 / 1024) # 3.0 seconds of silence
|
|
111
|
-
max_chunks = int(sample_rate * max_duration / 1024) # Maximum duration in chunks
|
|
112
|
-
|
|
113
|
-
start_time = time.time()
|
|
114
|
-
for _ in range(max_chunks):
|
|
115
|
-
data = stream.read(1024)
|
|
116
|
-
frames.append(data)
|
|
117
|
-
|
|
118
|
-
if is_silent(data, silence_threshold):
|
|
119
|
-
silent_chunks += 1
|
|
120
|
-
if has_speech and silent_chunks > max_silent_chunks:
|
|
121
|
-
break
|
|
122
|
-
else:
|
|
123
|
-
silent_chunks = 0
|
|
124
|
-
has_speech = True
|
|
125
|
-
|
|
126
|
-
if len(frames) % 10 == 0: # Print a dot every ~0.5 seconds
|
|
127
|
-
print(".", end="", flush=True)
|
|
128
|
-
|
|
129
|
-
if time.time() - start_time > max_duration:
|
|
130
|
-
print("\nMax duration reached.")
|
|
131
|
-
break
|
|
486
|
+
data = wf.readframes(CHUNK)
|
|
132
487
|
|
|
133
|
-
|
|
488
|
+
# This is blocking until audio is done playing
|
|
489
|
+
while data and state["running"]: # Check if system still running
|
|
490
|
+
stream.write(data)
|
|
491
|
+
data = wf.readframes(CHUNK)
|
|
134
492
|
|
|
135
493
|
stream.stop_stream()
|
|
136
494
|
stream.close()
|
|
137
495
|
p.terminate()
|
|
138
496
|
|
|
139
|
-
|
|
497
|
+
try:
|
|
498
|
+
os.unlink(filename)
|
|
499
|
+
except:
|
|
500
|
+
pass
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def select_model():
|
|
504
|
+
models = [
|
|
505
|
+
"gpt-4o-mini",
|
|
506
|
+
"claude-haiku-3-5-latest",
|
|
507
|
+
]
|
|
508
|
+
|
|
509
|
+
while True:
|
|
510
|
+
try:
|
|
511
|
+
choice = input(
|
|
512
|
+
"\nSelect a model number (or press Enter for default): "
|
|
513
|
+
).strip()
|
|
514
|
+
if not choice:
|
|
515
|
+
return models[0]["name"]
|
|
516
|
+
|
|
517
|
+
choice = int(choice)
|
|
518
|
+
if 1 <= choice <= len(models):
|
|
519
|
+
selected_model = models[choice - 1]["name"]
|
|
520
|
+
print(f"Selected model: {selected_model}")
|
|
521
|
+
return selected_model
|
|
522
|
+
else:
|
|
523
|
+
print(f"Please enter a number between 1 and {len(models)}")
|
|
524
|
+
except ValueError:
|
|
525
|
+
print("Please enter a valid number")
|
|
526
|
+
except Exception as e:
|
|
527
|
+
print(f"Error selecting model: {str(e)}")
|
|
528
|
+
if models:
|
|
529
|
+
return models[0]["name"]
|
|
530
|
+
return "gemma:2b"
|
|
140
531
|
|
|
141
532
|
|
|
142
|
-
def
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
text: The text to convert to speech.
|
|
148
|
-
Keyword Args:
|
|
149
|
-
None
|
|
150
|
-
Returns:
|
|
151
|
-
None
|
|
152
|
-
"""
|
|
533
|
+
def process_response_chunk(text_chunk):
|
|
534
|
+
if not text_chunk.strip():
|
|
535
|
+
return
|
|
536
|
+
processed_text = process_text_for_tts(text_chunk)
|
|
537
|
+
create_and_queue_audio(processed_text)
|
|
153
538
|
|
|
154
|
-
try:
|
|
155
|
-
tts = gTTS(text=text, lang="en")
|
|
156
|
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
|
|
157
|
-
tts.save(fp.name)
|
|
158
|
-
playsound(fp.name)
|
|
159
|
-
os.unlink(fp.name)
|
|
160
|
-
except Exception as e:
|
|
161
|
-
print(f"Text-to-speech error: {e}")
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def process_audio(file_path: str, table_name: str) -> List:
|
|
165
|
-
"""
|
|
166
|
-
Function Description:
|
|
167
|
-
This function is used to process an audio file.
|
|
168
|
-
Args:
|
|
169
|
-
file_path : str : The file path.
|
|
170
|
-
table_name : str : The table name.
|
|
171
|
-
Keyword Args:
|
|
172
|
-
None
|
|
173
|
-
Returns:
|
|
174
|
-
List : The embeddings and texts.
|
|
175
|
-
"""
|
|
176
|
-
|
|
177
|
-
embeddings = []
|
|
178
|
-
texts = []
|
|
179
|
-
try:
|
|
180
|
-
audio, sr = librosa.load(file_path)
|
|
181
|
-
# Transcribe audio using Whisper
|
|
182
|
-
model = whisper.load_model("base") # Or a larger model if available
|
|
183
|
-
result = model.transcribe(file_path)
|
|
184
|
-
transcribed_text = result["text"].strip()
|
|
185
|
-
|
|
186
|
-
# Split transcribed text into chunks (adjust chunk_size as needed)
|
|
187
|
-
chunk_size = 1000
|
|
188
|
-
for i in range(0, len(transcribed_text), chunk_size):
|
|
189
|
-
chunk = transcribed_text[i : i + chunk_size]
|
|
190
|
-
text_embedding_response = get_llm_response(
|
|
191
|
-
f"Generate an embedding for: {chunk}",
|
|
192
|
-
model="text-embedding-ada-002",
|
|
193
|
-
provider="openai",
|
|
194
|
-
) # Use a text embedding model
|
|
195
|
-
if (
|
|
196
|
-
isinstance(text_embedding_response, dict)
|
|
197
|
-
and "error" in text_embedding_response
|
|
198
|
-
):
|
|
199
|
-
print(
|
|
200
|
-
f"Error generating text embedding: {text_embedding_response['error']}"
|
|
201
|
-
)
|
|
202
|
-
else:
|
|
203
|
-
embeddings.append(text_embedding_response) # Store the embedding
|
|
204
|
-
texts.append(chunk) # Store the corresponding text chunk
|
|
205
539
|
|
|
206
|
-
|
|
540
|
+
def process_text_for_tts(text):
|
|
541
|
+
text = re.sub(r"[*<>{}()\[\]&%#@^_=+~]", "", text)
|
|
542
|
+
text = text.strip()
|
|
543
|
+
text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
|
|
544
|
+
text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
|
|
545
|
+
return text
|
|
207
546
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
547
|
+
|
|
548
|
+
"""
|
|
549
|
+
|
|
550
|
+
To use this code, you'll need to have the following dependencies installed:
|
|
551
|
+
|
|
552
|
+
```bash
|
|
553
|
+
pip install numpy torch torchaudio faster-whisper pygame pyaudio gtts ollama
|
|
554
|
+
```
|
|
555
|
+
|
|
556
|
+
And optionally FFmpeg for audio speed adjustment:
|
|
557
|
+
```bash
|
|
558
|
+
# On Ubuntu/Debian
|
|
559
|
+
sudo apt-get install ffmpeg
|
|
560
|
+
|
|
561
|
+
# On MacOS with Homebrew
|
|
562
|
+
brew install ffmpeg
|
|
563
|
+
|
|
564
|
+
# On Windows with Chocolatey
|
|
565
|
+
choco install ffmpeg
|
|
566
|
+
```
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
"""
|