npcsh 1.0.13__py3-none-any.whl → 1.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcsh/yap.py CHANGED
@@ -54,18 +54,20 @@ from npcpy.npc_compiler import (
54
54
  from npcpy.memory.command_history import CommandHistory, save_conversation_message,start_new_conversation
55
55
  from typing import Dict, Any, List
56
56
  def enter_yap_mode(
57
-
58
- model: str ,
59
- provider: str ,
60
- messages: list = None,
57
+ messages: list = None,
58
+ model: str = None,
59
+ provider: str = None ,
61
60
  npc = None,
62
- team= None,
61
+ team = None,
62
+ stream: bool = False,
63
+ api_url: str = None,
64
+ api_key: str=None,
65
+ conversation_id = None,
63
66
  tts_model="kokoro",
64
67
  voice="af_heart",
65
68
  files: List[str] = None,
66
69
  rag_similarity_threshold: float = 0.3,
67
- stream: bool = NPCSH_STREAM_OUTPUT,
68
- conversation_id = None,
70
+ **kwargs
69
71
  ) -> Dict[str, Any]:
70
72
  running = True
71
73
  is_recording = False
@@ -100,22 +102,20 @@ def enter_yap_mode(
100
102
  # Add conciseness instruction to the system message
101
103
  system_message = system_message + " " + concise_instruction
102
104
 
103
- if messages is None:
105
+ if messages is None or len(messages) == 0:
104
106
  messages = [{"role": "system", "content": system_message}]
105
107
  elif messages is not None and messages[0]['role'] != 'system':
106
108
  messages.insert(0, {"role": "system", "content": system_message})
107
109
 
108
110
  kokoro_pipeline = None
109
111
  if tts_model == "kokoro":
110
- try:
111
- from kokoro import KPipeline
112
- import soundfile as sf
112
+ from kokoro import KPipeline
113
+ import soundfile as sf
114
+
115
+ kokoro_pipeline = KPipeline(lang_code="a")
116
+ print("Kokoro TTS model initialized")
117
+
113
118
 
114
- kokoro_pipeline = KPipeline(lang_code="a")
115
- print("Kokoro TTS model initialized")
116
- except ImportError:
117
- print("Kokoro not installed, falling back to gTTS")
118
- tts_model = "gtts"
119
119
 
120
120
  # Initialize PyAudio
121
121
  pyaudio_instance = pyaudio.PyAudio()
@@ -134,43 +134,45 @@ def enter_yap_mode(
134
134
  nonlocal running, audio_stream
135
135
 
136
136
  while running and speech_thread_active.is_set():
137
- try:
138
- # Get next speech item from queue
139
- if not speech_queue.empty():
140
- text_to_speak = speech_queue.get(timeout=0.1)
141
-
142
- # Only process if there's text to speak
143
- if text_to_speak.strip():
144
- # IMPORTANT: Set is_speaking flag BEFORE starting audio output
145
- is_speaking.set()
146
-
147
- # Safely close the audio input stream before speaking
148
- current_audio_stream = audio_stream
149
- audio_stream = (
150
- None # Set to None to prevent capture thread from using it
151
- )
152
-
153
- if current_audio_stream and current_audio_stream.is_active():
154
- current_audio_stream.stop_stream()
155
- current_audio_stream.close()
156
-
157
- print(f"Speaking full response...")
158
-
159
- # Generate and play speech
160
- generate_and_play_speech(text_to_speak)
161
-
162
- # Delay after speech to prevent echo
163
- time.sleep(0.005 * len(text_to_speak))
164
- print(len(text_to_speak))
165
-
166
- # Clear the speaking flag to allow listening again
167
- is_speaking.clear()
168
- else:
169
- time.sleep(0.5)
170
- except Exception as e:
171
- print(f"Error in speech thread: {e}")
172
- is_speaking.clear() # Make sure to clear the flag if there's an error
173
- time.sleep(0.1)
137
+ #try:
138
+ # Get next speech item from queue
139
+ print('.', end='', flush=True)
140
+ if not speech_queue.empty():
141
+ print('\n')
142
+ text_to_speak = speech_queue.get(timeout=0.1)
143
+
144
+ # Only process if there's text to speak
145
+ if text_to_speak.strip():
146
+ # IMPORTANT: Set is_speaking flag BEFORE starting audio output
147
+ is_speaking.set()
148
+
149
+ # Safely close the audio input stream before speaking
150
+ current_audio_stream = audio_stream
151
+ audio_stream = (
152
+ None # Set to None to prevent capture thread from using it
153
+ )
154
+
155
+ if current_audio_stream and current_audio_stream.is_active():
156
+ current_audio_stream.stop_stream()
157
+ current_audio_stream.close()
158
+
159
+ print(f"Speaking full response...")
160
+ print(text_to_speak)
161
+ # Generate and play speech
162
+ generate_and_play_speech(text_to_speak)
163
+
164
+ # Delay after speech to prevent echo
165
+ time.sleep(0.005 * len(text_to_speak))
166
+ print(len(text_to_speak))
167
+
168
+ # Clear the speaking flag to allow listening again
169
+ is_speaking.clear()
170
+ else:
171
+ time.sleep(0.5)
172
+ #except Exception as e:
173
+ # print(f"Error in speech thread: {e}")
174
+ # is_speaking.clear() # Make sure to clear the flag if there's an error
175
+ # time.sleep(0.1)
174
176
 
175
177
  def safely_close_audio_stream(stream):
176
178
  """Safely close an audio stream with error handling"""
@@ -315,10 +317,9 @@ def enter_yap_mode(
315
317
  frames_per_buffer=CHUNK,
316
318
  )
317
319
 
318
- # Initialize or reset the recording variables
319
- is_recording = False
320
- recording_data = []
321
- buffer_data = []
320
+ # Add timeout counter
321
+ timeout_counter = 0
322
+ max_timeout = 100 # About 10 seconds at 0.1s intervals
322
323
 
323
324
  print("\nListening for speech...")
324
325
 
@@ -327,49 +328,63 @@ def enter_yap_mode(
327
328
  and audio_stream
328
329
  and audio_stream.is_active()
329
330
  and not is_speaking.is_set()
331
+ and timeout_counter < max_timeout
330
332
  ):
331
333
  try:
334
+ # Add non-blocking read with timeout
332
335
  data = audio_stream.read(CHUNK, exception_on_overflow=False)
333
- if data:
334
- audio_array = np.frombuffer(data, dtype=np.int16)
335
- audio_float = audio_array.astype(np.float32) / 32768.0
336
-
337
- tensor = torch.from_numpy(audio_float).to(device)
338
- speech_prob = vad_model(tensor, RATE).item()
339
- current_time = time.time()
340
-
341
- if speech_prob > 0.5: # VAD threshold
342
- last_speech_time = current_time
343
- if not is_recording:
344
- is_recording = True
345
- print("\nSpeech detected, listening...")
346
- recording_data.extend(buffer_data)
347
- buffer_data = []
348
- recording_data.append(data)
336
+
337
+ if not data:
338
+ timeout_counter += 1
339
+ time.sleep(0.1)
340
+ continue
341
+
342
+ # Reset timeout on successful read
343
+ timeout_counter = 0
344
+
345
+ audio_array = np.frombuffer(data, dtype=np.int16)
346
+ if len(audio_array) == 0:
347
+ continue
348
+
349
+ audio_float = audio_array.astype(np.float32) / 32768.0
350
+ tensor = torch.from_numpy(audio_float).to(device)
351
+
352
+ # Add timeout to VAD processing
353
+ speech_prob = vad_model(tensor, RATE).item()
354
+ current_time = time.time()
355
+
356
+ if speech_prob > 0.5: # VAD threshold
357
+ last_speech_time = current_time
358
+ if not is_recording:
359
+ is_recording = True
360
+ print("\nSpeech detected, listening...")
361
+ recording_data.extend(buffer_data)
362
+ buffer_data = []
363
+ recording_data.append(data)
364
+ else:
365
+ if is_recording:
366
+ if (
367
+ current_time - last_speech_time > 1
368
+ ): # silence duration
369
+ is_recording = False
370
+ print("Speech ended, transcribing...")
371
+
372
+ # Stop stream before transcribing
373
+ safely_close_audio_stream(audio_stream)
374
+ audio_stream = None
375
+
376
+ # Transcribe in this thread to avoid race conditions
377
+ transcription = transcribe_recording(recording_data)
378
+ if transcription:
379
+ transcription_queue.put(transcription)
380
+ recording_data = []
381
+ return True # Got speech
349
382
  else:
350
- if is_recording:
351
- if (
352
- current_time - last_speech_time > 1
353
- ): # silence duration
354
- is_recording = False
355
- print("Speech ended, transcribing...")
356
-
357
- # Stop stream before transcribing
358
- safely_close_audio_stream(audio_stream)
359
- audio_stream = None
360
-
361
- # Transcribe in this thread to avoid race conditions
362
- transcription = transcribe_recording(recording_data)
363
- if transcription:
364
- transcription_queue.put(transcription)
365
- recording_data = []
366
- return True # Got speech
367
- else:
368
- buffer_data.append(data)
369
- if len(buffer_data) > int(
370
- 0.65 * RATE / CHUNK
371
- ): # buffer duration
372
- buffer_data.pop(0)
383
+ buffer_data.append(data)
384
+ if len(buffer_data) > int(
385
+ 0.65 * RATE / CHUNK
386
+ ): # buffer duration
387
+ buffer_data.pop(0)
373
388
 
374
389
  # Check frequently if we need to stop capturing
375
390
  if is_speaking.is_set():
@@ -427,19 +442,14 @@ def enter_yap_mode(
427
442
 
428
443
 
429
444
  while running:
430
-
431
- # First check for typed input (non-blocking)
432
445
  import select
433
446
  import sys
434
-
435
- # Don't spam the console with prompts when speaking
436
447
  if not is_speaking.is_set():
437
448
  print(
438
449
  "🎤🎤🎤🎤\n Speak or type your message (or 'exit' to quit): ",
439
450
  end="",
440
451
  flush=True,
441
452
  )
442
-
443
453
  rlist, _, _ = select.select([sys.stdin], [], [], 0.1)
444
454
  if rlist:
445
455
  user_input = sys.stdin.readline().strip()
@@ -448,7 +458,7 @@ def enter_yap_mode(
448
458
  break
449
459
  if user_input:
450
460
  print(f"\nYou (typed): {user_input}")
451
- # Handle RAG context
461
+
452
462
  if loaded_content:
453
463
  context_content = ""
454
464
  for filename, content in loaded_content.items():
@@ -494,9 +504,8 @@ def enter_yap_mode(
494
504
 
495
505
 
496
506
  continue # Skip audio capture this cycle
497
-
498
- # Then try to capture some audio (if no typed input)
499
507
  if not is_speaking.is_set(): # Only capture if not currently speaking
508
+ print('capturing audio')
500
509
  got_speech = capture_audio()
501
510
 
502
511
  # If we got speech, process it
@@ -560,9 +569,9 @@ def main():
560
569
  provider = sibiji.provider
561
570
  # Enter spool mode
562
571
  enter_yap_mode(
563
- model,
564
- provider,
565
572
  messages=None,
573
+ model= model,
574
+ provider = provider,
566
575
  npc=sibiji,
567
576
  team = team,
568
577
  files=args.files,