npcsh 0.3.31__py3-none-any.whl → 0.3.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. npcsh/audio.py +540 -181
  2. npcsh/audio_gen.py +1 -0
  3. npcsh/cli.py +8 -10
  4. npcsh/conversation.py +14 -251
  5. npcsh/dataframes.py +13 -5
  6. npcsh/helpers.py +5 -0
  7. npcsh/image.py +2 -2
  8. npcsh/image_gen.py +38 -38
  9. npcsh/knowledge_graph.py +4 -4
  10. npcsh/llm_funcs.py +517 -349
  11. npcsh/npc_compiler.py +32 -23
  12. npcsh/npc_sysenv.py +5 -0
  13. npcsh/plonk.py +2 -2
  14. npcsh/response.py +131 -482
  15. npcsh/search.py +5 -1
  16. npcsh/serve.py +210 -203
  17. npcsh/shell.py +11 -25
  18. npcsh/shell_helpers.py +489 -99
  19. npcsh/stream.py +87 -554
  20. npcsh/video.py +5 -2
  21. npcsh/video_gen.py +69 -0
  22. npcsh-0.3.32.dist-info/METADATA +779 -0
  23. {npcsh-0.3.31.dist-info → npcsh-0.3.32.dist-info}/RECORD +49 -47
  24. npcsh-0.3.31.dist-info/METADATA +0 -1853
  25. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/bash_executer.tool +0 -0
  26. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/calculator.tool +0 -0
  27. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/celona.npc +0 -0
  28. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/code_executor.tool +0 -0
  29. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/corca.npc +0 -0
  30. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/eriane.npc +0 -0
  31. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/foreman.npc +0 -0
  32. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/generic_search.tool +0 -0
  33. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/image_generation.tool +0 -0
  34. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/lineru.npc +0 -0
  35. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/local_search.tool +0 -0
  36. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/maurawa.npc +0 -0
  37. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
  38. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/npcsh_executor.tool +0 -0
  39. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/raone.npc +0 -0
  40. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/screen_cap.tool +0 -0
  41. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/sibiji.npc +0 -0
  42. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/slean.npc +0 -0
  43. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/sql_executor.tool +0 -0
  44. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/test_pipeline.py +0 -0
  45. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/turnic.npc +0 -0
  46. {npcsh-0.3.31.data → npcsh-0.3.32.data}/data/npcsh/npc_team/welxor.npc +0 -0
  47. {npcsh-0.3.31.dist-info → npcsh-0.3.32.dist-info}/WHEEL +0 -0
  48. {npcsh-0.3.31.dist-info → npcsh-0.3.32.dist-info}/entry_points.txt +0 -0
  49. {npcsh-0.3.31.dist-info → npcsh-0.3.32.dist-info}/licenses/LICENSE +0 -0
  50. {npcsh-0.3.31.dist-info → npcsh-0.3.32.dist-info}/top_level.txt +0 -0
npcsh/shell_helpers.py CHANGED
@@ -1,6 +1,8 @@
1
1
  import os
2
2
  import pandas as pd
3
3
 
4
+ import threading
5
+
4
6
  from typing import Dict, Any, List, Optional, Union
5
7
  import numpy as np
6
8
  import readline
@@ -25,12 +27,46 @@ import signal
25
27
  import platform
26
28
  import time
27
29
 
30
+ import tempfile
31
+
32
+
33
+ # Global variables
34
+ running = True
35
+ is_recording = False
36
+ recording_data = []
37
+ buffer_data = []
38
+ last_speech_time = 0
39
+
28
40
 
29
41
  try:
30
42
  import whisper
31
- except:
43
+ from faster_whisper import WhisperModel
44
+ from gtts import gTTS
45
+ import torch
46
+ import pyaudio
47
+ import wave
48
+ import queue
49
+
50
+ from npcsh.audio import (
51
+ cleanup_temp_files,
52
+ FORMAT,
53
+ CHANNELS,
54
+ RATE,
55
+ device,
56
+ vad_model,
57
+ CHUNK,
58
+ whisper_model,
59
+ transcribe_recording,
60
+ convert_mp3_to_wav,
61
+ )
62
+
63
+
64
+ except Exception as e:
32
65
  print(
33
- "Could not load the whisper package. If you want to use tts/stt features, please run `pip install npcsh[audio]` and follow the instructions in the npcsh github readme to ensure your OS can handle the audio dependencies."
66
+ "Exception: "
67
+ + str(e)
68
+ + "\n"
69
+ + "Could not load the whisper package. If you want to use tts/stt features, please run `pip install npcsh[audio]` and follow the instructions in the npcsh github readme to ensure your OS can handle the audio dependencies."
34
70
  )
35
71
  try:
36
72
  from sentence_transformers import SentenceTransformer
@@ -40,8 +76,15 @@ except:
40
76
  "Could not load the sentence-transformers package. If you want to use it or other local AI features, please run `pip install npcsh[local]` ."
41
77
  )
42
78
 
43
- from .load_data import load_pdf, load_csv, load_json, load_excel, load_txt, load_image
44
- from .npc_sysenv import (
79
+ from npcsh.load_data import (
80
+ load_pdf,
81
+ load_csv,
82
+ load_json,
83
+ load_excel,
84
+ load_txt,
85
+ load_image,
86
+ )
87
+ from npcsh.npc_sysenv import (
45
88
  get_model_and_provider,
46
89
  get_available_models,
47
90
  get_system_message,
@@ -53,16 +96,18 @@ from .npc_sysenv import (
53
96
  NPCSH_VISION_PROVIDER,
54
97
  NPCSH_IMAGE_GEN_MODEL,
55
98
  NPCSH_IMAGE_GEN_PROVIDER,
99
+ NPCSH_VIDEO_GEN_MODEL,
100
+ NPCSH_VIDEO_GEN_PROVIDER,
56
101
  )
57
- from .command_history import (
102
+ from npcsh.command_history import (
58
103
  CommandHistory,
59
104
  save_attachment_to_message,
60
105
  save_conversation_message,
61
106
  start_new_conversation,
62
107
  )
63
- from .embeddings import search_similar_texts, chroma_client
108
+ from npcsh.embeddings import search_similar_texts, chroma_client
64
109
 
65
- from .llm_funcs import (
110
+ from npcsh.llm_funcs import (
66
111
  execute_llm_command,
67
112
  execute_llm_question,
68
113
  get_stream,
@@ -70,13 +115,14 @@ from .llm_funcs import (
70
115
  get_llm_response,
71
116
  check_llm_command,
72
117
  generate_image,
118
+ generate_video,
73
119
  get_embeddings,
74
120
  get_stream,
75
121
  )
76
- from .plonk import plonk, action_space
77
- from .helpers import get_db_npcs, get_npc_path
122
+ from npcsh.plonk import plonk, action_space
123
+ from npcsh.helpers import get_db_npcs, get_npc_path
78
124
 
79
- from .npc_compiler import (
125
+ from npcsh.npc_compiler import (
80
126
  NPCCompiler,
81
127
  NPC,
82
128
  load_npc_from_file,
@@ -86,10 +132,10 @@ from .npc_compiler import (
86
132
  )
87
133
 
88
134
 
89
- from .search import rag_search, search_web
90
- from .image import capture_screenshot, analyze_image
135
+ from npcsh.search import rag_search, search_web
136
+ from npcsh.image import capture_screenshot, analyze_image
91
137
 
92
- from .audio import calibrate_silence, record_audio, speak_text
138
+ # from npcsh.audio import calibrate_silence, record_audio, speak_text
93
139
  from rich.console import Console
94
140
  from rich.markdown import Markdown
95
141
  from rich.syntax import Syntax
@@ -566,11 +612,11 @@ def setup_readline() -> str:
566
612
 
567
613
  readline.set_history_length(1000)
568
614
  readline.parse_and_bind("set enable-bracketed-paste on") # Enable paste mode
569
- readline.parse_and_bind('"\e[A": history-search-backward')
570
- readline.parse_and_bind('"\e[B": history-search-forward')
571
- readline.parse_and_bind('"\C-r": reverse-search-history')
572
- readline.parse_and_bind("\C-e: end-of-line")
573
- readline.parse_and_bind("\C-a: beginning-of-line")
615
+ readline.parse_and_bind(r'"\e[A": history-search-backward')
616
+ readline.parse_and_bind(r'"\e[B": history-search-forward')
617
+ readline.parse_and_bind(r'"\C-r": reverse-search-history')
618
+ readline.parse_and_bind(r'\C-e: end-of-line')
619
+ readline.parse_and_bind(r'\C-a: beginning-of-line')
574
620
 
575
621
  return history_file
576
622
 
@@ -1788,8 +1834,8 @@ def execute_slash_command(
1788
1834
  log_action("Command Executed", command)
1789
1835
 
1790
1836
  command_parts = command.split()
1791
- command_name = command_parts[0]
1792
- args = command_parts[1:]
1837
+ command_name = command_parts[0] if len(command_parts) >= 1 else None
1838
+ args = command_parts[1:] if len(command_parts) >= 1 else []
1793
1839
 
1794
1840
  current_npc = npc
1795
1841
  if command_name in valid_npcs:
@@ -1972,6 +2018,7 @@ def execute_slash_command(
1972
2018
  command_parts, model=model, provider=provider, npc=npc, api_url=api_url
1973
2019
  )
1974
2020
  elif command_name == "help": # New help command
2021
+ print(get_help())
1975
2022
  return {
1976
2023
  "messages": messages,
1977
2024
  "output": get_help(),
@@ -2003,6 +2050,17 @@ def execute_slash_command(
2003
2050
  output = execute_rag_command(command, messages=messages)
2004
2051
  messages = output["messages"]
2005
2052
  output = output["output"]
2053
+ elif command_name == "roll":
2054
+
2055
+ output = generate_video(
2056
+ command,
2057
+ model=NPCSH_VIDEO_GEN_MODEL,
2058
+ provider=NPCSH_VIDEO_GEN_PROVIDER,
2059
+ npc=npc,
2060
+ messages=messages,
2061
+ )
2062
+ messages = output["messages"]
2063
+ output = output["output"]
2006
2064
 
2007
2065
  elif command_name == "set":
2008
2066
  parts = command.split()
@@ -2073,13 +2131,15 @@ def execute_slash_command(
2073
2131
  files = None
2074
2132
 
2075
2133
  if len(command_parts) >= 2 and command_parts[1] == "reattach":
2134
+ command_history = CommandHistory()
2076
2135
  last_conversation = command_history.get_last_conversation_by_path(
2077
2136
  os.getcwd()
2078
2137
  )
2079
2138
  print(last_conversation)
2080
2139
  if last_conversation:
2081
2140
  spool_context = [
2082
- {"role": part[2], "content": part[3]} for part in last_conversation
2141
+ {"role": part["role"], "content": part["content"]}
2142
+ for part in last_conversation
2083
2143
  ]
2084
2144
 
2085
2145
  print(f"Reattached to previous conversation:\n\n")
@@ -2742,98 +2802,428 @@ def enter_whisper_mode(
2742
2802
  npc: Any = None,
2743
2803
  spool=False,
2744
2804
  continuous=False,
2745
- stream=False,
2746
- ) -> str:
2747
- """
2748
- Function Description:
2749
- This function is used to enter the whisper mode.
2750
- Args:
2751
- Keyword Args:
2752
- npc : Any : The NPC object.
2753
- Returns:
2754
- str : The output of the whisper mode.
2755
- """
2805
+ stream=True,
2806
+ tts_model="kokoro",
2807
+ voice="af_heart", # Default voice,
2808
+ ) -> Dict[str, Any]:
2809
+ # Initialize state
2810
+ running = True
2811
+ is_recording = False
2812
+ recording_data = []
2813
+ buffer_data = []
2814
+ last_speech_time = 0
2756
2815
 
2757
- try:
2758
- model = whisper.load_model("base")
2759
- except Exception as e:
2760
- return f"Error: Unable to load Whisper model due to {str(e)}"
2816
+ print("Entering whisper mode. Initializing...")
2761
2817
 
2762
- whisper_output = []
2763
- if npc:
2764
- npc_info = f" (NPC: {npc.name})"
2765
- else:
2766
- npc_info = ""
2818
+ # Update the system message to encourage concise responses
2819
+ concise_instruction = "Please provide brief responses of 1-2 sentences unless the user specifically asks for more detailed information. Keep responses clear and concise."
2820
+
2821
+ model = select_model() if npc is None else npc.model or NPCSH_CHAT_MODEL
2822
+ provider = (
2823
+ NPCSH_CHAT_PROVIDER if npc is None else npc.provider or NPCSH_CHAT_PROVIDER
2824
+ )
2825
+ api_url = NPCSH_API_URL if npc is None else npc.api_url or NPCSH_API_URL
2826
+
2827
+ print(f"\nUsing model: {model} with provider: {provider}")
2828
+
2829
+ system_message = get_system_message(npc) if npc else "You are a helpful assistant."
2830
+
2831
+ # Add conciseness instruction to the system message
2832
+ system_message = system_message + " " + concise_instruction
2767
2833
 
2768
2834
  if messages is None:
2769
- messages = [] # Initialize messages list if not provided
2835
+ messages = [{"role": "system", "content": system_message}]
2836
+ elif messages and messages[0]["role"] == "system":
2837
+ # Update the existing system message
2838
+ messages[0]["content"] = messages[0]["content"] + " " + concise_instruction
2839
+ else:
2840
+ messages.insert(0, {"role": "system", "content": system_message})
2770
2841
 
2771
- # Begin whisper mode functionality
2772
- whisper_output.append(
2773
- f"Entering whisper mode{npc_info}. Calibrating silence level..."
2774
- )
2842
+ kokoro_pipeline = None
2843
+ if tts_model == "kokoro":
2844
+ try:
2845
+ from kokoro import KPipeline
2846
+ import soundfile as sf
2847
+
2848
+ kokoro_pipeline = KPipeline(lang_code="a")
2849
+ print("Kokoro TTS model initialized")
2850
+ except ImportError:
2851
+ print("Kokoro not installed, falling back to gTTS")
2852
+ tts_model = "gtts"
2853
+
2854
+ # Initialize PyAudio
2855
+ pyaudio_instance = pyaudio.PyAudio()
2856
+ audio_stream = None # We'll open and close as needed
2857
+ transcription_queue = queue.Queue()
2858
+
2859
+ # Create and properly use the is_speaking event
2860
+ is_speaking = threading.Event()
2861
+ is_speaking.clear() # Not speaking initially
2862
+
2863
+ speech_queue = queue.Queue(maxsize=20)
2864
+ speech_thread_active = threading.Event()
2865
+ speech_thread_active.set()
2866
+
2867
+ def speech_playback_thread():
2868
+ nonlocal running, audio_stream
2869
+
2870
+ while running and speech_thread_active.is_set():
2871
+ try:
2872
+ # Get next speech item from queue
2873
+ if not speech_queue.empty():
2874
+ text_to_speak = speech_queue.get(timeout=0.1)
2875
+
2876
+ # Only process if there's text to speak
2877
+ if text_to_speak.strip():
2878
+ # IMPORTANT: Set is_speaking flag BEFORE starting audio output
2879
+ is_speaking.set()
2880
+
2881
+ # Safely close the audio input stream before speaking
2882
+ current_audio_stream = audio_stream
2883
+ audio_stream = (
2884
+ None # Set to None to prevent capture thread from using it
2885
+ )
2886
+
2887
+ if current_audio_stream and current_audio_stream.is_active():
2888
+ current_audio_stream.stop_stream()
2889
+ current_audio_stream.close()
2890
+
2891
+ print(f"Speaking full response...")
2892
+
2893
+ # Generate and play speech
2894
+ generate_and_play_speech(text_to_speak)
2895
+
2896
+ # Delay after speech to prevent echo
2897
+ time.sleep(0.005 * len(text_to_speak))
2898
+ print(len(text_to_speak))
2899
+
2900
+ # Clear the speaking flag to allow listening again
2901
+ is_speaking.clear()
2902
+ else:
2903
+ time.sleep(0.5)
2904
+ except Exception as e:
2905
+ print(f"Error in speech thread: {e}")
2906
+ is_speaking.clear() # Make sure to clear the flag if there's an error
2907
+ time.sleep(0.1)
2908
+
2909
+ def safely_close_audio_stream(stream):
2910
+ """Safely close an audio stream with error handling"""
2911
+ if stream:
2912
+ try:
2913
+ if stream.is_active():
2914
+ stream.stop_stream()
2915
+ stream.close()
2916
+ except Exception as e:
2917
+ print(f"Error closing audio stream: {e}")
2918
+
2919
+ # Start speech thread
2920
+ speech_thread = threading.Thread(target=speech_playback_thread)
2921
+ speech_thread.daemon = True
2922
+ speech_thread.start()
2923
+
2924
+ def generate_and_play_speech(text):
2925
+ try:
2926
+ # Create a temporary file for audio
2927
+ unique_id = str(time.time()).replace(".", "")
2928
+ temp_dir = tempfile.gettempdir()
2929
+ wav_file = os.path.join(temp_dir, f"temp_{unique_id}.wav")
2930
+
2931
+ # Generate speech based on selected TTS model
2932
+ if tts_model == "kokoro" and kokoro_pipeline:
2933
+ # Use Kokoro for generation
2934
+ generator = kokoro_pipeline(text, voice=voice)
2935
+
2936
+ # Get the audio from the generator
2937
+ for _, _, audio in generator:
2938
+ # Save audio to WAV file
2939
+ import soundfile as sf
2940
+
2941
+ sf.write(wav_file, audio, 24000)
2942
+ break # Just use the first chunk for now
2943
+ else:
2944
+ # Fall back to gTTS
2945
+ mp3_file = os.path.join(temp_dir, f"temp_{unique_id}.mp3")
2946
+ tts = gTTS(text=text, lang="en", slow=False)
2947
+ tts.save(mp3_file)
2948
+ convert_mp3_to_wav(mp3_file, wav_file)
2949
+
2950
+ # Play the audio
2951
+ wf = wave.open(wav_file, "rb")
2952
+ p = pyaudio.PyAudio()
2953
+
2954
+ stream = p.open(
2955
+ format=p.get_format_from_width(wf.getsampwidth()),
2956
+ channels=wf.getnchannels(),
2957
+ rate=wf.getframerate(),
2958
+ output=True,
2959
+ )
2960
+
2961
+ data = wf.readframes(4096)
2962
+ while data and running:
2963
+ stream.write(data)
2964
+ data = wf.readframes(4096)
2965
+
2966
+ stream.stop_stream()
2967
+ stream.close()
2968
+ p.terminate()
2969
+
2970
+ # Cleanup temp files
2971
+ try:
2972
+ if os.path.exists(wav_file):
2973
+ os.remove(wav_file)
2974
+ if tts_model == "gtts" and "mp3_file" in locals():
2975
+ if os.path.exists(mp3_file):
2976
+ os.remove(mp3_file)
2977
+ except Exception as e:
2978
+ print(f"Error removing temp file: {e}")
2979
+
2980
+ except Exception as e:
2981
+ print(f"Error in TTS process: {e}")
2982
+
2983
+ # Modified speak_text function that just queues text
2984
+ def speak_text(text):
2985
+ speech_queue.put(text)
2986
+
2987
+ def process_input(user_input):
2988
+ nonlocal messages
2989
+
2990
+ # Add user message
2991
+ messages.append({"role": "user", "content": user_input})
2992
+
2993
+ # Process with LLM and collect the ENTIRE response first
2994
+ try:
2995
+ full_response = ""
2996
+
2997
+ # Use get_stream for streaming response
2998
+ check = check_llm_command(
2999
+ user_input,
3000
+ npc=npc,
3001
+ messages=messages,
3002
+ model=model,
3003
+ provider=provider,
3004
+ stream=True,
3005
+ whisper=True,
3006
+ )
3007
+
3008
+ # Collect the entire response first
3009
+ for chunk in check:
3010
+ if chunk:
3011
+ chunk_content = "".join(
3012
+ choice.delta.content
3013
+ for choice in chunk.choices
3014
+ if choice.delta.content is not None
3015
+ )
3016
+
3017
+ full_response += chunk_content
3018
+
3019
+ # Show progress in console
3020
+ print(chunk_content, end="", flush=True)
3021
+
3022
+ print("\n") # End the progress display
3023
+
3024
+ # Process and speak the entire response at once
3025
+ if full_response.strip():
3026
+ processed_text = process_text_for_tts(full_response)
3027
+ speak_text(processed_text)
3028
+
3029
+ # Add assistant's response to messages
3030
+ messages.append({"role": "assistant", "content": full_response})
3031
+
3032
+ except Exception as e:
3033
+ print(f"Error in LLM response: {e}")
3034
+ speak_text("I'm sorry, there was an error processing your request.")
3035
+
3036
+ # Function to capture and process audio
3037
+ def capture_audio():
3038
+ nonlocal is_recording, recording_data, buffer_data, last_speech_time, running, is_speaking
3039
+ nonlocal audio_stream, transcription_queue
3040
+
3041
+ # Don't try to record if we're speaking
3042
+ if is_speaking.is_set():
3043
+ return False
3044
+
3045
+ try:
3046
+ # Only create a new audio stream if we don't have one
3047
+ if audio_stream is None and not is_speaking.is_set():
3048
+ audio_stream = pyaudio_instance.open(
3049
+ format=FORMAT,
3050
+ channels=CHANNELS,
3051
+ rate=RATE,
3052
+ input=True,
3053
+ frames_per_buffer=CHUNK,
3054
+ )
3055
+
3056
+ # Initialize or reset the recording variables
3057
+ is_recording = False
3058
+ recording_data = []
3059
+ buffer_data = []
3060
+
3061
+ print("\nListening for speech...")
3062
+
3063
+ while (
3064
+ running
3065
+ and audio_stream
3066
+ and audio_stream.is_active()
3067
+ and not is_speaking.is_set()
3068
+ ):
3069
+ try:
3070
+ data = audio_stream.read(CHUNK, exception_on_overflow=False)
3071
+ if data:
3072
+ audio_array = np.frombuffer(data, dtype=np.int16)
3073
+ audio_float = audio_array.astype(np.float32) / 32768.0
3074
+
3075
+ tensor = torch.from_numpy(audio_float).to(device)
3076
+ speech_prob = vad_model(tensor, RATE).item()
3077
+ current_time = time.time()
3078
+
3079
+ if speech_prob > 0.5: # VAD threshold
3080
+ last_speech_time = current_time
3081
+ if not is_recording:
3082
+ is_recording = True
3083
+ print("\nSpeech detected, listening...")
3084
+ recording_data.extend(buffer_data)
3085
+ buffer_data = []
3086
+ recording_data.append(data)
3087
+ else:
3088
+ if is_recording:
3089
+ if (
3090
+ current_time - last_speech_time > 1
3091
+ ): # silence duration
3092
+ is_recording = False
3093
+ print("Speech ended, transcribing...")
3094
+
3095
+ # Stop stream before transcribing
3096
+ safely_close_audio_stream(audio_stream)
3097
+ audio_stream = None
3098
+
3099
+ # Transcribe in this thread to avoid race conditions
3100
+ transcription = transcribe_recording(recording_data)
3101
+ if transcription:
3102
+ transcription_queue.put(transcription)
3103
+ recording_data = []
3104
+ return True # Got speech
3105
+ else:
3106
+ buffer_data.append(data)
3107
+ if len(buffer_data) > int(
3108
+ 0.65 * RATE / CHUNK
3109
+ ): # buffer duration
3110
+ buffer_data.pop(0)
3111
+
3112
+ # Check frequently if we need to stop capturing
3113
+ if is_speaking.is_set():
3114
+ safely_close_audio_stream(audio_stream)
3115
+ audio_stream = None
3116
+ return False
3117
+
3118
+ except Exception as e:
3119
+ print(f"Error processing audio frame: {e}")
3120
+ time.sleep(0.1)
3121
+
3122
+ except Exception as e:
3123
+ print(f"Error in audio capture: {e}")
3124
+
3125
+ # Close stream if we exit without finding speech
3126
+ safely_close_audio_stream(audio_stream)
3127
+ audio_stream = None
3128
+
3129
+ return False
3130
+
3131
+ def process_text_for_tts(text):
3132
+ # Remove special characters that might cause issues in TTS
3133
+ text = re.sub(r"[*<>{}()\[\]&%#@^_=+~]", "", text)
3134
+ text = text.strip()
3135
+ # Add spaces after periods that are followed by words (for better pronunciation)
3136
+ text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
3137
+ text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
3138
+ return text
3139
+
3140
+ # Now that functions are defined, play welcome messages
3141
+ speak_text("Entering whisper mode. Please wait.")
2775
3142
 
2776
3143
  try:
2777
- silence_threshold = calibrate_silence()
2778
- except Exception as e:
2779
- return f"Error: Unable to calibrate silence due to {str(e)}"
2780
3144
 
2781
- whisper_output.append(
2782
- "Ready. Speak after seeing 'Listening...'. Say 'exit' or type '/wq' to quit."
2783
- )
2784
- speak_text("Whisper mode activated. Ready for your input.")
3145
+ while running:
2785
3146
 
2786
- while True:
2787
- audio_data = record_audio(silence_threshold=silence_threshold)
2788
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
2789
- wf = wave.open(temp_audio.name, "wb")
2790
- wf.setnchannels(1)
2791
- wf.setsampwidth(2)
2792
- wf.setframerate(16000)
2793
- wf.writeframes(audio_data)
2794
- wf.close()
2795
-
2796
- result = model.transcribe(temp_audio.name)
2797
- text = result["text"].strip()
2798
- print(f"You said: {text}")
2799
- os.unlink(temp_audio.name)
2800
-
2801
- messages.append({"role": "user", "content": text}) # Add user message
2802
- if text.lower() in ["exit", "/wq"]:
2803
- whisper_output.append("Exiting whisper mode.")
2804
- speak_text("Exiting whisper mode. Goodbye!")
2805
- break
2806
- if not spool:
2807
- llm_response = check_llm_command(
2808
- text, npc=npc, messages=messages, stream=stream
2809
- ) # Use
3147
+ # First check for typed input (non-blocking)
3148
+ import select
3149
+ import sys
2810
3150
 
2811
- messages = llm_response["messages"]
2812
- output = llm_response["output"]
2813
- else:
2814
- if stream:
2815
- messages = get_stream(
2816
- messages,
2817
- model=model,
2818
- provider=provider,
2819
- npc=npc,
3151
+ # Don't spam the console with prompts when speaking
3152
+ if not is_speaking.is_set():
3153
+ print(
3154
+ "\Speak or type your message (or 'exit' to quit): ",
3155
+ end="",
3156
+ flush=True,
2820
3157
  )
3158
+
3159
+ rlist, _, _ = select.select([sys.stdin], [], [], 0.1)
3160
+ if rlist:
3161
+ user_input = sys.stdin.readline().strip()
3162
+ if user_input.lower() in ("exit", "quit", "goodbye"):
3163
+ print("\nExiting whisper mode.")
3164
+ break
3165
+ if user_input:
3166
+ print(f"\nYou (typed): {user_input}")
3167
+ process_input(user_input)
3168
+ continue # Skip audio capture this cycle
3169
+
3170
+ # Then try to capture some audio (if no typed input)
3171
+ if not is_speaking.is_set(): # Only capture if not currently speaking
3172
+ got_speech = capture_audio()
3173
+
3174
+ # If we got speech, process it
3175
+ if got_speech:
3176
+ try:
3177
+ transcription = transcription_queue.get_nowait()
3178
+ print(f"\nYou (spoke): {transcription}")
3179
+ process_input(transcription)
3180
+ except queue.Empty:
3181
+ pass
2821
3182
  else:
2822
- messages = get_conversation(
2823
- messages,
2824
- model=model,
2825
- provider=provider,
2826
- npc=npc,
2827
- )
3183
+ # If we're speaking, just wait a bit without spamming the console
3184
+ time.sleep(0.1)
2828
3185
 
2829
- output = messages[-1]["content"]
2830
- print(output)
2831
- if not continuous:
2832
- inp = input("Press Enter to continue or type '/q' to quit: ")
2833
- if inp.lower() == "/q":
2834
- break
3186
+ except KeyboardInterrupt:
3187
+ print("\nInterrupted by user.")
3188
+
3189
+ finally:
3190
+ # Set running to False to signal threads to exit
3191
+ running = False
3192
+ speech_thread_active.clear()
3193
+
3194
+ # Clean up audio resources
3195
+ safely_close_audio_stream(audio_stream)
2835
3196
 
2836
- return messages
3197
+ if pyaudio_instance:
3198
+ pyaudio_instance.terminate()
3199
+
3200
+ print("\nExiting whisper mode.")
3201
+ speak_text("Exiting whisper mode. Goodbye!")
3202
+ time.sleep(1)
3203
+ cleanup_temp_files()
3204
+
3205
+ return {"messages": messages, "output": "Whisper mode session ended."}
3206
+
3207
+
3208
+ def get_context_string(messages):
3209
+ context = []
3210
+ for message in messages[-5:]: # Get last 5 messages for context
3211
+ role = message.get("role", "")
3212
+ content = message.get("content", "")
3213
+ context.append(f"{role.capitalize()}: {content}")
3214
+ return "\n".join(context)
3215
+
3216
+
3217
+ def input_with_timeout(prompt, timeout=0.1):
3218
+ """Non-blocking input function with a timeout."""
3219
+ import select
3220
+ import sys
3221
+
3222
+ print(prompt, end="", flush=True)
3223
+ rlist, _, _ = select.select([sys.stdin], [], [], timeout)
3224
+ if rlist:
3225
+ return sys.stdin.readline().strip()
3226
+ return None
2837
3227
 
2838
3228
 
2839
3229
  def enter_notes_mode(npc: Any = None) -> None: