abstractvoice 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ AbstractVoice voice mode CLI launcher.
4
+
5
+ This module provides a direct entry point to start AbstractVoice in voice mode.
6
+ """
7
+
8
+ import argparse
9
+ import time
10
+ from abstractvoice.examples.cli_repl import VoiceREPL
11
+
12
+ def parse_args():
13
+ """Parse command line arguments."""
14
+ parser = argparse.ArgumentParser(description="AbstractVoice Voice Mode")
15
+ parser.add_argument("--debug", action="store_true", help="Enable debug mode")
16
+ parser.add_argument("--api", default="http://localhost:11434/api/chat",
17
+ help="LLM API URL")
18
+ parser.add_argument("--model", default="granite3.3:2b",
19
+ help="LLM model name")
20
+ parser.add_argument("--whisper", default="tiny",
21
+ help="Whisper model to use (tiny, base, small, medium, large)")
22
+ parser.add_argument("--no-listening", action="store_true",
23
+ help="Disable speech-to-text (listening), TTS still works")
24
+ parser.add_argument("--system",
25
+ help="Custom system prompt")
26
+ parser.add_argument("--temperature", type=float, default=0.4,
27
+ help="Set temperature (0.0-2.0) for the LLM")
28
+ parser.add_argument("--max-tokens", type=int, default=4096,
29
+ help="Set maximum tokens for the LLM response")
30
+ return parser.parse_args()
31
+
32
+ def main():
33
+ """Entry point for direct voice mode."""
34
+ try:
35
+ # Parse command line arguments
36
+ args = parse_args()
37
+
38
+ print("Starting AbstractVoice voice interface...")
39
+
40
+ # Initialize REPL
41
+ repl = VoiceREPL(
42
+ api_url=args.api,
43
+ model=args.model,
44
+ debug_mode=args.debug
45
+ )
46
+
47
+ # Set custom system prompt if provided
48
+ if args.system:
49
+ repl.system_prompt = args.system
50
+ repl.messages = [{"role": "system", "content": args.system}]
51
+ if args.debug:
52
+ print(f"System prompt set to: {args.system}")
53
+
54
+ # Set temperature and max_tokens
55
+ repl.temperature = args.temperature
56
+ repl.max_tokens = args.max_tokens
57
+ if args.debug:
58
+ print(f"Temperature: {args.temperature}")
59
+ print(f"Max tokens: {args.max_tokens}")
60
+
61
+ # Change Whisper model if specified
62
+ if args.whisper and args.whisper != "tiny":
63
+ if repl.voice_manager.set_whisper(args.whisper):
64
+ if args.debug:
65
+ print(f"Using Whisper model: {args.whisper}")
66
+
67
+ # Start in voice mode automatically unless --no-listening is specified
68
+ if not args.no_listening:
69
+ print("Activating voice mode. Say 'stop' to exit voice mode.")
70
+ # Use the existing voice mode method
71
+ repl.do_voice("on")
72
+
73
+ # Start the REPL
74
+ repl.cmdloop()
75
+
76
+ except KeyboardInterrupt:
77
+ print("\nExiting AbstractVoice...")
78
+ except Exception as e:
79
+ print(f"Application error: {e}")
80
+ if args.debug:
81
+ import traceback
82
+ traceback.print_exc()
83
+
84
+ if __name__ == "__main__":
85
+ main()
@@ -0,0 +1,214 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Web API example using AbstractVoice with Flask.
4
+
5
+ This example shows how to create a simple web API that exposes
6
+ AbstractVoice functionality to web applications.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import os
12
+ import tempfile
13
+ import uuid
14
+ from flask import Flask, request, jsonify, send_file, render_template_string
15
+
16
+ # Import VoiceManager only when needed
17
+ # from abstractvoice import VoiceManager
18
+
19
+
20
+ # Initialize Flask app
21
+ app = Flask(__name__)
22
+
23
+ # Global voice manager
24
+ voice_manager = None
25
+
26
+ # Store active sessions (in a real app, use a database)
27
+ active_sessions = {}
28
+
29
+
30
+ # Simple HTML template for the home page
31
+ HOME_PAGE_TEMPLATE = """
32
+ <!DOCTYPE html>
33
+ <html>
34
+ <head>
35
+ <title>AbstractVoice Web API</title>
36
+ <style>
37
+ body {
38
+ font-family: Arial, sans-serif;
39
+ max-width: 800px;
40
+ margin: 0 auto;
41
+ padding: 20px;
42
+ line-height: 1.6;
43
+ }
44
+ h1, h2 {
45
+ color: #333;
46
+ }
47
+ pre {
48
+ background-color: #f5f5f5;
49
+ padding: 10px;
50
+ border-radius: 4px;
51
+ overflow-x: auto;
52
+ }
53
+ code {
54
+ background-color: #f5f5f5;
55
+ padding: 2px 4px;
56
+ border-radius: 4px;
57
+ }
58
+ .endpoint {
59
+ margin-bottom: 20px;
60
+ border-bottom: 1px solid #eee;
61
+ padding-bottom: 20px;
62
+ }
63
+ </style>
64
+ </head>
65
+ <body>
66
+ <h1>AbstractVoice Web API</h1>
67
+ <p>Welcome to the AbstractVoice Web API. Below are the available endpoints:</p>
68
+
69
+ <div class="endpoint">
70
+ <h2>GET /api/status</h2>
71
+ <p>Get the status of the voice services.</p>
72
+ <pre>curl http://{{ host }}:{{ port }}/api/status</pre>
73
+ </div>
74
+
75
+ <div class="endpoint">
76
+ <h2>POST /api/tts</h2>
77
+ <p>Convert text to speech and return audio file.</p>
78
+ <p><strong>Request Body:</strong></p>
79
+ <pre>{
80
+ "text": "Text to speak",
81
+ "speed": 1.0 // Optional
82
+ }</pre>
83
+ <p><strong>Example:</strong></p>
84
+ <pre>curl -X POST http://{{ host }}:{{ port }}/api/tts \
85
+ -H "Content-Type: application/json" \
86
+ -d '{"text":"Hello, this is a test", "speed":1.0}' \
87
+ --output speech.wav</pre>
88
+ </div>
89
+
90
+ <div class="endpoint">
91
+ <h2>POST /api/stt/transcribe</h2>
92
+ <p>Transcribe audio from file.</p>
93
+ <p><strong>Example:</strong></p>
94
+ <pre>curl -X POST http://{{ host }}:{{ port }}/api/stt/transcribe \
95
+ -F "audio_file=@/path/to/audio.wav"</pre>
96
+ </div>
97
+
98
+ <div class="endpoint">
99
+ <h2>POST /api/stt/start</h2>
100
+ <p>Start a listening session.</p>
101
+ <pre>curl -X POST http://{{ host }}:{{ port }}/api/stt/start</pre>
102
+ </div>
103
+
104
+ <div class="endpoint">
105
+ <h2>POST /api/stt/stop</h2>
106
+ <p>Stop a listening session.</p>
107
+ <p><strong>Request Body:</strong></p>
108
+ <pre>{
109
+ "session_id": "UUID of the session"
110
+ }</pre>
111
+ </div>
112
+ </body>
113
+ </html>
114
+ """
115
+
116
+
117
+ @app.route('/')
118
+ def home():
119
+ """Serve the home page with API documentation."""
120
+ host = request.host.split(':')[0]
121
+ port = request.host.split(':')[1] if ':' in request.host else "5000"
122
+ return render_template_string(HOME_PAGE_TEMPLATE, host=host, port=port)
123
+
124
+
125
+ @app.route('/api/test', methods=['GET'])
126
+ def test_api():
127
+ """Simple test endpoint to verify the API is working."""
128
+ return jsonify({
129
+ "status": "ok",
130
+ "message": "AbstractVoice Web API is running",
131
+ "is_voice_manager_initialized": voice_manager is not None
132
+ })
133
+
134
+
135
+ @app.route('/api/simpletest', methods=['GET'])
136
+ def simple_test():
137
+ """A very simple test that doesn't require any initialization."""
138
+ return jsonify({
139
+ "status": "ok",
140
+ "message": "Basic Flask API is working!",
141
+ "timestamp": str(uuid.uuid4())
142
+ })
143
+
144
+
145
+ # Simplified function that doesn't actually load the VoiceManager
146
+ def lazy_initialize_voice_manager(debug_mode=False):
147
+ """Initialize the voice manager only when needed."""
148
+ print("This is a placeholder for VoiceManager initialization")
149
+ print("For a full implementation, uncomment the VoiceManager import")
150
+ return None
151
+
152
+
153
+ def parse_args():
154
+ """Parse command line arguments."""
155
+ parser = argparse.ArgumentParser(description="AbstractVoice Web API Example")
156
+ parser.add_argument("--debug", action="store_true", help="Enable debug mode")
157
+ parser.add_argument("--host", default="127.0.0.1", help="Host to listen on")
158
+ parser.add_argument("--port", type=int, default=5000, help="Port to listen on")
159
+ parser.add_argument("--simulate", action="store_true",
160
+ help="Simulate only, don't load models")
161
+ return parser.parse_args()
162
+
163
+
164
+ def main():
165
+ """Entry point for the application."""
166
+ global voice_manager
167
+
168
+ try:
169
+ # Parse command line arguments
170
+ args = parse_args()
171
+
172
+ # Configure logging
173
+ import logging
174
+ log_level = logging.DEBUG if args.debug else logging.INFO
175
+ logging.basicConfig(
176
+ level=log_level,
177
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
178
+ )
179
+
180
+ # Configure Flask for development
181
+ if args.debug:
182
+ app.debug = True
183
+ app.logger.setLevel(logging.DEBUG)
184
+ else:
185
+ app.logger.setLevel(logging.INFO)
186
+
187
+ # Print startup message
188
+ print(f"Starting AbstractVoice Web API on {args.host}:{args.port}")
189
+
190
+ if not args.simulate:
191
+ print("Initializing VoiceManager (this may take a moment)...")
192
+ # Initialize voice manager - for real implementation, uncomment this
193
+ # from abstractvoice import VoiceManager
194
+ # voice_manager = VoiceManager(debug_mode=args.debug)
195
+ else:
196
+ print("Running in simulation mode (no models loaded)")
197
+
198
+ # Run Flask app
199
+ print(f"Server is ready at http://{args.host}:{args.port}")
200
+ print("Try these test endpoints:")
201
+ print(f" http://{args.host}:{args.port}/")
202
+ print(f" http://{args.host}:{args.port}/api/simpletest")
203
+ print("Press CTRL+C to quit")
204
+
205
+ app.run(host=args.host, port=args.port)
206
+
207
+ except KeyboardInterrupt:
208
+ print("\nExiting...")
209
+ except Exception as e:
210
+ print(f"Application error: {e}")
211
+
212
+
213
+ if __name__ == "__main__":
214
+ main()
@@ -0,0 +1,252 @@
1
+ """Voice recognition module that combines VAD and STT."""
2
+
3
+ import threading
4
+ import time
5
+ import pyaudio
6
+ from .vad import VoiceDetector
7
+ from .stt import Transcriber
8
+
9
+
10
+ class VoiceRecognizer:
11
+ """Voice recognition with VAD and STT."""
12
+
13
+ def __init__(self, transcription_callback, stop_callback=None,
14
+ vad_aggressiveness=1, min_speech_duration=600,
15
+ silence_timeout=1500, sample_rate=16000,
16
+ chunk_duration=30, whisper_model="tiny",
17
+ min_transcription_length=5, debug_mode=False):
18
+ """Initialize voice recognizer.
19
+
20
+ Args:
21
+ transcription_callback: Function to call with transcription text
22
+ stop_callback: Function to call when "stop" is detected
23
+ vad_aggressiveness: VAD aggressiveness (0-3)
24
+ min_speech_duration: Min speech duration in ms to start recording
25
+ silence_timeout: Silence timeout in ms to end recording
26
+ sample_rate: Audio sample rate in Hz
27
+ chunk_duration: Audio chunk duration in ms
28
+ whisper_model: Whisper model name
29
+ min_transcription_length: Min valid transcription length
30
+ debug_mode: Enable debug output
31
+ """
32
+ self.debug_mode = debug_mode
33
+ self.transcription_callback = transcription_callback
34
+ self.stop_callback = stop_callback
35
+
36
+ # Configuration
37
+ self.sample_rate = sample_rate
38
+ self.chunk_duration = chunk_duration # in ms
39
+ self.chunk_size = int(sample_rate * chunk_duration / 1000)
40
+ self.min_speech_chunks = int(min_speech_duration / chunk_duration)
41
+ self.silence_timeout_chunks = int(silence_timeout / chunk_duration)
42
+
43
+ # Initialize components
44
+ self.voice_detector = VoiceDetector(
45
+ aggressiveness=vad_aggressiveness,
46
+ sample_rate=sample_rate,
47
+ debug_mode=debug_mode
48
+ )
49
+
50
+ self.transcriber = Transcriber(
51
+ model_name=whisper_model,
52
+ min_transcription_length=min_transcription_length,
53
+ debug_mode=debug_mode
54
+ )
55
+
56
+ # State
57
+ self.is_running = False
58
+ self.thread = None
59
+ self.pyaudio = None
60
+ self.stream = None
61
+ self.tts_interrupt_callback = None
62
+ self.tts_interrupt_enabled = True # Can be disabled during TTS playback
63
+ self.listening_paused = False # Can be paused to completely stop processing audio
64
+
65
+ def start(self, tts_interrupt_callback=None):
66
+ """Start voice recognition in a separate thread.
67
+
68
+ Args:
69
+ tts_interrupt_callback: Function to call when speech is detected during listening
70
+
71
+ Returns:
72
+ True if started, False if already running
73
+ """
74
+ if self.is_running:
75
+ return False
76
+
77
+ self.tts_interrupt_callback = tts_interrupt_callback
78
+ self.is_running = True
79
+ self.thread = threading.Thread(target=self._recognition_loop)
80
+ self.thread.start()
81
+
82
+ if self.debug_mode:
83
+ print(" > Voice recognition started")
84
+ return True
85
+
86
+ def stop(self):
87
+ """Stop voice recognition.
88
+
89
+ Returns:
90
+ True if stopped, False if not running
91
+ """
92
+ if not self.is_running:
93
+ return False
94
+
95
+ self.is_running = False
96
+ if self.thread:
97
+ self.thread.join()
98
+
99
+ if self.stream:
100
+ self.stream.stop_stream()
101
+ self.stream.close()
102
+
103
+ if self.pyaudio:
104
+ self.pyaudio.terminate()
105
+
106
+ if self.debug_mode:
107
+ print(" > Voice recognition stopped")
108
+ return True
109
+
110
+ def _recognition_loop(self):
111
+ """Main recognition loop."""
112
+ import pyaudio
113
+
114
+ self.pyaudio = pyaudio.PyAudio()
115
+ self.stream = self.pyaudio.open(
116
+ format=pyaudio.paInt16,
117
+ channels=1,
118
+ rate=self.sample_rate,
119
+ input=True,
120
+ frames_per_buffer=self.chunk_size
121
+ )
122
+
123
+ speech_buffer = []
124
+ speech_count = 0
125
+ silence_count = 0
126
+ recording = False
127
+
128
+ while self.is_running:
129
+ try:
130
+ # If listening is paused, sleep briefly and skip processing
131
+ if self.listening_paused:
132
+ time.sleep(0.1)
133
+ continue
134
+
135
+ # Read audio data
136
+ audio_data = self.stream.read(self.chunk_size, exception_on_overflow=False)
137
+
138
+ # Check for speech
139
+ is_speech = self.voice_detector.is_speech(audio_data)
140
+
141
+ if is_speech:
142
+ speech_buffer.append(audio_data)
143
+ speech_count += 1
144
+ silence_count = 0
145
+
146
+ # Trigger TTS interrupt callback if enough speech detected
147
+ # Only interrupt if TTS interruption is enabled (not during TTS playback)
148
+ if (self.tts_interrupt_callback and
149
+ self.tts_interrupt_enabled and
150
+ speech_count >= self.min_speech_chunks and
151
+ not recording):
152
+ self.tts_interrupt_callback()
153
+ if self.debug_mode:
154
+ print(" > TTS interrupted by user speech")
155
+
156
+ # Start recording after minimum speech detected
157
+ if speech_count >= self.min_speech_chunks:
158
+ recording = True
159
+
160
+ else:
161
+ # Handle silence during recording
162
+ if recording:
163
+ speech_buffer.append(audio_data)
164
+ silence_count += 1
165
+
166
+ # End of speech detected
167
+ if silence_count >= self.silence_timeout_chunks:
168
+ if self.debug_mode:
169
+ print(f" > Speech detected ({len(speech_buffer)} chunks), transcribing...")
170
+
171
+ audio_bytes = b''.join(speech_buffer)
172
+ text = self.transcriber.transcribe(audio_bytes)
173
+
174
+ if text:
175
+ # Check for stop command
176
+ if text.lower() == "stop":
177
+ if self.stop_callback:
178
+ self.stop_callback()
179
+ else:
180
+ # If no stop callback, invoke transcription callback anyway
181
+ self.transcription_callback(text)
182
+ else:
183
+ # Normal transcription
184
+ self.transcription_callback(text)
185
+
186
+ # Reset state
187
+ speech_buffer = []
188
+ speech_count = 0
189
+ silence_count = 0
190
+ recording = False
191
+ else:
192
+ # No speech detected and not recording
193
+ speech_count = max(0, speech_count - 1)
194
+ if speech_count == 0:
195
+ speech_buffer = []
196
+
197
+ except Exception as e:
198
+ if self.debug_mode:
199
+ print(f"Voice recognition error: {e}")
200
+ continue
201
+
202
+ def change_whisper_model(self, model_name):
203
+ """Change the Whisper model.
204
+
205
+ Args:
206
+ model_name: New model name
207
+
208
+ Returns:
209
+ True if changed, False otherwise
210
+ """
211
+ return self.transcriber.change_model(model_name)
212
+
213
+ def change_vad_aggressiveness(self, aggressiveness):
214
+ """Change VAD aggressiveness.
215
+
216
+ Args:
217
+ aggressiveness: New aggressiveness level (0-3)
218
+
219
+ Returns:
220
+ True if changed, False otherwise
221
+ """
222
+ return self.voice_detector.set_aggressiveness(aggressiveness)
223
+
224
+ def pause_tts_interrupt(self):
225
+ """Temporarily disable TTS interruption (e.g., during TTS playback).
226
+
227
+ This prevents the system from interrupting its own speech.
228
+ """
229
+ self.tts_interrupt_enabled = False
230
+ if self.debug_mode:
231
+ print(" > TTS interrupt paused")
232
+
233
+ def resume_tts_interrupt(self):
234
+ """Re-enable TTS interruption after it was paused."""
235
+ self.tts_interrupt_enabled = True
236
+ if self.debug_mode:
237
+ print(" > TTS interrupt resumed")
238
+
239
+ def pause_listening(self):
240
+ """Temporarily pause audio processing entirely (e.g., during TTS in 'wait' mode).
241
+
242
+ This completely stops processing audio input while keeping the thread alive.
243
+ """
244
+ self.listening_paused = True
245
+ if self.debug_mode:
246
+ print(" > Listening paused")
247
+
248
+ def resume_listening(self):
249
+ """Resume audio processing after it was paused."""
250
+ self.listening_paused = False
251
+ if self.debug_mode:
252
+ print(" > Listening resumed")
@@ -0,0 +1,5 @@
1
+ """STT module for speech recognition using Whisper."""
2
+
3
+ from .transcriber import Transcriber
4
+
5
+ __all__ = ['Transcriber']
@@ -0,0 +1,138 @@
1
+ """Speech-to-text transcription using OpenAI's Whisper."""
2
+
3
+ import whisper
4
+ import numpy as np
5
+ import os
6
+ import sys
7
+ import logging
8
+
9
+
10
+ class Transcriber:
11
+ """Transcribes audio using OpenAI's Whisper model."""
12
+
13
+ def __init__(self, model_name="tiny", min_transcription_length=5, debug_mode=False):
14
+ """Initialize the Whisper transcriber.
15
+
16
+ Args:
17
+ model_name: Whisper model to use (tiny, base, etc.)
18
+ min_transcription_length: Minimum length of text to consider valid
19
+ debug_mode: Enable debug output
20
+ """
21
+ self.model_name = model_name
22
+ self.min_transcription_length = min_transcription_length
23
+ self.debug_mode = debug_mode
24
+
25
+ # Suppress Whisper output unless in debug mode
26
+ if not debug_mode:
27
+ logging.getLogger('whisper').setLevel(logging.ERROR)
28
+
29
+ try:
30
+ if self.debug_mode:
31
+ print(f" > Loading Whisper model: {model_name}")
32
+
33
+ # Redirect stdout when loading Whisper model in non-debug mode
34
+ orig_stdout = None
35
+ null_out = None
36
+ if not debug_mode:
37
+ orig_stdout = sys.stdout
38
+ null_out = open(os.devnull, 'w')
39
+ sys.stdout = null_out
40
+
41
+ # Load the Whisper model
42
+ self.model = whisper.load_model(model_name)
43
+ finally:
44
+ # Restore stdout if we redirected it
45
+ if not debug_mode and orig_stdout:
46
+ sys.stdout = orig_stdout
47
+ if 'null_out' in locals() and null_out:
48
+ null_out.close()
49
+
50
+ def transcribe(self, audio_data):
51
+ """Transcribe audio data to text.
52
+
53
+ Args:
54
+ audio_data: Audio data as bytes or numpy array
55
+
56
+ Returns:
57
+ Transcribed text or None if transcription failed or is too short
58
+ """
59
+ try:
60
+ # Convert bytes to numpy array if needed
61
+ if isinstance(audio_data, bytes):
62
+ audio_np = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
63
+ else:
64
+ audio_np = audio_data
65
+
66
+ # Redirect stdout for non-debug mode
67
+ orig_stdout = None
68
+ null_out = None
69
+ if not self.debug_mode:
70
+ orig_stdout = sys.stdout
71
+ null_out = open(os.devnull, 'w')
72
+ sys.stdout = null_out
73
+
74
+ try:
75
+ # Perform transcription
76
+ result = self.model.transcribe(audio_np, fp16=False)
77
+ finally:
78
+ # Restore stdout if we redirected it
79
+ if not self.debug_mode and orig_stdout:
80
+ sys.stdout = orig_stdout
81
+ if null_out:
82
+ null_out.close()
83
+
84
+ # Extract and clean text
85
+ text = result["text"].strip()
86
+
87
+ # Skip short transcriptions (except "stop" command)
88
+ if len(text) < self.min_transcription_length and text.lower() != "stop":
89
+ return None
90
+
91
+ if self.debug_mode:
92
+ print(f" > Transcribed: '{text}'")
93
+
94
+ return text
95
+
96
+ except Exception as e:
97
+ if self.debug_mode:
98
+ print(f"Transcription error: {e}")
99
+ return None
100
+
101
+ def change_model(self, model_name):
102
+ """Change the Whisper model.
103
+
104
+ Args:
105
+ model_name: New model name (tiny, base, etc.)
106
+
107
+ Returns:
108
+ True if model was changed, False otherwise
109
+ """
110
+ if model_name in ["tiny", "base", "small", "medium", "large"]:
111
+ if self.debug_mode:
112
+ print(f" > Changing Whisper model to {model_name}")
113
+
114
+ # Redirect stdout for non-debug mode
115
+ orig_stdout = None
116
+ null_out = None
117
+ if not self.debug_mode:
118
+ orig_stdout = sys.stdout
119
+ null_out = open(os.devnull, 'w')
120
+ sys.stdout = null_out
121
+
122
+ try:
123
+ self.model = whisper.load_model(model_name)
124
+ self.model_name = model_name
125
+ finally:
126
+ # Restore stdout if we redirected it
127
+ if not self.debug_mode and orig_stdout:
128
+ sys.stdout = orig_stdout
129
+ if null_out:
130
+ null_out.close()
131
+
132
+ if self.debug_mode:
133
+ print(f" > Whisper model changed to {model_name}")
134
+ return True
135
+ else:
136
+ if self.debug_mode:
137
+ print(f" > Invalid model name: {model_name}")
138
+ return False