abstractvoice 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ """VAD module for voice activity detection."""
2
+
3
+ from .voice_detector import VoiceDetector
4
+
5
+ __all__ = ['VoiceDetector']
@@ -0,0 +1,75 @@
1
+ """Voice activity detection using WebRTC VAD."""
2
+
3
+ import webrtcvad
4
+ import logging
5
+
6
+
7
+ class VoiceDetector:
8
+ """Detects voice activity in audio streams."""
9
+
10
+ def __init__(self, aggressiveness=1, sample_rate=16000, debug_mode=False):
11
+ """Initialize the voice detector.
12
+
13
+ Args:
14
+ aggressiveness: VAD aggressiveness (0-3, higher is more strict)
15
+ sample_rate: Audio sample rate (8000, 16000, 32000, 48000 Hz)
16
+ debug_mode: Enable debug output
17
+ """
18
+ self.debug_mode = debug_mode
19
+ self.sample_rate = sample_rate
20
+ self.aggressiveness = aggressiveness
21
+
22
+ # Check sample rate is valid for WebRTC VAD
23
+ if sample_rate not in [8000, 16000, 32000, 48000]:
24
+ raise ValueError("Sample rate must be 8000, 16000, 32000, or 48000 Hz")
25
+
26
+ # Initialize WebRTC VAD
27
+ try:
28
+ self.vad = webrtcvad.Vad(aggressiveness)
29
+ if self.debug_mode:
30
+ print(f" > VAD initialized with aggressiveness {aggressiveness}")
31
+ except Exception as e:
32
+ if self.debug_mode:
33
+ print(f"VAD initialization error: {e}")
34
+ raise
35
+
36
+ def is_speech(self, audio_frame):
37
+ """Check if audio frame contains speech.
38
+
39
+ Args:
40
+ audio_frame: Audio frame as bytes (must be 10, 20, or 30ms at sample_rate)
41
+
42
+ Returns:
43
+ True if speech detected, False otherwise
44
+ """
45
+ try:
46
+ return self.vad.is_speech(audio_frame, self.sample_rate)
47
+ except Exception as e:
48
+ if self.debug_mode:
49
+ print(f"VAD processing error: {e}")
50
+ return False
51
+
52
+ def set_aggressiveness(self, aggressiveness):
53
+ """Change VAD aggressiveness.
54
+
55
+ Args:
56
+ aggressiveness: New aggressiveness level (0-3)
57
+
58
+ Returns:
59
+ True if changed, False otherwise
60
+ """
61
+ if 0 <= aggressiveness <= 3:
62
+ try:
63
+ self.vad.set_mode(aggressiveness)
64
+ self.aggressiveness = aggressiveness
65
+ if self.debug_mode:
66
+ print(f" > VAD aggressiveness changed to {aggressiveness}")
67
+ return True
68
+ except Exception as e:
69
+ if self.debug_mode:
70
+ print(f"VAD aggressiveness change error: {e}")
71
+ return False
72
+ else:
73
+ if self.debug_mode:
74
+ print(f" > Invalid aggressiveness: {aggressiveness}")
75
+ return False
@@ -0,0 +1,294 @@
1
+ """Main Voice Manager class for coordinating TTS and STT components."""
2
+
3
+ from .tts import TTSEngine
4
+ from .recognition import VoiceRecognizer
5
+
6
+
7
+ class VoiceManager:
8
+ """Main class for voice interaction capabilities."""
9
+
10
+ def __init__(self, tts_model="tts_models/en/ljspeech/vits",
11
+ whisper_model="tiny", debug_mode=False):
12
+ """Initialize the Voice Manager.
13
+
14
+ Args:
15
+ tts_model: TTS model name to use
16
+ whisper_model: Whisper model name to use
17
+ debug_mode: Enable debug logging
18
+ """
19
+ self.debug_mode = debug_mode
20
+ self.speed = 1.0
21
+
22
+ # Initialize TTS engine
23
+ self.tts_engine = TTSEngine(
24
+ model_name=tts_model,
25
+ debug_mode=debug_mode
26
+ )
27
+
28
+ # Set up callbacks to pause/resume voice recognition during TTS playback
29
+ # This prevents the system from interrupting its own speech
30
+ self.tts_engine.on_playback_start = self._on_tts_start
31
+ self.tts_engine.on_playback_end = self._on_tts_end
32
+
33
+ # Voice recognizer is initialized on demand
34
+ self.voice_recognizer = None
35
+ self.whisper_model = whisper_model
36
+
37
+ # State tracking
38
+ self._transcription_callback = None
39
+ self._stop_callback = None
40
+ self._voice_mode = "full" # full, wait, stop, ptt
41
+
42
+ def _on_tts_start(self):
43
+ """Called when TTS playback starts - handle based on voice mode."""
44
+ if not self.voice_recognizer:
45
+ return
46
+
47
+ if self._voice_mode == "full":
48
+ # Full mode: Keep listening but pause interrupt capability
49
+ self.voice_recognizer.pause_tts_interrupt()
50
+ elif self._voice_mode in ["wait", "stop", "ptt"]:
51
+ # Wait/Stop/PTT modes: Pause listening entirely during TTS
52
+ self.voice_recognizer.pause_listening()
53
+
54
+ def _on_tts_end(self):
55
+ """Called when TTS playback ends - handle based on voice mode."""
56
+ if not self.voice_recognizer:
57
+ return
58
+
59
+ if self._voice_mode == "full":
60
+ # Full mode: Resume interrupt capability
61
+ self.voice_recognizer.resume_tts_interrupt()
62
+ elif self._voice_mode in ["wait", "stop", "ptt"]:
63
+ # Wait/Stop/PTT modes: Resume listening
64
+ self.voice_recognizer.resume_listening()
65
+
66
+ def speak(self, text, speed=1.0, callback=None):
67
+ """Convert text to speech and play audio.
68
+
69
+ Args:
70
+ text: Text to convert to speech
71
+ speed: Speech speed (0.5-2.0)
72
+ callback: Function to call when speech completes
73
+
74
+ Returns:
75
+ True if speech started, False otherwise
76
+ """
77
+ sp = 1.0
78
+ if speed != 1.0:
79
+ sp = speed
80
+ else:
81
+ sp = self.speed
82
+
83
+ return self.tts_engine.speak(text, sp, callback)
84
+
85
+ def stop_speaking(self):
86
+ """Stop current speech playback.
87
+
88
+ Returns:
89
+ True if stopped, False if no playback was active
90
+ """
91
+ return self.tts_engine.stop()
92
+
93
+ def pause_speaking(self):
94
+ """Pause current speech playback.
95
+
96
+ Pauses at chunk boundaries in streaming mode. Can be resumed with resume_speaking().
97
+
98
+ Returns:
99
+ True if paused, False if no playback was active
100
+ """
101
+ return self.tts_engine.pause()
102
+
103
+ def resume_speaking(self):
104
+ """Resume paused speech playback.
105
+
106
+ Returns:
107
+ True if resumed, False if not paused or no playback active
108
+ """
109
+ return self.tts_engine.resume()
110
+
111
+ def is_paused(self):
112
+ """Check if TTS is currently paused.
113
+
114
+ Returns:
115
+ True if paused, False otherwise
116
+ """
117
+ return self.tts_engine.is_paused()
118
+
119
+ def is_speaking(self):
120
+ """Check if TTS is currently active.
121
+
122
+ Returns:
123
+ True if speaking, False otherwise
124
+ """
125
+ return self.tts_engine.is_active()
126
+
127
+ def listen(self, on_transcription, on_stop=None):
128
+ """Start listening for speech with callbacks.
129
+
130
+ Args:
131
+ on_transcription: Callback for transcribed text
132
+ on_stop: Callback when 'stop' command detected
133
+
134
+ Returns:
135
+ True if started, False if already listening
136
+ """
137
+ # Store callbacks
138
+ self._transcription_callback = on_transcription
139
+ self._stop_callback = on_stop
140
+
141
+ # Initialize recognizer if not already done
142
+ if not self.voice_recognizer:
143
+ def _transcription_handler(text):
144
+ if self._transcription_callback:
145
+ self._transcription_callback(text)
146
+
147
+ def _stop_handler():
148
+ # Stop listening
149
+ self.stop_listening()
150
+ # Call user's stop callback if provided
151
+ if self._stop_callback:
152
+ self._stop_callback()
153
+
154
+ self.voice_recognizer = VoiceRecognizer(
155
+ transcription_callback=_transcription_handler,
156
+ stop_callback=_stop_handler,
157
+ whisper_model=self.whisper_model,
158
+ debug_mode=self.debug_mode
159
+ )
160
+
161
+ # Start with TTS interrupt capability
162
+ return self.voice_recognizer.start(
163
+ tts_interrupt_callback=self.stop_speaking
164
+ )
165
+
166
+ def stop_listening(self):
167
+ """Stop listening for speech.
168
+
169
+ Returns:
170
+ True if stopped, False if not listening
171
+ """
172
+ if self.voice_recognizer:
173
+ return self.voice_recognizer.stop()
174
+ return False
175
+
176
+ def is_listening(self):
177
+ """Check if currently listening for speech.
178
+
179
+ Returns:
180
+ True if listening, False otherwise
181
+ """
182
+ return self.voice_recognizer and self.voice_recognizer.is_running
183
+
184
+ def set_voice_mode(self, mode):
185
+ """Set the voice mode (full, wait, stop, ptt).
186
+
187
+ Args:
188
+ mode: Voice mode to use
189
+
190
+ Returns:
191
+ True if successful
192
+ """
193
+ if mode in ["full", "wait", "stop", "ptt"]:
194
+ self._voice_mode = mode
195
+ return True
196
+ return False
197
+
198
+ def set_speed(self, speed):
199
+ """Set the TTS speed.
200
+
201
+ Args:
202
+ speed: Speech speed multiplier (0.5-2.0)
203
+
204
+ Returns:
205
+ True if successful
206
+ """
207
+ self.speed = speed
208
+ return True
209
+
210
+ def get_speed(self):
211
+ """Get the TTS speed.
212
+
213
+ Returns:
214
+ Current TTS speed multiplier
215
+ """
216
+ return self.speed
217
+
218
+ def set_tts_model(self, model_name):
219
+ """Change the TTS model.
220
+
221
+ Available models (all pure Python, cross-platform):
222
+ - "tts_models/en/ljspeech/fast_pitch" (default, recommended)
223
+ - "tts_models/en/ljspeech/glow-tts" (alternative)
224
+ - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
225
+
226
+ Args:
227
+ model_name: TTS model name to use
228
+
229
+ Returns:
230
+ True if successful
231
+
232
+ Example:
233
+ vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
234
+ """
235
+ # Stop any current speech
236
+ self.stop_speaking()
237
+
238
+ # Reinitialize TTS engine with new model
239
+ self.tts_engine = TTSEngine(
240
+ model_name=model_name,
241
+ debug_mode=self.debug_mode
242
+ )
243
+
244
+ # Restore callbacks
245
+ self.tts_engine.on_playback_start = self._on_tts_start
246
+ self.tts_engine.on_playback_end = self._on_tts_end
247
+
248
+ return True
249
+
250
+ def set_whisper(self, model_name):
251
+ """Set the Whisper model.
252
+
253
+ Args:
254
+ whisper_model: Whisper model name (tiny, base, etc.)
255
+
256
+ Returns:
257
+ True if successful
258
+ """
259
+ self.whisper_model = model_name
260
+ if self.voice_recognizer:
261
+ return self.voice_recognizer.change_whisper_model(model_name)
262
+
263
+ def get_whisper(self):
264
+ """Get the Whisper model.
265
+
266
+ Returns:
267
+ Current Whisper model name
268
+ """
269
+ return self.whisper_model
270
+
271
+ def change_vad_aggressiveness(self, aggressiveness):
272
+ """Change VAD aggressiveness.
273
+
274
+ Args:
275
+ aggressiveness: New aggressiveness level (0-3)
276
+
277
+ Returns:
278
+ True if changed, False otherwise
279
+ """
280
+ if self.voice_recognizer:
281
+ return self.voice_recognizer.change_vad_aggressiveness(aggressiveness)
282
+ return False
283
+
284
+ def cleanup(self):
285
+ """Clean up resources.
286
+
287
+ Returns:
288
+ True if cleanup successful
289
+ """
290
+ if self.voice_recognizer:
291
+ self.voice_recognizer.stop()
292
+
293
+ self.stop_speaking()
294
+ return True