abstractvoice 0.5.0__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/PKG-INFO +47 -16
  2. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/README.md +31 -0
  3. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/__init__.py +1 -1
  4. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/tts/tts_engine.py +31 -1
  5. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/voice_manager.py +33 -1
  6. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/PKG-INFO +47 -16
  7. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/SOURCES.txt +4 -1
  8. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/requires.txt +15 -15
  9. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/pyproject.toml +15 -15
  10. abstractvoice-0.5.2/tests/test_callbacks.py +118 -0
  11. abstractvoice-0.5.2/tests/test_fresh_install.py +158 -0
  12. abstractvoice-0.5.2/tests/test_voice_switching.py +87 -0
  13. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/LICENSE +0 -0
  14. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/__main__.py +0 -0
  15. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/dependency_check.py +0 -0
  16. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/__init__.py +0 -0
  17. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/cli_repl.py +0 -0
  18. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/voice_cli.py +0 -0
  19. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/web_api.py +0 -0
  20. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/instant_setup.py +0 -0
  21. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/recognition.py +0 -0
  22. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/simple_model_manager.py +0 -0
  23. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/stt/__init__.py +0 -0
  24. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/stt/transcriber.py +0 -0
  25. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/tts/__init__.py +0 -0
  26. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/vad/__init__.py +0 -0
  27. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/vad/voice_detector.py +0 -0
  28. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/dependency_links.txt +0 -0
  29. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/entry_points.txt +0 -0
  30. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/top_level.txt +0 -0
  31. {abstractvoice-0.5.0 → abstractvoice-0.5.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstractvoice
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: A modular Python library for voice interactions with AI systems
5
5
  Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
6
6
  License-Expression: MIT
@@ -21,9 +21,9 @@ Requires-Dist: numpy>=1.24.0
21
21
  Requires-Dist: requests>=2.31.0
22
22
  Requires-Dist: appdirs>=1.4.0
23
23
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0
24
- Requires-Dist: torch<2.4.0,>=2.0.0
25
- Requires-Dist: torchvision<0.19.0,>=0.15.0
26
- Requires-Dist: torchaudio<2.4.0,>=2.0.0
24
+ Requires-Dist: torch<2.9.0,>=2.1.0
25
+ Requires-Dist: torchvision<1.0.0,>=0.16.0
26
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0
27
27
  Requires-Dist: librosa>=0.10.0
28
28
  Requires-Dist: sounddevice>=0.4.6
29
29
  Requires-Dist: soundfile>=0.12.1
@@ -34,9 +34,9 @@ Requires-Dist: PyAudio>=0.2.13; extra == "voice"
34
34
  Requires-Dist: soundfile>=0.12.1; extra == "voice"
35
35
  Provides-Extra: tts
36
36
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "tts"
37
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "tts"
38
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "tts"
39
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "tts"
37
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "tts"
38
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "tts"
39
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "tts"
40
40
  Requires-Dist: librosa>=0.10.0; extra == "tts"
41
41
  Provides-Extra: stt
42
42
  Requires-Dist: openai-whisper>=20230314; extra == "stt"
@@ -49,9 +49,9 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "all"
49
49
  Requires-Dist: PyAudio>=0.2.13; extra == "all"
50
50
  Requires-Dist: openai-whisper>=20230314; extra == "all"
51
51
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "all"
52
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "all"
53
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "all"
54
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "all"
52
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "all"
53
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "all"
54
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "all"
55
55
  Requires-Dist: librosa>=0.10.0; extra == "all"
56
56
  Requires-Dist: soundfile>=0.12.1; extra == "all"
57
57
  Requires-Dist: flask>=2.0.0; extra == "all"
@@ -66,17 +66,17 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "voice-full"
66
66
  Requires-Dist: PyAudio>=0.2.13; extra == "voice-full"
67
67
  Requires-Dist: openai-whisper>=20230314; extra == "voice-full"
68
68
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "voice-full"
69
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "voice-full"
70
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "voice-full"
71
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "voice-full"
69
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "voice-full"
70
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "voice-full"
71
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "voice-full"
72
72
  Requires-Dist: librosa>=0.10.0; extra == "voice-full"
73
73
  Requires-Dist: soundfile>=0.12.1; extra == "voice-full"
74
74
  Requires-Dist: tiktoken>=0.6.0; extra == "voice-full"
75
75
  Provides-Extra: core-tts
76
76
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "core-tts"
77
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "core-tts"
78
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "core-tts"
79
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "core-tts"
77
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "core-tts"
78
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "core-tts"
79
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "core-tts"
80
80
  Requires-Dist: librosa>=0.10.0; extra == "core-tts"
81
81
  Provides-Extra: core-stt
82
82
  Requires-Dist: openai-whisper>=20230314; extra == "core-stt"
@@ -615,6 +615,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
615
615
  # - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
616
616
  # - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
617
617
 
618
+ # === Audio Lifecycle Callbacks (v0.5.1+) ===
619
+
620
+ # NEW: Precise audio timing callbacks for visual status indicators
621
+ def on_synthesis_start():
622
+ print("🔴 Synthesis started - show thinking animation")
623
+
624
+ def on_audio_start():
625
+ print("🔵 Audio started - show speaking animation")
626
+
627
+ def on_audio_pause():
628
+ print("⏸️ Audio paused - show paused animation")
629
+
630
+ def on_audio_resume():
631
+ print("▶️ Audio resumed - continue speaking animation")
632
+
633
+ def on_audio_end():
634
+ print("🟢 Audio ended - show ready animation")
635
+
636
+ def on_synthesis_end():
637
+ print("✅ Synthesis complete")
638
+
639
+ # Wire up callbacks
640
+ manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
641
+ manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
642
+ manager.on_audio_start = on_audio_start # NEW (actual audio playback)
643
+ manager.on_audio_end = on_audio_end # NEW (actual audio playback)
644
+ manager.on_audio_pause = on_audio_pause # NEW (pause events)
645
+ manager.on_audio_resume = on_audio_resume # NEW (resume events)
646
+
647
+ # Perfect for system tray icons, UI animations, or coordinating multiple audio streams
648
+
618
649
  # === STT (Speech-to-Text) ===
619
650
 
620
651
  def on_transcription(text):
@@ -525,6 +525,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
525
525
  # - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
526
526
  # - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
527
527
 
528
+ # === Audio Lifecycle Callbacks (v0.5.1+) ===
529
+
530
+ # NEW: Precise audio timing callbacks for visual status indicators
531
+ def on_synthesis_start():
532
+ print("🔴 Synthesis started - show thinking animation")
533
+
534
+ def on_audio_start():
535
+ print("🔵 Audio started - show speaking animation")
536
+
537
+ def on_audio_pause():
538
+ print("⏸️ Audio paused - show paused animation")
539
+
540
+ def on_audio_resume():
541
+ print("▶️ Audio resumed - continue speaking animation")
542
+
543
+ def on_audio_end():
544
+ print("🟢 Audio ended - show ready animation")
545
+
546
+ def on_synthesis_end():
547
+ print("✅ Synthesis complete")
548
+
549
+ # Wire up callbacks
550
+ manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
551
+ manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
552
+ manager.on_audio_start = on_audio_start # NEW (actual audio playback)
553
+ manager.on_audio_end = on_audio_end # NEW (actual audio playback)
554
+ manager.on_audio_pause = on_audio_pause # NEW (pause events)
555
+ manager.on_audio_resume = on_audio_resume # NEW (resume events)
556
+
557
+ # Perfect for system tray icons, UI animations, or coordinating multiple audio streams
558
+
528
559
  # === STT (Speech-to-Text) ===
529
560
 
530
561
  def on_transcription(text):
@@ -32,5 +32,5 @@ from .voice_manager import VoiceManager
32
32
  # Import simple APIs for third-party applications
33
33
  from .simple_model_manager import list_models, download_model, get_status, is_ready
34
34
 
35
- __version__ = "0.5.0"
35
+ __version__ = "0.5.2"
36
36
  __all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
@@ -212,6 +212,13 @@ class NonBlockingAudioPlayer:
212
212
  self.current_position = 0
213
213
  self.playback_complete_callback = None
214
214
 
215
+ # NEW: Enhanced audio lifecycle callbacks
216
+ self.on_audio_start = None # Called when first audio sample plays
217
+ self.on_audio_end = None # Called when last audio sample finishes
218
+ self.on_audio_pause = None # Called when audio is paused
219
+ self.on_audio_resume = None # Called when audio is resumed
220
+ self._audio_started = False # Track if we've fired start callback
221
+
215
222
  def _audio_callback(self, outdata, frames, time, status):
216
223
  """Callback function for OutputStream - provides immediate pause/resume."""
217
224
  if status and self.debug_mode:
@@ -237,6 +244,12 @@ class NonBlockingAudioPlayer:
237
244
  outdata.fill(0)
238
245
  if self.is_playing:
239
246
  self.is_playing = False
247
+ self._audio_started = False # Reset for next playback
248
+
249
+ # Fire audio end callback
250
+ if self.on_audio_end:
251
+ threading.Thread(target=self.on_audio_end, daemon=True).start()
252
+
240
253
  if self.playback_complete_callback:
241
254
  # Call completion callback in a separate thread to avoid blocking
242
255
  threading.Thread(target=self.playback_complete_callback, daemon=True).start()
@@ -246,6 +259,12 @@ class NonBlockingAudioPlayer:
246
259
  remaining = len(self.current_audio) - self.current_position
247
260
  frames_to_output = min(frames, remaining)
248
261
 
262
+ # Fire audio start callback on first real audio output
263
+ if frames_to_output > 0 and not self._audio_started:
264
+ self._audio_started = True
265
+ if self.on_audio_start:
266
+ threading.Thread(target=self.on_audio_start, daemon=True).start()
267
+
249
268
  # Output the audio data
250
269
  if frames_to_output > 0:
251
270
  # Handle both mono and stereo output
@@ -344,6 +363,11 @@ class NonBlockingAudioPlayer:
344
363
  self.is_paused = True
345
364
  if self.debug_mode:
346
365
  print(" > Audio paused immediately")
366
+
367
+ # Fire audio pause callback
368
+ if self.on_audio_pause:
369
+ threading.Thread(target=self.on_audio_pause, daemon=True).start()
370
+
347
371
  return True
348
372
  return False
349
373
 
@@ -354,6 +378,11 @@ class NonBlockingAudioPlayer:
354
378
  self.is_paused = False
355
379
  if self.debug_mode:
356
380
  print(" > Audio resumed immediately")
381
+
382
+ # Fire audio resume callback
383
+ if self.on_audio_resume:
384
+ threading.Thread(target=self.on_audio_resume, daemon=True).start()
385
+
357
386
  return True
358
387
  return False
359
388
 
@@ -1264,4 +1293,5 @@ class TTSEngine:
1264
1293
  Returns:
1265
1294
  True if TTS is active, False otherwise
1266
1295
  """
1267
- return self.is_playing
1296
+ return self.is_playing
1297
+
@@ -241,6 +241,18 @@ class VoiceManager:
241
241
  self.tts_engine.on_playback_start = self._on_tts_start
242
242
  self.tts_engine.on_playback_end = self._on_tts_end
243
243
 
244
+ # NEW: Enhanced audio lifecycle callbacks (v0.5.1)
245
+ self.on_audio_start = None # Called when first audio sample plays
246
+ self.on_audio_end = None # Called when last audio sample finishes
247
+ self.on_audio_pause = None # Called when audio is paused
248
+ self.on_audio_resume = None # Called when audio is resumed
249
+
250
+ # Wire callbacks directly to audio player (skip TTSEngine layer)
251
+ self.tts_engine.audio_player.on_audio_start = self._on_audio_start
252
+ self.tts_engine.audio_player.on_audio_end = self._on_audio_end
253
+ self.tts_engine.audio_player.on_audio_pause = self._on_audio_pause
254
+ self.tts_engine.audio_player.on_audio_resume = self._on_audio_resume
255
+
244
256
  # Voice recognizer is initialized on demand
245
257
  self.voice_recognizer = None
246
258
  self.whisper_model = whisper_model
@@ -1030,4 +1042,24 @@ class VoiceManager:
1030
1042
  self.voice_recognizer.stop()
1031
1043
 
1032
1044
  self.stop_speaking()
1033
- return True
1045
+ return True
1046
+
1047
+ def _on_audio_start(self):
1048
+ """Called when audio actually starts playing."""
1049
+ if self.on_audio_start:
1050
+ self.on_audio_start()
1051
+
1052
+ def _on_audio_end(self):
1053
+ """Called when audio actually finishes playing."""
1054
+ if self.on_audio_end:
1055
+ self.on_audio_end()
1056
+
1057
+ def _on_audio_pause(self):
1058
+ """Called when audio is paused."""
1059
+ if self.on_audio_pause:
1060
+ self.on_audio_pause()
1061
+
1062
+ def _on_audio_resume(self):
1063
+ """Called when audio is resumed."""
1064
+ if self.on_audio_resume:
1065
+ self.on_audio_resume()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstractvoice
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: A modular Python library for voice interactions with AI systems
5
5
  Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
6
6
  License-Expression: MIT
@@ -21,9 +21,9 @@ Requires-Dist: numpy>=1.24.0
21
21
  Requires-Dist: requests>=2.31.0
22
22
  Requires-Dist: appdirs>=1.4.0
23
23
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0
24
- Requires-Dist: torch<2.4.0,>=2.0.0
25
- Requires-Dist: torchvision<0.19.0,>=0.15.0
26
- Requires-Dist: torchaudio<2.4.0,>=2.0.0
24
+ Requires-Dist: torch<2.9.0,>=2.1.0
25
+ Requires-Dist: torchvision<1.0.0,>=0.16.0
26
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0
27
27
  Requires-Dist: librosa>=0.10.0
28
28
  Requires-Dist: sounddevice>=0.4.6
29
29
  Requires-Dist: soundfile>=0.12.1
@@ -34,9 +34,9 @@ Requires-Dist: PyAudio>=0.2.13; extra == "voice"
34
34
  Requires-Dist: soundfile>=0.12.1; extra == "voice"
35
35
  Provides-Extra: tts
36
36
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "tts"
37
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "tts"
38
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "tts"
39
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "tts"
37
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "tts"
38
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "tts"
39
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "tts"
40
40
  Requires-Dist: librosa>=0.10.0; extra == "tts"
41
41
  Provides-Extra: stt
42
42
  Requires-Dist: openai-whisper>=20230314; extra == "stt"
@@ -49,9 +49,9 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "all"
49
49
  Requires-Dist: PyAudio>=0.2.13; extra == "all"
50
50
  Requires-Dist: openai-whisper>=20230314; extra == "all"
51
51
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "all"
52
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "all"
53
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "all"
54
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "all"
52
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "all"
53
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "all"
54
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "all"
55
55
  Requires-Dist: librosa>=0.10.0; extra == "all"
56
56
  Requires-Dist: soundfile>=0.12.1; extra == "all"
57
57
  Requires-Dist: flask>=2.0.0; extra == "all"
@@ -66,17 +66,17 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "voice-full"
66
66
  Requires-Dist: PyAudio>=0.2.13; extra == "voice-full"
67
67
  Requires-Dist: openai-whisper>=20230314; extra == "voice-full"
68
68
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "voice-full"
69
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "voice-full"
70
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "voice-full"
71
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "voice-full"
69
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "voice-full"
70
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "voice-full"
71
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "voice-full"
72
72
  Requires-Dist: librosa>=0.10.0; extra == "voice-full"
73
73
  Requires-Dist: soundfile>=0.12.1; extra == "voice-full"
74
74
  Requires-Dist: tiktoken>=0.6.0; extra == "voice-full"
75
75
  Provides-Extra: core-tts
76
76
  Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "core-tts"
77
- Requires-Dist: torch<2.4.0,>=2.0.0; extra == "core-tts"
78
- Requires-Dist: torchvision<0.19.0,>=0.15.0; extra == "core-tts"
79
- Requires-Dist: torchaudio<2.4.0,>=2.0.0; extra == "core-tts"
77
+ Requires-Dist: torch<2.9.0,>=2.1.0; extra == "core-tts"
78
+ Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "core-tts"
79
+ Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "core-tts"
80
80
  Requires-Dist: librosa>=0.10.0; extra == "core-tts"
81
81
  Provides-Extra: core-stt
82
82
  Requires-Dist: openai-whisper>=20230314; extra == "core-stt"
@@ -615,6 +615,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
615
615
  # - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
616
616
  # - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
617
617
 
618
+ # === Audio Lifecycle Callbacks (v0.5.1+) ===
619
+
620
+ # NEW: Precise audio timing callbacks for visual status indicators
621
+ def on_synthesis_start():
622
+ print("🔴 Synthesis started - show thinking animation")
623
+
624
+ def on_audio_start():
625
+ print("🔵 Audio started - show speaking animation")
626
+
627
+ def on_audio_pause():
628
+ print("⏸️ Audio paused - show paused animation")
629
+
630
+ def on_audio_resume():
631
+ print("▶️ Audio resumed - continue speaking animation")
632
+
633
+ def on_audio_end():
634
+ print("🟢 Audio ended - show ready animation")
635
+
636
+ def on_synthesis_end():
637
+ print("✅ Synthesis complete")
638
+
639
+ # Wire up callbacks
640
+ manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
641
+ manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
642
+ manager.on_audio_start = on_audio_start # NEW (actual audio playback)
643
+ manager.on_audio_end = on_audio_end # NEW (actual audio playback)
644
+ manager.on_audio_pause = on_audio_pause # NEW (pause events)
645
+ manager.on_audio_resume = on_audio_resume # NEW (resume events)
646
+
647
+ # Perfect for system tray icons, UI animations, or coordinating multiple audio streams
648
+
618
649
  # === STT (Speech-to-Text) ===
619
650
 
620
651
  def on_transcription(text):
@@ -23,4 +23,7 @@ abstractvoice/stt/transcriber.py
23
23
  abstractvoice/tts/__init__.py
24
24
  abstractvoice/tts/tts_engine.py
25
25
  abstractvoice/vad/__init__.py
26
- abstractvoice/vad/voice_detector.py
26
+ abstractvoice/vad/voice_detector.py
27
+ tests/test_callbacks.py
28
+ tests/test_fresh_install.py
29
+ tests/test_voice_switching.py
@@ -2,9 +2,9 @@ numpy>=1.24.0
2
2
  requests>=2.31.0
3
3
  appdirs>=1.4.0
4
4
  coqui-tts<0.30.0,>=0.27.0
5
- torch<2.4.0,>=2.0.0
6
- torchvision<0.19.0,>=0.15.0
7
- torchaudio<2.4.0,>=2.0.0
5
+ torch<2.9.0,>=2.1.0
6
+ torchvision<1.0.0,>=0.16.0
7
+ torchaudio<2.9.0,>=2.1.0
8
8
  librosa>=0.10.0
9
9
  sounddevice>=0.4.6
10
10
  soundfile>=0.12.1
@@ -15,9 +15,9 @@ webrtcvad>=2.0.10
15
15
  PyAudio>=0.2.13
16
16
  openai-whisper>=20230314
17
17
  coqui-tts<0.30.0,>=0.27.0
18
- torch<2.4.0,>=2.0.0
19
- torchvision<0.19.0,>=0.15.0
20
- torchaudio<2.4.0,>=2.0.0
18
+ torch<2.9.0,>=2.1.0
19
+ torchvision<1.0.0,>=0.16.0
20
+ torchaudio<2.9.0,>=2.1.0
21
21
  librosa>=0.10.0
22
22
  soundfile>=0.12.1
23
23
  flask>=2.0.0
@@ -35,9 +35,9 @@ tiktoken>=0.6.0
35
35
 
36
36
  [core-tts]
37
37
  coqui-tts<0.30.0,>=0.27.0
38
- torch<2.4.0,>=2.0.0
39
- torchvision<0.19.0,>=0.15.0
40
- torchaudio<2.4.0,>=2.0.0
38
+ torch<2.9.0,>=2.1.0
39
+ torchvision<1.0.0,>=0.16.0
40
+ torchaudio<2.9.0,>=2.1.0
41
41
  librosa>=0.10.0
42
42
 
43
43
  [dev]
@@ -51,9 +51,9 @@ tiktoken>=0.6.0
51
51
 
52
52
  [tts]
53
53
  coqui-tts<0.30.0,>=0.27.0
54
- torch<2.4.0,>=2.0.0
55
- torchvision<0.19.0,>=0.15.0
56
- torchaudio<2.4.0,>=2.0.0
54
+ torch<2.9.0,>=2.1.0
55
+ torchvision<1.0.0,>=0.16.0
56
+ torchaudio<2.9.0,>=2.1.0
57
57
  librosa>=0.10.0
58
58
 
59
59
  [voice]
@@ -68,9 +68,9 @@ webrtcvad>=2.0.10
68
68
  PyAudio>=0.2.13
69
69
  openai-whisper>=20230314
70
70
  coqui-tts<0.30.0,>=0.27.0
71
- torch<2.4.0,>=2.0.0
72
- torchvision<0.19.0,>=0.15.0
73
- torchaudio<2.4.0,>=2.0.0
71
+ torch<2.9.0,>=2.1.0
72
+ torchvision<1.0.0,>=0.16.0
73
+ torchaudio<2.9.0,>=2.1.0
74
74
  librosa>=0.10.0
75
75
  soundfile>=0.12.1
76
76
  tiktoken>=0.6.0
@@ -29,9 +29,9 @@ dependencies = [
29
29
  "appdirs>=1.4.0",
30
30
  # Essential TTS dependencies for immediate functionality
31
31
  "coqui-tts>=0.27.0,<0.30.0",
32
- "torch>=2.0.0,<2.4.0",
33
- "torchvision>=0.15.0,<0.19.0",
34
- "torchaudio>=2.0.0,<2.4.0",
32
+ "torch>=2.1.0,<2.9.0",
33
+ "torchvision>=0.16.0,<1.0.0",
34
+ "torchaudio>=2.1.0,<2.9.0",
35
35
  "librosa>=0.10.0",
36
36
  "sounddevice>=0.4.6",
37
37
  "soundfile>=0.12.1",
@@ -49,9 +49,9 @@ voice = [
49
49
  # Text-to-Speech functionality
50
50
  tts = [
51
51
  "coqui-tts>=0.27.0,<0.30.0",
52
- "torch>=2.0.0,<2.4.0",
53
- "torchvision>=0.15.0,<0.19.0",
54
- "torchaudio>=2.0.0,<2.4.0",
52
+ "torch>=2.1.0,<2.9.0",
53
+ "torchvision>=0.16.0,<1.0.0",
54
+ "torchaudio>=2.1.0,<2.9.0",
55
55
  "librosa>=0.10.0",
56
56
  ]
57
57
 
@@ -73,9 +73,9 @@ all = [
73
73
  "PyAudio>=0.2.13",
74
74
  "openai-whisper>=20230314",
75
75
  "coqui-tts>=0.27.0,<0.30.0",
76
- "torch>=2.0.0,<2.4.0",
77
- "torchvision>=0.15.0,<0.19.0",
78
- "torchaudio>=2.0.0,<2.4.0",
76
+ "torch>=2.1.0,<2.9.0",
77
+ "torchvision>=0.16.0,<1.0.0",
78
+ "torchaudio>=2.1.0,<2.9.0",
79
79
  "librosa>=0.10.0",
80
80
  "soundfile>=0.12.1",
81
81
  "flask>=2.0.0",
@@ -96,9 +96,9 @@ voice-full = [
96
96
  "PyAudio>=0.2.13",
97
97
  "openai-whisper>=20230314",
98
98
  "coqui-tts>=0.27.0,<0.30.0",
99
- "torch>=2.0.0,<2.4.0",
100
- "torchvision>=0.15.0,<0.19.0",
101
- "torchaudio>=2.0.0,<2.4.0",
99
+ "torch>=2.1.0,<2.9.0",
100
+ "torchvision>=0.16.0,<1.0.0",
101
+ "torchaudio>=2.1.0,<2.9.0",
102
102
  "librosa>=0.10.0",
103
103
  "soundfile>=0.12.1",
104
104
  "tiktoken>=0.6.0",
@@ -107,9 +107,9 @@ voice-full = [
107
107
  # Core TTS-only (lightweight, no STT)
108
108
  core-tts = [
109
109
  "coqui-tts>=0.27.0,<0.30.0",
110
- "torch>=2.0.0,<2.4.0",
111
- "torchvision>=0.15.0,<0.19.0",
112
- "torchaudio>=2.0.0,<2.4.0",
110
+ "torch>=2.1.0,<2.9.0",
111
+ "torchvision>=0.16.0,<1.0.0",
112
+ "torchaudio>=2.1.0,<2.9.0",
113
113
  "librosa>=0.10.0",
114
114
  ]
115
115
 
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for the new audio lifecycle callbacks in AbstractVoice v0.5.1
4
+
5
+ This script demonstrates the precise timing of the new callback system.
6
+ """
7
+
8
+ import time
9
+ from abstractvoice import VoiceManager
10
+
11
+ def test_audio_callbacks():
12
+ """Test the new audio lifecycle callbacks."""
13
+
14
+ print("🧪 Testing AbstractVoice v0.5.1 Audio Lifecycle Callbacks")
15
+ print("=" * 60)
16
+
17
+ # Callback tracking
18
+ events = []
19
+
20
+ def on_synthesis_start():
21
+ events.append(("synthesis_start", time.time()))
22
+ print("🔴 Synthesis started - thinking phase")
23
+
24
+ def on_audio_start():
25
+ events.append(("audio_start", time.time()))
26
+ print("🔵 Audio started - speaking phase")
27
+
28
+ def on_audio_pause():
29
+ events.append(("audio_pause", time.time()))
30
+ print("⏸️ Audio paused")
31
+
32
+ def on_audio_resume():
33
+ events.append(("audio_resume", time.time()))
34
+ print("▶️ Audio resumed")
35
+
36
+ def on_audio_end():
37
+ events.append(("audio_end", time.time()))
38
+ print("🟢 Audio ended - ready phase")
39
+
40
+ def on_synthesis_end():
41
+ events.append(("synthesis_end", time.time()))
42
+ print("✅ Synthesis complete")
43
+
44
+ # Initialize VoiceManager
45
+ print("Initializing VoiceManager...")
46
+ vm = VoiceManager(debug_mode=True)
47
+
48
+ # Wire up callbacks
49
+ vm.tts_engine.on_playback_start = on_synthesis_start
50
+ vm.tts_engine.on_playback_end = on_synthesis_end
51
+ vm.on_audio_start = on_audio_start
52
+ vm.on_audio_end = on_audio_end
53
+ vm.on_audio_pause = on_audio_pause
54
+ vm.on_audio_resume = on_audio_resume
55
+
56
+ print("\n📢 Starting TTS with callback monitoring...")
57
+
58
+ # Test basic playback
59
+ vm.speak("This is a test of the new audio lifecycle callbacks in AbstractVoice version zero point five point one.")
60
+
61
+ # Wait a bit, then pause
62
+ time.sleep(2)
63
+ print("\n⏸️ Pausing audio...")
64
+ success = vm.pause_speaking()
65
+ if success:
66
+ print("✓ Pause successful")
67
+
68
+ # Wait, then resume
69
+ time.sleep(2)
70
+ print("\n▶️ Resuming audio...")
71
+ success = vm.resume_speaking()
72
+ if success:
73
+ print("✓ Resume successful")
74
+
75
+ # Wait for completion
76
+ while vm.is_speaking() or vm.is_paused():
77
+ time.sleep(0.1)
78
+
79
+ # Analyze timing
80
+ print("\n📊 Callback Timing Analysis:")
81
+ print("-" * 40)
82
+
83
+ if len(events) >= 2:
84
+ start_time = events[0][1]
85
+ for event_name, event_time in events:
86
+ offset = (event_time - start_time) * 1000 # Convert to milliseconds
87
+ print(f"{event_name:15} | +{offset:6.1f}ms")
88
+
89
+ # Calculate key intervals
90
+ synthesis_to_audio = None
91
+ audio_duration = None
92
+
93
+ for i, (event_name, event_time) in enumerate(events):
94
+ if event_name == "synthesis_start":
95
+ synthesis_start = event_time
96
+ elif event_name == "audio_start" and 'synthesis_start' in locals():
97
+ synthesis_to_audio = (event_time - synthesis_start) * 1000
98
+ elif event_name == "audio_end":
99
+ audio_end = event_time
100
+ # Find corresponding audio_start
101
+ for j in range(i-1, -1, -1):
102
+ if events[j][0] == "audio_start":
103
+ audio_duration = (audio_end - events[j][1]) * 1000
104
+ break
105
+
106
+ print("-" * 40)
107
+ if synthesis_to_audio:
108
+ print(f"Synthesis → Audio: {synthesis_to_audio:.1f}ms")
109
+ if audio_duration:
110
+ print(f"Audio Duration: {audio_duration:.1f}ms")
111
+
112
+ # Cleanup
113
+ vm.cleanup()
114
+ print("\n✅ Test completed successfully!")
115
+ print("🎯 The new callbacks provide precise timing for visual status indicators.")
116
+
117
+ if __name__ == "__main__":
118
+ test_callbacks()
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script that simulates fresh install behavior
4
+ Tests language switching and voice selection with download requirements
5
+ """
6
+
7
+ def test_language_switching():
8
+ """Test language switching with download behavior."""
9
+ from abstractvoice import VoiceManager
10
+
11
+ print("🧪 Testing Language Switching (Fresh Install Simulation)")
12
+ print("=" * 60)
13
+
14
+ vm = VoiceManager(debug_mode=True)
15
+
16
+ # Test languages
17
+ test_languages = [
18
+ ('fr', 'Bonjour, ceci est un test.', 'French'),
19
+ ('es', 'Hola, esta es una prueba.', 'Spanish'),
20
+ ('de', 'Hallo, das ist ein Test.', 'German'),
21
+ ('it', 'Ciao, questo è un test.', 'Italian'),
22
+ ('en', 'Back to English.', 'English'),
23
+ ]
24
+
25
+ for lang, text, name in test_languages:
26
+ print(f"\n🌍 Testing {name} ({lang})...")
27
+ success = vm.set_language(lang)
28
+
29
+ if success:
30
+ print(f"✅ {name}: Successfully loaded")
31
+ vm.speak(text, speed=1.0)
32
+ else:
33
+ print(f"❌ {name}: Failed to load")
34
+ print(f" Run: abstractvoice download-models --language {lang}")
35
+
36
+ vm.cleanup()
37
+ print("\n✅ Language switching test complete!")
38
+
39
+
40
+ def test_voice_switching():
41
+ """Test voice switching with download behavior."""
42
+ from abstractvoice import VoiceManager
43
+
44
+ print("\n🎭 Testing Voice Switching (Fresh Install Simulation)")
45
+ print("=" * 60)
46
+
47
+ vm = VoiceManager(debug_mode=True)
48
+
49
+ # Test different voices
50
+ test_voices = [
51
+ ('en', 'tacotron2', 'This is Linda voice.'),
52
+ ('en', 'jenny', 'This is Jenny voice.'),
53
+ ('en', 'ek1', 'This is Edward voice.'),
54
+ ('fr', 'css10_vits', 'Voix française.'),
55
+ ]
56
+
57
+ for lang, voice_id, text in test_voices:
58
+ print(f"\n🎤 Testing {lang}.{voice_id}...")
59
+ success = vm.set_voice(lang, voice_id)
60
+
61
+ if success:
62
+ print(f"✅ {voice_id}: Successfully loaded")
63
+ vm.speak(text)
64
+ else:
65
+ print(f"❌ {voice_id}: Failed to load")
66
+
67
+ vm.cleanup()
68
+ print("\n✅ Voice switching test complete!")
69
+
70
+
71
+ def test_cli_commands():
72
+ """Test CLI commands for model management."""
73
+ from abstractvoice.examples.cli_repl import VoiceREPL
74
+
75
+ print("\n💻 Testing CLI Commands")
76
+ print("=" * 60)
77
+
78
+ cli = VoiceREPL()
79
+
80
+ # Test /language command
81
+ print("\n📝 Testing /language fr")
82
+ cli.onecmd('/language fr')
83
+
84
+ # Test /setvoice command
85
+ print("\n📝 Testing /setvoice en.jenny")
86
+ cli.onecmd('/setvoice en.jenny')
87
+
88
+ print("\n✅ CLI commands test complete!")
89
+
90
+
91
+ def test_download_status():
92
+ """Test model download and status checking."""
93
+ from abstractvoice import is_ready, get_status
94
+ import json
95
+
96
+ print("\n📦 Testing Model Status")
97
+ print("=" * 60)
98
+
99
+ # Check if ready
100
+ ready = is_ready()
101
+ print(f"System ready: {ready}")
102
+
103
+ # Get detailed status
104
+ status = json.loads(get_status())
105
+ print(f"Total cached models: {status.get('total_cached', 0)}")
106
+ print(f"Cache size: {status.get('total_size_mb', 0):.1f} MB")
107
+ print(f"Ready for offline: {status.get('ready_for_offline', False)}")
108
+
109
+ # List cached models
110
+ if 'cached_models' in status:
111
+ print("\nCached models:")
112
+ for model in status['cached_models'][:5]: # Show first 5
113
+ print(f" • {model}")
114
+ if len(status['cached_models']) > 5:
115
+ print(f" ... and {len(status['cached_models']) - 5} more")
116
+
117
+ print("\n✅ Model status test complete!")
118
+
119
+
120
+ def main():
121
+ """Run all tests."""
122
+ import sys
123
+
124
+ print("🚀 AbstractVoice Fresh Install Simulation")
125
+ print("=" * 60)
126
+ print("This tests how the system behaves on a fresh install")
127
+ print("when models need to be downloaded.\n")
128
+
129
+ # Check command line arguments
130
+ if len(sys.argv) > 1:
131
+ if sys.argv[1] == '--language':
132
+ test_language_switching()
133
+ elif sys.argv[1] == '--voice':
134
+ test_voice_switching()
135
+ elif sys.argv[1] == '--cli':
136
+ test_cli_commands()
137
+ elif sys.argv[1] == '--status':
138
+ test_download_status()
139
+ else:
140
+ print("Usage: python test_fresh_install.py [--language|--voice|--cli|--status]")
141
+ else:
142
+ # Run all tests
143
+ test_download_status()
144
+ test_language_switching()
145
+ test_voice_switching()
146
+ test_cli_commands()
147
+
148
+ print("\n" + "=" * 60)
149
+ print("🎉 All fresh install tests complete!")
150
+ print("\nKey findings:")
151
+ print(" • Language switching downloads models if needed")
152
+ print(" • Voice switching downloads models if needed")
153
+ print(" • Clear error messages when downloads fail")
154
+ print(" • CLI commands properly handle missing models")
155
+
156
+
157
+ if __name__ == "__main__":
158
+ main()
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify voice switching works correctly.
4
+ This will help us validate the fixes to the voice switching system.
5
+ """
6
+
7
+ from abstractvoice import VoiceManager
8
+ import time
9
+
10
+ def test_voice_switching():
11
+ """Test voice switching to ensure different models actually load."""
12
+ print("🧪 Testing Voice Switching Fixes...")
13
+ print("=" * 50)
14
+
15
+ vm = VoiceManager(debug_mode=True)
16
+ print("✅ VoiceManager initialized")
17
+
18
+ # Test voices that should sound different
19
+ test_voices = [
20
+ ("en", "tacotron2"), # Female LJSpeech
21
+ ("en", "jenny"), # Different female
22
+ ("en", "ek1"), # Male British
23
+ ]
24
+
25
+ print(f"\n🎭 Testing {len(test_voices)} different voices...")
26
+
27
+ for i, (lang, voice) in enumerate(test_voices):
28
+ print(f"\n--- Test {i+1}: {lang}.{voice} ---")
29
+
30
+ try:
31
+ # Set the voice and ensure it actually loads the requested model
32
+ success = vm.set_voice(lang, voice)
33
+
34
+ if success:
35
+ print(f"✅ Voice {voice} set successfully")
36
+
37
+ # Test speech with this voice
38
+ test_text = f"Hello, this is voice {voice}."
39
+ vm.speak(test_text, speed=1.0)
40
+
41
+ # Small delay between voice tests
42
+ time.sleep(1.0)
43
+
44
+ else:
45
+ print(f"❌ Failed to set voice {voice}")
46
+
47
+ except Exception as e:
48
+ print(f"💥 Exception with voice {voice}: {e}")
49
+
50
+ # Test language switching (should work without crashes)
51
+ print(f"\n🌍 Testing language switching...")
52
+
53
+ languages = ["en", "fr", "de"]
54
+ for lang in languages:
55
+ try:
56
+ success = vm.set_language(lang)
57
+ if success:
58
+ print(f"✅ Language {lang}: OK")
59
+ vm.speak(f"Testing {lang}", speed=1.0)
60
+ time.sleep(0.5)
61
+ else:
62
+ print(f"❌ Language {lang}: Failed")
63
+ except Exception as e:
64
+ print(f"💥 Language {lang}: Exception - {e}")
65
+
66
+ # Test Italian (the crash-prone one)
67
+ print(f"\n🇮🇹 Testing Italian models (crash safety)...")
68
+
69
+ italian_voices = ["mai_male_vits", "mai_female_vits"]
70
+ for voice in italian_voices:
71
+ try:
72
+ print(f"Testing it.{voice}...")
73
+ success = vm.set_voice("it", voice)
74
+ if success:
75
+ print(f"✅ Italian {voice}: Safe!")
76
+ vm.speak("Ciao, test italiano.", speed=0.8) # Slower for Italian
77
+ time.sleep(0.5)
78
+ else:
79
+ print(f"⚠️ Italian {voice}: Skipped (safety)")
80
+ except Exception as e:
81
+ print(f"💥 Italian {voice}: Exception handled - {e}")
82
+
83
+ vm.cleanup()
84
+ print(f"\n🎉 Voice switching test complete!")
85
+
86
+ if __name__ == "__main__":
87
+ test_voice_switching()
File without changes
File without changes