abstractvoice 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/PKG-INFO +47 -16
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/README.md +31 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/__init__.py +1 -1
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/tts/tts_engine.py +31 -1
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/voice_manager.py +33 -1
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/PKG-INFO +47 -16
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/SOURCES.txt +4 -1
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/requires.txt +15 -15
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/pyproject.toml +15 -15
- abstractvoice-0.5.2/tests/test_callbacks.py +118 -0
- abstractvoice-0.5.2/tests/test_fresh_install.py +158 -0
- abstractvoice-0.5.2/tests/test_voice_switching.py +87 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/LICENSE +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/__main__.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/dependency_check.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/__init__.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/cli_repl.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/voice_cli.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/examples/web_api.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/instant_setup.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/recognition.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/simple_model_manager.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/stt/__init__.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/stt/transcriber.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/tts/__init__.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/vad/__init__.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice/vad/voice_detector.py +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/dependency_links.txt +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/entry_points.txt +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/abstractvoice.egg-info/top_level.txt +0 -0
- {abstractvoice-0.5.0 → abstractvoice-0.5.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -21,9 +21,9 @@ Requires-Dist: numpy>=1.24.0
|
|
|
21
21
|
Requires-Dist: requests>=2.31.0
|
|
22
22
|
Requires-Dist: appdirs>=1.4.0
|
|
23
23
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0
|
|
24
|
-
Requires-Dist: torch<2.
|
|
25
|
-
Requires-Dist: torchvision<0.
|
|
26
|
-
Requires-Dist: torchaudio<2.
|
|
24
|
+
Requires-Dist: torch<2.9.0,>=2.1.0
|
|
25
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0
|
|
26
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0
|
|
27
27
|
Requires-Dist: librosa>=0.10.0
|
|
28
28
|
Requires-Dist: sounddevice>=0.4.6
|
|
29
29
|
Requires-Dist: soundfile>=0.12.1
|
|
@@ -34,9 +34,9 @@ Requires-Dist: PyAudio>=0.2.13; extra == "voice"
|
|
|
34
34
|
Requires-Dist: soundfile>=0.12.1; extra == "voice"
|
|
35
35
|
Provides-Extra: tts
|
|
36
36
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "tts"
|
|
37
|
-
Requires-Dist: torch<2.
|
|
38
|
-
Requires-Dist: torchvision<0.
|
|
39
|
-
Requires-Dist: torchaudio<2.
|
|
37
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "tts"
|
|
38
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "tts"
|
|
39
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "tts"
|
|
40
40
|
Requires-Dist: librosa>=0.10.0; extra == "tts"
|
|
41
41
|
Provides-Extra: stt
|
|
42
42
|
Requires-Dist: openai-whisper>=20230314; extra == "stt"
|
|
@@ -49,9 +49,9 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "all"
|
|
|
49
49
|
Requires-Dist: PyAudio>=0.2.13; extra == "all"
|
|
50
50
|
Requires-Dist: openai-whisper>=20230314; extra == "all"
|
|
51
51
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "all"
|
|
52
|
-
Requires-Dist: torch<2.
|
|
53
|
-
Requires-Dist: torchvision<0.
|
|
54
|
-
Requires-Dist: torchaudio<2.
|
|
52
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "all"
|
|
53
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "all"
|
|
54
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "all"
|
|
55
55
|
Requires-Dist: librosa>=0.10.0; extra == "all"
|
|
56
56
|
Requires-Dist: soundfile>=0.12.1; extra == "all"
|
|
57
57
|
Requires-Dist: flask>=2.0.0; extra == "all"
|
|
@@ -66,17 +66,17 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "voice-full"
|
|
|
66
66
|
Requires-Dist: PyAudio>=0.2.13; extra == "voice-full"
|
|
67
67
|
Requires-Dist: openai-whisper>=20230314; extra == "voice-full"
|
|
68
68
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "voice-full"
|
|
69
|
-
Requires-Dist: torch<2.
|
|
70
|
-
Requires-Dist: torchvision<0.
|
|
71
|
-
Requires-Dist: torchaudio<2.
|
|
69
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "voice-full"
|
|
70
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "voice-full"
|
|
71
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "voice-full"
|
|
72
72
|
Requires-Dist: librosa>=0.10.0; extra == "voice-full"
|
|
73
73
|
Requires-Dist: soundfile>=0.12.1; extra == "voice-full"
|
|
74
74
|
Requires-Dist: tiktoken>=0.6.0; extra == "voice-full"
|
|
75
75
|
Provides-Extra: core-tts
|
|
76
76
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "core-tts"
|
|
77
|
-
Requires-Dist: torch<2.
|
|
78
|
-
Requires-Dist: torchvision<0.
|
|
79
|
-
Requires-Dist: torchaudio<2.
|
|
77
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "core-tts"
|
|
78
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "core-tts"
|
|
79
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "core-tts"
|
|
80
80
|
Requires-Dist: librosa>=0.10.0; extra == "core-tts"
|
|
81
81
|
Provides-Extra: core-stt
|
|
82
82
|
Requires-Dist: openai-whisper>=20230314; extra == "core-stt"
|
|
@@ -615,6 +615,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
|
615
615
|
# - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
|
|
616
616
|
# - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
617
617
|
|
|
618
|
+
# === Audio Lifecycle Callbacks (v0.5.1+) ===
|
|
619
|
+
|
|
620
|
+
# NEW: Precise audio timing callbacks for visual status indicators
|
|
621
|
+
def on_synthesis_start():
|
|
622
|
+
print("🔴 Synthesis started - show thinking animation")
|
|
623
|
+
|
|
624
|
+
def on_audio_start():
|
|
625
|
+
print("🔵 Audio started - show speaking animation")
|
|
626
|
+
|
|
627
|
+
def on_audio_pause():
|
|
628
|
+
print("⏸️ Audio paused - show paused animation")
|
|
629
|
+
|
|
630
|
+
def on_audio_resume():
|
|
631
|
+
print("▶️ Audio resumed - continue speaking animation")
|
|
632
|
+
|
|
633
|
+
def on_audio_end():
|
|
634
|
+
print("🟢 Audio ended - show ready animation")
|
|
635
|
+
|
|
636
|
+
def on_synthesis_end():
|
|
637
|
+
print("✅ Synthesis complete")
|
|
638
|
+
|
|
639
|
+
# Wire up callbacks
|
|
640
|
+
manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
|
|
641
|
+
manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
|
|
642
|
+
manager.on_audio_start = on_audio_start # NEW (actual audio playback)
|
|
643
|
+
manager.on_audio_end = on_audio_end # NEW (actual audio playback)
|
|
644
|
+
manager.on_audio_pause = on_audio_pause # NEW (pause events)
|
|
645
|
+
manager.on_audio_resume = on_audio_resume # NEW (resume events)
|
|
646
|
+
|
|
647
|
+
# Perfect for system tray icons, UI animations, or coordinating multiple audio streams
|
|
648
|
+
|
|
618
649
|
# === STT (Speech-to-Text) ===
|
|
619
650
|
|
|
620
651
|
def on_transcription(text):
|
|
@@ -525,6 +525,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
|
525
525
|
# - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
|
|
526
526
|
# - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
527
527
|
|
|
528
|
+
# === Audio Lifecycle Callbacks (v0.5.1+) ===
|
|
529
|
+
|
|
530
|
+
# NEW: Precise audio timing callbacks for visual status indicators
|
|
531
|
+
def on_synthesis_start():
|
|
532
|
+
print("🔴 Synthesis started - show thinking animation")
|
|
533
|
+
|
|
534
|
+
def on_audio_start():
|
|
535
|
+
print("🔵 Audio started - show speaking animation")
|
|
536
|
+
|
|
537
|
+
def on_audio_pause():
|
|
538
|
+
print("⏸️ Audio paused - show paused animation")
|
|
539
|
+
|
|
540
|
+
def on_audio_resume():
|
|
541
|
+
print("▶️ Audio resumed - continue speaking animation")
|
|
542
|
+
|
|
543
|
+
def on_audio_end():
|
|
544
|
+
print("🟢 Audio ended - show ready animation")
|
|
545
|
+
|
|
546
|
+
def on_synthesis_end():
|
|
547
|
+
print("✅ Synthesis complete")
|
|
548
|
+
|
|
549
|
+
# Wire up callbacks
|
|
550
|
+
manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
|
|
551
|
+
manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
|
|
552
|
+
manager.on_audio_start = on_audio_start # NEW (actual audio playback)
|
|
553
|
+
manager.on_audio_end = on_audio_end # NEW (actual audio playback)
|
|
554
|
+
manager.on_audio_pause = on_audio_pause # NEW (pause events)
|
|
555
|
+
manager.on_audio_resume = on_audio_resume # NEW (resume events)
|
|
556
|
+
|
|
557
|
+
# Perfect for system tray icons, UI animations, or coordinating multiple audio streams
|
|
558
|
+
|
|
528
559
|
# === STT (Speech-to-Text) ===
|
|
529
560
|
|
|
530
561
|
def on_transcription(text):
|
|
@@ -32,5 +32,5 @@ from .voice_manager import VoiceManager
|
|
|
32
32
|
# Import simple APIs for third-party applications
|
|
33
33
|
from .simple_model_manager import list_models, download_model, get_status, is_ready
|
|
34
34
|
|
|
35
|
-
__version__ = "0.5.
|
|
35
|
+
__version__ = "0.5.2"
|
|
36
36
|
__all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
|
|
@@ -212,6 +212,13 @@ class NonBlockingAudioPlayer:
|
|
|
212
212
|
self.current_position = 0
|
|
213
213
|
self.playback_complete_callback = None
|
|
214
214
|
|
|
215
|
+
# NEW: Enhanced audio lifecycle callbacks
|
|
216
|
+
self.on_audio_start = None # Called when first audio sample plays
|
|
217
|
+
self.on_audio_end = None # Called when last audio sample finishes
|
|
218
|
+
self.on_audio_pause = None # Called when audio is paused
|
|
219
|
+
self.on_audio_resume = None # Called when audio is resumed
|
|
220
|
+
self._audio_started = False # Track if we've fired start callback
|
|
221
|
+
|
|
215
222
|
def _audio_callback(self, outdata, frames, time, status):
|
|
216
223
|
"""Callback function for OutputStream - provides immediate pause/resume."""
|
|
217
224
|
if status and self.debug_mode:
|
|
@@ -237,6 +244,12 @@ class NonBlockingAudioPlayer:
|
|
|
237
244
|
outdata.fill(0)
|
|
238
245
|
if self.is_playing:
|
|
239
246
|
self.is_playing = False
|
|
247
|
+
self._audio_started = False # Reset for next playback
|
|
248
|
+
|
|
249
|
+
# Fire audio end callback
|
|
250
|
+
if self.on_audio_end:
|
|
251
|
+
threading.Thread(target=self.on_audio_end, daemon=True).start()
|
|
252
|
+
|
|
240
253
|
if self.playback_complete_callback:
|
|
241
254
|
# Call completion callback in a separate thread to avoid blocking
|
|
242
255
|
threading.Thread(target=self.playback_complete_callback, daemon=True).start()
|
|
@@ -246,6 +259,12 @@ class NonBlockingAudioPlayer:
|
|
|
246
259
|
remaining = len(self.current_audio) - self.current_position
|
|
247
260
|
frames_to_output = min(frames, remaining)
|
|
248
261
|
|
|
262
|
+
# Fire audio start callback on first real audio output
|
|
263
|
+
if frames_to_output > 0 and not self._audio_started:
|
|
264
|
+
self._audio_started = True
|
|
265
|
+
if self.on_audio_start:
|
|
266
|
+
threading.Thread(target=self.on_audio_start, daemon=True).start()
|
|
267
|
+
|
|
249
268
|
# Output the audio data
|
|
250
269
|
if frames_to_output > 0:
|
|
251
270
|
# Handle both mono and stereo output
|
|
@@ -344,6 +363,11 @@ class NonBlockingAudioPlayer:
|
|
|
344
363
|
self.is_paused = True
|
|
345
364
|
if self.debug_mode:
|
|
346
365
|
print(" > Audio paused immediately")
|
|
366
|
+
|
|
367
|
+
# Fire audio pause callback
|
|
368
|
+
if self.on_audio_pause:
|
|
369
|
+
threading.Thread(target=self.on_audio_pause, daemon=True).start()
|
|
370
|
+
|
|
347
371
|
return True
|
|
348
372
|
return False
|
|
349
373
|
|
|
@@ -354,6 +378,11 @@ class NonBlockingAudioPlayer:
|
|
|
354
378
|
self.is_paused = False
|
|
355
379
|
if self.debug_mode:
|
|
356
380
|
print(" > Audio resumed immediately")
|
|
381
|
+
|
|
382
|
+
# Fire audio resume callback
|
|
383
|
+
if self.on_audio_resume:
|
|
384
|
+
threading.Thread(target=self.on_audio_resume, daemon=True).start()
|
|
385
|
+
|
|
357
386
|
return True
|
|
358
387
|
return False
|
|
359
388
|
|
|
@@ -1264,4 +1293,5 @@ class TTSEngine:
|
|
|
1264
1293
|
Returns:
|
|
1265
1294
|
True if TTS is active, False otherwise
|
|
1266
1295
|
"""
|
|
1267
|
-
return self.is_playing
|
|
1296
|
+
return self.is_playing
|
|
1297
|
+
|
|
@@ -241,6 +241,18 @@ class VoiceManager:
|
|
|
241
241
|
self.tts_engine.on_playback_start = self._on_tts_start
|
|
242
242
|
self.tts_engine.on_playback_end = self._on_tts_end
|
|
243
243
|
|
|
244
|
+
# NEW: Enhanced audio lifecycle callbacks (v0.5.1)
|
|
245
|
+
self.on_audio_start = None # Called when first audio sample plays
|
|
246
|
+
self.on_audio_end = None # Called when last audio sample finishes
|
|
247
|
+
self.on_audio_pause = None # Called when audio is paused
|
|
248
|
+
self.on_audio_resume = None # Called when audio is resumed
|
|
249
|
+
|
|
250
|
+
# Wire callbacks directly to audio player (skip TTSEngine layer)
|
|
251
|
+
self.tts_engine.audio_player.on_audio_start = self._on_audio_start
|
|
252
|
+
self.tts_engine.audio_player.on_audio_end = self._on_audio_end
|
|
253
|
+
self.tts_engine.audio_player.on_audio_pause = self._on_audio_pause
|
|
254
|
+
self.tts_engine.audio_player.on_audio_resume = self._on_audio_resume
|
|
255
|
+
|
|
244
256
|
# Voice recognizer is initialized on demand
|
|
245
257
|
self.voice_recognizer = None
|
|
246
258
|
self.whisper_model = whisper_model
|
|
@@ -1030,4 +1042,24 @@ class VoiceManager:
|
|
|
1030
1042
|
self.voice_recognizer.stop()
|
|
1031
1043
|
|
|
1032
1044
|
self.stop_speaking()
|
|
1033
|
-
return True
|
|
1045
|
+
return True
|
|
1046
|
+
|
|
1047
|
+
def _on_audio_start(self):
|
|
1048
|
+
"""Called when audio actually starts playing."""
|
|
1049
|
+
if self.on_audio_start:
|
|
1050
|
+
self.on_audio_start()
|
|
1051
|
+
|
|
1052
|
+
def _on_audio_end(self):
|
|
1053
|
+
"""Called when audio actually finishes playing."""
|
|
1054
|
+
if self.on_audio_end:
|
|
1055
|
+
self.on_audio_end()
|
|
1056
|
+
|
|
1057
|
+
def _on_audio_pause(self):
|
|
1058
|
+
"""Called when audio is paused."""
|
|
1059
|
+
if self.on_audio_pause:
|
|
1060
|
+
self.on_audio_pause()
|
|
1061
|
+
|
|
1062
|
+
def _on_audio_resume(self):
|
|
1063
|
+
"""Called when audio is resumed."""
|
|
1064
|
+
if self.on_audio_resume:
|
|
1065
|
+
self.on_audio_resume()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -21,9 +21,9 @@ Requires-Dist: numpy>=1.24.0
|
|
|
21
21
|
Requires-Dist: requests>=2.31.0
|
|
22
22
|
Requires-Dist: appdirs>=1.4.0
|
|
23
23
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0
|
|
24
|
-
Requires-Dist: torch<2.
|
|
25
|
-
Requires-Dist: torchvision<0.
|
|
26
|
-
Requires-Dist: torchaudio<2.
|
|
24
|
+
Requires-Dist: torch<2.9.0,>=2.1.0
|
|
25
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0
|
|
26
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0
|
|
27
27
|
Requires-Dist: librosa>=0.10.0
|
|
28
28
|
Requires-Dist: sounddevice>=0.4.6
|
|
29
29
|
Requires-Dist: soundfile>=0.12.1
|
|
@@ -34,9 +34,9 @@ Requires-Dist: PyAudio>=0.2.13; extra == "voice"
|
|
|
34
34
|
Requires-Dist: soundfile>=0.12.1; extra == "voice"
|
|
35
35
|
Provides-Extra: tts
|
|
36
36
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "tts"
|
|
37
|
-
Requires-Dist: torch<2.
|
|
38
|
-
Requires-Dist: torchvision<0.
|
|
39
|
-
Requires-Dist: torchaudio<2.
|
|
37
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "tts"
|
|
38
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "tts"
|
|
39
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "tts"
|
|
40
40
|
Requires-Dist: librosa>=0.10.0; extra == "tts"
|
|
41
41
|
Provides-Extra: stt
|
|
42
42
|
Requires-Dist: openai-whisper>=20230314; extra == "stt"
|
|
@@ -49,9 +49,9 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "all"
|
|
|
49
49
|
Requires-Dist: PyAudio>=0.2.13; extra == "all"
|
|
50
50
|
Requires-Dist: openai-whisper>=20230314; extra == "all"
|
|
51
51
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "all"
|
|
52
|
-
Requires-Dist: torch<2.
|
|
53
|
-
Requires-Dist: torchvision<0.
|
|
54
|
-
Requires-Dist: torchaudio<2.
|
|
52
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "all"
|
|
53
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "all"
|
|
54
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "all"
|
|
55
55
|
Requires-Dist: librosa>=0.10.0; extra == "all"
|
|
56
56
|
Requires-Dist: soundfile>=0.12.1; extra == "all"
|
|
57
57
|
Requires-Dist: flask>=2.0.0; extra == "all"
|
|
@@ -66,17 +66,17 @@ Requires-Dist: webrtcvad>=2.0.10; extra == "voice-full"
|
|
|
66
66
|
Requires-Dist: PyAudio>=0.2.13; extra == "voice-full"
|
|
67
67
|
Requires-Dist: openai-whisper>=20230314; extra == "voice-full"
|
|
68
68
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "voice-full"
|
|
69
|
-
Requires-Dist: torch<2.
|
|
70
|
-
Requires-Dist: torchvision<0.
|
|
71
|
-
Requires-Dist: torchaudio<2.
|
|
69
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "voice-full"
|
|
70
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "voice-full"
|
|
71
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "voice-full"
|
|
72
72
|
Requires-Dist: librosa>=0.10.0; extra == "voice-full"
|
|
73
73
|
Requires-Dist: soundfile>=0.12.1; extra == "voice-full"
|
|
74
74
|
Requires-Dist: tiktoken>=0.6.0; extra == "voice-full"
|
|
75
75
|
Provides-Extra: core-tts
|
|
76
76
|
Requires-Dist: coqui-tts<0.30.0,>=0.27.0; extra == "core-tts"
|
|
77
|
-
Requires-Dist: torch<2.
|
|
78
|
-
Requires-Dist: torchvision<0.
|
|
79
|
-
Requires-Dist: torchaudio<2.
|
|
77
|
+
Requires-Dist: torch<2.9.0,>=2.1.0; extra == "core-tts"
|
|
78
|
+
Requires-Dist: torchvision<1.0.0,>=0.16.0; extra == "core-tts"
|
|
79
|
+
Requires-Dist: torchaudio<2.9.0,>=2.1.0; extra == "core-tts"
|
|
80
80
|
Requires-Dist: librosa>=0.10.0; extra == "core-tts"
|
|
81
81
|
Provides-Extra: core-stt
|
|
82
82
|
Requires-Dist: openai-whisper>=20230314; extra == "core-stt"
|
|
@@ -615,6 +615,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
|
615
615
|
# - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
|
|
616
616
|
# - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
617
617
|
|
|
618
|
+
# === Audio Lifecycle Callbacks (v0.5.1+) ===
|
|
619
|
+
|
|
620
|
+
# NEW: Precise audio timing callbacks for visual status indicators
|
|
621
|
+
def on_synthesis_start():
|
|
622
|
+
print("🔴 Synthesis started - show thinking animation")
|
|
623
|
+
|
|
624
|
+
def on_audio_start():
|
|
625
|
+
print("🔵 Audio started - show speaking animation")
|
|
626
|
+
|
|
627
|
+
def on_audio_pause():
|
|
628
|
+
print("⏸️ Audio paused - show paused animation")
|
|
629
|
+
|
|
630
|
+
def on_audio_resume():
|
|
631
|
+
print("▶️ Audio resumed - continue speaking animation")
|
|
632
|
+
|
|
633
|
+
def on_audio_end():
|
|
634
|
+
print("🟢 Audio ended - show ready animation")
|
|
635
|
+
|
|
636
|
+
def on_synthesis_end():
|
|
637
|
+
print("✅ Synthesis complete")
|
|
638
|
+
|
|
639
|
+
# Wire up callbacks
|
|
640
|
+
manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
|
|
641
|
+
manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
|
|
642
|
+
manager.on_audio_start = on_audio_start # NEW (actual audio playback)
|
|
643
|
+
manager.on_audio_end = on_audio_end # NEW (actual audio playback)
|
|
644
|
+
manager.on_audio_pause = on_audio_pause # NEW (pause events)
|
|
645
|
+
manager.on_audio_resume = on_audio_resume # NEW (resume events)
|
|
646
|
+
|
|
647
|
+
# Perfect for system tray icons, UI animations, or coordinating multiple audio streams
|
|
648
|
+
|
|
618
649
|
# === STT (Speech-to-Text) ===
|
|
619
650
|
|
|
620
651
|
def on_transcription(text):
|
|
@@ -23,4 +23,7 @@ abstractvoice/stt/transcriber.py
|
|
|
23
23
|
abstractvoice/tts/__init__.py
|
|
24
24
|
abstractvoice/tts/tts_engine.py
|
|
25
25
|
abstractvoice/vad/__init__.py
|
|
26
|
-
abstractvoice/vad/voice_detector.py
|
|
26
|
+
abstractvoice/vad/voice_detector.py
|
|
27
|
+
tests/test_callbacks.py
|
|
28
|
+
tests/test_fresh_install.py
|
|
29
|
+
tests/test_voice_switching.py
|
|
@@ -2,9 +2,9 @@ numpy>=1.24.0
|
|
|
2
2
|
requests>=2.31.0
|
|
3
3
|
appdirs>=1.4.0
|
|
4
4
|
coqui-tts<0.30.0,>=0.27.0
|
|
5
|
-
torch<2.
|
|
6
|
-
torchvision<0.
|
|
7
|
-
torchaudio<2.
|
|
5
|
+
torch<2.9.0,>=2.1.0
|
|
6
|
+
torchvision<1.0.0,>=0.16.0
|
|
7
|
+
torchaudio<2.9.0,>=2.1.0
|
|
8
8
|
librosa>=0.10.0
|
|
9
9
|
sounddevice>=0.4.6
|
|
10
10
|
soundfile>=0.12.1
|
|
@@ -15,9 +15,9 @@ webrtcvad>=2.0.10
|
|
|
15
15
|
PyAudio>=0.2.13
|
|
16
16
|
openai-whisper>=20230314
|
|
17
17
|
coqui-tts<0.30.0,>=0.27.0
|
|
18
|
-
torch<2.
|
|
19
|
-
torchvision<0.
|
|
20
|
-
torchaudio<2.
|
|
18
|
+
torch<2.9.0,>=2.1.0
|
|
19
|
+
torchvision<1.0.0,>=0.16.0
|
|
20
|
+
torchaudio<2.9.0,>=2.1.0
|
|
21
21
|
librosa>=0.10.0
|
|
22
22
|
soundfile>=0.12.1
|
|
23
23
|
flask>=2.0.0
|
|
@@ -35,9 +35,9 @@ tiktoken>=0.6.0
|
|
|
35
35
|
|
|
36
36
|
[core-tts]
|
|
37
37
|
coqui-tts<0.30.0,>=0.27.0
|
|
38
|
-
torch<2.
|
|
39
|
-
torchvision<0.
|
|
40
|
-
torchaudio<2.
|
|
38
|
+
torch<2.9.0,>=2.1.0
|
|
39
|
+
torchvision<1.0.0,>=0.16.0
|
|
40
|
+
torchaudio<2.9.0,>=2.1.0
|
|
41
41
|
librosa>=0.10.0
|
|
42
42
|
|
|
43
43
|
[dev]
|
|
@@ -51,9 +51,9 @@ tiktoken>=0.6.0
|
|
|
51
51
|
|
|
52
52
|
[tts]
|
|
53
53
|
coqui-tts<0.30.0,>=0.27.0
|
|
54
|
-
torch<2.
|
|
55
|
-
torchvision<0.
|
|
56
|
-
torchaudio<2.
|
|
54
|
+
torch<2.9.0,>=2.1.0
|
|
55
|
+
torchvision<1.0.0,>=0.16.0
|
|
56
|
+
torchaudio<2.9.0,>=2.1.0
|
|
57
57
|
librosa>=0.10.0
|
|
58
58
|
|
|
59
59
|
[voice]
|
|
@@ -68,9 +68,9 @@ webrtcvad>=2.0.10
|
|
|
68
68
|
PyAudio>=0.2.13
|
|
69
69
|
openai-whisper>=20230314
|
|
70
70
|
coqui-tts<0.30.0,>=0.27.0
|
|
71
|
-
torch<2.
|
|
72
|
-
torchvision<0.
|
|
73
|
-
torchaudio<2.
|
|
71
|
+
torch<2.9.0,>=2.1.0
|
|
72
|
+
torchvision<1.0.0,>=0.16.0
|
|
73
|
+
torchaudio<2.9.0,>=2.1.0
|
|
74
74
|
librosa>=0.10.0
|
|
75
75
|
soundfile>=0.12.1
|
|
76
76
|
tiktoken>=0.6.0
|
|
@@ -29,9 +29,9 @@ dependencies = [
|
|
|
29
29
|
"appdirs>=1.4.0",
|
|
30
30
|
# Essential TTS dependencies for immediate functionality
|
|
31
31
|
"coqui-tts>=0.27.0,<0.30.0",
|
|
32
|
-
"torch>=2.
|
|
33
|
-
"torchvision>=0.
|
|
34
|
-
"torchaudio>=2.
|
|
32
|
+
"torch>=2.1.0,<2.9.0",
|
|
33
|
+
"torchvision>=0.16.0,<1.0.0",
|
|
34
|
+
"torchaudio>=2.1.0,<2.9.0",
|
|
35
35
|
"librosa>=0.10.0",
|
|
36
36
|
"sounddevice>=0.4.6",
|
|
37
37
|
"soundfile>=0.12.1",
|
|
@@ -49,9 +49,9 @@ voice = [
|
|
|
49
49
|
# Text-to-Speech functionality
|
|
50
50
|
tts = [
|
|
51
51
|
"coqui-tts>=0.27.0,<0.30.0",
|
|
52
|
-
"torch>=2.
|
|
53
|
-
"torchvision>=0.
|
|
54
|
-
"torchaudio>=2.
|
|
52
|
+
"torch>=2.1.0,<2.9.0",
|
|
53
|
+
"torchvision>=0.16.0,<1.0.0",
|
|
54
|
+
"torchaudio>=2.1.0,<2.9.0",
|
|
55
55
|
"librosa>=0.10.0",
|
|
56
56
|
]
|
|
57
57
|
|
|
@@ -73,9 +73,9 @@ all = [
|
|
|
73
73
|
"PyAudio>=0.2.13",
|
|
74
74
|
"openai-whisper>=20230314",
|
|
75
75
|
"coqui-tts>=0.27.0,<0.30.0",
|
|
76
|
-
"torch>=2.
|
|
77
|
-
"torchvision>=0.
|
|
78
|
-
"torchaudio>=2.
|
|
76
|
+
"torch>=2.1.0,<2.9.0",
|
|
77
|
+
"torchvision>=0.16.0,<1.0.0",
|
|
78
|
+
"torchaudio>=2.1.0,<2.9.0",
|
|
79
79
|
"librosa>=0.10.0",
|
|
80
80
|
"soundfile>=0.12.1",
|
|
81
81
|
"flask>=2.0.0",
|
|
@@ -96,9 +96,9 @@ voice-full = [
|
|
|
96
96
|
"PyAudio>=0.2.13",
|
|
97
97
|
"openai-whisper>=20230314",
|
|
98
98
|
"coqui-tts>=0.27.0,<0.30.0",
|
|
99
|
-
"torch>=2.
|
|
100
|
-
"torchvision>=0.
|
|
101
|
-
"torchaudio>=2.
|
|
99
|
+
"torch>=2.1.0,<2.9.0",
|
|
100
|
+
"torchvision>=0.16.0,<1.0.0",
|
|
101
|
+
"torchaudio>=2.1.0,<2.9.0",
|
|
102
102
|
"librosa>=0.10.0",
|
|
103
103
|
"soundfile>=0.12.1",
|
|
104
104
|
"tiktoken>=0.6.0",
|
|
@@ -107,9 +107,9 @@ voice-full = [
|
|
|
107
107
|
# Core TTS-only (lightweight, no STT)
|
|
108
108
|
core-tts = [
|
|
109
109
|
"coqui-tts>=0.27.0,<0.30.0",
|
|
110
|
-
"torch>=2.
|
|
111
|
-
"torchvision>=0.
|
|
112
|
-
"torchaudio>=2.
|
|
110
|
+
"torch>=2.1.0,<2.9.0",
|
|
111
|
+
"torchvision>=0.16.0,<1.0.0",
|
|
112
|
+
"torchaudio>=2.1.0,<2.9.0",
|
|
113
113
|
"librosa>=0.10.0",
|
|
114
114
|
]
|
|
115
115
|
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test script for the new audio lifecycle callbacks in AbstractVoice v0.5.1
|
|
4
|
+
|
|
5
|
+
This script demonstrates the precise timing of the new callback system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from abstractvoice import VoiceManager
|
|
10
|
+
|
|
11
|
+
def test_audio_callbacks():
|
|
12
|
+
"""Test the new audio lifecycle callbacks."""
|
|
13
|
+
|
|
14
|
+
print("🧪 Testing AbstractVoice v0.5.1 Audio Lifecycle Callbacks")
|
|
15
|
+
print("=" * 60)
|
|
16
|
+
|
|
17
|
+
# Callback tracking
|
|
18
|
+
events = []
|
|
19
|
+
|
|
20
|
+
def on_synthesis_start():
|
|
21
|
+
events.append(("synthesis_start", time.time()))
|
|
22
|
+
print("🔴 Synthesis started - thinking phase")
|
|
23
|
+
|
|
24
|
+
def on_audio_start():
|
|
25
|
+
events.append(("audio_start", time.time()))
|
|
26
|
+
print("🔵 Audio started - speaking phase")
|
|
27
|
+
|
|
28
|
+
def on_audio_pause():
|
|
29
|
+
events.append(("audio_pause", time.time()))
|
|
30
|
+
print("⏸️ Audio paused")
|
|
31
|
+
|
|
32
|
+
def on_audio_resume():
|
|
33
|
+
events.append(("audio_resume", time.time()))
|
|
34
|
+
print("▶️ Audio resumed")
|
|
35
|
+
|
|
36
|
+
def on_audio_end():
|
|
37
|
+
events.append(("audio_end", time.time()))
|
|
38
|
+
print("🟢 Audio ended - ready phase")
|
|
39
|
+
|
|
40
|
+
def on_synthesis_end():
|
|
41
|
+
events.append(("synthesis_end", time.time()))
|
|
42
|
+
print("✅ Synthesis complete")
|
|
43
|
+
|
|
44
|
+
# Initialize VoiceManager
|
|
45
|
+
print("Initializing VoiceManager...")
|
|
46
|
+
vm = VoiceManager(debug_mode=True)
|
|
47
|
+
|
|
48
|
+
# Wire up callbacks
|
|
49
|
+
vm.tts_engine.on_playback_start = on_synthesis_start
|
|
50
|
+
vm.tts_engine.on_playback_end = on_synthesis_end
|
|
51
|
+
vm.on_audio_start = on_audio_start
|
|
52
|
+
vm.on_audio_end = on_audio_end
|
|
53
|
+
vm.on_audio_pause = on_audio_pause
|
|
54
|
+
vm.on_audio_resume = on_audio_resume
|
|
55
|
+
|
|
56
|
+
print("\n📢 Starting TTS with callback monitoring...")
|
|
57
|
+
|
|
58
|
+
# Test basic playback
|
|
59
|
+
vm.speak("This is a test of the new audio lifecycle callbacks in AbstractVoice version zero point five point one.")
|
|
60
|
+
|
|
61
|
+
# Wait a bit, then pause
|
|
62
|
+
time.sleep(2)
|
|
63
|
+
print("\n⏸️ Pausing audio...")
|
|
64
|
+
success = vm.pause_speaking()
|
|
65
|
+
if success:
|
|
66
|
+
print("✓ Pause successful")
|
|
67
|
+
|
|
68
|
+
# Wait, then resume
|
|
69
|
+
time.sleep(2)
|
|
70
|
+
print("\n▶️ Resuming audio...")
|
|
71
|
+
success = vm.resume_speaking()
|
|
72
|
+
if success:
|
|
73
|
+
print("✓ Resume successful")
|
|
74
|
+
|
|
75
|
+
# Wait for completion
|
|
76
|
+
while vm.is_speaking() or vm.is_paused():
|
|
77
|
+
time.sleep(0.1)
|
|
78
|
+
|
|
79
|
+
# Analyze timing
|
|
80
|
+
print("\n📊 Callback Timing Analysis:")
|
|
81
|
+
print("-" * 40)
|
|
82
|
+
|
|
83
|
+
if len(events) >= 2:
|
|
84
|
+
start_time = events[0][1]
|
|
85
|
+
for event_name, event_time in events:
|
|
86
|
+
offset = (event_time - start_time) * 1000 # Convert to milliseconds
|
|
87
|
+
print(f"{event_name:15} | +{offset:6.1f}ms")
|
|
88
|
+
|
|
89
|
+
# Calculate key intervals
|
|
90
|
+
synthesis_to_audio = None
|
|
91
|
+
audio_duration = None
|
|
92
|
+
|
|
93
|
+
for i, (event_name, event_time) in enumerate(events):
|
|
94
|
+
if event_name == "synthesis_start":
|
|
95
|
+
synthesis_start = event_time
|
|
96
|
+
elif event_name == "audio_start" and 'synthesis_start' in locals():
|
|
97
|
+
synthesis_to_audio = (event_time - synthesis_start) * 1000
|
|
98
|
+
elif event_name == "audio_end":
|
|
99
|
+
audio_end = event_time
|
|
100
|
+
# Find corresponding audio_start
|
|
101
|
+
for j in range(i-1, -1, -1):
|
|
102
|
+
if events[j][0] == "audio_start":
|
|
103
|
+
audio_duration = (audio_end - events[j][1]) * 1000
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
print("-" * 40)
|
|
107
|
+
if synthesis_to_audio:
|
|
108
|
+
print(f"Synthesis → Audio: {synthesis_to_audio:.1f}ms")
|
|
109
|
+
if audio_duration:
|
|
110
|
+
print(f"Audio Duration: {audio_duration:.1f}ms")
|
|
111
|
+
|
|
112
|
+
# Cleanup
|
|
113
|
+
vm.cleanup()
|
|
114
|
+
print("\n✅ Test completed successfully!")
|
|
115
|
+
print("🎯 The new callbacks provide precise timing for visual status indicators.")
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
test_callbacks()
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test script that simulates fresh install behavior
|
|
4
|
+
Tests language switching and voice selection with download requirements
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
def test_language_switching():
|
|
8
|
+
"""Test language switching with download behavior."""
|
|
9
|
+
from abstractvoice import VoiceManager
|
|
10
|
+
|
|
11
|
+
print("🧪 Testing Language Switching (Fresh Install Simulation)")
|
|
12
|
+
print("=" * 60)
|
|
13
|
+
|
|
14
|
+
vm = VoiceManager(debug_mode=True)
|
|
15
|
+
|
|
16
|
+
# Test languages
|
|
17
|
+
test_languages = [
|
|
18
|
+
('fr', 'Bonjour, ceci est un test.', 'French'),
|
|
19
|
+
('es', 'Hola, esta es una prueba.', 'Spanish'),
|
|
20
|
+
('de', 'Hallo, das ist ein Test.', 'German'),
|
|
21
|
+
('it', 'Ciao, questo è un test.', 'Italian'),
|
|
22
|
+
('en', 'Back to English.', 'English'),
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
for lang, text, name in test_languages:
|
|
26
|
+
print(f"\n🌍 Testing {name} ({lang})...")
|
|
27
|
+
success = vm.set_language(lang)
|
|
28
|
+
|
|
29
|
+
if success:
|
|
30
|
+
print(f"✅ {name}: Successfully loaded")
|
|
31
|
+
vm.speak(text, speed=1.0)
|
|
32
|
+
else:
|
|
33
|
+
print(f"❌ {name}: Failed to load")
|
|
34
|
+
print(f" Run: abstractvoice download-models --language {lang}")
|
|
35
|
+
|
|
36
|
+
vm.cleanup()
|
|
37
|
+
print("\n✅ Language switching test complete!")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_voice_switching():
|
|
41
|
+
"""Test voice switching with download behavior."""
|
|
42
|
+
from abstractvoice import VoiceManager
|
|
43
|
+
|
|
44
|
+
print("\n🎭 Testing Voice Switching (Fresh Install Simulation)")
|
|
45
|
+
print("=" * 60)
|
|
46
|
+
|
|
47
|
+
vm = VoiceManager(debug_mode=True)
|
|
48
|
+
|
|
49
|
+
# Test different voices
|
|
50
|
+
test_voices = [
|
|
51
|
+
('en', 'tacotron2', 'This is Linda voice.'),
|
|
52
|
+
('en', 'jenny', 'This is Jenny voice.'),
|
|
53
|
+
('en', 'ek1', 'This is Edward voice.'),
|
|
54
|
+
('fr', 'css10_vits', 'Voix française.'),
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
for lang, voice_id, text in test_voices:
|
|
58
|
+
print(f"\n🎤 Testing {lang}.{voice_id}...")
|
|
59
|
+
success = vm.set_voice(lang, voice_id)
|
|
60
|
+
|
|
61
|
+
if success:
|
|
62
|
+
print(f"✅ {voice_id}: Successfully loaded")
|
|
63
|
+
vm.speak(text)
|
|
64
|
+
else:
|
|
65
|
+
print(f"❌ {voice_id}: Failed to load")
|
|
66
|
+
|
|
67
|
+
vm.cleanup()
|
|
68
|
+
print("\n✅ Voice switching test complete!")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_cli_commands():
|
|
72
|
+
"""Test CLI commands for model management."""
|
|
73
|
+
from abstractvoice.examples.cli_repl import VoiceREPL
|
|
74
|
+
|
|
75
|
+
print("\n💻 Testing CLI Commands")
|
|
76
|
+
print("=" * 60)
|
|
77
|
+
|
|
78
|
+
cli = VoiceREPL()
|
|
79
|
+
|
|
80
|
+
# Test /language command
|
|
81
|
+
print("\n📝 Testing /language fr")
|
|
82
|
+
cli.onecmd('/language fr')
|
|
83
|
+
|
|
84
|
+
# Test /setvoice command
|
|
85
|
+
print("\n📝 Testing /setvoice en.jenny")
|
|
86
|
+
cli.onecmd('/setvoice en.jenny')
|
|
87
|
+
|
|
88
|
+
print("\n✅ CLI commands test complete!")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_download_status():
|
|
92
|
+
"""Test model download and status checking."""
|
|
93
|
+
from abstractvoice import is_ready, get_status
|
|
94
|
+
import json
|
|
95
|
+
|
|
96
|
+
print("\n📦 Testing Model Status")
|
|
97
|
+
print("=" * 60)
|
|
98
|
+
|
|
99
|
+
# Check if ready
|
|
100
|
+
ready = is_ready()
|
|
101
|
+
print(f"System ready: {ready}")
|
|
102
|
+
|
|
103
|
+
# Get detailed status
|
|
104
|
+
status = json.loads(get_status())
|
|
105
|
+
print(f"Total cached models: {status.get('total_cached', 0)}")
|
|
106
|
+
print(f"Cache size: {status.get('total_size_mb', 0):.1f} MB")
|
|
107
|
+
print(f"Ready for offline: {status.get('ready_for_offline', False)}")
|
|
108
|
+
|
|
109
|
+
# List cached models
|
|
110
|
+
if 'cached_models' in status:
|
|
111
|
+
print("\nCached models:")
|
|
112
|
+
for model in status['cached_models'][:5]: # Show first 5
|
|
113
|
+
print(f" • {model}")
|
|
114
|
+
if len(status['cached_models']) > 5:
|
|
115
|
+
print(f" ... and {len(status['cached_models']) - 5} more")
|
|
116
|
+
|
|
117
|
+
print("\n✅ Model status test complete!")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def main():
|
|
121
|
+
"""Run all tests."""
|
|
122
|
+
import sys
|
|
123
|
+
|
|
124
|
+
print("🚀 AbstractVoice Fresh Install Simulation")
|
|
125
|
+
print("=" * 60)
|
|
126
|
+
print("This tests how the system behaves on a fresh install")
|
|
127
|
+
print("when models need to be downloaded.\n")
|
|
128
|
+
|
|
129
|
+
# Check command line arguments
|
|
130
|
+
if len(sys.argv) > 1:
|
|
131
|
+
if sys.argv[1] == '--language':
|
|
132
|
+
test_language_switching()
|
|
133
|
+
elif sys.argv[1] == '--voice':
|
|
134
|
+
test_voice_switching()
|
|
135
|
+
elif sys.argv[1] == '--cli':
|
|
136
|
+
test_cli_commands()
|
|
137
|
+
elif sys.argv[1] == '--status':
|
|
138
|
+
test_download_status()
|
|
139
|
+
else:
|
|
140
|
+
print("Usage: python test_fresh_install.py [--language|--voice|--cli|--status]")
|
|
141
|
+
else:
|
|
142
|
+
# Run all tests
|
|
143
|
+
test_download_status()
|
|
144
|
+
test_language_switching()
|
|
145
|
+
test_voice_switching()
|
|
146
|
+
test_cli_commands()
|
|
147
|
+
|
|
148
|
+
print("\n" + "=" * 60)
|
|
149
|
+
print("🎉 All fresh install tests complete!")
|
|
150
|
+
print("\nKey findings:")
|
|
151
|
+
print(" • Language switching downloads models if needed")
|
|
152
|
+
print(" • Voice switching downloads models if needed")
|
|
153
|
+
print(" • Clear error messages when downloads fail")
|
|
154
|
+
print(" • CLI commands properly handle missing models")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
if __name__ == "__main__":
|
|
158
|
+
main()
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test script to verify voice switching works correctly.
|
|
4
|
+
This will help us validate the fixes to the voice switching system.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abstractvoice import VoiceManager
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
def test_voice_switching():
|
|
11
|
+
"""Test voice switching to ensure different models actually load."""
|
|
12
|
+
print("🧪 Testing Voice Switching Fixes...")
|
|
13
|
+
print("=" * 50)
|
|
14
|
+
|
|
15
|
+
vm = VoiceManager(debug_mode=True)
|
|
16
|
+
print("✅ VoiceManager initialized")
|
|
17
|
+
|
|
18
|
+
# Test voices that should sound different
|
|
19
|
+
test_voices = [
|
|
20
|
+
("en", "tacotron2"), # Female LJSpeech
|
|
21
|
+
("en", "jenny"), # Different female
|
|
22
|
+
("en", "ek1"), # Male British
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
print(f"\n🎭 Testing {len(test_voices)} different voices...")
|
|
26
|
+
|
|
27
|
+
for i, (lang, voice) in enumerate(test_voices):
|
|
28
|
+
print(f"\n--- Test {i+1}: {lang}.{voice} ---")
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
# Set the voice and ensure it actually loads the requested model
|
|
32
|
+
success = vm.set_voice(lang, voice)
|
|
33
|
+
|
|
34
|
+
if success:
|
|
35
|
+
print(f"✅ Voice {voice} set successfully")
|
|
36
|
+
|
|
37
|
+
# Test speech with this voice
|
|
38
|
+
test_text = f"Hello, this is voice {voice}."
|
|
39
|
+
vm.speak(test_text, speed=1.0)
|
|
40
|
+
|
|
41
|
+
# Small delay between voice tests
|
|
42
|
+
time.sleep(1.0)
|
|
43
|
+
|
|
44
|
+
else:
|
|
45
|
+
print(f"❌ Failed to set voice {voice}")
|
|
46
|
+
|
|
47
|
+
except Exception as e:
|
|
48
|
+
print(f"💥 Exception with voice {voice}: {e}")
|
|
49
|
+
|
|
50
|
+
# Test language switching (should work without crashes)
|
|
51
|
+
print(f"\n🌍 Testing language switching...")
|
|
52
|
+
|
|
53
|
+
languages = ["en", "fr", "de"]
|
|
54
|
+
for lang in languages:
|
|
55
|
+
try:
|
|
56
|
+
success = vm.set_language(lang)
|
|
57
|
+
if success:
|
|
58
|
+
print(f"✅ Language {lang}: OK")
|
|
59
|
+
vm.speak(f"Testing {lang}", speed=1.0)
|
|
60
|
+
time.sleep(0.5)
|
|
61
|
+
else:
|
|
62
|
+
print(f"❌ Language {lang}: Failed")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
print(f"💥 Language {lang}: Exception - {e}")
|
|
65
|
+
|
|
66
|
+
# Test Italian (the crash-prone one)
|
|
67
|
+
print(f"\n🇮🇹 Testing Italian models (crash safety)...")
|
|
68
|
+
|
|
69
|
+
italian_voices = ["mai_male_vits", "mai_female_vits"]
|
|
70
|
+
for voice in italian_voices:
|
|
71
|
+
try:
|
|
72
|
+
print(f"Testing it.{voice}...")
|
|
73
|
+
success = vm.set_voice("it", voice)
|
|
74
|
+
if success:
|
|
75
|
+
print(f"✅ Italian {voice}: Safe!")
|
|
76
|
+
vm.speak("Ciao, test italiano.", speed=0.8) # Slower for Italian
|
|
77
|
+
time.sleep(0.5)
|
|
78
|
+
else:
|
|
79
|
+
print(f"⚠️ Italian {voice}: Skipped (safety)")
|
|
80
|
+
except Exception as e:
|
|
81
|
+
print(f"💥 Italian {voice}: Exception handled - {e}")
|
|
82
|
+
|
|
83
|
+
vm.cleanup()
|
|
84
|
+
print(f"\n🎉 Voice switching test complete!")
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
test_voice_switching()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|