abstractvoice 0.3.1__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvoice/__init__.py +5 -2
- abstractvoice/examples/cli_repl.py +81 -44
- abstractvoice/examples/voice_cli.py +56 -20
- abstractvoice/instant_setup.py +83 -0
- abstractvoice/simple_model_manager.py +500 -0
- abstractvoice/tts/tts_engine.py +253 -23
- abstractvoice/voice_manager.py +176 -21
- {abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/METADATA +125 -19
- abstractvoice-0.4.6.dist-info/RECORD +23 -0
- abstractvoice-0.3.1.dist-info/RECORD +0 -21
- {abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/WHEEL +0 -0
- {abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/entry_points.txt +0 -0
- {abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/licenses/LICENSE +0 -0
- {abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/top_level.txt +0 -0
abstractvoice/tts/tts_engine.py
CHANGED
|
@@ -300,11 +300,24 @@ class NonBlockingAudioPlayer:
|
|
|
300
300
|
print(f"Error stopping audio stream: {e}")
|
|
301
301
|
finally:
|
|
302
302
|
self.stream = None
|
|
303
|
-
|
|
303
|
+
|
|
304
304
|
self.is_playing = False
|
|
305
305
|
with self.pause_lock:
|
|
306
306
|
self.is_paused = False
|
|
307
307
|
self.clear_queue()
|
|
308
|
+
|
|
309
|
+
def cleanup(self):
|
|
310
|
+
"""Cleanup resources to prevent memory conflicts."""
|
|
311
|
+
try:
|
|
312
|
+
self.stop_stream()
|
|
313
|
+
# Clear any remaining references
|
|
314
|
+
self.current_audio = None
|
|
315
|
+
self.playback_complete_callback = None
|
|
316
|
+
if self.debug_mode:
|
|
317
|
+
print(" > Audio player cleaned up")
|
|
318
|
+
except Exception as e:
|
|
319
|
+
if self.debug_mode:
|
|
320
|
+
print(f"Audio cleanup warning: {e}")
|
|
308
321
|
|
|
309
322
|
def play_audio(self, audio_array):
|
|
310
323
|
"""Add audio to the playback queue."""
|
|
@@ -466,34 +479,21 @@ class TTSEngine:
|
|
|
466
479
|
try:
|
|
467
480
|
if self.debug_mode:
|
|
468
481
|
print(f" > Loading TTS model: {model_name}")
|
|
469
|
-
|
|
470
|
-
# Try
|
|
482
|
+
|
|
483
|
+
# Try simple, effective initialization strategy
|
|
471
484
|
try:
|
|
472
485
|
TTS = _import_tts()
|
|
473
|
-
|
|
486
|
+
success, final_model = self._load_with_simple_fallback(TTS, model_name, debug_mode)
|
|
487
|
+
if not success:
|
|
488
|
+
# If all fails, provide actionable guidance
|
|
489
|
+
self._handle_model_load_failure(debug_mode)
|
|
490
|
+
elif self.debug_mode and final_model != model_name:
|
|
491
|
+
print(f" > Loaded fallback model: {final_model}")
|
|
474
492
|
except Exception as e:
|
|
475
493
|
error_msg = str(e).lower()
|
|
476
494
|
# Check if this is an espeak-related error
|
|
477
495
|
if ("espeak" in error_msg or "phoneme" in error_msg):
|
|
478
|
-
|
|
479
|
-
if not debug_mode:
|
|
480
|
-
sys.stdout = sys.__stdout__
|
|
481
|
-
|
|
482
|
-
print("\n" + "="*70)
|
|
483
|
-
print("⚠️ VITS Model Requires espeak-ng (Not Found)")
|
|
484
|
-
print("="*70)
|
|
485
|
-
print("\nFor BEST voice quality, install espeak-ng:")
|
|
486
|
-
print(" • macOS: brew install espeak-ng")
|
|
487
|
-
print(" • Linux: sudo apt-get install espeak-ng")
|
|
488
|
-
print(" • Windows: conda install espeak-ng (or see README)")
|
|
489
|
-
print("\nFalling back to fast_pitch (lower quality, but works)")
|
|
490
|
-
print("="*70 + "\n")
|
|
491
|
-
|
|
492
|
-
if not debug_mode:
|
|
493
|
-
sys.stdout = null_out
|
|
494
|
-
|
|
495
|
-
# Fallback to fast_pitch
|
|
496
|
-
self.tts = TTS(model_name="tts_models/en/ljspeech/fast_pitch", progress_bar=self.debug_mode)
|
|
496
|
+
self._handle_espeak_fallback(debug_mode)
|
|
497
497
|
else:
|
|
498
498
|
# Different error, re-raise
|
|
499
499
|
raise
|
|
@@ -520,6 +520,236 @@ class TTSEngine:
|
|
|
520
520
|
# Pause/resume state
|
|
521
521
|
self.pause_lock = threading.Lock() # Thread-safe pause operations
|
|
522
522
|
self.is_paused_state = False # Explicit paused state tracking
|
|
523
|
+
|
|
524
|
+
def _load_with_simple_fallback(self, TTS, preferred_model: str, debug_mode: bool) -> tuple[bool, str]:
|
|
525
|
+
"""Load TTS model with bulletproof compatibility-first strategy."""
|
|
526
|
+
from ..simple_model_manager import get_model_manager
|
|
527
|
+
|
|
528
|
+
model_manager = get_model_manager(debug_mode=debug_mode)
|
|
529
|
+
|
|
530
|
+
# Step 1: Check espeak availability for smart model filtering
|
|
531
|
+
espeak_available = self._check_espeak_available()
|
|
532
|
+
if debug_mode and not espeak_available:
|
|
533
|
+
print(" > espeak-ng not found, will skip VITS models")
|
|
534
|
+
|
|
535
|
+
# Step 2: Try the REQUESTED model first if it's cached
|
|
536
|
+
cached_models = model_manager.get_cached_models()
|
|
537
|
+
if cached_models and debug_mode:
|
|
538
|
+
print(f" > Found {len(cached_models)} cached models")
|
|
539
|
+
|
|
540
|
+
# FORCE USER'S CHOICE: Try the specifically requested model first
|
|
541
|
+
if preferred_model in cached_models:
|
|
542
|
+
try:
|
|
543
|
+
if debug_mode:
|
|
544
|
+
print(f" > LOADING REQUESTED MODEL: {preferred_model}")
|
|
545
|
+
|
|
546
|
+
# Safety check for Italian VITS models that might crash
|
|
547
|
+
if "it/" in preferred_model and "vits" in preferred_model:
|
|
548
|
+
if debug_mode:
|
|
549
|
+
print(f" > Italian VITS model detected - using safe loading...")
|
|
550
|
+
|
|
551
|
+
self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
|
|
552
|
+
|
|
553
|
+
if debug_mode:
|
|
554
|
+
print(f" > ✅ SUCCESS: Loaded requested model: {preferred_model}")
|
|
555
|
+
return True, preferred_model
|
|
556
|
+
|
|
557
|
+
except Exception as e:
|
|
558
|
+
error_msg = str(e).lower()
|
|
559
|
+
if debug_mode:
|
|
560
|
+
print(f" > ❌ Requested model failed: {e}")
|
|
561
|
+
|
|
562
|
+
# Special handling for Italian model crashes
|
|
563
|
+
if "it/" in preferred_model and ("segmentation" in error_msg or "crash" in error_msg):
|
|
564
|
+
if debug_mode:
|
|
565
|
+
print(f" > Italian model caused crash - marking as incompatible")
|
|
566
|
+
# Force fallback for crashed Italian models
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
# Only fall back if the model actually failed to load, not due to dependencies
|
|
570
|
+
|
|
571
|
+
# Step 3: Only fall back to compatibility order if requested model failed
|
|
572
|
+
if debug_mode:
|
|
573
|
+
print(" > Requested model unavailable, trying fallback models...")
|
|
574
|
+
|
|
575
|
+
# Compatibility-first fallback order
|
|
576
|
+
fallback_models = [
|
|
577
|
+
"tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
|
|
578
|
+
"tts_models/en/jenny/jenny", # Different female speaker (Jenny)
|
|
579
|
+
"tts_models/en/ek1/tacotron2", # Male British accent (Edward)
|
|
580
|
+
"tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
|
|
581
|
+
"tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
|
|
582
|
+
"tts_models/en/ljspeech/glow-tts", # Another alternative
|
|
583
|
+
"tts_models/en/vctk/vits", # Multi-speaker (requires espeak)
|
|
584
|
+
"tts_models/en/ljspeech/vits", # Premium (requires espeak)
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
# Remove the preferred model from fallbacks to avoid duplicate attempts
|
|
588
|
+
fallback_models = [m for m in fallback_models if m != preferred_model]
|
|
589
|
+
|
|
590
|
+
# Try fallback models
|
|
591
|
+
for model in fallback_models:
|
|
592
|
+
if model in cached_models:
|
|
593
|
+
# Skip VITS models if no espeak
|
|
594
|
+
if "vits" in model and not espeak_available:
|
|
595
|
+
if debug_mode:
|
|
596
|
+
print(f" > Skipping {model} (requires espeak-ng)")
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
try:
|
|
600
|
+
if debug_mode:
|
|
601
|
+
print(f" > Trying fallback model: {model}")
|
|
602
|
+
self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
|
|
603
|
+
if debug_mode:
|
|
604
|
+
print(f" > ✅ Successfully loaded fallback: {model}")
|
|
605
|
+
return True, model
|
|
606
|
+
except Exception as e:
|
|
607
|
+
if debug_mode:
|
|
608
|
+
print(f" > ❌ Fallback {model} failed: {e}")
|
|
609
|
+
|
|
610
|
+
# Step 4: If no cached models work, try downloading requested model first
|
|
611
|
+
if debug_mode:
|
|
612
|
+
print(" > No cached models worked, attempting downloads...")
|
|
613
|
+
|
|
614
|
+
# Try downloading the requested model first
|
|
615
|
+
if "vits" not in preferred_model or espeak_available:
|
|
616
|
+
try:
|
|
617
|
+
if debug_mode:
|
|
618
|
+
print(f" > Downloading requested model: {preferred_model}...")
|
|
619
|
+
success = model_manager.download_model(preferred_model)
|
|
620
|
+
if success:
|
|
621
|
+
self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
|
|
622
|
+
if debug_mode:
|
|
623
|
+
print(f" > ✅ Downloaded and loaded requested: {preferred_model}")
|
|
624
|
+
return True, preferred_model
|
|
625
|
+
elif debug_mode:
|
|
626
|
+
print(f" > ❌ Download failed for requested model: {preferred_model}")
|
|
627
|
+
except Exception as e:
|
|
628
|
+
if debug_mode:
|
|
629
|
+
print(f" > ❌ Failed to download/load requested model: {e}")
|
|
630
|
+
|
|
631
|
+
# Step 5: If requested model download failed, try fallback downloads
|
|
632
|
+
for model in fallback_models:
|
|
633
|
+
# Skip VITS models if no espeak
|
|
634
|
+
if "vits" in model and not espeak_available:
|
|
635
|
+
continue
|
|
636
|
+
|
|
637
|
+
try:
|
|
638
|
+
if debug_mode:
|
|
639
|
+
print(f" > Downloading fallback: {model}...")
|
|
640
|
+
|
|
641
|
+
# First try to download
|
|
642
|
+
success = model_manager.download_model(model)
|
|
643
|
+
if success:
|
|
644
|
+
# Then try to load
|
|
645
|
+
self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
|
|
646
|
+
if debug_mode:
|
|
647
|
+
print(f" > ✅ Downloaded and loaded fallback: {model}")
|
|
648
|
+
return True, model
|
|
649
|
+
elif debug_mode:
|
|
650
|
+
print(f" > ❌ Download failed for {model}")
|
|
651
|
+
|
|
652
|
+
except Exception as e:
|
|
653
|
+
if debug_mode:
|
|
654
|
+
print(f" > ❌ Failed to load {model}: {e}")
|
|
655
|
+
|
|
656
|
+
return False, None
|
|
657
|
+
|
|
658
|
+
def _check_espeak_available(self) -> bool:
|
|
659
|
+
"""Check if espeak-ng is available on the system."""
|
|
660
|
+
import subprocess
|
|
661
|
+
try:
|
|
662
|
+
subprocess.run(['espeak-ng', '--version'],
|
|
663
|
+
capture_output=True, check=True, timeout=5)
|
|
664
|
+
return True
|
|
665
|
+
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
|
|
666
|
+
# Try alternative espeak command
|
|
667
|
+
try:
|
|
668
|
+
subprocess.run(['espeak', '--version'],
|
|
669
|
+
capture_output=True, check=True, timeout=5)
|
|
670
|
+
return True
|
|
671
|
+
except:
|
|
672
|
+
return False
|
|
673
|
+
|
|
674
|
+
def _handle_espeak_fallback(self, debug_mode: bool):
|
|
675
|
+
"""Handle espeak-related errors with fallback to non-phoneme models."""
|
|
676
|
+
# Restore stdout to show user-friendly message
|
|
677
|
+
if not debug_mode:
|
|
678
|
+
sys.stdout = sys.__stdout__
|
|
679
|
+
|
|
680
|
+
print("\n" + "="*70)
|
|
681
|
+
print("⚠️ VITS Model Requires espeak-ng (Not Found)")
|
|
682
|
+
print("="*70)
|
|
683
|
+
print("\nFor BEST voice quality, install espeak-ng:")
|
|
684
|
+
print(" • macOS: brew install espeak-ng")
|
|
685
|
+
print(" • Linux: sudo apt-get install espeak-ng")
|
|
686
|
+
print(" • Windows: conda install espeak-ng (or see README)")
|
|
687
|
+
print("\nFalling back to compatible models (no espeak dependency)")
|
|
688
|
+
print("="*70 + "\n")
|
|
689
|
+
|
|
690
|
+
if not debug_mode:
|
|
691
|
+
import os
|
|
692
|
+
null_out = open(os.devnull, 'w')
|
|
693
|
+
sys.stdout = null_out
|
|
694
|
+
|
|
695
|
+
# Try non-phoneme models that don't require espeak (compatibility-first order)
|
|
696
|
+
from TTS.api import TTS
|
|
697
|
+
fallback_models = [
|
|
698
|
+
"tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
|
|
699
|
+
"tts_models/en/jenny/jenny", # Different female speaker (Jenny)
|
|
700
|
+
"tts_models/en/ek1/tacotron2", # Male British accent (Edward)
|
|
701
|
+
"tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
|
|
702
|
+
"tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
|
|
703
|
+
"tts_models/en/ljspeech/glow-tts" # Another alternative
|
|
704
|
+
]
|
|
705
|
+
|
|
706
|
+
tts_loaded = False
|
|
707
|
+
for fallback_model in fallback_models:
|
|
708
|
+
try:
|
|
709
|
+
if debug_mode:
|
|
710
|
+
print(f"Trying fallback model: {fallback_model}")
|
|
711
|
+
self.tts = TTS(model_name=fallback_model, progress_bar=self.debug_mode)
|
|
712
|
+
tts_loaded = True
|
|
713
|
+
break
|
|
714
|
+
except Exception as fallback_error:
|
|
715
|
+
if debug_mode:
|
|
716
|
+
print(f"Fallback {fallback_model} failed: {fallback_error}")
|
|
717
|
+
continue
|
|
718
|
+
|
|
719
|
+
if not tts_loaded:
|
|
720
|
+
self._handle_model_load_failure(debug_mode)
|
|
721
|
+
|
|
722
|
+
def _handle_model_load_failure(self, debug_mode: bool):
|
|
723
|
+
"""Handle complete model loading failure with actionable guidance."""
|
|
724
|
+
# Restore stdout to show user-friendly message
|
|
725
|
+
if not debug_mode:
|
|
726
|
+
sys.stdout = sys.__stdout__
|
|
727
|
+
|
|
728
|
+
print("\n" + "="*70)
|
|
729
|
+
print("❌ TTS Model Loading Failed")
|
|
730
|
+
print("="*70)
|
|
731
|
+
print("\nNo TTS models could be loaded (offline or online).")
|
|
732
|
+
print("\nQuick fixes:")
|
|
733
|
+
print(" 1. Download essential models:")
|
|
734
|
+
print(" abstractvoice download-models")
|
|
735
|
+
print(" 2. Check internet connectivity")
|
|
736
|
+
print(" 3. Clear corrupted cache:")
|
|
737
|
+
print(" rm -rf ~/.cache/tts ~/.local/share/tts")
|
|
738
|
+
print(" 4. Reinstall TTS:")
|
|
739
|
+
print(" pip install --force-reinstall coqui-tts")
|
|
740
|
+
print(" 5. Use text-only mode:")
|
|
741
|
+
print(" abstractvoice --no-tts")
|
|
742
|
+
print("="*70)
|
|
743
|
+
|
|
744
|
+
raise RuntimeError(
|
|
745
|
+
"❌ Failed to load any TTS model.\n"
|
|
746
|
+
"This typically means:\n"
|
|
747
|
+
" • No models cached locally AND no internet connection\n"
|
|
748
|
+
" • Corrupted model cache\n"
|
|
749
|
+
" • Insufficient disk space\n"
|
|
750
|
+
" • Network firewall blocking downloads\n\n"
|
|
751
|
+
"Run 'abstractvoice download-models' when you have internet access."
|
|
752
|
+
)
|
|
523
753
|
|
|
524
754
|
def _on_playback_complete(self):
|
|
525
755
|
"""Callback when audio playback completes."""
|
abstractvoice/voice_manager.py
CHANGED
|
@@ -38,8 +38,8 @@ class VoiceManager:
|
|
|
38
38
|
# Smart language configuration - high quality stable defaults
|
|
39
39
|
LANGUAGES = {
|
|
40
40
|
'en': {
|
|
41
|
-
'default': 'tts_models/en/ljspeech/
|
|
42
|
-
'premium': 'tts_models/en/ljspeech/vits', #
|
|
41
|
+
'default': 'tts_models/en/ljspeech/tacotron2-DDC', # Reliable, compatible voice
|
|
42
|
+
'premium': 'tts_models/en/ljspeech/vits', # High quality (requires espeak)
|
|
43
43
|
'name': 'English'
|
|
44
44
|
},
|
|
45
45
|
'fr': {
|
|
@@ -70,15 +70,39 @@ class VoiceManager:
|
|
|
70
70
|
# Complete voice catalog with metadata
|
|
71
71
|
VOICE_CATALOG = {
|
|
72
72
|
'en': {
|
|
73
|
-
'
|
|
74
|
-
'model': 'tts_models/en/ljspeech/
|
|
75
|
-
'quality': '
|
|
73
|
+
'tacotron2': {
|
|
74
|
+
'model': 'tts_models/en/ljspeech/tacotron2-DDC',
|
|
75
|
+
'quality': 'good',
|
|
76
76
|
'gender': 'female',
|
|
77
77
|
'accent': 'US English',
|
|
78
78
|
'license': 'Open source (LJSpeech)',
|
|
79
|
-
'requires': '
|
|
79
|
+
'requires': 'none'
|
|
80
|
+
},
|
|
81
|
+
'jenny': {
|
|
82
|
+
'model': 'tts_models/en/jenny/jenny',
|
|
83
|
+
'quality': 'excellent',
|
|
84
|
+
'gender': 'female',
|
|
85
|
+
'accent': 'US English',
|
|
86
|
+
'license': 'Open source (Jenny)',
|
|
87
|
+
'requires': 'none'
|
|
88
|
+
},
|
|
89
|
+
'ek1': {
|
|
90
|
+
'model': 'tts_models/en/ek1/tacotron2',
|
|
91
|
+
'quality': 'excellent',
|
|
92
|
+
'gender': 'male',
|
|
93
|
+
'accent': 'British English',
|
|
94
|
+
'license': 'Open source (EK1)',
|
|
95
|
+
'requires': 'none'
|
|
80
96
|
},
|
|
81
|
-
'
|
|
97
|
+
'sam': {
|
|
98
|
+
'model': 'tts_models/en/sam/tacotron-DDC',
|
|
99
|
+
'quality': 'good',
|
|
100
|
+
'gender': 'male',
|
|
101
|
+
'accent': 'US English',
|
|
102
|
+
'license': 'Open source (Sam)',
|
|
103
|
+
'requires': 'none'
|
|
104
|
+
},
|
|
105
|
+
'fast_pitch': {
|
|
82
106
|
'model': 'tts_models/en/ljspeech/fast_pitch',
|
|
83
107
|
'quality': 'good',
|
|
84
108
|
'gender': 'female',
|
|
@@ -86,12 +110,12 @@ class VoiceManager:
|
|
|
86
110
|
'license': 'Open source (LJSpeech)',
|
|
87
111
|
'requires': 'none'
|
|
88
112
|
},
|
|
89
|
-
'
|
|
90
|
-
'model': 'tts_models/en/
|
|
113
|
+
'vits': {
|
|
114
|
+
'model': 'tts_models/en/ljspeech/vits',
|
|
91
115
|
'quality': 'premium',
|
|
92
|
-
'gender': '
|
|
93
|
-
'accent': '
|
|
94
|
-
'license': 'Open source (
|
|
116
|
+
'gender': 'female',
|
|
117
|
+
'accent': 'US English',
|
|
118
|
+
'license': 'Open source (LJSpeech)',
|
|
95
119
|
'requires': 'espeak-ng'
|
|
96
120
|
}
|
|
97
121
|
},
|
|
@@ -191,6 +215,20 @@ class VoiceManager:
|
|
|
191
215
|
lang_name = self.LANGUAGES[self.language]['name']
|
|
192
216
|
print(f"🌍 Using {lang_name} voice: {tts_model}")
|
|
193
217
|
|
|
218
|
+
# Initialize TTS engine with instant setup for new users
|
|
219
|
+
from .instant_setup import ensure_instant_tts, get_instant_model, is_model_cached
|
|
220
|
+
|
|
221
|
+
# If using default VITS model but it's not cached, use instant setup
|
|
222
|
+
if tts_model == "tts_models/en/ljspeech/vits" and not is_model_cached(tts_model):
|
|
223
|
+
if debug_mode:
|
|
224
|
+
print("🚀 First-time setup: ensuring instant TTS availability...")
|
|
225
|
+
|
|
226
|
+
# Try instant setup with lightweight model
|
|
227
|
+
if ensure_instant_tts():
|
|
228
|
+
tts_model = get_instant_model() # Use fast_pitch instead
|
|
229
|
+
if debug_mode:
|
|
230
|
+
print(f"✅ Using essential model: {tts_model}")
|
|
231
|
+
|
|
194
232
|
# Initialize TTS engine using lazy import
|
|
195
233
|
TTSEngine = _import_tts_engine()
|
|
196
234
|
self.tts_engine = TTSEngine(
|
|
@@ -391,32 +429,68 @@ class VoiceManager:
|
|
|
391
429
|
return self.speed
|
|
392
430
|
|
|
393
431
|
def set_tts_model(self, model_name):
|
|
394
|
-
"""Change the TTS model.
|
|
395
|
-
|
|
432
|
+
"""Change the TTS model safely without memory conflicts.
|
|
433
|
+
|
|
396
434
|
Available models (all pure Python, cross-platform):
|
|
397
435
|
- "tts_models/en/ljspeech/fast_pitch" (default, recommended)
|
|
398
436
|
- "tts_models/en/ljspeech/glow-tts" (alternative)
|
|
399
437
|
- "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
400
|
-
|
|
438
|
+
|
|
401
439
|
Args:
|
|
402
440
|
model_name: TTS model name to use
|
|
403
|
-
|
|
441
|
+
|
|
404
442
|
Returns:
|
|
405
443
|
True if successful
|
|
406
|
-
|
|
444
|
+
|
|
407
445
|
Example:
|
|
408
446
|
vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
409
447
|
"""
|
|
410
448
|
# Stop any current speech
|
|
411
449
|
self.stop_speaking()
|
|
412
|
-
|
|
450
|
+
|
|
451
|
+
# CRITICAL: Crash-safe cleanup of old TTS engine
|
|
452
|
+
if hasattr(self, 'tts_engine') and self.tts_engine:
|
|
453
|
+
try:
|
|
454
|
+
# Stop all audio and cleanup player
|
|
455
|
+
if hasattr(self.tts_engine, 'audio_player') and self.tts_engine.audio_player:
|
|
456
|
+
# Try stop method if available
|
|
457
|
+
if hasattr(self.tts_engine.audio_player, 'stop'):
|
|
458
|
+
self.tts_engine.audio_player.stop()
|
|
459
|
+
self.tts_engine.audio_player.cleanup()
|
|
460
|
+
|
|
461
|
+
# Force cleanup of TTS object and release GPU memory
|
|
462
|
+
if hasattr(self.tts_engine, 'tts') and self.tts_engine.tts:
|
|
463
|
+
# Clear CUDA cache if using GPU
|
|
464
|
+
try:
|
|
465
|
+
import torch
|
|
466
|
+
if torch.cuda.is_available():
|
|
467
|
+
torch.cuda.empty_cache()
|
|
468
|
+
except:
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
del self.tts_engine.tts
|
|
472
|
+
|
|
473
|
+
# Clear the engine itself
|
|
474
|
+
del self.tts_engine
|
|
475
|
+
self.tts_engine = None
|
|
476
|
+
|
|
477
|
+
# Force garbage collection to prevent memory leaks
|
|
478
|
+
import gc
|
|
479
|
+
gc.collect()
|
|
480
|
+
|
|
481
|
+
except Exception as e:
|
|
482
|
+
if self.debug_mode:
|
|
483
|
+
print(f"Warning: TTS cleanup issue: {e}")
|
|
484
|
+
# Force clear even if cleanup failed
|
|
485
|
+
self.tts_engine = None
|
|
486
|
+
|
|
413
487
|
# Reinitialize TTS engine with new model using lazy import
|
|
414
488
|
TTSEngine = _import_tts_engine()
|
|
415
489
|
self.tts_engine = TTSEngine(
|
|
416
490
|
model_name=model_name,
|
|
417
491
|
debug_mode=self.debug_mode
|
|
418
492
|
)
|
|
419
|
-
|
|
493
|
+
|
|
420
494
|
# Restore callbacks
|
|
421
495
|
self.tts_engine.on_playback_start = self._on_tts_start
|
|
422
496
|
self.tts_engine.on_playback_end = self._on_tts_end
|
|
@@ -823,14 +897,95 @@ class VoiceManager:
|
|
|
823
897
|
return self.voice_recognizer.change_vad_aggressiveness(aggressiveness)
|
|
824
898
|
return False
|
|
825
899
|
|
|
900
|
+
# ===== SIMPLE MODEL MANAGEMENT METHODS =====
|
|
901
|
+
# Clean, simple APIs for both CLI and third-party applications
|
|
902
|
+
|
|
903
|
+
def list_available_models(self, language: str = None) -> dict:
|
|
904
|
+
"""Get available models with metadata.
|
|
905
|
+
|
|
906
|
+
Args:
|
|
907
|
+
language: Optional language filter
|
|
908
|
+
|
|
909
|
+
Returns:
|
|
910
|
+
dict: Model information with cache status
|
|
911
|
+
|
|
912
|
+
Example:
|
|
913
|
+
>>> vm = VoiceManager()
|
|
914
|
+
>>> models = vm.list_available_models('en')
|
|
915
|
+
>>> print(json.dumps(models, indent=2))
|
|
916
|
+
"""
|
|
917
|
+
from .simple_model_manager import get_model_manager
|
|
918
|
+
manager = get_model_manager(self.debug_mode)
|
|
919
|
+
return manager.list_available_models(language)
|
|
920
|
+
|
|
921
|
+
def download_model(self, model_name: str, progress_callback=None) -> bool:
|
|
922
|
+
"""Download a specific model.
|
|
923
|
+
|
|
924
|
+
Args:
|
|
925
|
+
model_name: Model name or voice ID (e.g., 'en.vits' or full model path)
|
|
926
|
+
progress_callback: Optional function(model_name, success)
|
|
927
|
+
|
|
928
|
+
Returns:
|
|
929
|
+
bool: True if successful
|
|
930
|
+
|
|
931
|
+
Example:
|
|
932
|
+
>>> vm = VoiceManager()
|
|
933
|
+
>>> vm.download_model('en.vits') # or 'tts_models/en/ljspeech/vits'
|
|
934
|
+
"""
|
|
935
|
+
from .simple_model_manager import download_model
|
|
936
|
+
return download_model(model_name, progress_callback)
|
|
937
|
+
|
|
938
|
+
def is_model_ready(self) -> bool:
|
|
939
|
+
"""Check if essential model is ready for immediate use.
|
|
940
|
+
|
|
941
|
+
Returns:
|
|
942
|
+
bool: True if can speak immediately without download
|
|
943
|
+
"""
|
|
944
|
+
from .simple_model_manager import is_ready
|
|
945
|
+
return is_ready()
|
|
946
|
+
|
|
947
|
+
def ensure_ready(self, auto_download: bool = True) -> bool:
|
|
948
|
+
"""Ensure TTS is ready for immediate use.
|
|
949
|
+
|
|
950
|
+
Args:
|
|
951
|
+
auto_download: Whether to download essential model if needed
|
|
952
|
+
|
|
953
|
+
Returns:
|
|
954
|
+
bool: True if TTS is ready
|
|
955
|
+
|
|
956
|
+
Example:
|
|
957
|
+
>>> vm = VoiceManager()
|
|
958
|
+
>>> if vm.ensure_ready():
|
|
959
|
+
... vm.speak("Ready to go!")
|
|
960
|
+
"""
|
|
961
|
+
if self.is_model_ready():
|
|
962
|
+
return True
|
|
963
|
+
|
|
964
|
+
if not auto_download:
|
|
965
|
+
return False
|
|
966
|
+
|
|
967
|
+
from .simple_model_manager import get_model_manager
|
|
968
|
+
manager = get_model_manager(self.debug_mode)
|
|
969
|
+
return manager.download_essential_model()
|
|
970
|
+
|
|
971
|
+
def get_cache_status(self) -> dict:
|
|
972
|
+
"""Get model cache status.
|
|
973
|
+
|
|
974
|
+
Returns:
|
|
975
|
+
dict: Cache information including total models, sizes, etc.
|
|
976
|
+
"""
|
|
977
|
+
from .simple_model_manager import get_model_manager
|
|
978
|
+
manager = get_model_manager(self.debug_mode)
|
|
979
|
+
return manager.get_status()
|
|
980
|
+
|
|
826
981
|
def cleanup(self):
|
|
827
982
|
"""Clean up resources.
|
|
828
|
-
|
|
983
|
+
|
|
829
984
|
Returns:
|
|
830
985
|
True if cleanup successful
|
|
831
986
|
"""
|
|
832
987
|
if self.voice_recognizer:
|
|
833
988
|
self.voice_recognizer.stop()
|
|
834
|
-
|
|
989
|
+
|
|
835
990
|
self.stop_speaking()
|
|
836
991
|
return True
|