abstractvoice 0.3.1__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -300,11 +300,24 @@ class NonBlockingAudioPlayer:
300
300
  print(f"Error stopping audio stream: {e}")
301
301
  finally:
302
302
  self.stream = None
303
-
303
+
304
304
  self.is_playing = False
305
305
  with self.pause_lock:
306
306
  self.is_paused = False
307
307
  self.clear_queue()
308
+
309
+ def cleanup(self):
310
+ """Cleanup resources to prevent memory conflicts."""
311
+ try:
312
+ self.stop_stream()
313
+ # Clear any remaining references
314
+ self.current_audio = None
315
+ self.playback_complete_callback = None
316
+ if self.debug_mode:
317
+ print(" > Audio player cleaned up")
318
+ except Exception as e:
319
+ if self.debug_mode:
320
+ print(f"Audio cleanup warning: {e}")
308
321
 
309
322
  def play_audio(self, audio_array):
310
323
  """Add audio to the playback queue."""
@@ -466,34 +479,21 @@ class TTSEngine:
466
479
  try:
467
480
  if self.debug_mode:
468
481
  print(f" > Loading TTS model: {model_name}")
469
-
470
- # Try to initialize TTS using lazy import
482
+
483
+ # Try simple, effective initialization strategy
471
484
  try:
472
485
  TTS = _import_tts()
473
- self.tts = TTS(model_name=model_name, progress_bar=self.debug_mode)
486
+ success, final_model = self._load_with_simple_fallback(TTS, model_name, debug_mode)
487
+ if not success:
488
+ # If all fails, provide actionable guidance
489
+ self._handle_model_load_failure(debug_mode)
490
+ elif self.debug_mode and final_model != model_name:
491
+ print(f" > Loaded fallback model: {final_model}")
474
492
  except Exception as e:
475
493
  error_msg = str(e).lower()
476
494
  # Check if this is an espeak-related error
477
495
  if ("espeak" in error_msg or "phoneme" in error_msg):
478
- # Restore stdout to show user-friendly message
479
- if not debug_mode:
480
- sys.stdout = sys.__stdout__
481
-
482
- print("\n" + "="*70)
483
- print("⚠️ VITS Model Requires espeak-ng (Not Found)")
484
- print("="*70)
485
- print("\nFor BEST voice quality, install espeak-ng:")
486
- print(" • macOS: brew install espeak-ng")
487
- print(" • Linux: sudo apt-get install espeak-ng")
488
- print(" • Windows: conda install espeak-ng (or see README)")
489
- print("\nFalling back to fast_pitch (lower quality, but works)")
490
- print("="*70 + "\n")
491
-
492
- if not debug_mode:
493
- sys.stdout = null_out
494
-
495
- # Fallback to fast_pitch
496
- self.tts = TTS(model_name="tts_models/en/ljspeech/fast_pitch", progress_bar=self.debug_mode)
496
+ self._handle_espeak_fallback(debug_mode)
497
497
  else:
498
498
  # Different error, re-raise
499
499
  raise
@@ -520,6 +520,236 @@ class TTSEngine:
520
520
  # Pause/resume state
521
521
  self.pause_lock = threading.Lock() # Thread-safe pause operations
522
522
  self.is_paused_state = False # Explicit paused state tracking
523
+
524
+ def _load_with_simple_fallback(self, TTS, preferred_model: str, debug_mode: bool) -> tuple[bool, str]:
525
+ """Load TTS model with bulletproof compatibility-first strategy."""
526
+ from ..simple_model_manager import get_model_manager
527
+
528
+ model_manager = get_model_manager(debug_mode=debug_mode)
529
+
530
+ # Step 1: Check espeak availability for smart model filtering
531
+ espeak_available = self._check_espeak_available()
532
+ if debug_mode and not espeak_available:
533
+ print(" > espeak-ng not found, will skip VITS models")
534
+
535
+ # Step 2: Try the REQUESTED model first if it's cached
536
+ cached_models = model_manager.get_cached_models()
537
+ if cached_models and debug_mode:
538
+ print(f" > Found {len(cached_models)} cached models")
539
+
540
+ # FORCE USER'S CHOICE: Try the specifically requested model first
541
+ if preferred_model in cached_models:
542
+ try:
543
+ if debug_mode:
544
+ print(f" > LOADING REQUESTED MODEL: {preferred_model}")
545
+
546
+ # Safety check for Italian VITS models that might crash
547
+ if "it/" in preferred_model and "vits" in preferred_model:
548
+ if debug_mode:
549
+ print(f" > Italian VITS model detected - using safe loading...")
550
+
551
+ self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
552
+
553
+ if debug_mode:
554
+ print(f" > ✅ SUCCESS: Loaded requested model: {preferred_model}")
555
+ return True, preferred_model
556
+
557
+ except Exception as e:
558
+ error_msg = str(e).lower()
559
+ if debug_mode:
560
+ print(f" > ❌ Requested model failed: {e}")
561
+
562
+ # Special handling for Italian model crashes
563
+ if "it/" in preferred_model and ("segmentation" in error_msg or "crash" in error_msg):
564
+ if debug_mode:
565
+ print(f" > Italian model caused crash - marking as incompatible")
566
+ # Force fallback for crashed Italian models
567
+ pass
568
+
569
+ # Only fall back if the model actually failed to load, not due to dependencies
570
+
571
+ # Step 3: Only fall back to compatibility order if requested model failed
572
+ if debug_mode:
573
+ print(" > Requested model unavailable, trying fallback models...")
574
+
575
+ # Compatibility-first fallback order
576
+ fallback_models = [
577
+ "tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
578
+ "tts_models/en/jenny/jenny", # Different female speaker (Jenny)
579
+ "tts_models/en/ek1/tacotron2", # Male British accent (Edward)
580
+ "tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
581
+ "tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
582
+ "tts_models/en/ljspeech/glow-tts", # Another alternative
583
+ "tts_models/en/vctk/vits", # Multi-speaker (requires espeak)
584
+ "tts_models/en/ljspeech/vits", # Premium (requires espeak)
585
+ ]
586
+
587
+ # Remove the preferred model from fallbacks to avoid duplicate attempts
588
+ fallback_models = [m for m in fallback_models if m != preferred_model]
589
+
590
+ # Try fallback models
591
+ for model in fallback_models:
592
+ if model in cached_models:
593
+ # Skip VITS models if no espeak
594
+ if "vits" in model and not espeak_available:
595
+ if debug_mode:
596
+ print(f" > Skipping {model} (requires espeak-ng)")
597
+ continue
598
+
599
+ try:
600
+ if debug_mode:
601
+ print(f" > Trying fallback model: {model}")
602
+ self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
603
+ if debug_mode:
604
+ print(f" > ✅ Successfully loaded fallback: {model}")
605
+ return True, model
606
+ except Exception as e:
607
+ if debug_mode:
608
+ print(f" > ❌ Fallback {model} failed: {e}")
609
+
610
+ # Step 4: If no cached models work, try downloading requested model first
611
+ if debug_mode:
612
+ print(" > No cached models worked, attempting downloads...")
613
+
614
+ # Try downloading the requested model first
615
+ if "vits" not in preferred_model or espeak_available:
616
+ try:
617
+ if debug_mode:
618
+ print(f" > Downloading requested model: {preferred_model}...")
619
+ success = model_manager.download_model(preferred_model)
620
+ if success:
621
+ self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
622
+ if debug_mode:
623
+ print(f" > ✅ Downloaded and loaded requested: {preferred_model}")
624
+ return True, preferred_model
625
+ elif debug_mode:
626
+ print(f" > ❌ Download failed for requested model: {preferred_model}")
627
+ except Exception as e:
628
+ if debug_mode:
629
+ print(f" > ❌ Failed to download/load requested model: {e}")
630
+
631
+ # Step 5: If requested model download failed, try fallback downloads
632
+ for model in fallback_models:
633
+ # Skip VITS models if no espeak
634
+ if "vits" in model and not espeak_available:
635
+ continue
636
+
637
+ try:
638
+ if debug_mode:
639
+ print(f" > Downloading fallback: {model}...")
640
+
641
+ # First try to download
642
+ success = model_manager.download_model(model)
643
+ if success:
644
+ # Then try to load
645
+ self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
646
+ if debug_mode:
647
+ print(f" > ✅ Downloaded and loaded fallback: {model}")
648
+ return True, model
649
+ elif debug_mode:
650
+ print(f" > ❌ Download failed for {model}")
651
+
652
+ except Exception as e:
653
+ if debug_mode:
654
+ print(f" > ❌ Failed to load {model}: {e}")
655
+
656
+ return False, None
657
+
658
+ def _check_espeak_available(self) -> bool:
659
+ """Check if espeak-ng is available on the system."""
660
+ import subprocess
661
+ try:
662
+ subprocess.run(['espeak-ng', '--version'],
663
+ capture_output=True, check=True, timeout=5)
664
+ return True
665
+ except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
666
+ # Try alternative espeak command
667
+ try:
668
+ subprocess.run(['espeak', '--version'],
669
+ capture_output=True, check=True, timeout=5)
670
+ return True
671
+ except:
672
+ return False
673
+
674
+ def _handle_espeak_fallback(self, debug_mode: bool):
675
+ """Handle espeak-related errors with fallback to non-phoneme models."""
676
+ # Restore stdout to show user-friendly message
677
+ if not debug_mode:
678
+ sys.stdout = sys.__stdout__
679
+
680
+ print("\n" + "="*70)
681
+ print("⚠️ VITS Model Requires espeak-ng (Not Found)")
682
+ print("="*70)
683
+ print("\nFor BEST voice quality, install espeak-ng:")
684
+ print(" • macOS: brew install espeak-ng")
685
+ print(" • Linux: sudo apt-get install espeak-ng")
686
+ print(" • Windows: conda install espeak-ng (or see README)")
687
+ print("\nFalling back to compatible models (no espeak dependency)")
688
+ print("="*70 + "\n")
689
+
690
+ if not debug_mode:
691
+ import os
692
+ null_out = open(os.devnull, 'w')
693
+ sys.stdout = null_out
694
+
695
+ # Try non-phoneme models that don't require espeak (compatibility-first order)
696
+ from TTS.api import TTS
697
+ fallback_models = [
698
+ "tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
699
+ "tts_models/en/jenny/jenny", # Different female speaker (Jenny)
700
+ "tts_models/en/ek1/tacotron2", # Male British accent (Edward)
701
+ "tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
702
+ "tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
703
+ "tts_models/en/ljspeech/glow-tts" # Another alternative
704
+ ]
705
+
706
+ tts_loaded = False
707
+ for fallback_model in fallback_models:
708
+ try:
709
+ if debug_mode:
710
+ print(f"Trying fallback model: {fallback_model}")
711
+ self.tts = TTS(model_name=fallback_model, progress_bar=self.debug_mode)
712
+ tts_loaded = True
713
+ break
714
+ except Exception as fallback_error:
715
+ if debug_mode:
716
+ print(f"Fallback {fallback_model} failed: {fallback_error}")
717
+ continue
718
+
719
+ if not tts_loaded:
720
+ self._handle_model_load_failure(debug_mode)
721
+
722
+ def _handle_model_load_failure(self, debug_mode: bool):
723
+ """Handle complete model loading failure with actionable guidance."""
724
+ # Restore stdout to show user-friendly message
725
+ if not debug_mode:
726
+ sys.stdout = sys.__stdout__
727
+
728
+ print("\n" + "="*70)
729
+ print("❌ TTS Model Loading Failed")
730
+ print("="*70)
731
+ print("\nNo TTS models could be loaded (offline or online).")
732
+ print("\nQuick fixes:")
733
+ print(" 1. Download essential models:")
734
+ print(" abstractvoice download-models")
735
+ print(" 2. Check internet connectivity")
736
+ print(" 3. Clear corrupted cache:")
737
+ print(" rm -rf ~/.cache/tts ~/.local/share/tts")
738
+ print(" 4. Reinstall TTS:")
739
+ print(" pip install --force-reinstall coqui-tts")
740
+ print(" 5. Use text-only mode:")
741
+ print(" abstractvoice --no-tts")
742
+ print("="*70)
743
+
744
+ raise RuntimeError(
745
+ "❌ Failed to load any TTS model.\n"
746
+ "This typically means:\n"
747
+ " • No models cached locally AND no internet connection\n"
748
+ " • Corrupted model cache\n"
749
+ " • Insufficient disk space\n"
750
+ " • Network firewall blocking downloads\n\n"
751
+ "Run 'abstractvoice download-models' when you have internet access."
752
+ )
523
753
 
524
754
  def _on_playback_complete(self):
525
755
  """Callback when audio playback completes."""
@@ -38,8 +38,8 @@ class VoiceManager:
38
38
  # Smart language configuration - high quality stable defaults
39
39
  LANGUAGES = {
40
40
  'en': {
41
- 'default': 'tts_models/en/ljspeech/vits', # High quality premium voice
42
- 'premium': 'tts_models/en/ljspeech/vits', # Use same stable model
41
+ 'default': 'tts_models/en/ljspeech/tacotron2-DDC', # Reliable, compatible voice
42
+ 'premium': 'tts_models/en/ljspeech/vits', # High quality (requires espeak)
43
43
  'name': 'English'
44
44
  },
45
45
  'fr': {
@@ -70,15 +70,39 @@ class VoiceManager:
70
70
  # Complete voice catalog with metadata
71
71
  VOICE_CATALOG = {
72
72
  'en': {
73
- 'vits_premium': {
74
- 'model': 'tts_models/en/ljspeech/vits',
75
- 'quality': 'premium',
73
+ 'tacotron2': {
74
+ 'model': 'tts_models/en/ljspeech/tacotron2-DDC',
75
+ 'quality': 'good',
76
76
  'gender': 'female',
77
77
  'accent': 'US English',
78
78
  'license': 'Open source (LJSpeech)',
79
- 'requires': 'espeak-ng'
79
+ 'requires': 'none'
80
+ },
81
+ 'jenny': {
82
+ 'model': 'tts_models/en/jenny/jenny',
83
+ 'quality': 'excellent',
84
+ 'gender': 'female',
85
+ 'accent': 'US English',
86
+ 'license': 'Open source (Jenny)',
87
+ 'requires': 'none'
88
+ },
89
+ 'ek1': {
90
+ 'model': 'tts_models/en/ek1/tacotron2',
91
+ 'quality': 'excellent',
92
+ 'gender': 'male',
93
+ 'accent': 'British English',
94
+ 'license': 'Open source (EK1)',
95
+ 'requires': 'none'
80
96
  },
81
- 'fast_pitch_reliable': {
97
+ 'sam': {
98
+ 'model': 'tts_models/en/sam/tacotron-DDC',
99
+ 'quality': 'good',
100
+ 'gender': 'male',
101
+ 'accent': 'US English',
102
+ 'license': 'Open source (Sam)',
103
+ 'requires': 'none'
104
+ },
105
+ 'fast_pitch': {
82
106
  'model': 'tts_models/en/ljspeech/fast_pitch',
83
107
  'quality': 'good',
84
108
  'gender': 'female',
@@ -86,12 +110,12 @@ class VoiceManager:
86
110
  'license': 'Open source (LJSpeech)',
87
111
  'requires': 'none'
88
112
  },
89
- 'vctk_multi': {
90
- 'model': 'tts_models/en/vctk/vits',
113
+ 'vits': {
114
+ 'model': 'tts_models/en/ljspeech/vits',
91
115
  'quality': 'premium',
92
- 'gender': 'multiple',
93
- 'accent': 'British English',
94
- 'license': 'Open source (VCTK)',
116
+ 'gender': 'female',
117
+ 'accent': 'US English',
118
+ 'license': 'Open source (LJSpeech)',
95
119
  'requires': 'espeak-ng'
96
120
  }
97
121
  },
@@ -191,6 +215,20 @@ class VoiceManager:
191
215
  lang_name = self.LANGUAGES[self.language]['name']
192
216
  print(f"🌍 Using {lang_name} voice: {tts_model}")
193
217
 
218
+ # Initialize TTS engine with instant setup for new users
219
+ from .instant_setup import ensure_instant_tts, get_instant_model, is_model_cached
220
+
221
+ # If using default VITS model but it's not cached, use instant setup
222
+ if tts_model == "tts_models/en/ljspeech/vits" and not is_model_cached(tts_model):
223
+ if debug_mode:
224
+ print("🚀 First-time setup: ensuring instant TTS availability...")
225
+
226
+ # Try instant setup with lightweight model
227
+ if ensure_instant_tts():
228
+ tts_model = get_instant_model() # Use fast_pitch instead
229
+ if debug_mode:
230
+ print(f"✅ Using essential model: {tts_model}")
231
+
194
232
  # Initialize TTS engine using lazy import
195
233
  TTSEngine = _import_tts_engine()
196
234
  self.tts_engine = TTSEngine(
@@ -391,32 +429,68 @@ class VoiceManager:
391
429
  return self.speed
392
430
 
393
431
  def set_tts_model(self, model_name):
394
- """Change the TTS model.
395
-
432
+ """Change the TTS model safely without memory conflicts.
433
+
396
434
  Available models (all pure Python, cross-platform):
397
435
  - "tts_models/en/ljspeech/fast_pitch" (default, recommended)
398
436
  - "tts_models/en/ljspeech/glow-tts" (alternative)
399
437
  - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
400
-
438
+
401
439
  Args:
402
440
  model_name: TTS model name to use
403
-
441
+
404
442
  Returns:
405
443
  True if successful
406
-
444
+
407
445
  Example:
408
446
  vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
409
447
  """
410
448
  # Stop any current speech
411
449
  self.stop_speaking()
412
-
450
+
451
+ # CRITICAL: Crash-safe cleanup of old TTS engine
452
+ if hasattr(self, 'tts_engine') and self.tts_engine:
453
+ try:
454
+ # Stop all audio and cleanup player
455
+ if hasattr(self.tts_engine, 'audio_player') and self.tts_engine.audio_player:
456
+ # Try stop method if available
457
+ if hasattr(self.tts_engine.audio_player, 'stop'):
458
+ self.tts_engine.audio_player.stop()
459
+ self.tts_engine.audio_player.cleanup()
460
+
461
+ # Force cleanup of TTS object and release GPU memory
462
+ if hasattr(self.tts_engine, 'tts') and self.tts_engine.tts:
463
+ # Clear CUDA cache if using GPU
464
+ try:
465
+ import torch
466
+ if torch.cuda.is_available():
467
+ torch.cuda.empty_cache()
468
+ except:
469
+ pass
470
+
471
+ del self.tts_engine.tts
472
+
473
+ # Clear the engine itself
474
+ del self.tts_engine
475
+ self.tts_engine = None
476
+
477
+ # Force garbage collection to prevent memory leaks
478
+ import gc
479
+ gc.collect()
480
+
481
+ except Exception as e:
482
+ if self.debug_mode:
483
+ print(f"Warning: TTS cleanup issue: {e}")
484
+ # Force clear even if cleanup failed
485
+ self.tts_engine = None
486
+
413
487
  # Reinitialize TTS engine with new model using lazy import
414
488
  TTSEngine = _import_tts_engine()
415
489
  self.tts_engine = TTSEngine(
416
490
  model_name=model_name,
417
491
  debug_mode=self.debug_mode
418
492
  )
419
-
493
+
420
494
  # Restore callbacks
421
495
  self.tts_engine.on_playback_start = self._on_tts_start
422
496
  self.tts_engine.on_playback_end = self._on_tts_end
@@ -823,14 +897,95 @@ class VoiceManager:
823
897
  return self.voice_recognizer.change_vad_aggressiveness(aggressiveness)
824
898
  return False
825
899
 
900
+ # ===== SIMPLE MODEL MANAGEMENT METHODS =====
901
+ # Clean, simple APIs for both CLI and third-party applications
902
+
903
+ def list_available_models(self, language: str = None) -> dict:
904
+ """Get available models with metadata.
905
+
906
+ Args:
907
+ language: Optional language filter
908
+
909
+ Returns:
910
+ dict: Model information with cache status
911
+
912
+ Example:
913
+ >>> vm = VoiceManager()
914
+ >>> models = vm.list_available_models('en')
915
+ >>> print(json.dumps(models, indent=2))
916
+ """
917
+ from .simple_model_manager import get_model_manager
918
+ manager = get_model_manager(self.debug_mode)
919
+ return manager.list_available_models(language)
920
+
921
+ def download_model(self, model_name: str, progress_callback=None) -> bool:
922
+ """Download a specific model.
923
+
924
+ Args:
925
+ model_name: Model name or voice ID (e.g., 'en.vits' or full model path)
926
+ progress_callback: Optional function(model_name, success)
927
+
928
+ Returns:
929
+ bool: True if successful
930
+
931
+ Example:
932
+ >>> vm = VoiceManager()
933
+ >>> vm.download_model('en.vits') # or 'tts_models/en/ljspeech/vits'
934
+ """
935
+ from .simple_model_manager import download_model
936
+ return download_model(model_name, progress_callback)
937
+
938
+ def is_model_ready(self) -> bool:
939
+ """Check if essential model is ready for immediate use.
940
+
941
+ Returns:
942
+ bool: True if can speak immediately without download
943
+ """
944
+ from .simple_model_manager import is_ready
945
+ return is_ready()
946
+
947
+ def ensure_ready(self, auto_download: bool = True) -> bool:
948
+ """Ensure TTS is ready for immediate use.
949
+
950
+ Args:
951
+ auto_download: Whether to download essential model if needed
952
+
953
+ Returns:
954
+ bool: True if TTS is ready
955
+
956
+ Example:
957
+ >>> vm = VoiceManager()
958
+ >>> if vm.ensure_ready():
959
+ ... vm.speak("Ready to go!")
960
+ """
961
+ if self.is_model_ready():
962
+ return True
963
+
964
+ if not auto_download:
965
+ return False
966
+
967
+ from .simple_model_manager import get_model_manager
968
+ manager = get_model_manager(self.debug_mode)
969
+ return manager.download_essential_model()
970
+
971
+ def get_cache_status(self) -> dict:
972
+ """Get model cache status.
973
+
974
+ Returns:
975
+ dict: Cache information including total models, sizes, etc.
976
+ """
977
+ from .simple_model_manager import get_model_manager
978
+ manager = get_model_manager(self.debug_mode)
979
+ return manager.get_status()
980
+
826
981
  def cleanup(self):
827
982
  """Clean up resources.
828
-
983
+
829
984
  Returns:
830
985
  True if cleanup successful
831
986
  """
832
987
  if self.voice_recognizer:
833
988
  self.voice_recognizer.stop()
834
-
989
+
835
990
  self.stop_speaking()
836
991
  return True