PyPI - abstractvoice - Versions diffs - 0.3.1__py3-none-any.whl → 0.4.6__py3-none-any.whl - Mend

abstractvoice 0.3.1py3-none-any.whl → 0.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

abstractvoice/__init__.py +5 -2
abstractvoice/examples/cli_repl.py +81 -44
abstractvoice/examples/voice_cli.py +56 -20
abstractvoice/instant_setup.py +83 -0
abstractvoice/simple_model_manager.py +500 -0
abstractvoice/tts/tts_engine.py +253 -23
abstractvoice/voice_manager.py +176 -21
{abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/METADATA +125 -19
abstractvoice-0.4.6.dist-info/RECORD +23 -0
abstractvoice-0.3.1.dist-info/RECORD +0 -21
{abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/WHEEL +0 -0
{abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/entry_points.txt +0 -0
{abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/licenses/LICENSE +0 -0
{abstractvoice-0.3.1.dist-info → abstractvoice-0.4.6.dist-info}/top_level.txt +0 -0

abstractvoice/tts/tts_engine.py CHANGED Viewed

@@ -300,11 +300,24 @@ class NonBlockingAudioPlayer:
                     print(f"Error stopping audio stream: {e}")
             finally:
                 self.stream = None
         self.is_playing = False
         with self.pause_lock:
             self.is_paused = False
         self.clear_queue()
+    def cleanup(self):
+        """Cleanup resources to prevent memory conflicts."""
+        try:
+            self.stop_stream()
+            # Clear any remaining references
+            self.current_audio = None
+            self.playback_complete_callback = None
+            if self.debug_mode:
+                print(" > Audio player cleaned up")
+        except Exception as e:
+            if self.debug_mode:
+                print(f"Audio cleanup warning: {e}")
     def play_audio(self, audio_array):
         """Add audio to the playback queue."""
@@ -466,34 +479,21 @@ class TTSEngine:
         try:
             if self.debug_mode:
                 print(f" > Loading TTS model: {model_name}")
-            # Try to initialize TTS using lazy import
+            # Try simple, effective initialization strategy
             try:
                 TTS = _import_tts()
-                self.tts = TTS(model_name=model_name, progress_bar=self.debug_mode)
+                success, final_model = self._load_with_simple_fallback(TTS, model_name, debug_mode)
+                if not success:
+                    # If all fails, provide actionable guidance
+                    self._handle_model_load_failure(debug_mode)
+                elif self.debug_mode and final_model != model_name:
+                    print(f" > Loaded fallback model: {final_model}")
             except Exception as e:
                 error_msg = str(e).lower()
                 # Check if this is an espeak-related error
                 if ("espeak" in error_msg or "phoneme" in error_msg):
-                    # Restore stdout to show user-friendly message
-                    if not debug_mode:
-                        sys.stdout = sys.__stdout__
-                    print("\n" + "="*70)
-                    print("⚠️  VITS Model Requires espeak-ng (Not Found)")
-                    print("="*70)
-                    print("\nFor BEST voice quality, install espeak-ng:")
-                    print("  • macOS:   brew install espeak-ng")
-                    print("  • Linux:   sudo apt-get install espeak-ng")
-                    print("  • Windows: conda install espeak-ng  (or see README)")
-                    print("\nFalling back to fast_pitch (lower quality, but works)")
-                    print("="*70 + "\n")
-                    if not debug_mode:
-                        sys.stdout = null_out
-                    # Fallback to fast_pitch
-                    self.tts = TTS(model_name="tts_models/en/ljspeech/fast_pitch", progress_bar=self.debug_mode)
+                    self._handle_espeak_fallback(debug_mode)
                 else:
                     # Different error, re-raise
                     raise
@@ -520,6 +520,236 @@ class TTSEngine:
         # Pause/resume state
         self.pause_lock = threading.Lock()  # Thread-safe pause operations
         self.is_paused_state = False  # Explicit paused state tracking
+    def _load_with_simple_fallback(self, TTS, preferred_model: str, debug_mode: bool) -> tuple[bool, str]:
+        """Load TTS model with bulletproof compatibility-first strategy."""
+        from ..simple_model_manager import get_model_manager
+        model_manager = get_model_manager(debug_mode=debug_mode)
+        # Step 1: Check espeak availability for smart model filtering
+        espeak_available = self._check_espeak_available()
+        if debug_mode and not espeak_available:
+            print(" > espeak-ng not found, will skip VITS models")
+        # Step 2: Try the REQUESTED model first if it's cached
+        cached_models = model_manager.get_cached_models()
+        if cached_models and debug_mode:
+            print(f" > Found {len(cached_models)} cached models")
+        # FORCE USER'S CHOICE: Try the specifically requested model first
+        if preferred_model in cached_models:
+            try:
+                if debug_mode:
+                    print(f" > LOADING REQUESTED MODEL: {preferred_model}")
+                # Safety check for Italian VITS models that might crash
+                if "it/" in preferred_model and "vits" in preferred_model:
+                    if debug_mode:
+                        print(f" > Italian VITS model detected - using safe loading...")
+                self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
+                if debug_mode:
+                    print(f" > ✅ SUCCESS: Loaded requested model: {preferred_model}")
+                return True, preferred_model
+            except Exception as e:
+                error_msg = str(e).lower()
+                if debug_mode:
+                    print(f" > ❌ Requested model failed: {e}")
+                # Special handling for Italian model crashes
+                if "it/" in preferred_model and ("segmentation" in error_msg or "crash" in error_msg):
+                    if debug_mode:
+                        print(f" > Italian model caused crash - marking as incompatible")
+                    # Force fallback for crashed Italian models
+                    pass
+                # Only fall back if the model actually failed to load, not due to dependencies
+        # Step 3: Only fall back to compatibility order if requested model failed
+        if debug_mode:
+            print(" > Requested model unavailable, trying fallback models...")
+        # Compatibility-first fallback order
+        fallback_models = [
+            "tts_models/en/ljspeech/tacotron2-DDC",  # Most reliable (Linda)
+            "tts_models/en/jenny/jenny",             # Different female speaker (Jenny)
+            "tts_models/en/ek1/tacotron2",           # Male British accent (Edward)
+            "tts_models/en/sam/tacotron-DDC",        # Different male voice (Sam)
+            "tts_models/en/ljspeech/fast_pitch",     # Lightweight alternative
+            "tts_models/en/ljspeech/glow-tts",       # Another alternative
+            "tts_models/en/vctk/vits",               # Multi-speaker (requires espeak)
+            "tts_models/en/ljspeech/vits",           # Premium (requires espeak)
+        ]
+        # Remove the preferred model from fallbacks to avoid duplicate attempts
+        fallback_models = [m for m in fallback_models if m != preferred_model]
+        # Try fallback models
+        for model in fallback_models:
+            if model in cached_models:
+                # Skip VITS models if no espeak
+                if "vits" in model and not espeak_available:
+                    if debug_mode:
+                        print(f" > Skipping {model} (requires espeak-ng)")
+                    continue
+                try:
+                    if debug_mode:
+                        print(f" > Trying fallback model: {model}")
+                    self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
+                    if debug_mode:
+                        print(f" > ✅ Successfully loaded fallback: {model}")
+                    return True, model
+                except Exception as e:
+                    if debug_mode:
+                        print(f" > ❌ Fallback {model} failed: {e}")
+        # Step 4: If no cached models work, try downloading requested model first
+        if debug_mode:
+            print(" > No cached models worked, attempting downloads...")
+        # Try downloading the requested model first
+        if "vits" not in preferred_model or espeak_available:
+            try:
+                if debug_mode:
+                    print(f" > Downloading requested model: {preferred_model}...")
+                success = model_manager.download_model(preferred_model)
+                if success:
+                    self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
+                    if debug_mode:
+                        print(f" > ✅ Downloaded and loaded requested: {preferred_model}")
+                    return True, preferred_model
+                elif debug_mode:
+                    print(f" > ❌ Download failed for requested model: {preferred_model}")
+            except Exception as e:
+                if debug_mode:
+                    print(f" > ❌ Failed to download/load requested model: {e}")
+        # Step 5: If requested model download failed, try fallback downloads
+        for model in fallback_models:
+            # Skip VITS models if no espeak
+            if "vits" in model and not espeak_available:
+                continue
+            try:
+                if debug_mode:
+                    print(f" > Downloading fallback: {model}...")
+                # First try to download
+                success = model_manager.download_model(model)
+                if success:
+                    # Then try to load
+                    self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
+                    if debug_mode:
+                        print(f" > ✅ Downloaded and loaded fallback: {model}")
+                    return True, model
+                elif debug_mode:
+                    print(f" > ❌ Download failed for {model}")
+            except Exception as e:
+                if debug_mode:
+                    print(f" > ❌ Failed to load {model}: {e}")
+        return False, None
+    def _check_espeak_available(self) -> bool:
+        """Check if espeak-ng is available on the system."""
+        import subprocess
+        try:
+            subprocess.run(['espeak-ng', '--version'],
+                         capture_output=True, check=True, timeout=5)
+            return True
+        except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+            # Try alternative espeak command
+            try:
+                subprocess.run(['espeak', '--version'],
+                             capture_output=True, check=True, timeout=5)
+                return True
+            except:
+                return False
+    def _handle_espeak_fallback(self, debug_mode: bool):
+        """Handle espeak-related errors with fallback to non-phoneme models."""
+        # Restore stdout to show user-friendly message
+        if not debug_mode:
+            sys.stdout = sys.__stdout__
+        print("\n" + "="*70)
+        print("⚠️  VITS Model Requires espeak-ng (Not Found)")
+        print("="*70)
+        print("\nFor BEST voice quality, install espeak-ng:")
+        print("  • macOS:   brew install espeak-ng")
+        print("  • Linux:   sudo apt-get install espeak-ng")
+        print("  • Windows: conda install espeak-ng  (or see README)")
+        print("\nFalling back to compatible models (no espeak dependency)")
+        print("="*70 + "\n")
+        if not debug_mode:
+            import os
+            null_out = open(os.devnull, 'w')
+            sys.stdout = null_out
+        # Try non-phoneme models that don't require espeak (compatibility-first order)
+        from TTS.api import TTS
+        fallback_models = [
+            "tts_models/en/ljspeech/tacotron2-DDC",  # Most reliable (Linda)
+            "tts_models/en/jenny/jenny",             # Different female speaker (Jenny)
+            "tts_models/en/ek1/tacotron2",           # Male British accent (Edward)
+            "tts_models/en/sam/tacotron-DDC",        # Different male voice (Sam)
+            "tts_models/en/ljspeech/fast_pitch",     # Lightweight alternative
+            "tts_models/en/ljspeech/glow-tts"        # Another alternative
+        ]
+        tts_loaded = False
+        for fallback_model in fallback_models:
+            try:
+                if debug_mode:
+                    print(f"Trying fallback model: {fallback_model}")
+                self.tts = TTS(model_name=fallback_model, progress_bar=self.debug_mode)
+                tts_loaded = True
+                break
+            except Exception as fallback_error:
+                if debug_mode:
+                    print(f"Fallback {fallback_model} failed: {fallback_error}")
+                continue
+        if not tts_loaded:
+            self._handle_model_load_failure(debug_mode)
+    def _handle_model_load_failure(self, debug_mode: bool):
+        """Handle complete model loading failure with actionable guidance."""
+        # Restore stdout to show user-friendly message
+        if not debug_mode:
+            sys.stdout = sys.__stdout__
+        print("\n" + "="*70)
+        print("❌ TTS Model Loading Failed")
+        print("="*70)
+        print("\nNo TTS models could be loaded (offline or online).")
+        print("\nQuick fixes:")
+        print("  1. Download essential models:")
+        print("     abstractvoice download-models")
+        print("  2. Check internet connectivity")
+        print("  3. Clear corrupted cache:")
+        print("     rm -rf ~/.cache/tts ~/.local/share/tts")
+        print("  4. Reinstall TTS:")
+        print("     pip install --force-reinstall coqui-tts")
+        print("  5. Use text-only mode:")
+        print("     abstractvoice --no-tts")
+        print("="*70)
+        raise RuntimeError(
+            "❌ Failed to load any TTS model.\n"
+            "This typically means:\n"
+            "  • No models cached locally AND no internet connection\n"
+            "  • Corrupted model cache\n"
+            "  • Insufficient disk space\n"
+            "  • Network firewall blocking downloads\n\n"
+            "Run 'abstractvoice download-models' when you have internet access."
+        )
     def _on_playback_complete(self):
         """Callback when audio playback completes."""

abstractvoice/voice_manager.py CHANGED Viewed

@@ -38,8 +38,8 @@ class VoiceManager:
     # Smart language configuration - high quality stable defaults
     LANGUAGES = {
         'en': {
-            'default': 'tts_models/en/ljspeech/vits',          # High quality premium voice
-            'premium': 'tts_models/en/ljspeech/vits',          # Use same stable model
+            'default': 'tts_models/en/ljspeech/tacotron2-DDC', # Reliable, compatible voice
+            'premium': 'tts_models/en/ljspeech/vits',          # High quality (requires espeak)
             'name': 'English'
         },
         'fr': {
@@ -70,15 +70,39 @@ class VoiceManager:
     # Complete voice catalog with metadata
     VOICE_CATALOG = {
         'en': {
-            'vits_premium': {
-                'model': 'tts_models/en/ljspeech/vits',
-                'quality': 'premium',
+            'tacotron2': {
+                'model': 'tts_models/en/ljspeech/tacotron2-DDC',
+                'quality': 'good',
                 'gender': 'female',
                 'accent': 'US English',
                 'license': 'Open source (LJSpeech)',
-                'requires': 'espeak-ng'
+                'requires': 'none'
+            },
+            'jenny': {
+                'model': 'tts_models/en/jenny/jenny',
+                'quality': 'excellent',
+                'gender': 'female',
+                'accent': 'US English',
+                'license': 'Open source (Jenny)',
+                'requires': 'none'
+            },
+            'ek1': {
+                'model': 'tts_models/en/ek1/tacotron2',
+                'quality': 'excellent',
+                'gender': 'male',
+                'accent': 'British English',
+                'license': 'Open source (EK1)',
+                'requires': 'none'
             },
-            'fast_pitch_reliable': {
+            'sam': {
+                'model': 'tts_models/en/sam/tacotron-DDC',
+                'quality': 'good',
+                'gender': 'male',
+                'accent': 'US English',
+                'license': 'Open source (Sam)',
+                'requires': 'none'
+            },
+            'fast_pitch': {
                 'model': 'tts_models/en/ljspeech/fast_pitch',
                 'quality': 'good',
                 'gender': 'female',
@@ -86,12 +110,12 @@ class VoiceManager:
                 'license': 'Open source (LJSpeech)',
                 'requires': 'none'
             },
-            'vctk_multi': {
-                'model': 'tts_models/en/vctk/vits',
+            'vits': {
+                'model': 'tts_models/en/ljspeech/vits',
                 'quality': 'premium',
-                'gender': 'multiple',
-                'accent': 'British English',
-                'license': 'Open source (VCTK)',
+                'gender': 'female',
+                'accent': 'US English',
+                'license': 'Open source (LJSpeech)',
                 'requires': 'espeak-ng'
             }
         },
@@ -191,6 +215,20 @@ class VoiceManager:
                 lang_name = self.LANGUAGES[self.language]['name']
                 print(f"🌍 Using {lang_name} voice: {tts_model}")
+        # Initialize TTS engine with instant setup for new users
+        from .instant_setup import ensure_instant_tts, get_instant_model, is_model_cached
+        # If using default VITS model but it's not cached, use instant setup
+        if tts_model == "tts_models/en/ljspeech/vits" and not is_model_cached(tts_model):
+            if debug_mode:
+                print("🚀 First-time setup: ensuring instant TTS availability...")
+            # Try instant setup with lightweight model
+            if ensure_instant_tts():
+                tts_model = get_instant_model()  # Use fast_pitch instead
+                if debug_mode:
+                    print(f"✅ Using essential model: {tts_model}")
         # Initialize TTS engine using lazy import
         TTSEngine = _import_tts_engine()
         self.tts_engine = TTSEngine(
@@ -391,32 +429,68 @@ class VoiceManager:
         return self.speed
     def set_tts_model(self, model_name):
-        """Change the TTS model.
+        """Change the TTS model safely without memory conflicts.
         Available models (all pure Python, cross-platform):
         - "tts_models/en/ljspeech/fast_pitch" (default, recommended)
         - "tts_models/en/ljspeech/glow-tts" (alternative)
         - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
         Args:
             model_name: TTS model name to use
         Returns:
             True if successful
         Example:
             vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
         """
         # Stop any current speech
         self.stop_speaking()
+        # CRITICAL: Crash-safe cleanup of old TTS engine
+        if hasattr(self, 'tts_engine') and self.tts_engine:
+            try:
+                # Stop all audio and cleanup player
+                if hasattr(self.tts_engine, 'audio_player') and self.tts_engine.audio_player:
+                    # Try stop method if available
+                    if hasattr(self.tts_engine.audio_player, 'stop'):
+                        self.tts_engine.audio_player.stop()
+                    self.tts_engine.audio_player.cleanup()
+                # Force cleanup of TTS object and release GPU memory
+                if hasattr(self.tts_engine, 'tts') and self.tts_engine.tts:
+                    # Clear CUDA cache if using GPU
+                    try:
+                        import torch
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                    except:
+                        pass
+                    del self.tts_engine.tts
+                # Clear the engine itself
+                del self.tts_engine
+                self.tts_engine = None
+                # Force garbage collection to prevent memory leaks
+                import gc
+                gc.collect()
+            except Exception as e:
+                if self.debug_mode:
+                    print(f"Warning: TTS cleanup issue: {e}")
+                # Force clear even if cleanup failed
+                self.tts_engine = None
         # Reinitialize TTS engine with new model using lazy import
         TTSEngine = _import_tts_engine()
         self.tts_engine = TTSEngine(
             model_name=model_name,
             debug_mode=self.debug_mode
         )
         # Restore callbacks
         self.tts_engine.on_playback_start = self._on_tts_start
         self.tts_engine.on_playback_end = self._on_tts_end
@@ -823,14 +897,95 @@ class VoiceManager:
             return self.voice_recognizer.change_vad_aggressiveness(aggressiveness)
         return False
+    # ===== SIMPLE MODEL MANAGEMENT METHODS =====
+    # Clean, simple APIs for both CLI and third-party applications
+    def list_available_models(self, language: str = None) -> dict:
+        """Get available models with metadata.
+        Args:
+            language: Optional language filter
+        Returns:
+            dict: Model information with cache status
+        Example:
+            >>> vm = VoiceManager()
+            >>> models = vm.list_available_models('en')
+            >>> print(json.dumps(models, indent=2))
+        """
+        from .simple_model_manager import get_model_manager
+        manager = get_model_manager(self.debug_mode)
+        return manager.list_available_models(language)
+    def download_model(self, model_name: str, progress_callback=None) -> bool:
+        """Download a specific model.
+        Args:
+            model_name: Model name or voice ID (e.g., 'en.vits' or full model path)
+            progress_callback: Optional function(model_name, success)
+        Returns:
+            bool: True if successful
+        Example:
+            >>> vm = VoiceManager()
+            >>> vm.download_model('en.vits')  # or 'tts_models/en/ljspeech/vits'
+        """
+        from .simple_model_manager import download_model
+        return download_model(model_name, progress_callback)
+    def is_model_ready(self) -> bool:
+        """Check if essential model is ready for immediate use.
+        Returns:
+            bool: True if can speak immediately without download
+        """
+        from .simple_model_manager import is_ready
+        return is_ready()
+    def ensure_ready(self, auto_download: bool = True) -> bool:
+        """Ensure TTS is ready for immediate use.
+        Args:
+            auto_download: Whether to download essential model if needed
+        Returns:
+            bool: True if TTS is ready
+        Example:
+            >>> vm = VoiceManager()
+            >>> if vm.ensure_ready():
+            ...     vm.speak("Ready to go!")
+        """
+        if self.is_model_ready():
+            return True
+        if not auto_download:
+            return False
+        from .simple_model_manager import get_model_manager
+        manager = get_model_manager(self.debug_mode)
+        return manager.download_essential_model()
+    def get_cache_status(self) -> dict:
+        """Get model cache status.
+        Returns:
+            dict: Cache information including total models, sizes, etc.
+        """
+        from .simple_model_manager import get_model_manager
+        manager = get_model_manager(self.debug_mode)
+        return manager.get_status()
     def cleanup(self):
         """Clean up resources.
         Returns:
             True if cleanup successful
         """
         if self.voice_recognizer:
             self.voice_recognizer.stop()
         self.stop_speaking()
         return True

abstractvoice 0.3.1__py3-none-any.whl → 0.4.6__py3-none-any.whl

abstractvoice 0.3.1py3-none-any.whl → 0.4.6py3-none-any.whl