PyPI - abstractvoice - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

abstractvoice 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

abstractvoice/voice_manager.py CHANGED Viewed

@@ -38,8 +38,8 @@ class VoiceManager:
     # Smart language configuration - high quality stable defaults
     LANGUAGES = {
         'en': {
-            'default': 'tts_models/en/ljspeech/vits',          # High quality premium voice
-            'premium': 'tts_models/en/ljspeech/vits',          # Use same stable model
+            'default': 'tts_models/en/ljspeech/tacotron2-DDC', # Reliable, compatible voice
+            'premium': 'tts_models/en/ljspeech/vits',          # High quality (requires espeak)
             'name': 'English'
         },
         'fr': {
@@ -70,15 +70,39 @@ class VoiceManager:
     # Complete voice catalog with metadata
     VOICE_CATALOG = {
         'en': {
-            'vits_premium': {
-                'model': 'tts_models/en/ljspeech/vits',
-                'quality': 'premium',
+            'tacotron2': {
+                'model': 'tts_models/en/ljspeech/tacotron2-DDC',
+                'quality': 'good',
                 'gender': 'female',
                 'accent': 'US English',
                 'license': 'Open source (LJSpeech)',
-                'requires': 'espeak-ng'
+                'requires': 'none'
+            },
+            'jenny': {
+                'model': 'tts_models/en/jenny/jenny',
+                'quality': 'excellent',
+                'gender': 'female',
+                'accent': 'US English',
+                'license': 'Open source (Jenny)',
+                'requires': 'none'
+            },
+            'ek1': {
+                'model': 'tts_models/en/ek1/tacotron2',
+                'quality': 'excellent',
+                'gender': 'male',
+                'accent': 'British English',
+                'license': 'Open source (EK1)',
+                'requires': 'none'
+            },
+            'sam': {
+                'model': 'tts_models/en/sam/tacotron-DDC',
+                'quality': 'good',
+                'gender': 'male',
+                'accent': 'US English',
+                'license': 'Open source (Sam)',
+                'requires': 'none'
             },
-            'fast_pitch_reliable': {
+            'fast_pitch': {
                 'model': 'tts_models/en/ljspeech/fast_pitch',
                 'quality': 'good',
                 'gender': 'female',
@@ -86,12 +110,12 @@ class VoiceManager:
                 'license': 'Open source (LJSpeech)',
                 'requires': 'none'
             },
-            'vctk_multi': {
-                'model': 'tts_models/en/vctk/vits',
+            'vits': {
+                'model': 'tts_models/en/ljspeech/vits',
                 'quality': 'premium',
-                'gender': 'multiple',
-                'accent': 'British English',
-                'license': 'Open source (VCTK)',
+                'gender': 'female',
+                'accent': 'US English',
+                'license': 'Open source (LJSpeech)',
                 'requires': 'espeak-ng'
             }
         },
@@ -191,6 +215,20 @@ class VoiceManager:
                 lang_name = self.LANGUAGES[self.language]['name']
                 print(f"🌍 Using {lang_name} voice: {tts_model}")
+        # Initialize TTS engine with instant setup for new users
+        from .instant_setup import ensure_instant_tts, get_instant_model, is_model_cached
+        # If using default VITS model but it's not cached, use instant setup
+        if tts_model == "tts_models/en/ljspeech/vits" and not is_model_cached(tts_model):
+            if debug_mode:
+                print("🚀 First-time setup: ensuring instant TTS availability...")
+            # Try instant setup with lightweight model
+            if ensure_instant_tts():
+                tts_model = get_instant_model()  # Use fast_pitch instead
+                if debug_mode:
+                    print(f"✅ Using essential model: {tts_model}")
         # Initialize TTS engine using lazy import
         TTSEngine = _import_tts_engine()
         self.tts_engine = TTSEngine(
@@ -391,32 +429,68 @@ class VoiceManager:
         return self.speed
     def set_tts_model(self, model_name):
-        """Change the TTS model.
+        """Change the TTS model safely without memory conflicts.
         Available models (all pure Python, cross-platform):
         - "tts_models/en/ljspeech/fast_pitch" (default, recommended)
         - "tts_models/en/ljspeech/glow-tts" (alternative)
         - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
         Args:
             model_name: TTS model name to use
         Returns:
             True if successful
         Example:
             vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
         """
         # Stop any current speech
         self.stop_speaking()
+        # CRITICAL: Crash-safe cleanup of old TTS engine
+        if hasattr(self, 'tts_engine') and self.tts_engine:
+            try:
+                # Stop all audio and cleanup player
+                if hasattr(self.tts_engine, 'audio_player') and self.tts_engine.audio_player:
+                    # Try stop method if available
+                    if hasattr(self.tts_engine.audio_player, 'stop'):
+                        self.tts_engine.audio_player.stop()
+                    self.tts_engine.audio_player.cleanup()
+                # Force cleanup of TTS object and release GPU memory
+                if hasattr(self.tts_engine, 'tts') and self.tts_engine.tts:
+                    # Clear CUDA cache if using GPU
+                    try:
+                        import torch
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                    except:
+                        pass
+                    del self.tts_engine.tts
+                # Clear the engine itself
+                del self.tts_engine
+                self.tts_engine = None
+                # Force garbage collection to prevent memory leaks
+                import gc
+                gc.collect()
+            except Exception as e:
+                if self.debug_mode:
+                    print(f"Warning: TTS cleanup issue: {e}")
+                # Force clear even if cleanup failed
+                self.tts_engine = None
         # Reinitialize TTS engine with new model using lazy import
         TTSEngine = _import_tts_engine()
         self.tts_engine = TTSEngine(
             model_name=model_name,
             debug_mode=self.debug_mode
         )
         # Restore callbacks
         self.tts_engine.on_playback_start = self._on_tts_start
         self.tts_engine.on_playback_end = self._on_tts_end
@@ -474,13 +548,37 @@ class VoiceManager:
         # Select best model for this language
         selected_model = self._select_best_model(language)
-        models_to_try = [selected_model, self.SAFE_FALLBACK]
+        # CRITICAL FIX: Check if model is available, download if not
+        from .instant_setup import is_model_cached
+        from .simple_model_manager import download_model
+        if not is_model_cached(selected_model):
+            if self.debug_mode:
+                print(f"📥 Model {selected_model} not cached, downloading...")
+            # Try to download the model
+            success = download_model(selected_model)
+            if not success:
+                if self.debug_mode:
+                    print(f"❌ Failed to download {selected_model}")
+                # If download fails and it's not English, we have a problem
+                if language != 'en':
+                    print(f"❌ Cannot switch to {self.LANGUAGES[language]['name']}: Model download failed")
+                    print(f"   Try: abstractvoice download-models --language {language}")
+                    return False
+        models_to_try = [selected_model]
+        # Only add fallback if it's different from selected
+        if selected_model != self.SAFE_FALLBACK:
+            models_to_try.append(self.SAFE_FALLBACK)
         for model_name in models_to_try:
             try:
                 if self.debug_mode:
                     lang_name = self.LANGUAGES[language]['name']
-                    print(f"🌍 Switching to {lang_name} voice: {model_name}")
+                    print(f"🌍 Loading {lang_name} voice: {model_name}")
                 # Reinitialize TTS engine
                 TTSEngine = _import_tts_engine()
@@ -505,12 +603,16 @@ class VoiceManager:
             except Exception as e:
                 if self.debug_mode:
-                    print(f"⚠️ Model {model_name} failed: {e}")
+                    print(f"⚠️ Model {model_name} failed to load: {e}")
+                # Don't silently continue - report the failure
+                if model_name == selected_model and language != 'en':
+                    print(f"❌ Failed to load {lang_name} voice model")
+                    print(f"   The model might be corrupted. Try:")
+                    print(f"   abstractvoice download-models --language {language}")
                 continue
         # All models failed
-        if self.debug_mode:
-            print(f"❌ All models failed for language '{language}'")
+        print(f"❌ Cannot switch to {self.LANGUAGES[language]['name']}: No working models")
         return False
     def get_language(self):
@@ -782,15 +884,29 @@ class VoiceManager:
             return False
         voice_info = self.VOICE_CATALOG[language][voice_id]
+        model_name = voice_info['model']
+        # CRITICAL FIX: Download model if not cached
+        from .instant_setup import is_model_cached
+        from .simple_model_manager import download_model
-        # Check compatibility
-        if voice_info['requires'] == 'espeak-ng' and not self._test_model_compatibility(voice_info['model']):
+        if not is_model_cached(model_name):
+            print(f"📥 Voice model '{voice_id}' not cached, downloading...")
+            success = download_model(model_name)
+            if not success:
+                print(f"❌ Failed to download voice '{voice_id}'")
+                print(f"   Check your internet connection and try again")
+                return False
+            print(f"✅ Voice model '{voice_id}' downloaded successfully")
+        # Check compatibility after download
+        if voice_info['requires'] == 'espeak-ng' and not self._test_model_compatibility(model_name):
             if self.debug_mode:
                 print(f"⚠️ Voice '{voice_id}' requires espeak-ng. Install it for premium quality.")
-            return False
+            # Don't fail - try to load anyway
+            # return False
         # Set the specific voice
-        model_name = voice_info['model']
         if self.debug_mode:
             print(f"🎭 Setting {language} voice to: {voice_id}")
             print(f"   Model: {model_name}")

{abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstractvoice
-Version: 0.4.1
+Version: 0.5.0
 Summary: A modular Python library for voice interactions with AI systems
 Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
 License-Expression: MIT
@@ -19,6 +19,14 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy>=1.24.0
 Requires-Dist: requests>=2.31.0
+Requires-Dist: appdirs>=1.4.0
+Requires-Dist: coqui-tts<0.30.0,>=0.27.0
+Requires-Dist: torch<2.4.0,>=2.0.0
+Requires-Dist: torchvision<0.19.0,>=0.15.0
+Requires-Dist: torchaudio<2.4.0,>=2.0.0
+Requires-Dist: librosa>=0.10.0
+Requires-Dist: sounddevice>=0.4.6
+Requires-Dist: soundfile>=0.12.1
 Provides-Extra: voice
 Requires-Dist: sounddevice>=0.4.6; extra == "voice"
 Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
@@ -164,38 +172,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
 ## Quick Start
-### ⚡ Instant TTS (v0.4.0+)
+### ⚡ Instant TTS (v0.5.0+)
 ```python
 from abstractvoice import VoiceManager
-# Initialize voice manager - automatically downloads essential model if needed
+# Initialize voice manager - works immediately with included dependencies
 vm = VoiceManager()
-# Text-to-speech works immediately!
+# Text-to-speech works right away!
 vm.speak("Hello! TTS works out of the box!")
+# Language switching with automatic model download
+vm.set_language('fr')
+vm.speak("Bonjour! Le français fonctionne aussi!")
 ```
-**That's it!** AbstractVoice v0.4.0+ automatically:
-- ✅ Downloads essential English model (107MB) on first use
-- ✅ Caches models permanently for offline use
-- ✅ Works immediately after first setup
+**That's it!** AbstractVoice v0.5.0+ automatically:
+- ✅ Includes essential TTS dependencies in base installation
+- ✅ Downloads models automatically when switching languages/voices
+- ✅ Works immediately after `pip install abstractvoice`
+- ✅ No silent failures - clear error messages if download fails
 - ✅ No complex configuration needed
-### 🌍 Multi-Language Support
+### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
 ```python
-# Download and use French voice
-vm.download_model('fr.css10_vits')  # Downloads automatically
+# Simply switch language - downloads model automatically if needed!
 vm.set_language('fr')
 vm.speak("Bonjour! Je parle français maintenant.")
-# Download and use German voice
-vm.download_model('de.thorsten_vits')
+# Switch to German - no manual download needed
 vm.set_language('de')
 vm.speak("Hallo! Ich spreche jetzt Deutsch.")
+# Spanish, Italian also supported
+vm.set_language('es')
+vm.speak("¡Hola! Hablo español ahora.")
+# If download fails, you'll get clear error messages with instructions
+# Example: "❌ Cannot switch to French: Model download failed"
+#          "   Try: abstractvoice download-models --language fr"
 ```
+**New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
 ### 🔧 Check System Status
 ```python
@@ -1363,20 +1384,22 @@ abstractvoice check-deps
 ### CLI Voice Commands
-In the CLI REPL, use these commands:
+In the CLI REPL, use these commands (v0.5.0+):
 ```bash
 # List all available voices with download status
 /setvoice
-# Download and set specific voice
-/setvoice fr.css10_vits      # French CSS10 VITS
-/setvoice de.thorsten_vits   # German Thorsten
-/setvoice it.mai_male_vits   # Italian Male
+# Automatically download and set specific voice (NEW in v0.5.0!)
+/setvoice fr.css10_vits      # Downloads French CSS10 if needed
+/setvoice de.thorsten_vits   # Downloads German Thorsten if needed
+/setvoice it.mai_male_vits   # Downloads Italian Male if needed
+/setvoice en.jenny           # Downloads Jenny voice if needed
-# Change language
-/language fr
-/language de
+# Change language (automatically downloads models if needed - NEW!)
+/language fr                 # Switches to French, downloads if needed
+/language de                 # Switches to German, downloads if needed
+/language es                 # Switches to Spanish, downloads if needed
 # Voice controls
 /pause                       # Pause current speech
@@ -1387,6 +1410,8 @@ In the CLI REPL, use these commands:
 /exit
 ```
+**New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
 ## Perspectives
 This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).

{abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,23 +1,23 @@
-abstractvoice/__init__.py,sha256=HZYSCQ-xztoj7gWr5dVLBsGh4AYrViTYe8-ze4b-ynY,1011
+abstractvoice/__init__.py,sha256=EM_gfVmMvSOGeYPfMd8-AFThLrKwWh8TN4JFqdn0deU,1011
 abstractvoice/__main__.py,sha256=e6jhoONg3uwwPUCdnr68bSRTT1RrpWy2DrOJ6ozMJVc,4775
 abstractvoice/dependency_check.py,sha256=BUUADz4un4_FCZzNpgwk1qpJ6yqVi5Pvjfd3JLS8hAI,10045
-abstractvoice/model_manager.py,sha256=hnN3PTaY109mjTjgBuOB8yfAYVlMpqtMVBljLASRUi4,14275
+abstractvoice/instant_setup.py,sha256=_Q8T6tcMSor--1XPlgdOya3lvC-VtClHz4FSgDOXFNI,2667
 abstractvoice/recognition.py,sha256=4KtDUDFixEYuBUMDH2fWaD9csKlwA9tqXkMAkyQMSMo,11259
-abstractvoice/simple_model_manager.py,sha256=DTvEBEPtfu9zJA6V3S8SaWQ_pDYFlK_SoOMlnnRjBtk,13801
-abstractvoice/voice_manager.py,sha256=n7QHZPR1LWh3RjEBQ3LVrBKoOr5zccc3soKah5CBrac,32584
+abstractvoice/simple_model_manager.py,sha256=DfmrF3t3-V6rIWs4IvJmB_0Cck0LBY3YMvMzHGsHg4Q,19423
+abstractvoice/voice_manager.py,sha256=iw8Lu4VhsSMk0HQYlUU2NWAQR2C4dDiCAsiFjaeldiU,37664
 abstractvoice/examples/__init__.py,sha256=94vpKJDlfOrEBIUETg-57Q5Z7fYDidg6v4UzV7V_lZA,60
 abstractvoice/examples/cli_repl.py,sha256=kIgvgrGfyejX8-VFeFhvAVqrp3X-s-K3Ul861aM4Bh8,44220
-abstractvoice/examples/voice_cli.py,sha256=SYnFkz9KWWTISLgS2beJzb2tzLoz4dXpHQBWpKgS0sc,11585
+abstractvoice/examples/voice_cli.py,sha256=VdgDT01wly8HjWF53t_hDLkJoZc9FWQq2I-nxcSIAp8,11592
 abstractvoice/examples/web_api.py,sha256=0g5LKJpl7fZepPQJL25AcdaevV-xv34VqqyWGYYchPk,6376
 abstractvoice/stt/__init__.py,sha256=PFc6la3tTkxT4TJYwb0PnMIahM_hFtU4pNQdeKmbooo,120
 abstractvoice/stt/transcriber.py,sha256=GdaH1OsCHu4Vu9rUsQlzH6X9bfcnoiK5tGz1AW_uj6Q,5481
 abstractvoice/tts/__init__.py,sha256=WgJrxqdc_qaRyfFt1jbgMQD9S757jYuBpDzMRB02TFs,122
-abstractvoice/tts/tts_engine.py,sha256=HstJMwxTbZJx87Q-CY4mWeKHKbj17DhdvDdlch3xUNQ,49725
+abstractvoice/tts/tts_engine.py,sha256=iTa9eBH9vPH8VR2qoJX6nmNi6yERvA4Uz1jPu4OXrTA,55074
 abstractvoice/vad/__init__.py,sha256=RIIbFw25jNHgel06E4VvTWJnXjwjeFZ98m1Vx9hVjuo,119
 abstractvoice/vad/voice_detector.py,sha256=ghrhpDFlIR5TsMB2gpigXY6t5c_1yZ7vEX1imAMgWjc,3166
-abstractvoice-0.4.1.dist-info/licenses/LICENSE,sha256=TiDPM5WcFRQPoC5e46jGMeMppZ-eu0eFx_HytjE49bk,1105
-abstractvoice-0.4.1.dist-info/METADATA,sha256=AN_KjRcI2ZaetOIuAb9JOf4dvFWJI7AckQpV25wq2tI,40713
-abstractvoice-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-abstractvoice-0.4.1.dist-info/entry_points.txt,sha256=3bDX2dNOGvrsTx1wZ_o_hVgmM_a2zbcHc1ZkL154rN4,72
-abstractvoice-0.4.1.dist-info/top_level.txt,sha256=a1qyxqgF1O8cJtPKpcJuImGZ_uXqPNghbLZ9gp-UiOo,14
-abstractvoice-0.4.1.dist-info/RECORD,,
+abstractvoice-0.5.0.dist-info/licenses/LICENSE,sha256=TiDPM5WcFRQPoC5e46jGMeMppZ-eu0eFx_HytjE49bk,1105
+abstractvoice-0.5.0.dist-info/METADATA,sha256=cH2FfruHztr3vENO6zo93yuKHkf6KmqI0-C1GnHCYA0,42154
+abstractvoice-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+abstractvoice-0.5.0.dist-info/entry_points.txt,sha256=3bDX2dNOGvrsTx1wZ_o_hVgmM_a2zbcHc1ZkL154rN4,72
+abstractvoice-0.5.0.dist-info/top_level.txt,sha256=a1qyxqgF1O8cJtPKpcJuImGZ_uXqPNghbLZ9gp-UiOo,14
+abstractvoice-0.5.0.dist-info/RECORD,,

abstractvoice 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

abstractvoice 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl