PyPI - abstractvoice - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

abstractvoice 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

abstractvoice/__main__.py +20 -10
abstractvoice/examples/cli_repl.py +198 -13
abstractvoice/examples/voice_cli.py +20 -6
abstractvoice/recognition.py +50 -7
abstractvoice/stt/transcriber.py +17 -2
abstractvoice/tts/tts_engine.py +84 -32
abstractvoice/vad/voice_detector.py +16 -2
abstractvoice/voice_manager.py +558 -16
{abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/METADATA +228 -50
abstractvoice-0.2.0.dist-info/RECORD +20 -0
{abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/licenses/LICENSE +1 -1
abstractvoice-0.1.1.dist-info/RECORD +0 -20
{abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/WHEEL +0 -0
{abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/entry_points.txt +0 -0
{abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/top_level.txt +0 -0

abstractvoice/__main__.py CHANGED Viewed

@@ -15,20 +15,25 @@ def print_examples():
     print("  cli       - Command-line REPL example")
     print("  web       - Web API example")
     print("  simple    - Simple usage example")
-    print("\nUsage: python -m abstractvoice <example> [args...]")
+    print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
+    print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
+    print("\nExamples:")
+    print("  python -m abstractvoice cli --language fr    # French CLI")
+    print("  python -m abstractvoice simple --language ru # Russian simple example")
 def simple_example():
     """Run a simple example demonstrating basic usage."""
     from abstractvoice import VoiceManager
     import time
     print("Simple AbstractVoice Example")
     print("============================")
     print("This example demonstrates basic TTS and STT functionality.")
+    print("(Use --language argument to test different languages)")
     print()
-    # Initialize voice manager
+    # Initialize voice manager (can be overridden with --language)
     manager = VoiceManager(debug_mode=True)
     try:
@@ -91,17 +96,22 @@ def main():
     """Main entry point."""
     parser = argparse.ArgumentParser(description="AbstractVoice examples")
     parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple)")
-    # Parse just the first argument
+    parser.add_argument("--language", "--lang", default="en",
+                      choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
+                      help="Voice language for examples")
+    # Parse just the first argument and language
     args, remaining = parser.parse_known_args()
     if not args.example:
         print_examples()
         return
-    # Set remaining args as sys.argv for the examples
+    # Set remaining args as sys.argv for the examples, including language
+    if args.language != "en":
+        remaining = ["--language", args.language] + remaining
     sys.argv = [sys.argv[0]] + remaining
     if args.example == "cli":
         from abstractvoice.examples.cli_repl import main
         main()

abstractvoice/examples/cli_repl.py CHANGED Viewed

@@ -37,21 +37,28 @@ class VoiceREPL(cmd.Cmd):
     ruler = ""  # No horizontal rule line
     use_rawinput = True
-    def __init__(self, api_url="http://localhost:11434/api/chat",
-                 model="granite3.3:2b", debug_mode=False):
+    def __init__(self, api_url="http://localhost:11434/api/chat",
+                 model="granite3.3:2b", debug_mode=False, language="en", tts_model=None):
         super().__init__()
         # Debug mode
         self.debug_mode = debug_mode
         # API settings
         self.api_url = api_url
         self.model = model
         self.temperature = 0.4
         self.max_tokens = 4096
-        # Initialize voice manager
-        self.voice_manager = VoiceManager(debug_mode=debug_mode)
+        # Language settings
+        self.current_language = language
+        # Initialize voice manager with language support
+        self.voice_manager = VoiceManager(
+            language=language,
+            tts_model=tts_model,
+            debug_mode=debug_mode
+        )
         # Settings
         self.use_tts = True
@@ -83,10 +90,12 @@ class VoiceREPL(cmd.Cmd):
     def _get_intro(self):
         """Generate intro message with help."""
         intro = f"\n{Colors.BOLD}Welcome to AbstractVoice CLI REPL{Colors.END}\n"
-        intro += f"API: {self.api_url} | Model: {self.model}\n"
+        lang_name = self.voice_manager.get_language_name()
+        intro += f"API: {self.api_url} | Model: {self.model} | Voice: {lang_name}\n"
         intro += f"\n{Colors.CYAN}Quick Start:{Colors.END}\n"
         intro += "  • Type messages to chat with the LLM\n"
         intro += "  • Use /voice <mode> to enable voice input\n"
+        intro += "  • Use /language <lang> to switch voice language\n"
         intro += "  • Type /help for full command list\n"
         intro += "  • Type /exit or /q to quit\n"
         return intro
@@ -278,7 +287,172 @@ class VoiceREPL(cmd.Cmd):
             text = re.sub(pattern, "", text, flags=re.DOTALL)
         return text.strip()
+    def do_language(self, args):
+        """Switch voice language.
+        Usage: /language <lang>
+        Available languages: en, fr, es, de, it
+        """
+        if not args:
+            current_name = self.voice_manager.get_language_name()
+            current_code = self.voice_manager.get_language()
+            print(f"Current language: {current_name} ({current_code})")
+            print("Available languages:")
+            for code in self.voice_manager.get_supported_languages():
+                name = self.voice_manager.get_language_name(code)
+                print(f"  {code} - {name}")
+            return
+        language = args.strip().lower()
+        # Stop any current voice activity
+        if self.voice_mode_active:
+            self._voice_stop_callback()
+            was_active = True
+        else:
+            was_active = False
+        # Switch language
+        old_lang = self.current_language
+        if self.voice_manager.set_language(language):
+            self.current_language = language
+            old_name = self.voice_manager.get_language_name(old_lang)
+            new_name = self.voice_manager.get_language_name(language)
+            print(f"🌍 Language changed: {old_name} → {new_name}")
+            # Test the new language with localized message
+            test_messages = {
+                'en': "Language switched to English.",
+                'fr': "Langue changée en français.",
+                'es': "Idioma cambiado a español.",
+                'de': "Sprache auf Deutsch umgestellt.",
+                'it': "Lingua cambiata in italiano."
+            }
+            test_msg = test_messages.get(language, "Language switched.")
+            self.voice_manager.speak(test_msg)
+            # Restart voice mode if it was active
+            if was_active:
+                self.do_voice(self.voice_mode)
+        else:
+            supported = ', '.join(self.voice_manager.get_supported_languages())
+            print(f"Failed to switch to language: {language}")
+            print(f"Supported languages: {supported}")
+            if self.debug_mode:
+                import traceback
+                traceback.print_exc()
+    def do_setvoice(self, args):
+        """Set a specific voice model.
+        Usage:
+          /setvoice                    # Show all available voices
+          /setvoice <voice_id>         # Set voice (format: language.voice_id)
+        Examples:
+          /setvoice                    # List all voices
+          /setvoice fr.css10_vits      # Set French CSS10 VITS voice
+          /setvoice it.mai_male_vits   # Set Italian male VITS voice
+        """
+        if not args:
+            # Show all available voices organized by language
+            print(f"\n{Colors.CYAN}Available Voice Models:{Colors.END}")
+            self.voice_manager.list_voices()
+            print(f"\n{Colors.YELLOW}Usage:{Colors.END}")
+            print("  /setvoice <language>.<voice_id>")
+            print("  Example: /setvoice fr.css10_vits")
+            return
+        voice_spec = args.strip()
+        # Parse language.voice_id format
+        if '.' not in voice_spec:
+            print(f"❌ Invalid format. Use: language.voice_id")
+            print(f"   Example: /setvoice fr.css10_vits")
+            print(f"   Run '/setvoice' to see available voices")
+            return
+        try:
+            language, voice_id = voice_spec.split('.', 1)
+        except ValueError:
+            print(f"❌ Invalid format. Use: language.voice_id")
+            return
+        # Stop any current voice activity
+        if self.voice_mode_active:
+            self._voice_stop_callback()
+            was_active = True
+        else:
+            was_active = False
+        # Set the specific voice
+        try:
+            success = self.voice_manager.set_voice(language, voice_id)
+            if success:
+                # Update current language to match the voice
+                self.current_language = language
+                # Get voice info for confirmation
+                voice_info = self.voice_manager.VOICE_CATALOG.get(language, {}).get(voice_id, {})
+                lang_name = self.voice_manager.get_language_name(language)
+                print(f"✅ Voice changed successfully!")
+                print(f"   Language: {lang_name} ({language})")
+                print(f"   Voice: {voice_id}")
+                if voice_info:
+                    quality_icon = "✨" if voice_info.get('quality') == 'premium' else "🔧"
+                    gender_icon = {"male": "👨", "female": "👩", "multiple": "👥"}.get(voice_info.get('gender'), "🗣️")
+                    print(f"   Details: {quality_icon} {gender_icon} {voice_info.get('accent', 'Unknown accent')}")
+                # Test the new voice
+                test_messages = {
+                    'en': "Voice changed to English.",
+                    'fr': "Voix changée en français.",
+                    'es': "Voz cambiada al español.",
+                    'de': "Stimme auf Deutsch geändert.",
+                    'it': "Voce cambiata in italiano."
+                }
+                test_msg = test_messages.get(language, "Voice changed successfully.")
+                self.voice_manager.speak(test_msg)
+                # Restart voice mode if it was active
+                if was_active:
+                    self.do_voice(self.voice_mode)
+            else:
+                print(f"❌ Failed to set voice: {voice_spec}")
+                print(f"   Run '/setvoice' to see available voices")
+        except Exception as e:
+            print(f"❌ Error setting voice: {e}")
+            if self.debug_mode:
+                import traceback
+                traceback.print_exc()
+    def do_lang_info(self, args):
+        """Show current language information."""
+        info = self.voice_manager.get_language_info()
+        print(f"\n{Colors.CYAN}Current Language Information:{Colors.END}")
+        print(f"  Language: {info['name']} ({info['code']})")
+        print(f"  Model: {info['model']}")
+        print(f"  Available models: {list(info['available_models'].keys())}")
+        # Check if XTTS supports multiple languages
+        if 'xtts' in (info['model'] or '').lower():
+            print(f"  ✅ Supports multilingual synthesis")
+        else:
+            print(f"  ℹ️ Monolingual model")
+    def do_list_languages(self, args):
+        """List all supported languages."""
+        print(f"\n{Colors.CYAN}Supported Languages:{Colors.END}")
+        for lang in self.voice_manager.get_supported_languages():
+            name = self.voice_manager.get_language_name(lang)
+            current = " (current)" if lang == self.current_language else ""
+            print(f"  {lang} - {name}{current}")
     def do_voice(self, arg):
         """Control voice input mode.
@@ -554,6 +728,10 @@ class VoiceREPL(cmd.Cmd):
         print("  /clear              Clear history")
         print("  /tts on|off         Toggle TTS")
         print("  /voice <mode>       Voice input: off|full|wait|stop|ptt")
+        print("  /language <lang>    Switch voice language (en, fr, es, de, it)")
+        print("  /setvoice [id]      List voices or set specific voice (lang.voice_id)")
+        print("  /lang_info          Show current language information")
+        print("  /list_languages     List all supported languages")
         print("  /speed <number>     Set TTS speed (0.5-2.0, default: 1.0, pitch preserved)")
         print("  /tts_model <model>  Switch TTS model: vits(best)|fast_pitch|glow-tts|tacotron2-DDC")
         print("  /whisper <model>    Switch Whisper model: tiny|base|small|medium|large")
@@ -831,10 +1009,15 @@ def parse_args():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(description="AbstractVoice CLI Example")
     parser.add_argument("--debug", action="store_true", help="Enable debug mode")
-    parser.add_argument("--api", default="http://localhost:11434/api/chat",
+    parser.add_argument("--api", default="http://localhost:11434/api/chat",
                       help="LLM API URL")
-    parser.add_argument("--model", default="granite3.3:2b",
+    parser.add_argument("--model", default="granite3.3:2b",
                       help="LLM model name")
+    parser.add_argument("--language", "--lang", default="en",
+                      choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
+                      help="Voice language (en=English, fr=French, es=Spanish, de=German, it=Italian, ru=Russian, multilingual=All)")
+    parser.add_argument("--tts-model",
+                      help="Specific TTS model to use (overrides language default)")
     return parser.parse_args()
@@ -844,11 +1027,13 @@ def main():
         # Parse command line arguments
         args = parse_args()
-        # Initialize and run REPL
+        # Initialize and run REPL with language support
         repl = VoiceREPL(
             api_url=args.api,
             model=args.model,
-            debug_mode=args.debug
+            debug_mode=args.debug,
+            language=args.language,
+            tts_model=args.tts_model
         )
         repl.cmdloop()
     except KeyboardInterrupt:

abstractvoice/examples/voice_cli.py CHANGED Viewed

@@ -13,20 +13,25 @@ def parse_args():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(description="AbstractVoice Voice Mode")
     parser.add_argument("--debug", action="store_true", help="Enable debug mode")
-    parser.add_argument("--api", default="http://localhost:11434/api/chat",
+    parser.add_argument("--api", default="http://localhost:11434/api/chat",
                       help="LLM API URL")
-    parser.add_argument("--model", default="granite3.3:2b",
+    parser.add_argument("--model", default="granite3.3:2b",
                       help="LLM model name")
     parser.add_argument("--whisper", default="tiny",
                       help="Whisper model to use (tiny, base, small, medium, large)")
     parser.add_argument("--no-listening", action="store_true",
                       help="Disable speech-to-text (listening), TTS still works")
-    parser.add_argument("--system",
+    parser.add_argument("--system",
                       help="Custom system prompt")
     parser.add_argument("--temperature", type=float, default=0.4,
                       help="Set temperature (0.0-2.0) for the LLM")
     parser.add_argument("--max-tokens", type=int, default=4096,
                       help="Set maximum tokens for the LLM response")
+    parser.add_argument("--language", "--lang", default="en",
+                      choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
+                      help="Voice language (en=English, fr=French, es=Spanish, de=German, it=Italian, ru=Russian, multilingual=All)")
+    parser.add_argument("--tts-model",
+                      help="Specific TTS model to use (overrides language default)")
     return parser.parse_args()
 def main():
@@ -35,13 +40,22 @@ def main():
         # Parse command line arguments
         args = parse_args()
-        print("Starting AbstractVoice voice interface...")
+        # Show language information
+        language_names = {
+            'en': 'English', 'fr': 'French', 'es': 'Spanish',
+            'de': 'German', 'it': 'Italian', 'ru': 'Russian',
+            'multilingual': 'Multilingual'
+        }
+        lang_name = language_names.get(args.language, args.language)
+        print(f"Starting AbstractVoice voice interface ({lang_name})...")
-        # Initialize REPL
+        # Initialize REPL with language support
         repl = VoiceREPL(
             api_url=args.api,
             model=args.model,
-            debug_mode=args.debug
+            debug_mode=args.debug,
+            language=args.language,
+            tts_model=args.tts_model
         )
         # Set custom system prompt if provided

abstractvoice/recognition.py CHANGED Viewed

@@ -2,9 +2,50 @@
 import threading
 import time
-import pyaudio
-from .vad import VoiceDetector
-from .stt import Transcriber
+# Lazy imports for heavy dependencies
+def _import_audio_deps():
+    """Import audio dependencies with helpful error message if missing."""
+    try:
+        import pyaudio
+        return pyaudio
+    except ImportError as e:
+        raise ImportError(
+            "Audio functionality requires optional dependencies. Install with:\n"
+            "  pip install abstractvoice[voice]  # For basic audio\n"
+            "  pip install abstractvoice[all]    # For all features\n"
+            f"Original error: {e}"
+        ) from e
+def _import_vad():
+    """Import VoiceDetector with helpful error message if dependencies missing."""
+    try:
+        from .vad import VoiceDetector
+        return VoiceDetector
+    except ImportError as e:
+        if "webrtcvad" in str(e):
+            raise ImportError(
+                "Voice activity detection requires optional dependencies. Install with:\n"
+                "  pip install abstractvoice[voice]  # For basic audio\n"
+                "  pip install abstractvoice[all]    # For all features\n"
+                f"Original error: {e}"
+            ) from e
+        raise
+def _import_transcriber():
+    """Import Transcriber with helpful error message if dependencies missing."""
+    try:
+        from .stt import Transcriber
+        return Transcriber
+    except ImportError as e:
+        if "whisper" in str(e) or "tiktoken" in str(e):
+            raise ImportError(
+                "Speech recognition functionality requires optional dependencies. Install with:\n"
+                "  pip install abstractvoice[stt]    # For speech recognition only\n"
+                "  pip install abstractvoice[all]    # For all features\n"
+                f"Original error: {e}"
+            ) from e
+        raise
 class VoiceRecognizer:
@@ -40,13 +81,15 @@ class VoiceRecognizer:
         self.min_speech_chunks = int(min_speech_duration / chunk_duration)
         self.silence_timeout_chunks = int(silence_timeout / chunk_duration)
-        # Initialize components
+        # Initialize components using lazy imports
+        VoiceDetector = _import_vad()
         self.voice_detector = VoiceDetector(
             aggressiveness=vad_aggressiveness,
             sample_rate=sample_rate,
             debug_mode=debug_mode
         )
+        Transcriber = _import_transcriber()
         self.transcriber = Transcriber(
             model_name=whisper_model,
             min_transcription_length=min_transcription_length,
@@ -109,8 +152,8 @@ class VoiceRecognizer:
     def _recognition_loop(self):
         """Main recognition loop."""
-        import pyaudio
+        pyaudio = _import_audio_deps()
         self.pyaudio = pyaudio.PyAudio()
         self.stream = self.pyaudio.open(
             format=pyaudio.paInt16,

abstractvoice/stt/transcriber.py CHANGED Viewed

@@ -1,11 +1,24 @@
 """Speech-to-text transcription using OpenAI's Whisper."""
-import whisper
 import numpy as np
 import os
 import sys
 import logging
+# Lazy import for heavy dependencies
+def _import_whisper():
+    """Import whisper with helpful error message if dependencies missing."""
+    try:
+        import whisper
+        return whisper
+    except ImportError as e:
+        raise ImportError(
+            "Speech recognition functionality requires optional dependencies. Install with:\n"
+            "  pip install abstractvoice[stt]    # For speech recognition only\n"
+            "  pip install abstractvoice[all]    # For all features\n"
+            f"Original error: {e}"
+        ) from e
 class Transcriber:
     """Transcribes audio using OpenAI's Whisper model."""
@@ -38,7 +51,8 @@ class Transcriber:
                 null_out = open(os.devnull, 'w')
                 sys.stdout = null_out
-            # Load the Whisper model
+            # Load the Whisper model using lazy import
+            whisper = _import_whisper()
             self.model = whisper.load_model(model_name)
         finally:
             # Restore stdout if we redirected it
@@ -120,6 +134,7 @@ class Transcriber:
                 sys.stdout = null_out
             try:
+                whisper = _import_whisper()
                 self.model = whisper.load_model(model_name)
                 self.model_name = model_name
             finally:

abstractvoice 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

abstractvoice 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl