PyPI - abstractvoice - Versions diffs - 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

abstractvoice 0.5.2py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

abstractvoice/__init__.py +2 -5
abstractvoice/__main__.py +82 -3
abstractvoice/adapters/__init__.py +12 -0
abstractvoice/adapters/base.py +207 -0
abstractvoice/adapters/stt_faster_whisper.py +401 -0
abstractvoice/adapters/tts_piper.py +480 -0
abstractvoice/aec/__init__.py +10 -0
abstractvoice/aec/webrtc_apm.py +56 -0
abstractvoice/artifacts.py +173 -0
abstractvoice/audio/__init__.py +7 -0
abstractvoice/audio/recorder.py +46 -0
abstractvoice/audio/resample.py +25 -0
abstractvoice/cloning/__init__.py +7 -0
abstractvoice/cloning/engine_chroma.py +738 -0
abstractvoice/cloning/engine_f5.py +546 -0
abstractvoice/cloning/manager.py +349 -0
abstractvoice/cloning/store.py +362 -0
abstractvoice/compute/__init__.py +6 -0
abstractvoice/compute/device.py +73 -0
abstractvoice/config/__init__.py +2 -0
abstractvoice/config/voice_catalog.py +19 -0
abstractvoice/dependency_check.py +0 -1
abstractvoice/examples/cli_repl.py +2403 -243
abstractvoice/examples/voice_cli.py +64 -63
abstractvoice/integrations/__init__.py +2 -0
abstractvoice/integrations/abstractcore.py +116 -0
abstractvoice/integrations/abstractcore_plugin.py +253 -0
abstractvoice/prefetch.py +82 -0
abstractvoice/recognition.py +424 -42
abstractvoice/stop_phrase.py +103 -0
abstractvoice/tts/__init__.py +3 -3
abstractvoice/tts/adapter_tts_engine.py +210 -0
abstractvoice/tts/tts_engine.py +257 -1208
abstractvoice/vm/__init__.py +2 -0
abstractvoice/vm/common.py +21 -0
abstractvoice/vm/core.py +139 -0
abstractvoice/vm/manager.py +108 -0
abstractvoice/vm/stt_mixin.py +158 -0
abstractvoice/vm/tts_mixin.py +550 -0
abstractvoice/voice_manager.py +6 -1061
abstractvoice-0.6.1.dist-info/METADATA +213 -0
abstractvoice-0.6.1.dist-info/RECORD +52 -0
{abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
abstractvoice/instant_setup.py +0 -83
abstractvoice/simple_model_manager.py +0 -539
abstractvoice-0.5.2.dist-info/METADATA +0 -1458
abstractvoice-0.5.2.dist-info/RECORD +0 -23
abstractvoice-0.5.2.dist-info/entry_points.txt +0 -2
{abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
{abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0

abstractvoice/adapters/tts_piper.py ADDED Viewed

@@ -0,0 +1,480 @@
+"""Piper TTS Adapter - Zero-dependency TTS engine.
+Piper is a fast, local neural text-to-speech system that:
+- Requires NO system dependencies (no espeak-ng)
+- Works on Windows, macOS, Linux out of the box
+- Supports 40+ languages with 100+ voices
+- Uses ONNX Runtime for cross-platform compatibility
+- Has small model sizes (15-60MB vs 200-500MB VITS)
+"""
+import gc
+import os
+import io
+import logging
+import numpy as np
+from pathlib import Path
+from typing import Optional, Dict, Any
+import wave
+import struct
+from .base import TTSAdapter
+logger = logging.getLogger(__name__)
+class PiperTTSAdapter(TTSAdapter):
+    """Piper TTS adapter using piper-tts package.
+    This adapter provides cross-platform TTS without system dependencies,
+    making it ideal for easy installation on Windows, macOS, and Linux.
+    """
+    # Language-to-voice mapping (using quality 'medium' models for balance of size/quality)
+    # Format: language_code -> (hf_path, model_filename)
+    PIPER_MODELS = {
+        'en': ('en/en_US/amy/medium', 'en_US-amy-medium'),         # US English, female voice
+        'fr': ('fr/fr_FR/siwis/medium', 'fr_FR-siwis-medium'),     # France French
+        'de': ('de/de_DE/thorsten/medium', 'de_DE-thorsten-medium'), # German
+        'es': ('es/es_ES/carlfm/medium', 'es_ES-carlfm-medium'),   # Spain Spanish
+        'ru': ('ru/ru_RU/dmitri/medium', 'ru_RU-dmitri-medium'),   # Russian
+        'zh': ('zh/zh_CN/huayan/medium', 'zh_CN-huayan-medium'),   # Mandarin Chinese
+    }
+    # Model download sizes (for user information)
+    MODEL_SIZES = {
+        'en': '50MB',
+        'fr': '45MB',
+        'de': '48MB',
+        'es': '47MB',
+        'ru': '52MB',
+        'zh': '55MB',
+    }
+    def __init__(
+        self,
+        language: str = "en",
+        model_dir: Optional[str] = None,
+        *,
+        allow_downloads: bool = True,
+        auto_load: bool = True,
+    ):
+        """Initialize Piper TTS adapter.
+        Args:
+            language: Initial language (default: 'en')
+            model_dir: Directory to store models (default: ~/.piper/models)
+        """
+        self._piper_available = False
+        self._voice = None
+        self._current_language = None
+        self._sample_rate = 22050  # Piper default
+        self._allow_downloads = bool(allow_downloads)
+        # Set model directory
+        if model_dir is None:
+            home = Path.home()
+            self._model_dir = home / '.piper' / 'models'
+        else:
+            self._model_dir = Path(model_dir)
+        self._model_dir.mkdir(parents=True, exist_ok=True)
+        # Try to import piper-tts
+        try:
+            from piper import PiperVoice
+            self._PiperVoice = PiperVoice
+            self._piper_available = True
+            logger.info("✅ Piper TTS initialized successfully")
+            # Load initial language model (best-effort). In offline-first contexts
+            # `allow_downloads=False` prevents implicit downloads and will fail fast
+            # if models are not already cached locally.
+            if bool(auto_load):
+                self.set_language(language)
+        except ImportError as e:
+            logger.warning(f"⚠️  Piper TTS not available: {e}")
+            logger.info(
+                "To install Piper TTS:\n"
+                "  pip install piper-tts>=1.2.0\n"
+                "This will enable zero-dependency TTS on all platforms."
+            )
+    def _get_model_path(self, language: str) -> tuple[Path, Path]:
+        """Get paths for model and config files.
+        Args:
+            language: Language code
+        Returns:
+            Tuple of (model_path, config_path)
+        """
+        model_info = self.PIPER_MODELS.get(language)
+        if not model_info:
+            raise ValueError(f"Unsupported language: {language}")
+        _, model_filename = model_info
+        model_path = self._model_dir / f"{model_filename}.onnx"
+        config_path = self._model_dir / f"{model_filename}.onnx.json"
+        return model_path, config_path
+    def ensure_model_downloaded(self, language: str) -> bool:
+        """Explicitly download Piper model files for a language (no implicit calls).
+        This downloads the ONNX model + JSON config into the local cache directory.
+        """
+        try:
+            model_path, config_path = self._get_model_path(language)
+        except Exception:
+            return False
+        if model_path.exists() and config_path.exists():
+            return True
+        return bool(self._download_model(language))
+    def unload(self) -> None:
+        """Best-effort release of loaded voice/session to free memory."""
+        self._voice = None
+        try:
+            gc.collect()
+        except Exception:
+            pass
+    def _download_model(self, language: str) -> bool:
+        """Download Piper model for specified language using Hugging Face Hub.
+        Args:
+            language: Language code
+        Returns:
+            True if successful, False otherwise
+        """
+        if not self._piper_available:
+            return False
+        if not bool(getattr(self, "_allow_downloads", True)):
+            # Offline-first: never hit the network implicitly.
+            logger.info(f"ℹ️ Piper model for '{language}' not cached locally (offline mode).")
+            return False
+        model_info = self.PIPER_MODELS.get(language)
+        if not model_info:
+            logger.error(f"❌ No Piper model defined for language: {language}")
+            return False
+        hf_path, model_filename = model_info
+        model_path, config_path = self._get_model_path(language)
+        # Check if already downloaded
+        if model_path.exists() and config_path.exists():
+            logger.debug(f"✅ Model already exists: {model_filename}")
+            return True
+        # Download from Piper repository.
+        #
+        # IMPORTANT: we intentionally avoid importing `huggingface_hub` here.
+        # In some environments we've observed intermittent interpreter crashes
+        # during deep import chains (pure-Python packages should not segfault,
+        # which strongly suggests native extension interactions elsewhere).
+        #
+        # Using direct HTTPS downloads is simpler, more predictable, and keeps
+        # the adapter robust in "fresh install" scenarios.
+        logger.info(f"⬇️  Downloading Piper model for {language} ({self.MODEL_SIZES.get(language, 'unknown size')})...")
+        try:
+            repo_id = "rhasspy/piper-voices"
+            base_url = f"https://huggingface.co/{repo_id}/resolve/main"
+            def _download(url: str, dest: Path) -> None:
+                import requests
+                import tempfile
+                dest.parent.mkdir(parents=True, exist_ok=True)
+                with requests.get(url, stream=True, timeout=60) as r:
+                    r.raise_for_status()
+                    # Write atomically to avoid leaving corrupt partial files.
+                    with tempfile.NamedTemporaryFile(dir=str(dest.parent), delete=False) as tmp:
+                        for chunk in r.iter_content(chunk_size=1024 * 256):
+                            if chunk:
+                                tmp.write(chunk)
+                        tmp_path = Path(tmp.name)
+                tmp_path.replace(dest)
+            # Download model file
+            if not model_path.exists():
+                logger.info(f"   Downloading {model_path.name}...")
+                _download(f"{base_url}/{hf_path}/{model_filename}.onnx", model_path)
+            # Download config file
+            if not config_path.exists():
+                logger.info(f"   Downloading {config_path.name}...")
+                _download(f"{base_url}/{hf_path}/{model_filename}.onnx.json", config_path)
+            logger.info(f"✅ Successfully downloaded Piper model for {language}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to download Piper model: {e}")
+            logger.info(f"   If this persists, manually download from: https://huggingface.co/rhasspy/piper-voices")
+            # Clean up partial downloads
+            if model_path.exists():
+                model_path.unlink()
+            if config_path.exists():
+                config_path.unlink()
+            return False
+    def _load_voice(self, language: str) -> bool:
+        """Load Piper voice for specified language.
+        Args:
+            language: Language code
+        Returns:
+            True if successful, False otherwise
+        """
+        if not self._piper_available:
+            return False
+        # Download model if needed
+        model_path, config_path = self._get_model_path(language)
+        if not (model_path.exists() and config_path.exists()):
+            # Offline-first: do not attempt downloads unless explicitly allowed.
+            if not bool(getattr(self, "_allow_downloads", True)):
+                return False
+            if not self._download_model(language):
+                return False
+        # Load the voice
+        try:
+            logger.debug(f"Loading Piper voice: {model_path}")
+            self._voice = self._PiperVoice.load(str(model_path), str(config_path))
+            self._current_language = language
+            # Update sample rate from config
+            if hasattr(self._voice, 'config') and hasattr(self._voice.config, 'sample_rate'):
+                self._sample_rate = self._voice.config.sample_rate
+            logger.info(f"✅ Loaded Piper voice for {language}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to load Piper voice for {language}: {e}")
+            return False
+    def synthesize(self, text: str) -> np.ndarray:
+        """Convert text to audio array for immediate playback.
+        Args:
+            text: The text to synthesize
+        Returns:
+            Audio data as numpy array (float32, range -1.0 to 1.0)
+        """
+        if not self.is_available():
+            raise RuntimeError("Piper TTS is not available. Install with: pip install piper-tts>=1.2.0")
+        if not self._voice:
+            raise RuntimeError(f"No voice loaded. Call set_language() first.")
+        try:
+            # Piper synthesize returns an iterable of AudioChunk objects
+            audio_chunks = list(self._voice.synthesize(text))
+            if not audio_chunks:
+                return np.array([], dtype=np.float32)
+            # Combine all audio chunks into single array
+            # Each chunk has audio_float_array attribute with normalized float32 audio
+            audio_arrays = [chunk.audio_float_array for chunk in audio_chunks]
+            # Concatenate all arrays
+            audio_array = np.concatenate(audio_arrays)
+            return audio_array
+        except Exception as e:
+            logger.error(f"❌ Piper synthesis failed: {e}")
+            raise RuntimeError(f"Piper synthesis failed: {e}") from e
+    def synthesize_to_bytes(self, text: str, format: str = 'wav') -> bytes:
+        """Convert text to audio bytes for network transmission.
+        Args:
+            text: The text to synthesize
+            format: Audio format ('wav' only supported currently)
+        Returns:
+            Audio data as bytes in WAV format
+        """
+        if format.lower() != 'wav':
+            raise ValueError(f"Piper adapter currently only supports WAV format, not {format}")
+        # Get audio array
+        audio_array = self.synthesize(text)
+        # Convert to bytes
+        return self._array_to_wav_bytes(audio_array)
+    def synthesize_to_file(self, text: str, output_path: str, format: Optional[str] = None) -> str:
+        """Convert text to audio file.
+        Args:
+            text: The text to synthesize
+            output_path: Path to save the audio file
+            format: Audio format (optional, inferred from extension)
+        Returns:
+            Path to the saved audio file
+        """
+        # Infer format from extension if not provided
+        if format is None:
+            format = Path(output_path).suffix.lstrip('.')
+        if format.lower() != 'wav':
+            raise ValueError(f"Piper adapter currently only supports WAV format, not {format}")
+        # Get audio bytes
+        audio_bytes = self.synthesize_to_bytes(text, format='wav')
+        # Write to file
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'wb') as f:
+            f.write(audio_bytes)
+        logger.info(f"✅ Saved audio to: {output_path}")
+        return str(output_path)
+    def _array_to_wav_bytes(self, audio_array: np.ndarray) -> bytes:
+        """Convert numpy array to WAV bytes.
+        Args:
+            audio_array: Audio as float32 array [-1.0, 1.0]
+        Returns:
+            WAV file as bytes
+        """
+        # Convert to 16-bit PCM
+        audio_int16 = (audio_array * 32767).astype(np.int16)
+        # Create WAV file in memory
+        buffer = io.BytesIO()
+        with wave.open(buffer, 'wb') as wav_file:
+            wav_file.setnchannels(1)  # Mono
+            wav_file.setsampwidth(2)  # 16-bit
+            wav_file.setframerate(self._sample_rate)
+            wav_file.writeframes(audio_int16.tobytes())
+        return buffer.getvalue()
+    def set_language(self, language: str) -> bool:
+        """Switch the TTS language.
+        Args:
+            language: ISO 639-1 language code (e.g., 'en', 'fr', 'de')
+        Returns:
+            True if language switch successful, False otherwise
+        """
+        if language not in self.PIPER_MODELS:
+            logger.warning(f"⚠️  Language {language} not supported by Piper adapter")
+            return False
+        # Don't reload if already loaded
+        if self._current_language == language and self._voice is not None:
+            logger.debug(f"Language {language} already loaded")
+            return True
+        # Load new voice
+        return self._load_voice(language)
+    def get_supported_languages(self) -> list[str]:
+        """Get list of supported language codes.
+        Returns:
+            List of ISO 639-1 language codes
+        """
+        return list(self.PIPER_MODELS.keys())
+    def get_sample_rate(self) -> int:
+        """Get the sample rate of the synthesized audio.
+        Returns:
+            Sample rate in Hz (typically 22050)
+        """
+        return self._sample_rate
+    def is_available(self) -> bool:
+        """Check if Piper TTS is available and functional.
+        Returns:
+            True if Piper can be used, False otherwise
+        """
+        return self._piper_available and self._voice is not None
+    def get_info(self) -> Dict[str, Any]:
+        """Get metadata about Piper TTS engine.
+        Returns:
+            Dictionary with engine information
+        """
+        info = super().get_info()
+        info.update({
+            'engine': 'Piper TTS',
+            'version': '1.2.0+',
+            'current_language': self._current_language,
+            'model_dir': str(self._model_dir),
+            'requires_system_deps': False,
+            'cross_platform': True
+        })
+        return info
+    def list_available_models(self, language: Optional[str] = None) -> Dict[str, Any]:
+        """List available Piper voices with cache status.
+        This is a small, stable introspection surface used by the CLI to present
+        selectable voices. Piper model downloads happen on-demand in `set_language()`.
+        """
+        def _parse_size_mb(size: str) -> int:
+            try:
+                return int(str(size).lower().replace("mb", "").strip())
+            except Exception:
+                return 0
+        def _voice_id_from_hf_path(hf_path: str) -> str:
+            # e.g. "en/en_US/amy/medium" -> "amy"
+            parts = (hf_path or "").split("/")
+            return parts[2] if len(parts) >= 3 else hf_path
+        models: Dict[str, Any] = {}
+        languages = [language] if language else list(self.PIPER_MODELS.keys())
+        for lang in languages:
+            if lang not in self.PIPER_MODELS:
+                continue
+            hf_path, model_filename = self.PIPER_MODELS[lang]
+            voice_id = _voice_id_from_hf_path(hf_path)
+            model_path, config_path = self._get_model_path(lang)
+            cached = model_path.exists() and config_path.exists()
+            models.setdefault(lang, {})
+            models[lang][voice_id] = {
+                "name": f"Piper {voice_id}",
+                "quality": "medium",
+                "size_mb": _parse_size_mb(self.MODEL_SIZES.get(lang, "0MB")),
+                "description": f"Default Piper voice for {lang}",
+                "requires_espeak": False,
+                "cached": cached,
+                "model_filename": model_filename,
+            }
+        return models

abstractvoice/aec/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Optional acoustic echo cancellation (AEC) support.
+This package is intentionally behind an optional extra:
+  pip install "abstractvoice[aec]"
+"""
+from .webrtc_apm import WebRtcAecProcessor
+__all__ = ["WebRtcAecProcessor"]

abstractvoice/aec/webrtc_apm.py ADDED Viewed

@@ -0,0 +1,56 @@
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class AecConfig:
+    sample_rate: int = 16000
+    channels: int = 1
+    stream_delay_ms: int = 0
+    enable_ns: bool = True
+    enable_agc: bool = False
+class WebRtcAecProcessor:
+    """Thin wrapper around `aec-audio-processing` (WebRTC APM).
+    Design goals:
+    - Optional dependency (import only when enabled)
+    - Byte-oriented processing (PCM16) to integrate with our VAD/STT pipeline
+    """
+    def __init__(self, cfg: AecConfig):
+        self.cfg = cfg
+        try:
+            from aec_audio_processing import AudioProcessor  # type: ignore
+        except Exception as e:
+            raise ImportError(
+                "AEC is optional and requires extra dependencies.\n"
+                "Install with: pip install \"abstractvoice[aec]\"\n"
+                f"Original error: {e}"
+            ) from e
+        ap = AudioProcessor(
+            enable_aec=True,
+            enable_ns=bool(cfg.enable_ns),
+            enable_agc=bool(cfg.enable_agc),
+        )
+        ap.set_stream_format(int(cfg.sample_rate), int(cfg.channels))
+        ap.set_reverse_stream_format(int(cfg.sample_rate), int(cfg.channels))
+        try:
+            ap.set_stream_delay(int(cfg.stream_delay_ms))
+        except Exception:
+            # Best-effort: some builds may not expose delay control.
+            pass
+        self._ap = ap
+    def process(self, *, near_pcm16: bytes, far_pcm16: bytes) -> bytes:
+        """Process one chunk: feed far-end then clean near-end."""
+        # The WebRTC APM expects reverse stream first.
+        if far_pcm16:
+            self._ap.process_reverse_stream(far_pcm16)
+        return self._ap.process_stream(near_pcm16)

abstractvoice 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl

abstractvoice 0.5.2py3-none-any.whl → 0.6.1py3-none-any.whl