abstractvoice 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. abstractvoice/__init__.py +2 -5
  2. abstractvoice/__main__.py +82 -3
  3. abstractvoice/adapters/__init__.py +12 -0
  4. abstractvoice/adapters/base.py +207 -0
  5. abstractvoice/adapters/stt_faster_whisper.py +401 -0
  6. abstractvoice/adapters/tts_piper.py +480 -0
  7. abstractvoice/aec/__init__.py +10 -0
  8. abstractvoice/aec/webrtc_apm.py +56 -0
  9. abstractvoice/artifacts.py +173 -0
  10. abstractvoice/audio/__init__.py +7 -0
  11. abstractvoice/audio/recorder.py +46 -0
  12. abstractvoice/audio/resample.py +25 -0
  13. abstractvoice/cloning/__init__.py +7 -0
  14. abstractvoice/cloning/engine_chroma.py +738 -0
  15. abstractvoice/cloning/engine_f5.py +546 -0
  16. abstractvoice/cloning/manager.py +349 -0
  17. abstractvoice/cloning/store.py +362 -0
  18. abstractvoice/compute/__init__.py +6 -0
  19. abstractvoice/compute/device.py +73 -0
  20. abstractvoice/config/__init__.py +2 -0
  21. abstractvoice/config/voice_catalog.py +19 -0
  22. abstractvoice/dependency_check.py +0 -1
  23. abstractvoice/examples/cli_repl.py +2403 -243
  24. abstractvoice/examples/voice_cli.py +64 -63
  25. abstractvoice/integrations/__init__.py +2 -0
  26. abstractvoice/integrations/abstractcore.py +116 -0
  27. abstractvoice/integrations/abstractcore_plugin.py +253 -0
  28. abstractvoice/prefetch.py +82 -0
  29. abstractvoice/recognition.py +424 -42
  30. abstractvoice/stop_phrase.py +103 -0
  31. abstractvoice/tts/__init__.py +3 -3
  32. abstractvoice/tts/adapter_tts_engine.py +210 -0
  33. abstractvoice/tts/tts_engine.py +257 -1208
  34. abstractvoice/vm/__init__.py +2 -0
  35. abstractvoice/vm/common.py +21 -0
  36. abstractvoice/vm/core.py +139 -0
  37. abstractvoice/vm/manager.py +108 -0
  38. abstractvoice/vm/stt_mixin.py +158 -0
  39. abstractvoice/vm/tts_mixin.py +550 -0
  40. abstractvoice/voice_manager.py +6 -1061
  41. abstractvoice-0.6.1.dist-info/METADATA +213 -0
  42. abstractvoice-0.6.1.dist-info/RECORD +52 -0
  43. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
  44. abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
  45. abstractvoice/instant_setup.py +0 -83
  46. abstractvoice/simple_model_manager.py +0 -539
  47. abstractvoice-0.5.2.dist-info/METADATA +0 -1458
  48. abstractvoice-0.5.2.dist-info/RECORD +0 -23
  49. abstractvoice-0.5.2.dist-info/entry_points.txt +0 -2
  50. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
  51. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,480 @@
1
+ """Piper TTS Adapter - Zero-dependency TTS engine.
2
+
3
+ Piper is a fast, local neural text-to-speech system that:
4
+ - Requires NO system dependencies (no espeak-ng)
5
+ - Works on Windows, macOS, Linux out of the box
6
+ - Supports 40+ languages with 100+ voices
7
+ - Uses ONNX Runtime for cross-platform compatibility
8
+ - Has small model sizes (15-60MB vs 200-500MB VITS)
9
+ """
10
+
11
+ import gc
12
+ import os
13
+ import io
14
+ import logging
15
+ import numpy as np
16
+ from pathlib import Path
17
+ from typing import Optional, Dict, Any
18
+ import wave
19
+ import struct
20
+
21
+ from .base import TTSAdapter
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class PiperTTSAdapter(TTSAdapter):
27
+ """Piper TTS adapter using piper-tts package.
28
+
29
+ This adapter provides cross-platform TTS without system dependencies,
30
+ making it ideal for easy installation on Windows, macOS, and Linux.
31
+ """
32
+
33
+ # Language-to-voice mapping (using quality 'medium' models for balance of size/quality)
34
+ # Format: language_code -> (hf_path, model_filename)
35
+ PIPER_MODELS = {
36
+ 'en': ('en/en_US/amy/medium', 'en_US-amy-medium'), # US English, female voice
37
+ 'fr': ('fr/fr_FR/siwis/medium', 'fr_FR-siwis-medium'), # France French
38
+ 'de': ('de/de_DE/thorsten/medium', 'de_DE-thorsten-medium'), # German
39
+ 'es': ('es/es_ES/carlfm/medium', 'es_ES-carlfm-medium'), # Spain Spanish
40
+ 'ru': ('ru/ru_RU/dmitri/medium', 'ru_RU-dmitri-medium'), # Russian
41
+ 'zh': ('zh/zh_CN/huayan/medium', 'zh_CN-huayan-medium'), # Mandarin Chinese
42
+ }
43
+
44
+ # Model download sizes (for user information)
45
+ MODEL_SIZES = {
46
+ 'en': '50MB',
47
+ 'fr': '45MB',
48
+ 'de': '48MB',
49
+ 'es': '47MB',
50
+ 'ru': '52MB',
51
+ 'zh': '55MB',
52
+ }
53
+
54
+ def __init__(
55
+ self,
56
+ language: str = "en",
57
+ model_dir: Optional[str] = None,
58
+ *,
59
+ allow_downloads: bool = True,
60
+ auto_load: bool = True,
61
+ ):
62
+ """Initialize Piper TTS adapter.
63
+
64
+ Args:
65
+ language: Initial language (default: 'en')
66
+ model_dir: Directory to store models (default: ~/.piper/models)
67
+ """
68
+ self._piper_available = False
69
+ self._voice = None
70
+ self._current_language = None
71
+ self._sample_rate = 22050 # Piper default
72
+ self._allow_downloads = bool(allow_downloads)
73
+
74
+ # Set model directory
75
+ if model_dir is None:
76
+ home = Path.home()
77
+ self._model_dir = home / '.piper' / 'models'
78
+ else:
79
+ self._model_dir = Path(model_dir)
80
+
81
+ self._model_dir.mkdir(parents=True, exist_ok=True)
82
+
83
+ # Try to import piper-tts
84
+ try:
85
+ from piper import PiperVoice
86
+ self._PiperVoice = PiperVoice
87
+ self._piper_available = True
88
+ logger.info("✅ Piper TTS initialized successfully")
89
+
90
+ # Load initial language model (best-effort). In offline-first contexts
91
+ # `allow_downloads=False` prevents implicit downloads and will fail fast
92
+ # if models are not already cached locally.
93
+ if bool(auto_load):
94
+ self.set_language(language)
95
+
96
+ except ImportError as e:
97
+ logger.warning(f"⚠️ Piper TTS not available: {e}")
98
+ logger.info(
99
+ "To install Piper TTS:\n"
100
+ " pip install piper-tts>=1.2.0\n"
101
+ "This will enable zero-dependency TTS on all platforms."
102
+ )
103
+
104
+ def _get_model_path(self, language: str) -> tuple[Path, Path]:
105
+ """Get paths for model and config files.
106
+
107
+ Args:
108
+ language: Language code
109
+
110
+ Returns:
111
+ Tuple of (model_path, config_path)
112
+ """
113
+ model_info = self.PIPER_MODELS.get(language)
114
+ if not model_info:
115
+ raise ValueError(f"Unsupported language: {language}")
116
+
117
+ _, model_filename = model_info
118
+ model_path = self._model_dir / f"{model_filename}.onnx"
119
+ config_path = self._model_dir / f"{model_filename}.onnx.json"
120
+
121
+ return model_path, config_path
122
+
123
+ def ensure_model_downloaded(self, language: str) -> bool:
124
+ """Explicitly download Piper model files for a language (no implicit calls).
125
+
126
+ This downloads the ONNX model + JSON config into the local cache directory.
127
+ """
128
+ try:
129
+ model_path, config_path = self._get_model_path(language)
130
+ except Exception:
131
+ return False
132
+
133
+ if model_path.exists() and config_path.exists():
134
+ return True
135
+
136
+ return bool(self._download_model(language))
137
+
138
+ def unload(self) -> None:
139
+ """Best-effort release of loaded voice/session to free memory."""
140
+ self._voice = None
141
+ try:
142
+ gc.collect()
143
+ except Exception:
144
+ pass
145
+
146
+ def _download_model(self, language: str) -> bool:
147
+ """Download Piper model for specified language using Hugging Face Hub.
148
+
149
+ Args:
150
+ language: Language code
151
+
152
+ Returns:
153
+ True if successful, False otherwise
154
+ """
155
+ if not self._piper_available:
156
+ return False
157
+
158
+ if not bool(getattr(self, "_allow_downloads", True)):
159
+ # Offline-first: never hit the network implicitly.
160
+ logger.info(f"ℹ️ Piper model for '{language}' not cached locally (offline mode).")
161
+ return False
162
+
163
+ model_info = self.PIPER_MODELS.get(language)
164
+ if not model_info:
165
+ logger.error(f"❌ No Piper model defined for language: {language}")
166
+ return False
167
+
168
+ hf_path, model_filename = model_info
169
+ model_path, config_path = self._get_model_path(language)
170
+
171
+ # Check if already downloaded
172
+ if model_path.exists() and config_path.exists():
173
+ logger.debug(f"✅ Model already exists: {model_filename}")
174
+ return True
175
+
176
+ # Download from Piper repository.
177
+ #
178
+ # IMPORTANT: we intentionally avoid importing `huggingface_hub` here.
179
+ # In some environments we've observed intermittent interpreter crashes
180
+ # during deep import chains (pure-Python packages should not segfault,
181
+ # which strongly suggests native extension interactions elsewhere).
182
+ #
183
+ # Using direct HTTPS downloads is simpler, more predictable, and keeps
184
+ # the adapter robust in "fresh install" scenarios.
185
+ logger.info(f"⬇️ Downloading Piper model for {language} ({self.MODEL_SIZES.get(language, 'unknown size')})...")
186
+
187
+ try:
188
+ repo_id = "rhasspy/piper-voices"
189
+ base_url = f"https://huggingface.co/{repo_id}/resolve/main"
190
+
191
+ def _download(url: str, dest: Path) -> None:
192
+ import requests
193
+ import tempfile
194
+
195
+ dest.parent.mkdir(parents=True, exist_ok=True)
196
+
197
+ with requests.get(url, stream=True, timeout=60) as r:
198
+ r.raise_for_status()
199
+
200
+ # Write atomically to avoid leaving corrupt partial files.
201
+ with tempfile.NamedTemporaryFile(dir=str(dest.parent), delete=False) as tmp:
202
+ for chunk in r.iter_content(chunk_size=1024 * 256):
203
+ if chunk:
204
+ tmp.write(chunk)
205
+ tmp_path = Path(tmp.name)
206
+
207
+ tmp_path.replace(dest)
208
+
209
+ # Download model file
210
+ if not model_path.exists():
211
+ logger.info(f" Downloading {model_path.name}...")
212
+ _download(f"{base_url}/{hf_path}/{model_filename}.onnx", model_path)
213
+
214
+ # Download config file
215
+ if not config_path.exists():
216
+ logger.info(f" Downloading {config_path.name}...")
217
+ _download(f"{base_url}/{hf_path}/{model_filename}.onnx.json", config_path)
218
+
219
+ logger.info(f"✅ Successfully downloaded Piper model for {language}")
220
+ return True
221
+
222
+ except Exception as e:
223
+ logger.error(f"❌ Failed to download Piper model: {e}")
224
+ logger.info(f" If this persists, manually download from: https://huggingface.co/rhasspy/piper-voices")
225
+ # Clean up partial downloads
226
+ if model_path.exists():
227
+ model_path.unlink()
228
+ if config_path.exists():
229
+ config_path.unlink()
230
+ return False
231
+
232
+ def _load_voice(self, language: str) -> bool:
233
+ """Load Piper voice for specified language.
234
+
235
+ Args:
236
+ language: Language code
237
+
238
+ Returns:
239
+ True if successful, False otherwise
240
+ """
241
+ if not self._piper_available:
242
+ return False
243
+
244
+ # Download model if needed
245
+ model_path, config_path = self._get_model_path(language)
246
+ if not (model_path.exists() and config_path.exists()):
247
+ # Offline-first: do not attempt downloads unless explicitly allowed.
248
+ if not bool(getattr(self, "_allow_downloads", True)):
249
+ return False
250
+ if not self._download_model(language):
251
+ return False
252
+
253
+ # Load the voice
254
+ try:
255
+ logger.debug(f"Loading Piper voice: {model_path}")
256
+ self._voice = self._PiperVoice.load(str(model_path), str(config_path))
257
+ self._current_language = language
258
+
259
+ # Update sample rate from config
260
+ if hasattr(self._voice, 'config') and hasattr(self._voice.config, 'sample_rate'):
261
+ self._sample_rate = self._voice.config.sample_rate
262
+
263
+ logger.info(f"✅ Loaded Piper voice for {language}")
264
+ return True
265
+
266
+ except Exception as e:
267
+ logger.error(f"❌ Failed to load Piper voice for {language}: {e}")
268
+ return False
269
+
270
+ def synthesize(self, text: str) -> np.ndarray:
271
+ """Convert text to audio array for immediate playback.
272
+
273
+ Args:
274
+ text: The text to synthesize
275
+
276
+ Returns:
277
+ Audio data as numpy array (float32, range -1.0 to 1.0)
278
+ """
279
+ if not self.is_available():
280
+ raise RuntimeError("Piper TTS is not available. Install with: pip install piper-tts>=1.2.0")
281
+
282
+ if not self._voice:
283
+ raise RuntimeError(f"No voice loaded. Call set_language() first.")
284
+
285
+ try:
286
+ # Piper synthesize returns an iterable of AudioChunk objects
287
+ audio_chunks = list(self._voice.synthesize(text))
288
+
289
+ if not audio_chunks:
290
+ return np.array([], dtype=np.float32)
291
+
292
+ # Combine all audio chunks into single array
293
+ # Each chunk has audio_float_array attribute with normalized float32 audio
294
+ audio_arrays = [chunk.audio_float_array for chunk in audio_chunks]
295
+
296
+ # Concatenate all arrays
297
+ audio_array = np.concatenate(audio_arrays)
298
+
299
+ return audio_array
300
+
301
+ except Exception as e:
302
+ logger.error(f"❌ Piper synthesis failed: {e}")
303
+ raise RuntimeError(f"Piper synthesis failed: {e}") from e
304
+
305
+ def synthesize_to_bytes(self, text: str, format: str = 'wav') -> bytes:
306
+ """Convert text to audio bytes for network transmission.
307
+
308
+ Args:
309
+ text: The text to synthesize
310
+ format: Audio format ('wav' only supported currently)
311
+
312
+ Returns:
313
+ Audio data as bytes in WAV format
314
+ """
315
+ if format.lower() != 'wav':
316
+ raise ValueError(f"Piper adapter currently only supports WAV format, not {format}")
317
+
318
+ # Get audio array
319
+ audio_array = self.synthesize(text)
320
+
321
+ # Convert to bytes
322
+ return self._array_to_wav_bytes(audio_array)
323
+
324
+ def synthesize_to_file(self, text: str, output_path: str, format: Optional[str] = None) -> str:
325
+ """Convert text to audio file.
326
+
327
+ Args:
328
+ text: The text to synthesize
329
+ output_path: Path to save the audio file
330
+ format: Audio format (optional, inferred from extension)
331
+
332
+ Returns:
333
+ Path to the saved audio file
334
+ """
335
+ # Infer format from extension if not provided
336
+ if format is None:
337
+ format = Path(output_path).suffix.lstrip('.')
338
+
339
+ if format.lower() != 'wav':
340
+ raise ValueError(f"Piper adapter currently only supports WAV format, not {format}")
341
+
342
+ # Get audio bytes
343
+ audio_bytes = self.synthesize_to_bytes(text, format='wav')
344
+
345
+ # Write to file
346
+ output_path = Path(output_path)
347
+ output_path.parent.mkdir(parents=True, exist_ok=True)
348
+
349
+ with open(output_path, 'wb') as f:
350
+ f.write(audio_bytes)
351
+
352
+ logger.info(f"✅ Saved audio to: {output_path}")
353
+ return str(output_path)
354
+
355
+ def _array_to_wav_bytes(self, audio_array: np.ndarray) -> bytes:
356
+ """Convert numpy array to WAV bytes.
357
+
358
+ Args:
359
+ audio_array: Audio as float32 array [-1.0, 1.0]
360
+
361
+ Returns:
362
+ WAV file as bytes
363
+ """
364
+ # Convert to 16-bit PCM
365
+ audio_int16 = (audio_array * 32767).astype(np.int16)
366
+
367
+ # Create WAV file in memory
368
+ buffer = io.BytesIO()
369
+
370
+ with wave.open(buffer, 'wb') as wav_file:
371
+ wav_file.setnchannels(1) # Mono
372
+ wav_file.setsampwidth(2) # 16-bit
373
+ wav_file.setframerate(self._sample_rate)
374
+ wav_file.writeframes(audio_int16.tobytes())
375
+
376
+ return buffer.getvalue()
377
+
378
+ def set_language(self, language: str) -> bool:
379
+ """Switch the TTS language.
380
+
381
+ Args:
382
+ language: ISO 639-1 language code (e.g., 'en', 'fr', 'de')
383
+
384
+ Returns:
385
+ True if language switch successful, False otherwise
386
+ """
387
+ if language not in self.PIPER_MODELS:
388
+ logger.warning(f"⚠️ Language {language} not supported by Piper adapter")
389
+ return False
390
+
391
+ # Don't reload if already loaded
392
+ if self._current_language == language and self._voice is not None:
393
+ logger.debug(f"Language {language} already loaded")
394
+ return True
395
+
396
+ # Load new voice
397
+ return self._load_voice(language)
398
+
399
+ def get_supported_languages(self) -> list[str]:
400
+ """Get list of supported language codes.
401
+
402
+ Returns:
403
+ List of ISO 639-1 language codes
404
+ """
405
+ return list(self.PIPER_MODELS.keys())
406
+
407
+ def get_sample_rate(self) -> int:
408
+ """Get the sample rate of the synthesized audio.
409
+
410
+ Returns:
411
+ Sample rate in Hz (typically 22050)
412
+ """
413
+ return self._sample_rate
414
+
415
+ def is_available(self) -> bool:
416
+ """Check if Piper TTS is available and functional.
417
+
418
+ Returns:
419
+ True if Piper can be used, False otherwise
420
+ """
421
+ return self._piper_available and self._voice is not None
422
+
423
+ def get_info(self) -> Dict[str, Any]:
424
+ """Get metadata about Piper TTS engine.
425
+
426
+ Returns:
427
+ Dictionary with engine information
428
+ """
429
+ info = super().get_info()
430
+ info.update({
431
+ 'engine': 'Piper TTS',
432
+ 'version': '1.2.0+',
433
+ 'current_language': self._current_language,
434
+ 'model_dir': str(self._model_dir),
435
+ 'requires_system_deps': False,
436
+ 'cross_platform': True
437
+ })
438
+ return info
439
+
440
+ def list_available_models(self, language: Optional[str] = None) -> Dict[str, Any]:
441
+ """List available Piper voices with cache status.
442
+
443
+ This is a small, stable introspection surface used by the CLI to present
444
+ selectable voices. Piper model downloads happen on-demand in `set_language()`.
445
+ """
446
+ def _parse_size_mb(size: str) -> int:
447
+ try:
448
+ return int(str(size).lower().replace("mb", "").strip())
449
+ except Exception:
450
+ return 0
451
+
452
+ def _voice_id_from_hf_path(hf_path: str) -> str:
453
+ # e.g. "en/en_US/amy/medium" -> "amy"
454
+ parts = (hf_path or "").split("/")
455
+ return parts[2] if len(parts) >= 3 else hf_path
456
+
457
+ models: Dict[str, Any] = {}
458
+ languages = [language] if language else list(self.PIPER_MODELS.keys())
459
+
460
+ for lang in languages:
461
+ if lang not in self.PIPER_MODELS:
462
+ continue
463
+
464
+ hf_path, model_filename = self.PIPER_MODELS[lang]
465
+ voice_id = _voice_id_from_hf_path(hf_path)
466
+ model_path, config_path = self._get_model_path(lang)
467
+ cached = model_path.exists() and config_path.exists()
468
+
469
+ models.setdefault(lang, {})
470
+ models[lang][voice_id] = {
471
+ "name": f"Piper {voice_id}",
472
+ "quality": "medium",
473
+ "size_mb": _parse_size_mb(self.MODEL_SIZES.get(lang, "0MB")),
474
+ "description": f"Default Piper voice for {lang}",
475
+ "requires_espeak": False,
476
+ "cached": cached,
477
+ "model_filename": model_filename,
478
+ }
479
+
480
+ return models
@@ -0,0 +1,10 @@
1
+ """Optional acoustic echo cancellation (AEC) support.
2
+
3
+ This package is intentionally behind an optional extra:
4
+ pip install "abstractvoice[aec]"
5
+ """
6
+
7
+ from .webrtc_apm import WebRtcAecProcessor
8
+
9
+ __all__ = ["WebRtcAecProcessor"]
10
+
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class AecConfig:
8
+ sample_rate: int = 16000
9
+ channels: int = 1
10
+ stream_delay_ms: int = 0
11
+ enable_ns: bool = True
12
+ enable_agc: bool = False
13
+
14
+
15
+ class WebRtcAecProcessor:
16
+ """Thin wrapper around `aec-audio-processing` (WebRTC APM).
17
+
18
+ Design goals:
19
+ - Optional dependency (import only when enabled)
20
+ - Byte-oriented processing (PCM16) to integrate with our VAD/STT pipeline
21
+ """
22
+
23
+ def __init__(self, cfg: AecConfig):
24
+ self.cfg = cfg
25
+
26
+ try:
27
+ from aec_audio_processing import AudioProcessor # type: ignore
28
+ except Exception as e:
29
+ raise ImportError(
30
+ "AEC is optional and requires extra dependencies.\n"
31
+ "Install with: pip install \"abstractvoice[aec]\"\n"
32
+ f"Original error: {e}"
33
+ ) from e
34
+
35
+ ap = AudioProcessor(
36
+ enable_aec=True,
37
+ enable_ns=bool(cfg.enable_ns),
38
+ enable_agc=bool(cfg.enable_agc),
39
+ )
40
+ ap.set_stream_format(int(cfg.sample_rate), int(cfg.channels))
41
+ ap.set_reverse_stream_format(int(cfg.sample_rate), int(cfg.channels))
42
+ try:
43
+ ap.set_stream_delay(int(cfg.stream_delay_ms))
44
+ except Exception:
45
+ # Best-effort: some builds may not expose delay control.
46
+ pass
47
+
48
+ self._ap = ap
49
+
50
+ def process(self, *, near_pcm16: bytes, far_pcm16: bytes) -> bytes:
51
+ """Process one chunk: feed far-end then clean near-end."""
52
+ # The WebRTC APM expects reverse stream first.
53
+ if far_pcm16:
54
+ self._ap.process_reverse_stream(far_pcm16)
55
+ return self._ap.process_stream(near_pcm16)
56
+