abstractvoice 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. abstractvoice/__init__.py +2 -5
  2. abstractvoice/__main__.py +82 -3
  3. abstractvoice/adapters/__init__.py +12 -0
  4. abstractvoice/adapters/base.py +207 -0
  5. abstractvoice/adapters/stt_faster_whisper.py +401 -0
  6. abstractvoice/adapters/tts_piper.py +480 -0
  7. abstractvoice/aec/__init__.py +10 -0
  8. abstractvoice/aec/webrtc_apm.py +56 -0
  9. abstractvoice/artifacts.py +173 -0
  10. abstractvoice/audio/__init__.py +7 -0
  11. abstractvoice/audio/recorder.py +46 -0
  12. abstractvoice/audio/resample.py +25 -0
  13. abstractvoice/cloning/__init__.py +7 -0
  14. abstractvoice/cloning/engine_chroma.py +738 -0
  15. abstractvoice/cloning/engine_f5.py +546 -0
  16. abstractvoice/cloning/manager.py +349 -0
  17. abstractvoice/cloning/store.py +362 -0
  18. abstractvoice/compute/__init__.py +6 -0
  19. abstractvoice/compute/device.py +73 -0
  20. abstractvoice/config/__init__.py +2 -0
  21. abstractvoice/config/voice_catalog.py +19 -0
  22. abstractvoice/dependency_check.py +0 -1
  23. abstractvoice/examples/cli_repl.py +2403 -243
  24. abstractvoice/examples/voice_cli.py +64 -63
  25. abstractvoice/integrations/__init__.py +2 -0
  26. abstractvoice/integrations/abstractcore.py +116 -0
  27. abstractvoice/integrations/abstractcore_plugin.py +253 -0
  28. abstractvoice/prefetch.py +82 -0
  29. abstractvoice/recognition.py +424 -42
  30. abstractvoice/stop_phrase.py +103 -0
  31. abstractvoice/tts/__init__.py +3 -3
  32. abstractvoice/tts/adapter_tts_engine.py +210 -0
  33. abstractvoice/tts/tts_engine.py +257 -1208
  34. abstractvoice/vm/__init__.py +2 -0
  35. abstractvoice/vm/common.py +21 -0
  36. abstractvoice/vm/core.py +139 -0
  37. abstractvoice/vm/manager.py +108 -0
  38. abstractvoice/vm/stt_mixin.py +158 -0
  39. abstractvoice/vm/tts_mixin.py +550 -0
  40. abstractvoice/voice_manager.py +6 -1061
  41. abstractvoice-0.6.1.dist-info/METADATA +213 -0
  42. abstractvoice-0.6.1.dist-info/RECORD +52 -0
  43. {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
  44. abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
  45. abstractvoice/instant_setup.py +0 -83
  46. abstractvoice/simple_model_manager.py +0 -539
  47. abstractvoice-0.5.1.dist-info/METADATA +0 -1458
  48. abstractvoice-0.5.1.dist-info/RECORD +0 -23
  49. abstractvoice-0.5.1.dist-info/entry_points.txt +0 -2
  50. {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
  51. {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
- """TTS module for voice synthesis with interrupt handling."""
1
+ """TTS utilities (Piper-first)."""
2
2
 
3
- from .tts_engine import TTSEngine
3
+ from .tts_engine import NonBlockingAudioPlayer, apply_speed_without_pitch_change
4
4
 
5
- __all__ = ['TTSEngine']
5
+ __all__ = ["NonBlockingAudioPlayer", "apply_speed_without_pitch_change"]
@@ -0,0 +1,210 @@
1
+ """Adapter-backed TTSEngine facade.
2
+
3
+ Why this exists
4
+ ---------------
5
+ `VoiceManager` historically relied on a `TTSEngine` instance exposing:
6
+ - speak(text, speed, callback)
7
+ - stop/pause/resume/is_active/is_paused
8
+ - on_playback_start/on_playback_end callbacks
9
+ - an `audio_player` that supports immediate pause/resume
10
+
11
+ With the introduction of adapter-based engines (e.g. Piper), `VoiceManager`
12
+ must keep that internal contract stable to preserve backward compatibility
13
+ across the codebase (CLI, tests, integrations).
14
+
15
+ This module provides a small engine facade that wraps any `TTSAdapter` and
16
+ uses the existing `NonBlockingAudioPlayer` for playback control.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import threading
22
+ import time
23
+ from typing import Callable, Optional
24
+
25
+ import numpy as np
26
+
27
+ from .tts_engine import NonBlockingAudioPlayer, apply_speed_without_pitch_change
28
+ from ..adapters.base import TTSAdapter
29
+
30
+
31
+ class AdapterTTSEngine:
32
+ """TTSEngine-compatible wrapper around a `TTSAdapter`."""
33
+
34
+ def __init__(self, adapter: TTSAdapter, debug_mode: bool = False):
35
+ self.adapter = adapter
36
+ self.debug_mode = debug_mode
37
+
38
+ self.on_playback_start: Optional[Callable[[], None]] = None
39
+ self.on_playback_end: Optional[Callable[[], None]] = None
40
+
41
+ self._user_callback: Optional[Callable[[], None]] = None
42
+
43
+ sample_rate = self._safe_sample_rate()
44
+ self.audio_player = NonBlockingAudioPlayer(sample_rate=sample_rate, debug_mode=debug_mode)
45
+ self.audio_player.playback_complete_callback = self._on_playback_complete
46
+
47
+ # Best-effort last TTS metrics (used by verbose REPL output).
48
+ self.last_tts_metrics: dict | None = None
49
+
50
+ def _safe_sample_rate(self) -> int:
51
+ try:
52
+ return int(self.adapter.get_sample_rate())
53
+ except Exception:
54
+ return 22050
55
+
56
+ def _sync_sample_rate(self) -> None:
57
+ """Keep audio player sample rate aligned with adapter."""
58
+ sr = self._safe_sample_rate()
59
+ if getattr(self.audio_player, "sample_rate", None) == sr:
60
+ return
61
+
62
+ # If a stream is already open, keep it stable and rely on resampling at
63
+ # enqueue time (see `NonBlockingAudioPlayer.play_audio(sample_rate=...)`).
64
+ # This avoids frequent close/reopen cycles that can be flaky on some
65
+ # PortAudio/CoreAudio device configurations.
66
+ if getattr(self.audio_player, "stream", None) is not None:
67
+ return
68
+
69
+ self.audio_player.sample_rate = sr
70
+
71
+ def speak(self, text: str, speed: float = 1.0, callback=None) -> bool:
72
+ """Synthesize and enqueue audio for playback (non-blocking)."""
73
+ if not self.adapter or not self.adapter.is_available():
74
+ raise RuntimeError("No TTS adapter available")
75
+
76
+ self._sync_sample_rate()
77
+
78
+ self._user_callback = callback
79
+
80
+ if self.on_playback_start:
81
+ threading.Thread(target=self.on_playback_start, daemon=True).start()
82
+
83
+ t0 = time.monotonic()
84
+ audio: np.ndarray = self.adapter.synthesize(text)
85
+ t1 = time.monotonic()
86
+
87
+ # Best-effort speed handling. If librosa isn't installed, the helper
88
+ # falls back to original audio (no crash).
89
+ if speed and speed != 1.0:
90
+ audio = apply_speed_without_pitch_change(audio, speed, sr=self._safe_sample_rate())
91
+
92
+ sr = self._safe_sample_rate()
93
+ try:
94
+ audio_samples = int(len(audio)) if audio is not None else 0
95
+ except Exception:
96
+ audio_samples = 0
97
+ audio_s = (float(audio_samples) / float(sr)) if sr and audio_samples else 0.0
98
+ synth_s = float(t1 - t0)
99
+ self.last_tts_metrics = {
100
+ "engine": "piper",
101
+ "synth_s": synth_s,
102
+ "audio_s": float(audio_s),
103
+ "rtf": (synth_s / float(audio_s)) if audio_s else None,
104
+ "sample_rate": int(sr),
105
+ "audio_samples": int(audio_samples),
106
+ "speed": float(speed or 1.0),
107
+ "ts": time.time(),
108
+ }
109
+
110
+ self.audio_player.play_audio(audio, sample_rate=sr)
111
+ return True
112
+
113
+ def begin_playback(self, callback=None, *, sample_rate: int | None = None) -> None:
114
+ """Begin a playback session without synthesizing.
115
+
116
+ Used for streaming/chunked playback where audio is enqueued progressively.
117
+ """
118
+ if sample_rate is not None:
119
+ # For externally-produced audio (e.g. cloning), prefer native sample
120
+ # rate when we haven't opened an output stream yet. If a stream is
121
+ # already open, keep it stable and resample on enqueue.
122
+ sr = int(sample_rate)
123
+ if getattr(self.audio_player, "stream", None) is None:
124
+ self.audio_player.sample_rate = sr
125
+ else:
126
+ self._sync_sample_rate()
127
+ if callback is not None:
128
+ self._user_callback = callback
129
+ if self.on_playback_start:
130
+ threading.Thread(target=self.on_playback_start, daemon=True).start()
131
+
132
+ def enqueue_audio(self, audio: np.ndarray, *, sample_rate: int | None = None) -> None:
133
+ """Enqueue audio into the underlying player (no extra callbacks)."""
134
+ self.audio_player.play_audio(audio, sample_rate=sample_rate)
135
+
136
+ def play_audio_array(self, audio: np.ndarray, callback=None) -> bool:
137
+ """Play already-synthesized audio through the same playback pipeline.
138
+
139
+ Used for optional features (e.g., voice cloning) that produce WAV bytes
140
+ externally but still want to reuse the existing low-latency playback +
141
+ lifecycle callbacks.
142
+ """
143
+ self._user_callback = callback
144
+ if self.on_playback_start:
145
+ threading.Thread(target=self.on_playback_start, daemon=True).start()
146
+
147
+ self.audio_player.play_audio(audio, sample_rate=self._safe_sample_rate())
148
+ return True
149
+
150
+ def _on_playback_complete(self) -> None:
151
+ """Called by the audio player when playback fully drains."""
152
+ if self.on_playback_end:
153
+ threading.Thread(target=self.on_playback_end, daemon=True).start()
154
+
155
+ if self._user_callback:
156
+ threading.Thread(target=self._user_callback, daemon=True).start()
157
+ self._user_callback = None
158
+
159
+ def stop(self, *, close_stream: bool = True) -> bool:
160
+ """Stop playback immediately and clear queued audio.
161
+
162
+ By default we close the underlying output stream. Some interactive
163
+ environments (macOS AUHAL in particular) can be flaky when repeatedly
164
+ closing/reopening streams; callers can pass close_stream=False to keep
165
+ the stream open and just flush playback state.
166
+ """
167
+ stream_exists = getattr(self.audio_player, "stream", None) is not None
168
+ was_playing = bool(getattr(self.audio_player, "is_playing", False))
169
+
170
+ if not (stream_exists or was_playing):
171
+ return False
172
+
173
+ if close_stream:
174
+ self.audio_player.stop_stream()
175
+ return True
176
+
177
+ # Keep stream open; just stop playback and clear buffers.
178
+ try:
179
+ self.audio_player.clear_queue()
180
+ self.audio_player.is_playing = False
181
+ self.audio_player._audio_started = False # noqa: SLF001 (internal flag; best-effort)
182
+ self.audio_player.current_audio = None
183
+ self.audio_player.current_position = 0
184
+ try:
185
+ with self.audio_player._pause_lock: # noqa: SLF001
186
+ self.audio_player._paused = False # noqa: SLF001
187
+ except Exception:
188
+ pass
189
+ except Exception:
190
+ # Best-effort; never crash caller during stop.
191
+ pass
192
+ return True
193
+
194
+ def pause(self) -> bool:
195
+ return self.audio_player.pause()
196
+
197
+ def resume(self) -> bool:
198
+ return self.audio_player.resume()
199
+
200
+ def is_paused(self) -> bool:
201
+ return self.audio_player.is_paused_state()
202
+
203
+ def is_active(self) -> bool:
204
+ return bool(getattr(self.audio_player, "is_playing", False))
205
+
206
+ def cleanup(self) -> None:
207
+ try:
208
+ self.audio_player.cleanup()
209
+ except Exception:
210
+ pass