abstractvoice 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvoice/__init__.py +2 -5
- abstractvoice/__main__.py +82 -3
- abstractvoice/adapters/__init__.py +12 -0
- abstractvoice/adapters/base.py +207 -0
- abstractvoice/adapters/stt_faster_whisper.py +401 -0
- abstractvoice/adapters/tts_piper.py +480 -0
- abstractvoice/aec/__init__.py +10 -0
- abstractvoice/aec/webrtc_apm.py +56 -0
- abstractvoice/artifacts.py +173 -0
- abstractvoice/audio/__init__.py +7 -0
- abstractvoice/audio/recorder.py +46 -0
- abstractvoice/audio/resample.py +25 -0
- abstractvoice/cloning/__init__.py +7 -0
- abstractvoice/cloning/engine_chroma.py +738 -0
- abstractvoice/cloning/engine_f5.py +546 -0
- abstractvoice/cloning/manager.py +349 -0
- abstractvoice/cloning/store.py +362 -0
- abstractvoice/compute/__init__.py +6 -0
- abstractvoice/compute/device.py +73 -0
- abstractvoice/config/__init__.py +2 -0
- abstractvoice/config/voice_catalog.py +19 -0
- abstractvoice/dependency_check.py +0 -1
- abstractvoice/examples/cli_repl.py +2403 -243
- abstractvoice/examples/voice_cli.py +64 -63
- abstractvoice/integrations/__init__.py +2 -0
- abstractvoice/integrations/abstractcore.py +116 -0
- abstractvoice/integrations/abstractcore_plugin.py +253 -0
- abstractvoice/prefetch.py +82 -0
- abstractvoice/recognition.py +424 -42
- abstractvoice/stop_phrase.py +103 -0
- abstractvoice/tts/__init__.py +3 -3
- abstractvoice/tts/adapter_tts_engine.py +210 -0
- abstractvoice/tts/tts_engine.py +257 -1208
- abstractvoice/vm/__init__.py +2 -0
- abstractvoice/vm/common.py +21 -0
- abstractvoice/vm/core.py +139 -0
- abstractvoice/vm/manager.py +108 -0
- abstractvoice/vm/stt_mixin.py +158 -0
- abstractvoice/vm/tts_mixin.py +550 -0
- abstractvoice/voice_manager.py +6 -1061
- abstractvoice-0.6.1.dist-info/METADATA +213 -0
- abstractvoice-0.6.1.dist-info/RECORD +52 -0
- {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
- abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
- abstractvoice/instant_setup.py +0 -83
- abstractvoice/simple_model_manager.py +0 -539
- abstractvoice-0.5.1.dist-info/METADATA +0 -1458
- abstractvoice-0.5.1.dist-info/RECORD +0 -23
- abstractvoice-0.5.1.dist-info/entry_points.txt +0 -2
- {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
abstractvoice/tts/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
"""TTS
|
|
1
|
+
"""TTS utilities (Piper-first)."""
|
|
2
2
|
|
|
3
|
-
from .tts_engine import
|
|
3
|
+
from .tts_engine import NonBlockingAudioPlayer, apply_speed_without_pitch_change
|
|
4
4
|
|
|
5
|
-
__all__ = [
|
|
5
|
+
__all__ = ["NonBlockingAudioPlayer", "apply_speed_without_pitch_change"]
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Adapter-backed TTSEngine facade.
|
|
2
|
+
|
|
3
|
+
Why this exists
|
|
4
|
+
---------------
|
|
5
|
+
`VoiceManager` historically relied on a `TTSEngine` instance exposing:
|
|
6
|
+
- speak(text, speed, callback)
|
|
7
|
+
- stop/pause/resume/is_active/is_paused
|
|
8
|
+
- on_playback_start/on_playback_end callbacks
|
|
9
|
+
- an `audio_player` that supports immediate pause/resume
|
|
10
|
+
|
|
11
|
+
With the introduction of adapter-based engines (e.g. Piper), `VoiceManager`
|
|
12
|
+
must keep that internal contract stable to preserve backward compatibility
|
|
13
|
+
across the codebase (CLI, tests, integrations).
|
|
14
|
+
|
|
15
|
+
This module provides a small engine facade that wraps any `TTSAdapter` and
|
|
16
|
+
uses the existing `NonBlockingAudioPlayer` for playback control.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import threading
|
|
22
|
+
import time
|
|
23
|
+
from typing import Callable, Optional
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
from .tts_engine import NonBlockingAudioPlayer, apply_speed_without_pitch_change
|
|
28
|
+
from ..adapters.base import TTSAdapter
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AdapterTTSEngine:
|
|
32
|
+
"""TTSEngine-compatible wrapper around a `TTSAdapter`."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, adapter: TTSAdapter, debug_mode: bool = False):
|
|
35
|
+
self.adapter = adapter
|
|
36
|
+
self.debug_mode = debug_mode
|
|
37
|
+
|
|
38
|
+
self.on_playback_start: Optional[Callable[[], None]] = None
|
|
39
|
+
self.on_playback_end: Optional[Callable[[], None]] = None
|
|
40
|
+
|
|
41
|
+
self._user_callback: Optional[Callable[[], None]] = None
|
|
42
|
+
|
|
43
|
+
sample_rate = self._safe_sample_rate()
|
|
44
|
+
self.audio_player = NonBlockingAudioPlayer(sample_rate=sample_rate, debug_mode=debug_mode)
|
|
45
|
+
self.audio_player.playback_complete_callback = self._on_playback_complete
|
|
46
|
+
|
|
47
|
+
# Best-effort last TTS metrics (used by verbose REPL output).
|
|
48
|
+
self.last_tts_metrics: dict | None = None
|
|
49
|
+
|
|
50
|
+
def _safe_sample_rate(self) -> int:
|
|
51
|
+
try:
|
|
52
|
+
return int(self.adapter.get_sample_rate())
|
|
53
|
+
except Exception:
|
|
54
|
+
return 22050
|
|
55
|
+
|
|
56
|
+
def _sync_sample_rate(self) -> None:
|
|
57
|
+
"""Keep audio player sample rate aligned with adapter."""
|
|
58
|
+
sr = self._safe_sample_rate()
|
|
59
|
+
if getattr(self.audio_player, "sample_rate", None) == sr:
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
# If a stream is already open, keep it stable and rely on resampling at
|
|
63
|
+
# enqueue time (see `NonBlockingAudioPlayer.play_audio(sample_rate=...)`).
|
|
64
|
+
# This avoids frequent close/reopen cycles that can be flaky on some
|
|
65
|
+
# PortAudio/CoreAudio device configurations.
|
|
66
|
+
if getattr(self.audio_player, "stream", None) is not None:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
self.audio_player.sample_rate = sr
|
|
70
|
+
|
|
71
|
+
def speak(self, text: str, speed: float = 1.0, callback=None) -> bool:
|
|
72
|
+
"""Synthesize and enqueue audio for playback (non-blocking)."""
|
|
73
|
+
if not self.adapter or not self.adapter.is_available():
|
|
74
|
+
raise RuntimeError("No TTS adapter available")
|
|
75
|
+
|
|
76
|
+
self._sync_sample_rate()
|
|
77
|
+
|
|
78
|
+
self._user_callback = callback
|
|
79
|
+
|
|
80
|
+
if self.on_playback_start:
|
|
81
|
+
threading.Thread(target=self.on_playback_start, daemon=True).start()
|
|
82
|
+
|
|
83
|
+
t0 = time.monotonic()
|
|
84
|
+
audio: np.ndarray = self.adapter.synthesize(text)
|
|
85
|
+
t1 = time.monotonic()
|
|
86
|
+
|
|
87
|
+
# Best-effort speed handling. If librosa isn't installed, the helper
|
|
88
|
+
# falls back to original audio (no crash).
|
|
89
|
+
if speed and speed != 1.0:
|
|
90
|
+
audio = apply_speed_without_pitch_change(audio, speed, sr=self._safe_sample_rate())
|
|
91
|
+
|
|
92
|
+
sr = self._safe_sample_rate()
|
|
93
|
+
try:
|
|
94
|
+
audio_samples = int(len(audio)) if audio is not None else 0
|
|
95
|
+
except Exception:
|
|
96
|
+
audio_samples = 0
|
|
97
|
+
audio_s = (float(audio_samples) / float(sr)) if sr and audio_samples else 0.0
|
|
98
|
+
synth_s = float(t1 - t0)
|
|
99
|
+
self.last_tts_metrics = {
|
|
100
|
+
"engine": "piper",
|
|
101
|
+
"synth_s": synth_s,
|
|
102
|
+
"audio_s": float(audio_s),
|
|
103
|
+
"rtf": (synth_s / float(audio_s)) if audio_s else None,
|
|
104
|
+
"sample_rate": int(sr),
|
|
105
|
+
"audio_samples": int(audio_samples),
|
|
106
|
+
"speed": float(speed or 1.0),
|
|
107
|
+
"ts": time.time(),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
self.audio_player.play_audio(audio, sample_rate=sr)
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def begin_playback(self, callback=None, *, sample_rate: int | None = None) -> None:
|
|
114
|
+
"""Begin a playback session without synthesizing.
|
|
115
|
+
|
|
116
|
+
Used for streaming/chunked playback where audio is enqueued progressively.
|
|
117
|
+
"""
|
|
118
|
+
if sample_rate is not None:
|
|
119
|
+
# For externally-produced audio (e.g. cloning), prefer native sample
|
|
120
|
+
# rate when we haven't opened an output stream yet. If a stream is
|
|
121
|
+
# already open, keep it stable and resample on enqueue.
|
|
122
|
+
sr = int(sample_rate)
|
|
123
|
+
if getattr(self.audio_player, "stream", None) is None:
|
|
124
|
+
self.audio_player.sample_rate = sr
|
|
125
|
+
else:
|
|
126
|
+
self._sync_sample_rate()
|
|
127
|
+
if callback is not None:
|
|
128
|
+
self._user_callback = callback
|
|
129
|
+
if self.on_playback_start:
|
|
130
|
+
threading.Thread(target=self.on_playback_start, daemon=True).start()
|
|
131
|
+
|
|
132
|
+
def enqueue_audio(self, audio: np.ndarray, *, sample_rate: int | None = None) -> None:
|
|
133
|
+
"""Enqueue audio into the underlying player (no extra callbacks)."""
|
|
134
|
+
self.audio_player.play_audio(audio, sample_rate=sample_rate)
|
|
135
|
+
|
|
136
|
+
def play_audio_array(self, audio: np.ndarray, callback=None) -> bool:
|
|
137
|
+
"""Play already-synthesized audio through the same playback pipeline.
|
|
138
|
+
|
|
139
|
+
Used for optional features (e.g., voice cloning) that produce WAV bytes
|
|
140
|
+
externally but still want to reuse the existing low-latency playback +
|
|
141
|
+
lifecycle callbacks.
|
|
142
|
+
"""
|
|
143
|
+
self._user_callback = callback
|
|
144
|
+
if self.on_playback_start:
|
|
145
|
+
threading.Thread(target=self.on_playback_start, daemon=True).start()
|
|
146
|
+
|
|
147
|
+
self.audio_player.play_audio(audio, sample_rate=self._safe_sample_rate())
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
def _on_playback_complete(self) -> None:
|
|
151
|
+
"""Called by the audio player when playback fully drains."""
|
|
152
|
+
if self.on_playback_end:
|
|
153
|
+
threading.Thread(target=self.on_playback_end, daemon=True).start()
|
|
154
|
+
|
|
155
|
+
if self._user_callback:
|
|
156
|
+
threading.Thread(target=self._user_callback, daemon=True).start()
|
|
157
|
+
self._user_callback = None
|
|
158
|
+
|
|
159
|
+
def stop(self, *, close_stream: bool = True) -> bool:
|
|
160
|
+
"""Stop playback immediately and clear queued audio.
|
|
161
|
+
|
|
162
|
+
By default we close the underlying output stream. Some interactive
|
|
163
|
+
environments (macOS AUHAL in particular) can be flaky when repeatedly
|
|
164
|
+
closing/reopening streams; callers can pass close_stream=False to keep
|
|
165
|
+
the stream open and just flush playback state.
|
|
166
|
+
"""
|
|
167
|
+
stream_exists = getattr(self.audio_player, "stream", None) is not None
|
|
168
|
+
was_playing = bool(getattr(self.audio_player, "is_playing", False))
|
|
169
|
+
|
|
170
|
+
if not (stream_exists or was_playing):
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
if close_stream:
|
|
174
|
+
self.audio_player.stop_stream()
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
# Keep stream open; just stop playback and clear buffers.
|
|
178
|
+
try:
|
|
179
|
+
self.audio_player.clear_queue()
|
|
180
|
+
self.audio_player.is_playing = False
|
|
181
|
+
self.audio_player._audio_started = False # noqa: SLF001 (internal flag; best-effort)
|
|
182
|
+
self.audio_player.current_audio = None
|
|
183
|
+
self.audio_player.current_position = 0
|
|
184
|
+
try:
|
|
185
|
+
with self.audio_player._pause_lock: # noqa: SLF001
|
|
186
|
+
self.audio_player._paused = False # noqa: SLF001
|
|
187
|
+
except Exception:
|
|
188
|
+
pass
|
|
189
|
+
except Exception:
|
|
190
|
+
# Best-effort; never crash caller during stop.
|
|
191
|
+
pass
|
|
192
|
+
return True
|
|
193
|
+
|
|
194
|
+
def pause(self) -> bool:
|
|
195
|
+
return self.audio_player.pause()
|
|
196
|
+
|
|
197
|
+
def resume(self) -> bool:
|
|
198
|
+
return self.audio_player.resume()
|
|
199
|
+
|
|
200
|
+
def is_paused(self) -> bool:
|
|
201
|
+
return self.audio_player.is_paused_state()
|
|
202
|
+
|
|
203
|
+
def is_active(self) -> bool:
|
|
204
|
+
return bool(getattr(self.audio_player, "is_playing", False))
|
|
205
|
+
|
|
206
|
+
def cleanup(self) -> None:
|
|
207
|
+
try:
|
|
208
|
+
self.audio_player.cleanup()
|
|
209
|
+
except Exception:
|
|
210
|
+
pass
|