abstractvoice 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvoice/__init__.py +2 -5
- abstractvoice/__main__.py +82 -3
- abstractvoice/adapters/__init__.py +12 -0
- abstractvoice/adapters/base.py +207 -0
- abstractvoice/adapters/stt_faster_whisper.py +401 -0
- abstractvoice/adapters/tts_piper.py +480 -0
- abstractvoice/aec/__init__.py +10 -0
- abstractvoice/aec/webrtc_apm.py +56 -0
- abstractvoice/artifacts.py +173 -0
- abstractvoice/audio/__init__.py +7 -0
- abstractvoice/audio/recorder.py +46 -0
- abstractvoice/audio/resample.py +25 -0
- abstractvoice/cloning/__init__.py +7 -0
- abstractvoice/cloning/engine_chroma.py +738 -0
- abstractvoice/cloning/engine_f5.py +546 -0
- abstractvoice/cloning/manager.py +349 -0
- abstractvoice/cloning/store.py +362 -0
- abstractvoice/compute/__init__.py +6 -0
- abstractvoice/compute/device.py +73 -0
- abstractvoice/config/__init__.py +2 -0
- abstractvoice/config/voice_catalog.py +19 -0
- abstractvoice/dependency_check.py +0 -1
- abstractvoice/examples/cli_repl.py +2403 -243
- abstractvoice/examples/voice_cli.py +64 -63
- abstractvoice/integrations/__init__.py +2 -0
- abstractvoice/integrations/abstractcore.py +116 -0
- abstractvoice/integrations/abstractcore_plugin.py +253 -0
- abstractvoice/prefetch.py +82 -0
- abstractvoice/recognition.py +424 -42
- abstractvoice/stop_phrase.py +103 -0
- abstractvoice/tts/__init__.py +3 -3
- abstractvoice/tts/adapter_tts_engine.py +210 -0
- abstractvoice/tts/tts_engine.py +257 -1208
- abstractvoice/vm/__init__.py +2 -0
- abstractvoice/vm/common.py +21 -0
- abstractvoice/vm/core.py +139 -0
- abstractvoice/vm/manager.py +108 -0
- abstractvoice/vm/stt_mixin.py +158 -0
- abstractvoice/vm/tts_mixin.py +550 -0
- abstractvoice/voice_manager.py +6 -1061
- abstractvoice-0.6.1.dist-info/METADATA +213 -0
- abstractvoice-0.6.1.dist-info/RECORD +52 -0
- {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
- abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
- abstractvoice/instant_setup.py +0 -83
- abstractvoice/simple_model_manager.py +0 -539
- abstractvoice-0.5.1.dist-info/METADATA +0 -1458
- abstractvoice-0.5.1.dist-info/RECORD +0 -23
- abstractvoice-0.5.1.dist-info/entry_points.txt +0 -2
- {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
abstractvoice/__init__.py
CHANGED
|
@@ -29,8 +29,5 @@ warnings.filterwarnings(
|
|
|
29
29
|
# Import the main class for public API
|
|
30
30
|
from .voice_manager import VoiceManager
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
__version__ = "0.5.1"
|
|
36
|
-
__all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
|
|
32
|
+
__version__ = "0.6.1"
|
|
33
|
+
__all__ = ["VoiceManager"]
|
abstractvoice/__main__.py
CHANGED
|
@@ -16,8 +16,9 @@ def print_examples():
|
|
|
16
16
|
print(" web - Web API example")
|
|
17
17
|
print(" simple - Simple usage example")
|
|
18
18
|
print(" check-deps - Check dependency compatibility")
|
|
19
|
+
print(" download - Explicitly prefetch model artifacts")
|
|
19
20
|
print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
|
|
20
|
-
print("\nSupported languages: en, fr,
|
|
21
|
+
print("\nSupported languages: en, fr, de, es, ru, zh")
|
|
21
22
|
print("\nExamples:")
|
|
22
23
|
print(" python -m abstractvoice cli --language fr # French CLI")
|
|
23
24
|
print(" python -m abstractvoice simple --language ru # Russian simple example")
|
|
@@ -99,7 +100,7 @@ def main():
|
|
|
99
100
|
parser = argparse.ArgumentParser(description="AbstractVoice examples")
|
|
100
101
|
parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple, check-deps)")
|
|
101
102
|
parser.add_argument("--language", "--lang", default="en",
|
|
102
|
-
choices=["en", "fr", "
|
|
103
|
+
choices=["en", "fr", "de", "es", "ru", "zh"],
|
|
103
104
|
help="Voice language for examples")
|
|
104
105
|
|
|
105
106
|
# Parse just the first argument and language
|
|
@@ -119,6 +120,84 @@ def main():
|
|
|
119
120
|
print("This might indicate a dependency issue.")
|
|
120
121
|
return
|
|
121
122
|
|
|
123
|
+
if args.example == "download":
|
|
124
|
+
dl = argparse.ArgumentParser(description="AbstractVoice explicit downloads")
|
|
125
|
+
dl.add_argument("--stt", dest="stt_model", default=None, help="Prefetch faster-whisper model (e.g. small)")
|
|
126
|
+
dl.add_argument(
|
|
127
|
+
"--openf5",
|
|
128
|
+
action="store_true",
|
|
129
|
+
help="Prefetch OpenF5 artifacts for cloning (~5.4GB, requires abstractvoice[cloning])",
|
|
130
|
+
)
|
|
131
|
+
dl.add_argument(
|
|
132
|
+
"--chroma",
|
|
133
|
+
action="store_true",
|
|
134
|
+
help="Prefetch Chroma-4B artifacts (~14GB+, requires HF access; install abstractvoice[chroma] to run inference)",
|
|
135
|
+
)
|
|
136
|
+
dl.add_argument(
|
|
137
|
+
"--piper",
|
|
138
|
+
dest="piper_language",
|
|
139
|
+
default=None,
|
|
140
|
+
help="Prefetch Piper voice model for a language (e.g. en/fr/de).",
|
|
141
|
+
)
|
|
142
|
+
dl_args = dl.parse_args(remaining)
|
|
143
|
+
|
|
144
|
+
if not dl_args.stt_model and not dl_args.openf5 and not dl_args.chroma and not dl_args.piper_language:
|
|
145
|
+
print("Nothing to download. Examples:")
|
|
146
|
+
print(" python -m abstractvoice download --stt small")
|
|
147
|
+
print(" python -m abstractvoice download --openf5")
|
|
148
|
+
print(" python -m abstractvoice download --chroma")
|
|
149
|
+
print(" python -m abstractvoice download --piper en")
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
if dl_args.stt_model:
|
|
153
|
+
try:
|
|
154
|
+
from abstractvoice.adapters.stt_faster_whisper import FasterWhisperAdapter
|
|
155
|
+
|
|
156
|
+
model = str(dl_args.stt_model).strip()
|
|
157
|
+
print(f"Downloading STT model (faster-whisper): {model}")
|
|
158
|
+
stt = FasterWhisperAdapter(model_size=model, device="cpu", compute_type="int8", allow_downloads=True)
|
|
159
|
+
if not stt.is_available():
|
|
160
|
+
raise RuntimeError("Model download/load failed.")
|
|
161
|
+
print("✅ STT model ready.")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
print(f"❌ STT download failed: {e}")
|
|
164
|
+
|
|
165
|
+
if dl_args.openf5:
|
|
166
|
+
try:
|
|
167
|
+
from abstractvoice.cloning.engine_f5 import F5TTSVoiceCloningEngine
|
|
168
|
+
|
|
169
|
+
print("Downloading OpenF5 artifacts (cloning)…")
|
|
170
|
+
engine = F5TTSVoiceCloningEngine(debug=True)
|
|
171
|
+
engine.ensure_openf5_artifacts_downloaded()
|
|
172
|
+
print("✅ OpenF5 artifacts ready.")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
print(f"❌ OpenF5 download failed: {e}")
|
|
175
|
+
|
|
176
|
+
if dl_args.chroma:
|
|
177
|
+
try:
|
|
178
|
+
from abstractvoice.cloning.engine_chroma import ChromaVoiceCloningEngine
|
|
179
|
+
|
|
180
|
+
print("Downloading Chroma artifacts (cloning)…")
|
|
181
|
+
engine = ChromaVoiceCloningEngine(debug=True)
|
|
182
|
+
engine.ensure_chroma_artifacts_downloaded()
|
|
183
|
+
print("✅ Chroma artifacts ready.")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"❌ Chroma download failed: {e}")
|
|
186
|
+
|
|
187
|
+
if dl_args.piper_language:
|
|
188
|
+
try:
|
|
189
|
+
from abstractvoice.adapters.tts_piper import PiperTTSAdapter
|
|
190
|
+
|
|
191
|
+
lang = str(dl_args.piper_language).strip().lower()
|
|
192
|
+
print(f"Downloading Piper voice model: {lang}")
|
|
193
|
+
piper = PiperTTSAdapter(language=lang, allow_downloads=True, auto_load=False)
|
|
194
|
+
if not piper.ensure_model_downloaded(lang):
|
|
195
|
+
raise RuntimeError("Piper model download failed.")
|
|
196
|
+
print("✅ Piper model ready.")
|
|
197
|
+
except Exception as e:
|
|
198
|
+
print(f"❌ Piper download failed: {e}")
|
|
199
|
+
return
|
|
200
|
+
|
|
122
201
|
# Set remaining args as sys.argv for the examples, including language
|
|
123
202
|
if args.language != "en":
|
|
124
203
|
remaining = ["--language", args.language] + remaining
|
|
@@ -138,4 +217,4 @@ def main():
|
|
|
138
217
|
|
|
139
218
|
|
|
140
219
|
if __name__ == "__main__":
|
|
141
|
-
main()
|
|
220
|
+
main()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Adapter interfaces for TTS and STT engines.
|
|
2
|
+
|
|
3
|
+
This module defines base interfaces for pluggable TTS and STT engines,
|
|
4
|
+
enabling easy integration of new speech synthesis and recognition backends
|
|
5
|
+
while maintaining API compatibility.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .base import TTSAdapter, STTAdapter
|
|
9
|
+
from .tts_piper import PiperTTSAdapter
|
|
10
|
+
from .stt_faster_whisper import FasterWhisperAdapter
|
|
11
|
+
|
|
12
|
+
__all__ = ['TTSAdapter', 'STTAdapter', 'PiperTTSAdapter', 'FasterWhisperAdapter']
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Base adapter interfaces for TTS and STT engines.
|
|
2
|
+
|
|
3
|
+
These abstract base classes define the contract that all TTS and STT adapters
|
|
4
|
+
must implement, ensuring consistent API across different backends.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Optional, Dict, Any, Union
|
|
9
|
+
import numpy as np
|
|
10
|
+
import io
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TTSAdapter(ABC):
|
|
14
|
+
"""Abstract base class for Text-to-Speech adapters.
|
|
15
|
+
|
|
16
|
+
All TTS engines must implement this interface to be compatible with
|
|
17
|
+
the VoiceManager. This ensures we can swap engines without breaking
|
|
18
|
+
existing code.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def synthesize(self, text: str) -> np.ndarray:
|
|
23
|
+
"""Convert text to audio array for immediate playback.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
text: The text to synthesize
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Audio data as numpy array (shape: [samples,], dtype: float32, range: -1.0 to 1.0)
|
|
30
|
+
"""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def synthesize_to_bytes(self, text: str, format: str = 'wav') -> bytes:
|
|
35
|
+
"""Convert text to audio bytes for network transmission or file storage.
|
|
36
|
+
|
|
37
|
+
This method is essential for client-server architectures where the backend
|
|
38
|
+
generates speech and sends it to clients for playback.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
text: The text to synthesize
|
|
42
|
+
format: Audio format ('wav', 'mp3', 'ogg'). Default: 'wav'
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Audio data as bytes in the specified format
|
|
46
|
+
"""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def synthesize_to_file(self, text: str, output_path: str, format: Optional[str] = None) -> str:
|
|
51
|
+
"""Convert text to audio file.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
text: The text to synthesize
|
|
55
|
+
output_path: Path to save the audio file
|
|
56
|
+
format: Audio format (optional, inferred from file extension if not provided)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Path to the saved audio file
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def set_language(self, language: str) -> bool:
|
|
65
|
+
"""Switch the TTS language.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
language: ISO 639-1 language code (e.g., 'en', 'fr', 'de')
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
True if language switch successful, False otherwise
|
|
72
|
+
"""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def get_supported_languages(self) -> list[str]:
|
|
77
|
+
"""Get list of supported language codes.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
List of ISO 639-1 language codes
|
|
81
|
+
"""
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def get_sample_rate(self) -> int:
|
|
86
|
+
"""Get the sample rate of the synthesized audio.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Sample rate in Hz (e.g., 22050, 16000)
|
|
90
|
+
"""
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def is_available(self) -> bool:
|
|
95
|
+
"""Check if this TTS engine is available and functional.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
True if the engine can be used, False if dependencies missing or initialization failed
|
|
99
|
+
"""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
def get_info(self) -> Dict[str, Any]:
|
|
103
|
+
"""Get metadata about this TTS engine.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Dictionary with engine information (name, version, languages, etc.)
|
|
107
|
+
"""
|
|
108
|
+
return {
|
|
109
|
+
'name': self.__class__.__name__,
|
|
110
|
+
'languages': self.get_supported_languages(),
|
|
111
|
+
'sample_rate': self.get_sample_rate(),
|
|
112
|
+
'available': self.is_available()
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class STTAdapter(ABC):
|
|
117
|
+
"""Abstract base class for Speech-to-Text adapters.
|
|
118
|
+
|
|
119
|
+
All STT engines must implement this interface to be compatible with
|
|
120
|
+
the VoiceManager.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def transcribe(self, audio_path: str, language: Optional[str] = None) -> str:
|
|
125
|
+
"""Transcribe audio file to text.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
audio_path: Path to audio file
|
|
129
|
+
language: Target language (optional, auto-detect if not provided)
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Transcribed text
|
|
133
|
+
"""
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
@abstractmethod
|
|
137
|
+
def transcribe_from_bytes(self, audio_bytes: bytes, language: Optional[str] = None) -> str:
|
|
138
|
+
"""Transcribe audio from bytes (network use case).
|
|
139
|
+
|
|
140
|
+
This method is essential for client-server architectures where clients
|
|
141
|
+
record audio and send it to the backend for transcription.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
audio_bytes: Audio data as bytes
|
|
145
|
+
language: Target language (optional, auto-detect if not provided)
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Transcribed text
|
|
149
|
+
"""
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
@abstractmethod
|
|
153
|
+
def transcribe_from_array(self, audio_array: np.ndarray, sample_rate: int,
|
|
154
|
+
language: Optional[str] = None) -> str:
|
|
155
|
+
"""Transcribe audio from numpy array.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
audio_array: Audio data as numpy array
|
|
159
|
+
sample_rate: Sample rate of the audio in Hz
|
|
160
|
+
language: Target language (optional, auto-detect if not provided)
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Transcribed text
|
|
164
|
+
"""
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def set_language(self, language: str) -> bool:
|
|
169
|
+
"""Set the default language for transcription.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
language: ISO 639-1 language code
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
True if successful, False otherwise
|
|
176
|
+
"""
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
@abstractmethod
|
|
180
|
+
def get_supported_languages(self) -> list[str]:
|
|
181
|
+
"""Get list of supported language codes.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of ISO 639-1 language codes
|
|
185
|
+
"""
|
|
186
|
+
pass
|
|
187
|
+
|
|
188
|
+
@abstractmethod
|
|
189
|
+
def is_available(self) -> bool:
|
|
190
|
+
"""Check if this STT engine is available and functional.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
True if the engine can be used, False otherwise
|
|
194
|
+
"""
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
def get_info(self) -> Dict[str, Any]:
|
|
198
|
+
"""Get metadata about this STT engine.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Dictionary with engine information
|
|
202
|
+
"""
|
|
203
|
+
return {
|
|
204
|
+
'name': self.__class__.__name__,
|
|
205
|
+
'languages': self.get_supported_languages(),
|
|
206
|
+
'available': self.is_available()
|
|
207
|
+
}
|