abstractvoice 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. abstractvoice/__init__.py +2 -5
  2. abstractvoice/__main__.py +82 -3
  3. abstractvoice/adapters/__init__.py +12 -0
  4. abstractvoice/adapters/base.py +207 -0
  5. abstractvoice/adapters/stt_faster_whisper.py +401 -0
  6. abstractvoice/adapters/tts_piper.py +480 -0
  7. abstractvoice/aec/__init__.py +10 -0
  8. abstractvoice/aec/webrtc_apm.py +56 -0
  9. abstractvoice/artifacts.py +173 -0
  10. abstractvoice/audio/__init__.py +7 -0
  11. abstractvoice/audio/recorder.py +46 -0
  12. abstractvoice/audio/resample.py +25 -0
  13. abstractvoice/cloning/__init__.py +7 -0
  14. abstractvoice/cloning/engine_chroma.py +738 -0
  15. abstractvoice/cloning/engine_f5.py +546 -0
  16. abstractvoice/cloning/manager.py +349 -0
  17. abstractvoice/cloning/store.py +362 -0
  18. abstractvoice/compute/__init__.py +6 -0
  19. abstractvoice/compute/device.py +73 -0
  20. abstractvoice/config/__init__.py +2 -0
  21. abstractvoice/config/voice_catalog.py +19 -0
  22. abstractvoice/dependency_check.py +0 -1
  23. abstractvoice/examples/cli_repl.py +2403 -243
  24. abstractvoice/examples/voice_cli.py +64 -63
  25. abstractvoice/integrations/__init__.py +2 -0
  26. abstractvoice/integrations/abstractcore.py +116 -0
  27. abstractvoice/integrations/abstractcore_plugin.py +253 -0
  28. abstractvoice/prefetch.py +82 -0
  29. abstractvoice/recognition.py +424 -42
  30. abstractvoice/stop_phrase.py +103 -0
  31. abstractvoice/tts/__init__.py +3 -3
  32. abstractvoice/tts/adapter_tts_engine.py +210 -0
  33. abstractvoice/tts/tts_engine.py +257 -1208
  34. abstractvoice/vm/__init__.py +2 -0
  35. abstractvoice/vm/common.py +21 -0
  36. abstractvoice/vm/core.py +139 -0
  37. abstractvoice/vm/manager.py +108 -0
  38. abstractvoice/vm/stt_mixin.py +158 -0
  39. abstractvoice/vm/tts_mixin.py +550 -0
  40. abstractvoice/voice_manager.py +6 -1061
  41. abstractvoice-0.6.1.dist-info/METADATA +213 -0
  42. abstractvoice-0.6.1.dist-info/RECORD +52 -0
  43. {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
  44. abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
  45. abstractvoice/instant_setup.py +0 -83
  46. abstractvoice/simple_model_manager.py +0 -539
  47. abstractvoice-0.5.1.dist-info/METADATA +0 -1458
  48. abstractvoice-0.5.1.dist-info/RECORD +0 -23
  49. abstractvoice-0.5.1.dist-info/entry_points.txt +0 -2
  50. {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
  51. {abstractvoice-0.5.1.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
abstractvoice/__init__.py CHANGED
@@ -29,8 +29,5 @@ warnings.filterwarnings(
29
29
  # Import the main class for public API
30
30
  from .voice_manager import VoiceManager
31
31
 
32
- # Import simple APIs for third-party applications
33
- from .simple_model_manager import list_models, download_model, get_status, is_ready
34
-
35
- __version__ = "0.5.1"
36
- __all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
32
+ __version__ = "0.6.1"
33
+ __all__ = ["VoiceManager"]
abstractvoice/__main__.py CHANGED
@@ -16,8 +16,9 @@ def print_examples():
16
16
  print(" web - Web API example")
17
17
  print(" simple - Simple usage example")
18
18
  print(" check-deps - Check dependency compatibility")
19
+ print(" download - Explicitly prefetch model artifacts")
19
20
  print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
20
- print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
21
+ print("\nSupported languages: en, fr, de, es, ru, zh")
21
22
  print("\nExamples:")
22
23
  print(" python -m abstractvoice cli --language fr # French CLI")
23
24
  print(" python -m abstractvoice simple --language ru # Russian simple example")
@@ -99,7 +100,7 @@ def main():
99
100
  parser = argparse.ArgumentParser(description="AbstractVoice examples")
100
101
  parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple, check-deps)")
101
102
  parser.add_argument("--language", "--lang", default="en",
102
- choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
103
+ choices=["en", "fr", "de", "es", "ru", "zh"],
103
104
  help="Voice language for examples")
104
105
 
105
106
  # Parse just the first argument and language
@@ -119,6 +120,84 @@ def main():
119
120
  print("This might indicate a dependency issue.")
120
121
  return
121
122
 
123
+ if args.example == "download":
124
+ dl = argparse.ArgumentParser(description="AbstractVoice explicit downloads")
125
+ dl.add_argument("--stt", dest="stt_model", default=None, help="Prefetch faster-whisper model (e.g. small)")
126
+ dl.add_argument(
127
+ "--openf5",
128
+ action="store_true",
129
+ help="Prefetch OpenF5 artifacts for cloning (~5.4GB, requires abstractvoice[cloning])",
130
+ )
131
+ dl.add_argument(
132
+ "--chroma",
133
+ action="store_true",
134
+ help="Prefetch Chroma-4B artifacts (~14GB+, requires HF access; install abstractvoice[chroma] to run inference)",
135
+ )
136
+ dl.add_argument(
137
+ "--piper",
138
+ dest="piper_language",
139
+ default=None,
140
+ help="Prefetch Piper voice model for a language (e.g. en/fr/de).",
141
+ )
142
+ dl_args = dl.parse_args(remaining)
143
+
144
+ if not dl_args.stt_model and not dl_args.openf5 and not dl_args.chroma and not dl_args.piper_language:
145
+ print("Nothing to download. Examples:")
146
+ print(" python -m abstractvoice download --stt small")
147
+ print(" python -m abstractvoice download --openf5")
148
+ print(" python -m abstractvoice download --chroma")
149
+ print(" python -m abstractvoice download --piper en")
150
+ return
151
+
152
+ if dl_args.stt_model:
153
+ try:
154
+ from abstractvoice.adapters.stt_faster_whisper import FasterWhisperAdapter
155
+
156
+ model = str(dl_args.stt_model).strip()
157
+ print(f"Downloading STT model (faster-whisper): {model}")
158
+ stt = FasterWhisperAdapter(model_size=model, device="cpu", compute_type="int8", allow_downloads=True)
159
+ if not stt.is_available():
160
+ raise RuntimeError("Model download/load failed.")
161
+ print("✅ STT model ready.")
162
+ except Exception as e:
163
+ print(f"❌ STT download failed: {e}")
164
+
165
+ if dl_args.openf5:
166
+ try:
167
+ from abstractvoice.cloning.engine_f5 import F5TTSVoiceCloningEngine
168
+
169
+ print("Downloading OpenF5 artifacts (cloning)…")
170
+ engine = F5TTSVoiceCloningEngine(debug=True)
171
+ engine.ensure_openf5_artifacts_downloaded()
172
+ print("✅ OpenF5 artifacts ready.")
173
+ except Exception as e:
174
+ print(f"❌ OpenF5 download failed: {e}")
175
+
176
+ if dl_args.chroma:
177
+ try:
178
+ from abstractvoice.cloning.engine_chroma import ChromaVoiceCloningEngine
179
+
180
+ print("Downloading Chroma artifacts (cloning)…")
181
+ engine = ChromaVoiceCloningEngine(debug=True)
182
+ engine.ensure_chroma_artifacts_downloaded()
183
+ print("✅ Chroma artifacts ready.")
184
+ except Exception as e:
185
+ print(f"❌ Chroma download failed: {e}")
186
+
187
+ if dl_args.piper_language:
188
+ try:
189
+ from abstractvoice.adapters.tts_piper import PiperTTSAdapter
190
+
191
+ lang = str(dl_args.piper_language).strip().lower()
192
+ print(f"Downloading Piper voice model: {lang}")
193
+ piper = PiperTTSAdapter(language=lang, allow_downloads=True, auto_load=False)
194
+ if not piper.ensure_model_downloaded(lang):
195
+ raise RuntimeError("Piper model download failed.")
196
+ print("✅ Piper model ready.")
197
+ except Exception as e:
198
+ print(f"❌ Piper download failed: {e}")
199
+ return
200
+
122
201
  # Set remaining args as sys.argv for the examples, including language
123
202
  if args.language != "en":
124
203
  remaining = ["--language", args.language] + remaining
@@ -138,4 +217,4 @@ def main():
138
217
 
139
218
 
140
219
  if __name__ == "__main__":
141
- main()
220
+ main()
@@ -0,0 +1,12 @@
1
+ """Adapter interfaces for TTS and STT engines.
2
+
3
+ This module defines base interfaces for pluggable TTS and STT engines,
4
+ enabling easy integration of new speech synthesis and recognition backends
5
+ while maintaining API compatibility.
6
+ """
7
+
8
+ from .base import TTSAdapter, STTAdapter
9
+ from .tts_piper import PiperTTSAdapter
10
+ from .stt_faster_whisper import FasterWhisperAdapter
11
+
12
+ __all__ = ['TTSAdapter', 'STTAdapter', 'PiperTTSAdapter', 'FasterWhisperAdapter']
@@ -0,0 +1,207 @@
1
+ """Base adapter interfaces for TTS and STT engines.
2
+
3
+ These abstract base classes define the contract that all TTS and STT adapters
4
+ must implement, ensuring consistent API across different backends.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Optional, Dict, Any, Union
9
+ import numpy as np
10
+ import io
11
+
12
+
13
+ class TTSAdapter(ABC):
14
+ """Abstract base class for Text-to-Speech adapters.
15
+
16
+ All TTS engines must implement this interface to be compatible with
17
+ the VoiceManager. This ensures we can swap engines without breaking
18
+ existing code.
19
+ """
20
+
21
+ @abstractmethod
22
+ def synthesize(self, text: str) -> np.ndarray:
23
+ """Convert text to audio array for immediate playback.
24
+
25
+ Args:
26
+ text: The text to synthesize
27
+
28
+ Returns:
29
+ Audio data as numpy array (shape: [samples,], dtype: float32, range: -1.0 to 1.0)
30
+ """
31
+ pass
32
+
33
+ @abstractmethod
34
+ def synthesize_to_bytes(self, text: str, format: str = 'wav') -> bytes:
35
+ """Convert text to audio bytes for network transmission or file storage.
36
+
37
+ This method is essential for client-server architectures where the backend
38
+ generates speech and sends it to clients for playback.
39
+
40
+ Args:
41
+ text: The text to synthesize
42
+ format: Audio format ('wav', 'mp3', 'ogg'). Default: 'wav'
43
+
44
+ Returns:
45
+ Audio data as bytes in the specified format
46
+ """
47
+ pass
48
+
49
+ @abstractmethod
50
+ def synthesize_to_file(self, text: str, output_path: str, format: Optional[str] = None) -> str:
51
+ """Convert text to audio file.
52
+
53
+ Args:
54
+ text: The text to synthesize
55
+ output_path: Path to save the audio file
56
+ format: Audio format (optional, inferred from file extension if not provided)
57
+
58
+ Returns:
59
+ Path to the saved audio file
60
+ """
61
+ pass
62
+
63
+ @abstractmethod
64
+ def set_language(self, language: str) -> bool:
65
+ """Switch the TTS language.
66
+
67
+ Args:
68
+ language: ISO 639-1 language code (e.g., 'en', 'fr', 'de')
69
+
70
+ Returns:
71
+ True if language switch successful, False otherwise
72
+ """
73
+ pass
74
+
75
+ @abstractmethod
76
+ def get_supported_languages(self) -> list[str]:
77
+ """Get list of supported language codes.
78
+
79
+ Returns:
80
+ List of ISO 639-1 language codes
81
+ """
82
+ pass
83
+
84
+ @abstractmethod
85
+ def get_sample_rate(self) -> int:
86
+ """Get the sample rate of the synthesized audio.
87
+
88
+ Returns:
89
+ Sample rate in Hz (e.g., 22050, 16000)
90
+ """
91
+ pass
92
+
93
+ @abstractmethod
94
+ def is_available(self) -> bool:
95
+ """Check if this TTS engine is available and functional.
96
+
97
+ Returns:
98
+ True if the engine can be used, False if dependencies missing or initialization failed
99
+ """
100
+ pass
101
+
102
+ def get_info(self) -> Dict[str, Any]:
103
+ """Get metadata about this TTS engine.
104
+
105
+ Returns:
106
+ Dictionary with engine information (name, version, languages, etc.)
107
+ """
108
+ return {
109
+ 'name': self.__class__.__name__,
110
+ 'languages': self.get_supported_languages(),
111
+ 'sample_rate': self.get_sample_rate(),
112
+ 'available': self.is_available()
113
+ }
114
+
115
+
116
+ class STTAdapter(ABC):
117
+ """Abstract base class for Speech-to-Text adapters.
118
+
119
+ All STT engines must implement this interface to be compatible with
120
+ the VoiceManager.
121
+ """
122
+
123
+ @abstractmethod
124
+ def transcribe(self, audio_path: str, language: Optional[str] = None) -> str:
125
+ """Transcribe audio file to text.
126
+
127
+ Args:
128
+ audio_path: Path to audio file
129
+ language: Target language (optional, auto-detect if not provided)
130
+
131
+ Returns:
132
+ Transcribed text
133
+ """
134
+ pass
135
+
136
+ @abstractmethod
137
+ def transcribe_from_bytes(self, audio_bytes: bytes, language: Optional[str] = None) -> str:
138
+ """Transcribe audio from bytes (network use case).
139
+
140
+ This method is essential for client-server architectures where clients
141
+ record audio and send it to the backend for transcription.
142
+
143
+ Args:
144
+ audio_bytes: Audio data as bytes
145
+ language: Target language (optional, auto-detect if not provided)
146
+
147
+ Returns:
148
+ Transcribed text
149
+ """
150
+ pass
151
+
152
+ @abstractmethod
153
+ def transcribe_from_array(self, audio_array: np.ndarray, sample_rate: int,
154
+ language: Optional[str] = None) -> str:
155
+ """Transcribe audio from numpy array.
156
+
157
+ Args:
158
+ audio_array: Audio data as numpy array
159
+ sample_rate: Sample rate of the audio in Hz
160
+ language: Target language (optional, auto-detect if not provided)
161
+
162
+ Returns:
163
+ Transcribed text
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def set_language(self, language: str) -> bool:
169
+ """Set the default language for transcription.
170
+
171
+ Args:
172
+ language: ISO 639-1 language code
173
+
174
+ Returns:
175
+ True if successful, False otherwise
176
+ """
177
+ pass
178
+
179
+ @abstractmethod
180
+ def get_supported_languages(self) -> list[str]:
181
+ """Get list of supported language codes.
182
+
183
+ Returns:
184
+ List of ISO 639-1 language codes
185
+ """
186
+ pass
187
+
188
+ @abstractmethod
189
+ def is_available(self) -> bool:
190
+ """Check if this STT engine is available and functional.
191
+
192
+ Returns:
193
+ True if the engine can be used, False otherwise
194
+ """
195
+ pass
196
+
197
+ def get_info(self) -> Dict[str, Any]:
198
+ """Get metadata about this STT engine.
199
+
200
+ Returns:
201
+ Dictionary with engine information
202
+ """
203
+ return {
204
+ 'name': self.__class__.__name__,
205
+ 'languages': self.get_supported_languages(),
206
+ 'available': self.is_available()
207
+ }