abstractvoice 0.5.1__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. abstractvoice-0.6.1/PKG-INFO +213 -0
  2. abstractvoice-0.6.1/README.md +128 -0
  3. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/__init__.py +2 -5
  4. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/__main__.py +82 -3
  5. abstractvoice-0.6.1/abstractvoice/adapters/__init__.py +12 -0
  6. abstractvoice-0.6.1/abstractvoice/adapters/base.py +207 -0
  7. abstractvoice-0.6.1/abstractvoice/adapters/stt_faster_whisper.py +401 -0
  8. abstractvoice-0.6.1/abstractvoice/adapters/tts_piper.py +480 -0
  9. abstractvoice-0.6.1/abstractvoice/aec/__init__.py +10 -0
  10. abstractvoice-0.6.1/abstractvoice/aec/webrtc_apm.py +56 -0
  11. abstractvoice-0.6.1/abstractvoice/artifacts.py +173 -0
  12. abstractvoice-0.6.1/abstractvoice/audio/__init__.py +7 -0
  13. abstractvoice-0.6.1/abstractvoice/audio/recorder.py +46 -0
  14. abstractvoice-0.6.1/abstractvoice/audio/resample.py +25 -0
  15. abstractvoice-0.6.1/abstractvoice/cloning/__init__.py +7 -0
  16. abstractvoice-0.6.1/abstractvoice/cloning/engine_chroma.py +738 -0
  17. abstractvoice-0.6.1/abstractvoice/cloning/engine_f5.py +546 -0
  18. abstractvoice-0.6.1/abstractvoice/cloning/manager.py +349 -0
  19. abstractvoice-0.6.1/abstractvoice/cloning/store.py +362 -0
  20. abstractvoice-0.6.1/abstractvoice/compute/__init__.py +6 -0
  21. abstractvoice-0.6.1/abstractvoice/compute/device.py +73 -0
  22. abstractvoice-0.6.1/abstractvoice/config/__init__.py +2 -0
  23. abstractvoice-0.6.1/abstractvoice/config/voice_catalog.py +19 -0
  24. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/dependency_check.py +0 -1
  25. abstractvoice-0.6.1/abstractvoice/examples/cli_repl.py +3267 -0
  26. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/examples/voice_cli.py +64 -63
  27. abstractvoice-0.6.1/abstractvoice/integrations/__init__.py +2 -0
  28. abstractvoice-0.6.1/abstractvoice/integrations/abstractcore.py +116 -0
  29. abstractvoice-0.6.1/abstractvoice/integrations/abstractcore_plugin.py +253 -0
  30. abstractvoice-0.6.1/abstractvoice/prefetch.py +82 -0
  31. abstractvoice-0.6.1/abstractvoice/recognition.py +677 -0
  32. abstractvoice-0.6.1/abstractvoice/stop_phrase.py +103 -0
  33. abstractvoice-0.6.1/abstractvoice/tts/__init__.py +5 -0
  34. abstractvoice-0.6.1/abstractvoice/tts/adapter_tts_engine.py +210 -0
  35. abstractvoice-0.6.1/abstractvoice/tts/tts_engine.py +346 -0
  36. abstractvoice-0.6.1/abstractvoice/vm/__init__.py +2 -0
  37. abstractvoice-0.6.1/abstractvoice/vm/common.py +21 -0
  38. abstractvoice-0.6.1/abstractvoice/vm/core.py +139 -0
  39. abstractvoice-0.6.1/abstractvoice/vm/manager.py +108 -0
  40. abstractvoice-0.6.1/abstractvoice/vm/stt_mixin.py +158 -0
  41. abstractvoice-0.6.1/abstractvoice/vm/tts_mixin.py +550 -0
  42. abstractvoice-0.6.1/abstractvoice/voice_manager.py +10 -0
  43. abstractvoice-0.6.1/abstractvoice.egg-info/PKG-INFO +213 -0
  44. abstractvoice-0.6.1/abstractvoice.egg-info/SOURCES.txt +80 -0
  45. abstractvoice-0.6.1/abstractvoice.egg-info/entry_points.txt +6 -0
  46. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice.egg-info/requires.txt +28 -31
  47. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/pyproject.toml +51 -35
  48. abstractvoice-0.6.1/tests/test_abstractcore_plugin.py +144 -0
  49. abstractvoice-0.6.1/tests/test_adr0002_phase1.py +54 -0
  50. abstractvoice-0.6.1/tests/test_adr0002_phase2_optional_aec.py +44 -0
  51. abstractvoice-0.6.1/tests/test_artifact_tools.py +77 -0
  52. abstractvoice-0.6.1/tests/test_audio_player_resample.py +41 -0
  53. abstractvoice-0.6.1/tests/test_callbacks.py +149 -0
  54. abstractvoice-0.6.1/tests/test_chroma_cloning_integration.py +36 -0
  55. abstractvoice-0.6.1/tests/test_cloned_tts_cancellation.py +48 -0
  56. abstractvoice-0.6.1/tests/test_cloning_reference_text_autofallback.py +34 -0
  57. abstractvoice-0.6.1/tests/test_faster_whisper_adapter.py +303 -0
  58. abstractvoice-0.6.1/tests/test_fresh_install.py +156 -0
  59. abstractvoice-0.6.1/tests/test_full_mode_echo_gate.py +25 -0
  60. abstractvoice-0.6.1/tests/test_piper_adapter.py +232 -0
  61. abstractvoice-0.6.1/tests/test_reference_text_autofallback_offline_cached_model.py +37 -0
  62. abstractvoice-0.6.1/tests/test_repl_interrupt_cloned_voice_does_not_resume_old_audio.py +65 -0
  63. abstractvoice-0.6.1/tests/test_stop_phrase_continuous_detector.py +26 -0
  64. abstractvoice-0.6.1/tests/test_stop_phrase_matching_is_tolerant.py +12 -0
  65. abstractvoice-0.6.1/tests/test_stop_speaking_restores_recognizer_state.py +27 -0
  66. abstractvoice-0.6.1/tests/test_voice_clone_store_delete_rename.py +22 -0
  67. abstractvoice-0.6.1/tests/test_voice_cloner_engine_dispatch.py +56 -0
  68. abstractvoice-0.6.1/tests/test_voice_cloning_integration_hal9000.py +28 -0
  69. abstractvoice-0.6.1/tests/test_voice_cloning_store.py +32 -0
  70. abstractvoice-0.6.1/tests/test_voice_mode_wait_pauses_listening_during_tts.py +23 -0
  71. abstractvoice-0.6.1/tests/test_voice_recognizer_ptt_profile.py +9 -0
  72. abstractvoice-0.6.1/tests/test_voice_switching.py +87 -0
  73. abstractvoice-0.5.1/PKG-INFO +0 -1458
  74. abstractvoice-0.5.1/README.md +0 -1368
  75. abstractvoice-0.5.1/abstractvoice/examples/cli_repl.py +0 -1107
  76. abstractvoice-0.5.1/abstractvoice/instant_setup.py +0 -83
  77. abstractvoice-0.5.1/abstractvoice/recognition.py +0 -295
  78. abstractvoice-0.5.1/abstractvoice/simple_model_manager.py +0 -539
  79. abstractvoice-0.5.1/abstractvoice/tts/__init__.py +0 -5
  80. abstractvoice-0.5.1/abstractvoice/tts/tts_engine.py +0 -1297
  81. abstractvoice-0.5.1/abstractvoice/voice_manager.py +0 -1065
  82. abstractvoice-0.5.1/abstractvoice.egg-info/PKG-INFO +0 -1458
  83. abstractvoice-0.5.1/abstractvoice.egg-info/SOURCES.txt +0 -26
  84. abstractvoice-0.5.1/abstractvoice.egg-info/entry_points.txt +0 -2
  85. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/LICENSE +0 -0
  86. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/examples/__init__.py +0 -0
  87. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/examples/web_api.py +0 -0
  88. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/stt/__init__.py +0 -0
  89. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/stt/transcriber.py +0 -0
  90. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/vad/__init__.py +0 -0
  91. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice/vad/voice_detector.py +0 -0
  92. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice.egg-info/dependency_links.txt +0 -0
  93. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/abstractvoice.egg-info/top_level.txt +0 -0
  94. {abstractvoice-0.5.1 → abstractvoice-0.6.1}/setup.cfg +0 -0
@@ -0,0 +1,213 @@
1
+ Metadata-Version: 2.4
2
+ Name: abstractvoice
3
+ Version: 0.6.1
4
+ Summary: A modular Python library for voice interactions with AI systems
5
+ Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/lpalbou/abstractvoice
8
+ Project-URL: Documentation, https://github.com/lpalbou/abstractvoice#readme
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: numpy>=1.24.0
21
+ Requires-Dist: requests>=2.31.0
22
+ Requires-Dist: appdirs>=1.4.0
23
+ Requires-Dist: piper-tts>=1.2.0
24
+ Requires-Dist: huggingface_hub>=0.20.0
25
+ Requires-Dist: faster-whisper>=0.10.0
26
+ Requires-Dist: sounddevice>=0.4.6
27
+ Requires-Dist: soundfile>=0.12.1
28
+ Requires-Dist: webrtcvad>=2.0.10
29
+ Provides-Extra: voice
30
+ Requires-Dist: sounddevice>=0.4.6; extra == "voice"
31
+ Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
32
+ Requires-Dist: soundfile>=0.12.1; extra == "voice"
33
+ Provides-Extra: audio-fx
34
+ Requires-Dist: librosa>=0.10.0; extra == "audio-fx"
35
+ Provides-Extra: cloning
36
+ Requires-Dist: f5-tts>=1.1.0; extra == "cloning"
37
+ Provides-Extra: chroma
38
+ Requires-Dist: torch>=2.0.0; extra == "chroma"
39
+ Requires-Dist: torchaudio>=2.0.0; extra == "chroma"
40
+ Requires-Dist: torchvision>=0.15.0; extra == "chroma"
41
+ Requires-Dist: transformers>=5.0.0rc0; extra == "chroma"
42
+ Requires-Dist: accelerate>=1.0.0; extra == "chroma"
43
+ Requires-Dist: av>=14.0.0; extra == "chroma"
44
+ Requires-Dist: librosa>=0.11.0; extra == "chroma"
45
+ Requires-Dist: audioread>=3.0.0; extra == "chroma"
46
+ Requires-Dist: pillow>=11.0.0; extra == "chroma"
47
+ Requires-Dist: safetensors>=0.5.0; extra == "chroma"
48
+ Provides-Extra: aec
49
+ Requires-Dist: aec-audio-processing>=1.0.1; extra == "aec"
50
+ Provides-Extra: stt
51
+ Requires-Dist: openai-whisper>=20230314; extra == "stt"
52
+ Requires-Dist: tiktoken>=0.6.0; extra == "stt"
53
+ Provides-Extra: web
54
+ Requires-Dist: flask>=2.0.0; extra == "web"
55
+ Provides-Extra: all
56
+ Requires-Dist: piper-tts>=1.2.0; extra == "all"
57
+ Requires-Dist: sounddevice>=0.4.6; extra == "all"
58
+ Requires-Dist: webrtcvad>=2.0.10; extra == "all"
59
+ Requires-Dist: openai-whisper>=20230314; extra == "all"
60
+ Requires-Dist: librosa>=0.10.0; extra == "all"
61
+ Requires-Dist: soundfile>=0.12.1; extra == "all"
62
+ Requires-Dist: flask>=2.0.0; extra == "all"
63
+ Requires-Dist: tiktoken>=0.6.0; extra == "all"
64
+ Requires-Dist: f5-tts>=1.1.0; extra == "all"
65
+ Requires-Dist: aec-audio-processing>=1.0.1; extra == "all"
66
+ Provides-Extra: dev
67
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
68
+ Requires-Dist: black>=22.0.0; extra == "dev"
69
+ Requires-Dist: flake8>=5.0.0; extra == "dev"
70
+ Provides-Extra: voice-full
71
+ Requires-Dist: sounddevice>=0.4.6; extra == "voice-full"
72
+ Requires-Dist: webrtcvad>=2.0.10; extra == "voice-full"
73
+ Requires-Dist: openai-whisper>=20230314; extra == "voice-full"
74
+ Requires-Dist: librosa>=0.10.0; extra == "voice-full"
75
+ Requires-Dist: soundfile>=0.12.1; extra == "voice-full"
76
+ Requires-Dist: tiktoken>=0.6.0; extra == "voice-full"
77
+ Provides-Extra: core-stt
78
+ Requires-Dist: openai-whisper>=20230314; extra == "core-stt"
79
+ Requires-Dist: tiktoken>=0.6.0; extra == "core-stt"
80
+ Provides-Extra: audio-only
81
+ Requires-Dist: sounddevice>=0.4.6; extra == "audio-only"
82
+ Requires-Dist: webrtcvad>=2.0.10; extra == "audio-only"
83
+ Requires-Dist: soundfile>=0.12.1; extra == "audio-only"
84
+ Dynamic: license-file
85
+
86
+ # AbstractVoice
87
+
88
+ A modular Python library for **voice I/O** around AI applications.
89
+
90
+ - **TTS (default)**: Piper (cross-platform, no system deps)
91
+ - **STT (default)**: faster-whisper
92
+ - **Local assistant**: `listen()` + `speak()` with playback/listening control
93
+ - **Headless/server**: `speak_to_bytes()` / `speak_to_file()` and `transcribe_*`
94
+
95
+ Status: **alpha** (`0.6.1`). The supported integrator surface is documented in `docs/api.md`.
96
+
97
+ Next: `docs/getting-started.md` (recommended setup + first smoke tests).
98
+
99
+ > AbstractVoice will ultimately be integrated as the voice modality of AbstractFramework.
100
+ > An OpenAI-compatible voice endpoint is an optional demo/integration layer (see backlog).
101
+
102
+ ---
103
+
104
+ ## Install
105
+
106
+ ```bash
107
+ pip install abstractvoice
108
+ ```
109
+
110
+ Optional extras (feature flags):
111
+
112
+ ```bash
113
+ pip install "abstractvoice[all]"
114
+ ```
115
+
116
+ Notes:
117
+ - `abstractvoice[all]` enables most optional features (incl. cloning + AEC + audio-fx), but **does not** include the GPU-heavy Chroma runtime.
118
+ - For the full list of extras (and platform troubleshooting), see `docs/installation.md`.
119
+
120
+ ### Explicit model downloads (recommended; never implicit in the REPL)
121
+
122
+ Some features rely on large model weights/artifacts. AbstractVoice will **not**
123
+ download these implicitly inside the REPL (offline-first).
124
+
125
+ After installing, prefetch explicitly (cross-platform):
126
+
127
+ ```bash
128
+ abstractvoice-prefetch --stt small
129
+ abstractvoice-prefetch --piper en
130
+ abstractvoice-prefetch --openf5
131
+ abstractvoice-prefetch --chroma
132
+ ```
133
+
134
+ Or equivalently:
135
+
136
+ ```bash
137
+ python -m abstractvoice download --stt small
138
+ python -m abstractvoice download --piper en
139
+ python -m abstractvoice download --openf5
140
+ python -m abstractvoice download --chroma
141
+ ```
142
+
143
+ Notes:
144
+ - `--piper <lang>` downloads the Piper ONNX voice for that language into `~/.piper/models`.
145
+ - `--openf5` is ~5.4GB. `--chroma` is very large (GPU-heavy).
146
+
147
+ ---
148
+
149
+ ## Quick smoke tests
150
+
151
+ ### REPL (fastest end-to-end)
152
+
153
+ ```bash
154
+ abstractvoice --verbose
155
+ # or (from a source checkout):
156
+ python -m abstractvoice cli --verbose
157
+ ```
158
+
159
+ Notes:
160
+ - Mic voice input is **off by default** for fast startup. Enable with `--voice-mode stop` (or in-session: `/voice stop`).
161
+ - The REPL is **offline-first**: no implicit model downloads. Use the explicit download commands above.
162
+
163
+ See `docs/repl_guide.md`.
164
+
165
+ ### Minimal Python
166
+
167
+ ```python
168
+ from abstractvoice import VoiceManager
169
+
170
+ vm = VoiceManager()
171
+ vm.speak("Hello! This is AbstractVoice.")
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Public API (stable surface)
177
+
178
+ See `docs/api.md` for the supported integrator contract.
179
+
180
+ At a glance:
181
+ - **TTS**: `speak()`, `stop_speaking()`, `pause_speaking()`, `resume_speaking()`, `speak_to_bytes()`, `speak_to_file()`
182
+ - **STT**: `transcribe_file()`, `transcribe_from_bytes()`
183
+ - **Mic**: `listen()`, `stop_listening()`, `pause_listening()`, `resume_listening()`
184
+
185
+ ---
186
+
187
+ ## Documentation (minimal set)
188
+
189
+ - **Docs index**: `docs/README.md`
190
+ - **Getting started**: `docs/getting-started.md`
191
+ - **FAQ**: `docs/faq.md`
192
+ - **Orientation**: `docs/overview.md`
193
+ - **Acronyms**: `docs/acronyms.md`
194
+ - **Public API**: `docs/api.md`
195
+ - **REPL guide**: `docs/repl_guide.md`
196
+ - **Install troubleshooting**: `docs/installation.md`
197
+ - **Multilingual support**: `docs/multilingual.md`
198
+ - **Architecture (internal)**: `docs/architecture.md` + `docs/adr/`
199
+ - **Model management (Piper-first)**: `docs/model-management.md`
200
+ - **Licensing notes**: `docs/voices-and-licenses.md`
201
+
202
+ ---
203
+
204
+ ## Project
205
+
206
+ - **Changelog**: `CHANGELOG.md`
207
+ - **Contributing**: `CONTRIBUTING.md`
208
+ - **Security**: `SECURITY.md`
209
+ - **Acknowledgments**: `ACKNOWLEDGMENTS.md`
210
+
211
+ ## License
212
+
213
+ MIT. See `LICENSE`.
@@ -0,0 +1,128 @@
1
+ # AbstractVoice
2
+
3
+ A modular Python library for **voice I/O** around AI applications.
4
+
5
+ - **TTS (default)**: Piper (cross-platform, no system deps)
6
+ - **STT (default)**: faster-whisper
7
+ - **Local assistant**: `listen()` + `speak()` with playback/listening control
8
+ - **Headless/server**: `speak_to_bytes()` / `speak_to_file()` and `transcribe_*`
9
+
10
+ Status: **alpha** (`0.6.1`). The supported integrator surface is documented in `docs/api.md`.
11
+
12
+ Next: `docs/getting-started.md` (recommended setup + first smoke tests).
13
+
14
+ > AbstractVoice will ultimately be integrated as the voice modality of AbstractFramework.
15
+ > An OpenAI-compatible voice endpoint is an optional demo/integration layer (see backlog).
16
+
17
+ ---
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ pip install abstractvoice
23
+ ```
24
+
25
+ Optional extras (feature flags):
26
+
27
+ ```bash
28
+ pip install "abstractvoice[all]"
29
+ ```
30
+
31
+ Notes:
32
+ - `abstractvoice[all]` enables most optional features (incl. cloning + AEC + audio-fx), but **does not** include the GPU-heavy Chroma runtime.
33
+ - For the full list of extras (and platform troubleshooting), see `docs/installation.md`.
34
+
35
+ ### Explicit model downloads (recommended; never implicit in the REPL)
36
+
37
+ Some features rely on large model weights/artifacts. AbstractVoice will **not**
38
+ download these implicitly inside the REPL (offline-first).
39
+
40
+ After installing, prefetch explicitly (cross-platform):
41
+
42
+ ```bash
43
+ abstractvoice-prefetch --stt small
44
+ abstractvoice-prefetch --piper en
45
+ abstractvoice-prefetch --openf5
46
+ abstractvoice-prefetch --chroma
47
+ ```
48
+
49
+ Or equivalently:
50
+
51
+ ```bash
52
+ python -m abstractvoice download --stt small
53
+ python -m abstractvoice download --piper en
54
+ python -m abstractvoice download --openf5
55
+ python -m abstractvoice download --chroma
56
+ ```
57
+
58
+ Notes:
59
+ - `--piper <lang>` downloads the Piper ONNX voice for that language into `~/.piper/models`.
60
+ - `--openf5` is ~5.4GB. `--chroma` is very large (GPU-heavy).
61
+
62
+ ---
63
+
64
+ ## Quick smoke tests
65
+
66
+ ### REPL (fastest end-to-end)
67
+
68
+ ```bash
69
+ abstractvoice --verbose
70
+ # or (from a source checkout):
71
+ python -m abstractvoice cli --verbose
72
+ ```
73
+
74
+ Notes:
75
+ - Mic voice input is **off by default** for fast startup. Enable with `--voice-mode stop` (or in-session: `/voice stop`).
76
+ - The REPL is **offline-first**: no implicit model downloads. Use the explicit download commands above.
77
+
78
+ See `docs/repl_guide.md`.
79
+
80
+ ### Minimal Python
81
+
82
+ ```python
83
+ from abstractvoice import VoiceManager
84
+
85
+ vm = VoiceManager()
86
+ vm.speak("Hello! This is AbstractVoice.")
87
+ ```
88
+
89
+ ---
90
+
91
+ ## Public API (stable surface)
92
+
93
+ See `docs/api.md` for the supported integrator contract.
94
+
95
+ At a glance:
96
+ - **TTS**: `speak()`, `stop_speaking()`, `pause_speaking()`, `resume_speaking()`, `speak_to_bytes()`, `speak_to_file()`
97
+ - **STT**: `transcribe_file()`, `transcribe_from_bytes()`
98
+ - **Mic**: `listen()`, `stop_listening()`, `pause_listening()`, `resume_listening()`
99
+
100
+ ---
101
+
102
+ ## Documentation (minimal set)
103
+
104
+ - **Docs index**: `docs/README.md`
105
+ - **Getting started**: `docs/getting-started.md`
106
+ - **FAQ**: `docs/faq.md`
107
+ - **Orientation**: `docs/overview.md`
108
+ - **Acronyms**: `docs/acronyms.md`
109
+ - **Public API**: `docs/api.md`
110
+ - **REPL guide**: `docs/repl_guide.md`
111
+ - **Install troubleshooting**: `docs/installation.md`
112
+ - **Multilingual support**: `docs/multilingual.md`
113
+ - **Architecture (internal)**: `docs/architecture.md` + `docs/adr/`
114
+ - **Model management (Piper-first)**: `docs/model-management.md`
115
+ - **Licensing notes**: `docs/voices-and-licenses.md`
116
+
117
+ ---
118
+
119
+ ## Project
120
+
121
+ - **Changelog**: `CHANGELOG.md`
122
+ - **Contributing**: `CONTRIBUTING.md`
123
+ - **Security**: `SECURITY.md`
124
+ - **Acknowledgments**: `ACKNOWLEDGMENTS.md`
125
+
126
+ ## License
127
+
128
+ MIT. See `LICENSE`.
@@ -29,8 +29,5 @@ warnings.filterwarnings(
29
29
  # Import the main class for public API
30
30
  from .voice_manager import VoiceManager
31
31
 
32
- # Import simple APIs for third-party applications
33
- from .simple_model_manager import list_models, download_model, get_status, is_ready
34
-
35
- __version__ = "0.5.1"
36
- __all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
32
+ __version__ = "0.6.1"
33
+ __all__ = ["VoiceManager"]
@@ -16,8 +16,9 @@ def print_examples():
16
16
  print(" web - Web API example")
17
17
  print(" simple - Simple usage example")
18
18
  print(" check-deps - Check dependency compatibility")
19
+ print(" download - Explicitly prefetch model artifacts")
19
20
  print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
20
- print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
21
+ print("\nSupported languages: en, fr, de, es, ru, zh")
21
22
  print("\nExamples:")
22
23
  print(" python -m abstractvoice cli --language fr # French CLI")
23
24
  print(" python -m abstractvoice simple --language ru # Russian simple example")
@@ -99,7 +100,7 @@ def main():
99
100
  parser = argparse.ArgumentParser(description="AbstractVoice examples")
100
101
  parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple, check-deps)")
101
102
  parser.add_argument("--language", "--lang", default="en",
102
- choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
103
+ choices=["en", "fr", "de", "es", "ru", "zh"],
103
104
  help="Voice language for examples")
104
105
 
105
106
  # Parse just the first argument and language
@@ -119,6 +120,84 @@ def main():
119
120
  print("This might indicate a dependency issue.")
120
121
  return
121
122
 
123
+ if args.example == "download":
124
+ dl = argparse.ArgumentParser(description="AbstractVoice explicit downloads")
125
+ dl.add_argument("--stt", dest="stt_model", default=None, help="Prefetch faster-whisper model (e.g. small)")
126
+ dl.add_argument(
127
+ "--openf5",
128
+ action="store_true",
129
+ help="Prefetch OpenF5 artifacts for cloning (~5.4GB, requires abstractvoice[cloning])",
130
+ )
131
+ dl.add_argument(
132
+ "--chroma",
133
+ action="store_true",
134
+ help="Prefetch Chroma-4B artifacts (~14GB+, requires HF access; install abstractvoice[chroma] to run inference)",
135
+ )
136
+ dl.add_argument(
137
+ "--piper",
138
+ dest="piper_language",
139
+ default=None,
140
+ help="Prefetch Piper voice model for a language (e.g. en/fr/de).",
141
+ )
142
+ dl_args = dl.parse_args(remaining)
143
+
144
+ if not dl_args.stt_model and not dl_args.openf5 and not dl_args.chroma and not dl_args.piper_language:
145
+ print("Nothing to download. Examples:")
146
+ print(" python -m abstractvoice download --stt small")
147
+ print(" python -m abstractvoice download --openf5")
148
+ print(" python -m abstractvoice download --chroma")
149
+ print(" python -m abstractvoice download --piper en")
150
+ return
151
+
152
+ if dl_args.stt_model:
153
+ try:
154
+ from abstractvoice.adapters.stt_faster_whisper import FasterWhisperAdapter
155
+
156
+ model = str(dl_args.stt_model).strip()
157
+ print(f"Downloading STT model (faster-whisper): {model}")
158
+ stt = FasterWhisperAdapter(model_size=model, device="cpu", compute_type="int8", allow_downloads=True)
159
+ if not stt.is_available():
160
+ raise RuntimeError("Model download/load failed.")
161
+ print("✅ STT model ready.")
162
+ except Exception as e:
163
+ print(f"❌ STT download failed: {e}")
164
+
165
+ if dl_args.openf5:
166
+ try:
167
+ from abstractvoice.cloning.engine_f5 import F5TTSVoiceCloningEngine
168
+
169
+ print("Downloading OpenF5 artifacts (cloning)…")
170
+ engine = F5TTSVoiceCloningEngine(debug=True)
171
+ engine.ensure_openf5_artifacts_downloaded()
172
+ print("✅ OpenF5 artifacts ready.")
173
+ except Exception as e:
174
+ print(f"❌ OpenF5 download failed: {e}")
175
+
176
+ if dl_args.chroma:
177
+ try:
178
+ from abstractvoice.cloning.engine_chroma import ChromaVoiceCloningEngine
179
+
180
+ print("Downloading Chroma artifacts (cloning)…")
181
+ engine = ChromaVoiceCloningEngine(debug=True)
182
+ engine.ensure_chroma_artifacts_downloaded()
183
+ print("✅ Chroma artifacts ready.")
184
+ except Exception as e:
185
+ print(f"❌ Chroma download failed: {e}")
186
+
187
+ if dl_args.piper_language:
188
+ try:
189
+ from abstractvoice.adapters.tts_piper import PiperTTSAdapter
190
+
191
+ lang = str(dl_args.piper_language).strip().lower()
192
+ print(f"Downloading Piper voice model: {lang}")
193
+ piper = PiperTTSAdapter(language=lang, allow_downloads=True, auto_load=False)
194
+ if not piper.ensure_model_downloaded(lang):
195
+ raise RuntimeError("Piper model download failed.")
196
+ print("✅ Piper model ready.")
197
+ except Exception as e:
198
+ print(f"❌ Piper download failed: {e}")
199
+ return
200
+
122
201
  # Set remaining args as sys.argv for the examples, including language
123
202
  if args.language != "en":
124
203
  remaining = ["--language", args.language] + remaining
@@ -138,4 +217,4 @@ def main():
138
217
 
139
218
 
140
219
  if __name__ == "__main__":
141
- main()
220
+ main()
@@ -0,0 +1,12 @@
1
+ """Adapter interfaces for TTS and STT engines.
2
+
3
+ This module defines base interfaces for pluggable TTS and STT engines,
4
+ enabling easy integration of new speech synthesis and recognition backends
5
+ while maintaining API compatibility.
6
+ """
7
+
8
+ from .base import TTSAdapter, STTAdapter
9
+ from .tts_piper import PiperTTSAdapter
10
+ from .stt_faster_whisper import FasterWhisperAdapter
11
+
12
+ __all__ = ['TTSAdapter', 'STTAdapter', 'PiperTTSAdapter', 'FasterWhisperAdapter']
@@ -0,0 +1,207 @@
1
+ """Base adapter interfaces for TTS and STT engines.
2
+
3
+ These abstract base classes define the contract that all TTS and STT adapters
4
+ must implement, ensuring consistent API across different backends.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Optional, Dict, Any, Union
9
+ import numpy as np
10
+ import io
11
+
12
+
13
+ class TTSAdapter(ABC):
14
+ """Abstract base class for Text-to-Speech adapters.
15
+
16
+ All TTS engines must implement this interface to be compatible with
17
+ the VoiceManager. This ensures we can swap engines without breaking
18
+ existing code.
19
+ """
20
+
21
+ @abstractmethod
22
+ def synthesize(self, text: str) -> np.ndarray:
23
+ """Convert text to audio array for immediate playback.
24
+
25
+ Args:
26
+ text: The text to synthesize
27
+
28
+ Returns:
29
+ Audio data as numpy array (shape: [samples,], dtype: float32, range: -1.0 to 1.0)
30
+ """
31
+ pass
32
+
33
+ @abstractmethod
34
+ def synthesize_to_bytes(self, text: str, format: str = 'wav') -> bytes:
35
+ """Convert text to audio bytes for network transmission or file storage.
36
+
37
+ This method is essential for client-server architectures where the backend
38
+ generates speech and sends it to clients for playback.
39
+
40
+ Args:
41
+ text: The text to synthesize
42
+ format: Audio format ('wav', 'mp3', 'ogg'). Default: 'wav'
43
+
44
+ Returns:
45
+ Audio data as bytes in the specified format
46
+ """
47
+ pass
48
+
49
+ @abstractmethod
50
+ def synthesize_to_file(self, text: str, output_path: str, format: Optional[str] = None) -> str:
51
+ """Convert text to audio file.
52
+
53
+ Args:
54
+ text: The text to synthesize
55
+ output_path: Path to save the audio file
56
+ format: Audio format (optional, inferred from file extension if not provided)
57
+
58
+ Returns:
59
+ Path to the saved audio file
60
+ """
61
+ pass
62
+
63
+ @abstractmethod
64
+ def set_language(self, language: str) -> bool:
65
+ """Switch the TTS language.
66
+
67
+ Args:
68
+ language: ISO 639-1 language code (e.g., 'en', 'fr', 'de')
69
+
70
+ Returns:
71
+ True if language switch successful, False otherwise
72
+ """
73
+ pass
74
+
75
+ @abstractmethod
76
+ def get_supported_languages(self) -> list[str]:
77
+ """Get list of supported language codes.
78
+
79
+ Returns:
80
+ List of ISO 639-1 language codes
81
+ """
82
+ pass
83
+
84
+ @abstractmethod
85
+ def get_sample_rate(self) -> int:
86
+ """Get the sample rate of the synthesized audio.
87
+
88
+ Returns:
89
+ Sample rate in Hz (e.g., 22050, 16000)
90
+ """
91
+ pass
92
+
93
+ @abstractmethod
94
+ def is_available(self) -> bool:
95
+ """Check if this TTS engine is available and functional.
96
+
97
+ Returns:
98
+ True if the engine can be used, False if dependencies missing or initialization failed
99
+ """
100
+ pass
101
+
102
+ def get_info(self) -> Dict[str, Any]:
103
+ """Get metadata about this TTS engine.
104
+
105
+ Returns:
106
+ Dictionary with engine information (name, version, languages, etc.)
107
+ """
108
+ return {
109
+ 'name': self.__class__.__name__,
110
+ 'languages': self.get_supported_languages(),
111
+ 'sample_rate': self.get_sample_rate(),
112
+ 'available': self.is_available()
113
+ }
114
+
115
+
116
+ class STTAdapter(ABC):
117
+ """Abstract base class for Speech-to-Text adapters.
118
+
119
+ All STT engines must implement this interface to be compatible with
120
+ the VoiceManager.
121
+ """
122
+
123
+ @abstractmethod
124
+ def transcribe(self, audio_path: str, language: Optional[str] = None) -> str:
125
+ """Transcribe audio file to text.
126
+
127
+ Args:
128
+ audio_path: Path to audio file
129
+ language: Target language (optional, auto-detect if not provided)
130
+
131
+ Returns:
132
+ Transcribed text
133
+ """
134
+ pass
135
+
136
+ @abstractmethod
137
+ def transcribe_from_bytes(self, audio_bytes: bytes, language: Optional[str] = None) -> str:
138
+ """Transcribe audio from bytes (network use case).
139
+
140
+ This method is essential for client-server architectures where clients
141
+ record audio and send it to the backend for transcription.
142
+
143
+ Args:
144
+ audio_bytes: Audio data as bytes
145
+ language: Target language (optional, auto-detect if not provided)
146
+
147
+ Returns:
148
+ Transcribed text
149
+ """
150
+ pass
151
+
152
+ @abstractmethod
153
+ def transcribe_from_array(self, audio_array: np.ndarray, sample_rate: int,
154
+ language: Optional[str] = None) -> str:
155
+ """Transcribe audio from numpy array.
156
+
157
+ Args:
158
+ audio_array: Audio data as numpy array
159
+ sample_rate: Sample rate of the audio in Hz
160
+ language: Target language (optional, auto-detect if not provided)
161
+
162
+ Returns:
163
+ Transcribed text
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def set_language(self, language: str) -> bool:
169
+ """Set the default language for transcription.
170
+
171
+ Args:
172
+ language: ISO 639-1 language code
173
+
174
+ Returns:
175
+ True if successful, False otherwise
176
+ """
177
+ pass
178
+
179
+ @abstractmethod
180
+ def get_supported_languages(self) -> list[str]:
181
+ """Get list of supported language codes.
182
+
183
+ Returns:
184
+ List of ISO 639-1 language codes
185
+ """
186
+ pass
187
+
188
+ @abstractmethod
189
+ def is_available(self) -> bool:
190
+ """Check if this STT engine is available and functional.
191
+
192
+ Returns:
193
+ True if the engine can be used, False otherwise
194
+ """
195
+ pass
196
+
197
+ def get_info(self) -> Dict[str, Any]:
198
+ """Get metadata about this STT engine.
199
+
200
+ Returns:
201
+ Dictionary with engine information
202
+ """
203
+ return {
204
+ 'name': self.__class__.__name__,
205
+ 'languages': self.get_supported_languages(),
206
+ 'available': self.is_available()
207
+ }