abstractvoice 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/LICENSE +1 -1
- {abstractvoice-0.1.0/abstractvoice.egg-info → abstractvoice-0.2.0}/PKG-INFO +228 -50
- abstractvoice-0.1.0/PKG-INFO → abstractvoice-0.2.0/README.md +141 -77
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/__main__.py +20 -10
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/examples/cli_repl.py +198 -13
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/examples/voice_cli.py +20 -6
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/recognition.py +50 -7
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/stt/transcriber.py +17 -2
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/tts/tts_engine.py +84 -32
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/vad/voice_detector.py +16 -2
- abstractvoice-0.2.0/abstractvoice/voice_manager.py +836 -0
- abstractvoice-0.1.0/README.md → abstractvoice-0.2.0/abstractvoice.egg-info/PKG-INFO +255 -39
- abstractvoice-0.2.0/abstractvoice.egg-info/requires.txt +104 -0
- abstractvoice-0.2.0/pyproject.toml +157 -0
- abstractvoice-0.1.0/abstractvoice/voice_manager.py +0 -294
- abstractvoice-0.1.0/abstractvoice.egg-info/requires.txt +0 -18
- abstractvoice-0.1.0/pyproject.toml +0 -54
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/__init__.py +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/examples/__init__.py +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/examples/web_api.py +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/stt/__init__.py +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/tts/__init__.py +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice/vad/__init__.py +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice.egg-info/SOURCES.txt +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice.egg-info/dependency_links.txt +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice.egg-info/entry_points.txt +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/abstractvoice.egg-info/top_level.txt +0 -0
- {abstractvoice-0.1.0 → abstractvoice-0.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2025 Laurent-Philippe Albou (
|
|
3
|
+
Copyright (c) 2025 Laurent-Philippe Albou (contact@abstractcore.ai)
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,31 +18,108 @@ Requires-Python: >=3.8
|
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: numpy>=1.24.0
|
|
21
|
-
Requires-Dist: sounddevice>=0.4.6
|
|
22
|
-
Requires-Dist: webrtcvad>=2.0.10
|
|
23
|
-
Requires-Dist: PyAudio>=0.2.13
|
|
24
|
-
Requires-Dist: openai-whisper>=20230314
|
|
25
|
-
Requires-Dist: coqui-tts>=0.27.0
|
|
26
|
-
Requires-Dist: torch>=2.0.0
|
|
27
|
-
Requires-Dist: torchaudio>=2.0.0
|
|
28
|
-
Requires-Dist: librosa>=0.10.0
|
|
29
|
-
Requires-Dist: soundfile>=0.12.1
|
|
30
21
|
Requires-Dist: requests>=2.31.0
|
|
31
|
-
|
|
32
|
-
Requires-Dist:
|
|
22
|
+
Provides-Extra: voice
|
|
23
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "voice"
|
|
24
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
|
|
25
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "voice"
|
|
26
|
+
Requires-Dist: soundfile>=0.12.1; extra == "voice"
|
|
27
|
+
Provides-Extra: tts
|
|
28
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "tts"
|
|
29
|
+
Requires-Dist: torch>=2.0.0; extra == "tts"
|
|
30
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "tts"
|
|
31
|
+
Requires-Dist: librosa>=0.10.0; extra == "tts"
|
|
32
|
+
Provides-Extra: stt
|
|
33
|
+
Requires-Dist: openai-whisper>=20230314; extra == "stt"
|
|
34
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "stt"
|
|
35
|
+
Provides-Extra: web
|
|
36
|
+
Requires-Dist: flask>=2.0.0; extra == "web"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "all"
|
|
39
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "all"
|
|
40
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "all"
|
|
41
|
+
Requires-Dist: openai-whisper>=20230314; extra == "all"
|
|
42
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "all"
|
|
43
|
+
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
44
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "all"
|
|
45
|
+
Requires-Dist: librosa>=0.10.0; extra == "all"
|
|
46
|
+
Requires-Dist: soundfile>=0.12.1; extra == "all"
|
|
47
|
+
Requires-Dist: flask>=2.0.0; extra == "all"
|
|
48
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "all"
|
|
33
49
|
Provides-Extra: dev
|
|
34
50
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
35
51
|
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
36
52
|
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
53
|
+
Provides-Extra: languages
|
|
54
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "languages"
|
|
55
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "languages"
|
|
56
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "languages"
|
|
57
|
+
Requires-Dist: openai-whisper>=20230314; extra == "languages"
|
|
58
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "languages"
|
|
59
|
+
Requires-Dist: torch>=2.0.0; extra == "languages"
|
|
60
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "languages"
|
|
61
|
+
Requires-Dist: librosa>=0.10.0; extra == "languages"
|
|
62
|
+
Requires-Dist: soundfile>=0.12.1; extra == "languages"
|
|
63
|
+
Requires-Dist: flask>=2.0.0; extra == "languages"
|
|
64
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "languages"
|
|
65
|
+
Provides-Extra: fr
|
|
66
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "fr"
|
|
67
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "fr"
|
|
68
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "fr"
|
|
69
|
+
Requires-Dist: openai-whisper>=20230314; extra == "fr"
|
|
70
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "fr"
|
|
71
|
+
Requires-Dist: torch>=2.0.0; extra == "fr"
|
|
72
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "fr"
|
|
73
|
+
Requires-Dist: librosa>=0.10.0; extra == "fr"
|
|
74
|
+
Requires-Dist: soundfile>=0.12.1; extra == "fr"
|
|
75
|
+
Requires-Dist: flask>=2.0.0; extra == "fr"
|
|
76
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "fr"
|
|
77
|
+
Provides-Extra: es
|
|
78
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "es"
|
|
79
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "es"
|
|
80
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "es"
|
|
81
|
+
Requires-Dist: openai-whisper>=20230314; extra == "es"
|
|
82
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "es"
|
|
83
|
+
Requires-Dist: torch>=2.0.0; extra == "es"
|
|
84
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "es"
|
|
85
|
+
Requires-Dist: librosa>=0.10.0; extra == "es"
|
|
86
|
+
Requires-Dist: soundfile>=0.12.1; extra == "es"
|
|
87
|
+
Requires-Dist: flask>=2.0.0; extra == "es"
|
|
88
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "es"
|
|
89
|
+
Provides-Extra: de
|
|
90
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "de"
|
|
91
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "de"
|
|
92
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "de"
|
|
93
|
+
Requires-Dist: openai-whisper>=20230314; extra == "de"
|
|
94
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "de"
|
|
95
|
+
Requires-Dist: torch>=2.0.0; extra == "de"
|
|
96
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "de"
|
|
97
|
+
Requires-Dist: librosa>=0.10.0; extra == "de"
|
|
98
|
+
Requires-Dist: soundfile>=0.12.1; extra == "de"
|
|
99
|
+
Requires-Dist: flask>=2.0.0; extra == "de"
|
|
100
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "de"
|
|
101
|
+
Provides-Extra: it
|
|
102
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "it"
|
|
103
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "it"
|
|
104
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "it"
|
|
105
|
+
Requires-Dist: openai-whisper>=20230314; extra == "it"
|
|
106
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "it"
|
|
107
|
+
Requires-Dist: torch>=2.0.0; extra == "it"
|
|
108
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "it"
|
|
109
|
+
Requires-Dist: librosa>=0.10.0; extra == "it"
|
|
110
|
+
Requires-Dist: soundfile>=0.12.1; extra == "it"
|
|
111
|
+
Requires-Dist: flask>=2.0.0; extra == "it"
|
|
112
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "it"
|
|
37
113
|
Dynamic: license-file
|
|
38
114
|
|
|
39
115
|
# AbstractVoice
|
|
40
116
|
|
|
41
117
|
[](https://pypi.org/project/abstractvoice/)
|
|
42
118
|
[](https://pypi.org/project/abstractvoice/)
|
|
43
|
-
[](https://github.com/lpalbou/abstractvoice/blob/main/LICENSE)
|
|
44
120
|
[](https://github.com/lpalbou/abstractvoice/stargazers)
|
|
45
121
|
|
|
122
|
+
|
|
46
123
|
A modular Python library for voice interactions with AI systems, providing text-to-speech (TTS) and speech-to-text (STT) capabilities with interrupt handling.
|
|
47
124
|
|
|
48
125
|
While we provide CLI and WEB examples, AbstractVoice is designed to be integrated in other projects.
|
|
@@ -62,73 +139,174 @@ While we provide CLI and WEB examples, AbstractVoice is designed to be integrate
|
|
|
62
139
|
- **Interrupt Handling**: Stop TTS by speaking or using stop commands
|
|
63
140
|
- **Modular Design**: Easily integrate with any text generation system
|
|
64
141
|
|
|
142
|
+
Note : *the LLM access is rudimentary and abstractvoice is provided more as an example and demonstrator. A better integration is to use the functionalities of this library and use them directly in combination with [AbstractCore](https://github.com/lpalbou/AbstractCore)*.
|
|
143
|
+
|
|
65
144
|
## Installation
|
|
66
145
|
|
|
67
|
-
|
|
146
|
+
AbstractVoice is designed to **work everywhere, out of the box** with automatic quality upgrades.
|
|
147
|
+
|
|
148
|
+
### 🚀 Quick Start (Recommended)
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# One command installation - works on all systems
|
|
152
|
+
pip install abstractvoice[all]
|
|
68
153
|
|
|
69
|
-
|
|
70
|
-
-
|
|
71
|
-
|
|
154
|
+
# Verify it works
|
|
155
|
+
python -c "from abstractvoice import VoiceManager; print('✅ Ready to go!')"
|
|
156
|
+
```
|
|
72
157
|
|
|
73
|
-
|
|
158
|
+
**That's it!** AbstractVoice automatically:
|
|
159
|
+
- ✅ **Works everywhere** - Uses reliable models that run on any system
|
|
160
|
+
- ✅ **Auto-upgrades quality** - Detects when better models are available
|
|
161
|
+
- ✅ **No system dependencies required** - Pure Python installation
|
|
162
|
+
- ✅ **Optional quality boost** - Install `espeak-ng` for premium voices
|
|
74
163
|
|
|
75
|
-
|
|
164
|
+
### Installation Options
|
|
76
165
|
|
|
77
|
-
**macOS:**
|
|
78
166
|
```bash
|
|
79
|
-
|
|
167
|
+
# Minimal (just 2 dependencies)
|
|
168
|
+
pip install abstractvoice
|
|
169
|
+
|
|
170
|
+
# Add features as needed
|
|
171
|
+
pip install abstractvoice[tts] # Text-to-speech
|
|
172
|
+
pip install abstractvoice[stt] # Speech-to-text
|
|
173
|
+
pip install abstractvoice[all] # Everything (recommended)
|
|
174
|
+
|
|
175
|
+
# Language-specific
|
|
176
|
+
pip install abstractvoice[fr] # French with all features
|
|
177
|
+
pip install abstractvoice[de] # German with all features
|
|
80
178
|
```
|
|
81
179
|
|
|
82
|
-
|
|
180
|
+
### Optional Quality Upgrade
|
|
181
|
+
|
|
182
|
+
For the **absolute best voice quality**, install espeak-ng:
|
|
183
|
+
|
|
83
184
|
```bash
|
|
185
|
+
# macOS
|
|
186
|
+
brew install espeak-ng
|
|
187
|
+
|
|
188
|
+
# Linux
|
|
84
189
|
sudo apt-get install espeak-ng
|
|
190
|
+
|
|
191
|
+
# Windows
|
|
192
|
+
conda install espeak-ng
|
|
85
193
|
```
|
|
86
194
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
195
|
+
AbstractVoice automatically detects espeak-ng and upgrades to premium quality voices when available.
|
|
196
|
+
|
|
197
|
+
## Quick Start
|
|
198
|
+
|
|
199
|
+
### Basic Usage (Minimal Installation)
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
# First install with minimal dependencies
|
|
203
|
+
# pip install abstractvoice
|
|
204
|
+
|
|
205
|
+
from abstractvoice import VoiceManager
|
|
206
|
+
|
|
207
|
+
# This will show a helpful error message with installation instructions
|
|
208
|
+
try:
|
|
209
|
+
vm = VoiceManager()
|
|
210
|
+
except ImportError as e:
|
|
211
|
+
print(e) # Shows: "TTS functionality requires optional dependencies..."
|
|
212
|
+
# Follow the instructions to install: pip install abstractvoice[all]
|
|
90
213
|
```
|
|
91
214
|
|
|
92
|
-
|
|
93
|
-
```bash
|
|
94
|
-
# Option 1: Using Conda
|
|
95
|
-
conda install -c conda-forge espeak-ng
|
|
215
|
+
### Full Usage Example
|
|
96
216
|
|
|
97
|
-
|
|
98
|
-
|
|
217
|
+
```python
|
|
218
|
+
# After installing with: pip install abstractvoice[all]
|
|
99
219
|
|
|
100
|
-
|
|
220
|
+
from abstractvoice import VoiceManager
|
|
221
|
+
|
|
222
|
+
# Initialize voice manager
|
|
223
|
+
vm = VoiceManager(language='en', debug_mode=True)
|
|
224
|
+
|
|
225
|
+
# Text-to-speech
|
|
226
|
+
vm.speak("Hello! I can speak text and listen for responses.")
|
|
227
|
+
|
|
228
|
+
# Speech-to-text with callbacks
|
|
229
|
+
def on_transcription(text):
|
|
230
|
+
print(f"You said: {text}")
|
|
231
|
+
# Process the transcription
|
|
232
|
+
vm.speak(f"I heard you say: {text}")
|
|
233
|
+
|
|
234
|
+
def on_stop():
|
|
235
|
+
print("Stopping voice interaction")
|
|
236
|
+
|
|
237
|
+
# Start listening
|
|
238
|
+
vm.listen(on_transcription, on_stop)
|
|
239
|
+
|
|
240
|
+
# The voice manager will automatically pause listening when speaking
|
|
241
|
+
# and resume when done to prevent feedback loops
|
|
101
242
|
```
|
|
102
243
|
|
|
103
|
-
|
|
244
|
+
## Additional Examples
|
|
104
245
|
|
|
105
|
-
###
|
|
246
|
+
### Language-Specific Usage
|
|
106
247
|
|
|
107
|
-
```
|
|
108
|
-
#
|
|
109
|
-
|
|
248
|
+
```python
|
|
249
|
+
# French voice
|
|
250
|
+
vm_fr = VoiceManager(language='fr')
|
|
251
|
+
vm_fr.speak("Bonjour! Je peux parler français.")
|
|
110
252
|
|
|
111
|
-
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
253
|
+
# Spanish voice
|
|
254
|
+
vm_es = VoiceManager(language='es')
|
|
255
|
+
vm_es.speak("¡Hola! Puedo hablar español.")
|
|
256
|
+
|
|
257
|
+
# Dynamic language switching
|
|
258
|
+
vm.set_language('fr') # Switch to French
|
|
259
|
+
vm.set_language('en') # Switch back to English
|
|
115
260
|
```
|
|
116
261
|
|
|
117
|
-
###
|
|
262
|
+
### Advanced Configuration
|
|
118
263
|
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
|
|
264
|
+
```python
|
|
265
|
+
from abstractvoice import VoiceManager
|
|
266
|
+
|
|
267
|
+
# Custom TTS model selection
|
|
268
|
+
vm = VoiceManager(
|
|
269
|
+
language='en',
|
|
270
|
+
tts_model='tts_models/en/ljspeech/fast_pitch', # Specific model
|
|
271
|
+
whisper_model='base', # Larger Whisper model for better accuracy
|
|
272
|
+
debug_mode=True
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Speed control
|
|
276
|
+
vm.set_speed(1.5) # 1.5x speed
|
|
277
|
+
vm.speak("This text will be spoken faster.")
|
|
278
|
+
|
|
279
|
+
# Model switching at runtime
|
|
280
|
+
vm.set_tts_model('tts_models/en/ljspeech/vits') # Switch to VITS
|
|
281
|
+
vm.set_whisper('small') # Switch to larger Whisper model
|
|
122
282
|
```
|
|
123
283
|
|
|
124
|
-
###
|
|
284
|
+
### Error Handling and Graceful Degradation
|
|
125
285
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
286
|
+
AbstractVoice is designed to provide helpful error messages and fallback gracefully:
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
# If you install just the basic package
|
|
290
|
+
# pip install abstractvoice
|
|
291
|
+
|
|
292
|
+
from abstractvoice import VoiceManager # This works fine
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
vm = VoiceManager() # This will fail with helpful message
|
|
296
|
+
except ImportError as e:
|
|
297
|
+
print(e)
|
|
298
|
+
# Output: "TTS functionality requires optional dependencies. Install with:
|
|
299
|
+
# pip install abstractvoice[tts] # For TTS only
|
|
300
|
+
# pip install abstractvoice[all] # For all features"
|
|
301
|
+
|
|
302
|
+
# Missing espeak-ng automatically falls back to compatible models
|
|
303
|
+
# Missing dependencies show clear installation instructions
|
|
304
|
+
# All errors are graceful with helpful guidance
|
|
129
305
|
```
|
|
130
306
|
|
|
131
|
-
##
|
|
307
|
+
## CLI and Web Examples
|
|
308
|
+
|
|
309
|
+
AbstractVoice includes example applications to demonstrate its capabilities:
|
|
132
310
|
|
|
133
311
|
### Using AbstractVoice from the Command Line
|
|
134
312
|
|
|
@@ -1,48 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: abstractvoice
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: A modular Python library for voice interactions with AI systems
|
|
5
|
-
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Repository, https://github.com/lpalbou/abstractvoice
|
|
8
|
-
Project-URL: Documentation, https://github.com/lpalbou/abstractvoice#readme
|
|
9
|
-
Classifier: Development Status :: 3 - Alpha
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Python: >=3.8
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
|
-
Requires-Dist: numpy>=1.24.0
|
|
21
|
-
Requires-Dist: sounddevice>=0.4.6
|
|
22
|
-
Requires-Dist: webrtcvad>=2.0.10
|
|
23
|
-
Requires-Dist: PyAudio>=0.2.13
|
|
24
|
-
Requires-Dist: openai-whisper>=20230314
|
|
25
|
-
Requires-Dist: coqui-tts>=0.27.0
|
|
26
|
-
Requires-Dist: torch>=2.0.0
|
|
27
|
-
Requires-Dist: torchaudio>=2.0.0
|
|
28
|
-
Requires-Dist: librosa>=0.10.0
|
|
29
|
-
Requires-Dist: soundfile>=0.12.1
|
|
30
|
-
Requires-Dist: requests>=2.31.0
|
|
31
|
-
Requires-Dist: flask>=2.0.0
|
|
32
|
-
Requires-Dist: tiktoken>=0.6.0
|
|
33
|
-
Provides-Extra: dev
|
|
34
|
-
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
35
|
-
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
36
|
-
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
37
|
-
Dynamic: license-file
|
|
38
|
-
|
|
39
1
|
# AbstractVoice
|
|
40
2
|
|
|
41
3
|
[](https://pypi.org/project/abstractvoice/)
|
|
42
4
|
[](https://pypi.org/project/abstractvoice/)
|
|
43
|
-
[](https://github.com/lpalbou/abstractvoice/blob/main/LICENSE)
|
|
44
6
|
[](https://github.com/lpalbou/abstractvoice/stargazers)
|
|
45
7
|
|
|
8
|
+
|
|
46
9
|
A modular Python library for voice interactions with AI systems, providing text-to-speech (TTS) and speech-to-text (STT) capabilities with interrupt handling.
|
|
47
10
|
|
|
48
11
|
While we provide CLI and WEB examples, AbstractVoice is designed to be integrated in other projects.
|
|
@@ -62,73 +25,174 @@ While we provide CLI and WEB examples, AbstractVoice is designed to be integrate
|
|
|
62
25
|
- **Interrupt Handling**: Stop TTS by speaking or using stop commands
|
|
63
26
|
- **Modular Design**: Easily integrate with any text generation system
|
|
64
27
|
|
|
28
|
+
Note : *the LLM access is rudimentary and abstractvoice is provided more as an example and demonstrator. A better integration is to use the functionalities of this library and use them directly in combination with [AbstractCore](https://github.com/lpalbou/AbstractCore)*.
|
|
29
|
+
|
|
65
30
|
## Installation
|
|
66
31
|
|
|
67
|
-
|
|
32
|
+
AbstractVoice is designed to **work everywhere, out of the box** with automatic quality upgrades.
|
|
33
|
+
|
|
34
|
+
### 🚀 Quick Start (Recommended)
|
|
68
35
|
|
|
69
|
-
|
|
70
|
-
-
|
|
71
|
-
|
|
36
|
+
```bash
|
|
37
|
+
# One command installation - works on all systems
|
|
38
|
+
pip install abstractvoice[all]
|
|
39
|
+
|
|
40
|
+
# Verify it works
|
|
41
|
+
python -c "from abstractvoice import VoiceManager; print('✅ Ready to go!')"
|
|
42
|
+
```
|
|
72
43
|
|
|
73
|
-
|
|
44
|
+
**That's it!** AbstractVoice automatically:
|
|
45
|
+
- ✅ **Works everywhere** - Uses reliable models that run on any system
|
|
46
|
+
- ✅ **Auto-upgrades quality** - Detects when better models are available
|
|
47
|
+
- ✅ **No system dependencies required** - Pure Python installation
|
|
48
|
+
- ✅ **Optional quality boost** - Install `espeak-ng` for premium voices
|
|
74
49
|
|
|
75
|
-
|
|
50
|
+
### Installation Options
|
|
76
51
|
|
|
77
|
-
**macOS:**
|
|
78
52
|
```bash
|
|
79
|
-
|
|
53
|
+
# Minimal (just 2 dependencies)
|
|
54
|
+
pip install abstractvoice
|
|
55
|
+
|
|
56
|
+
# Add features as needed
|
|
57
|
+
pip install abstractvoice[tts] # Text-to-speech
|
|
58
|
+
pip install abstractvoice[stt] # Speech-to-text
|
|
59
|
+
pip install abstractvoice[all] # Everything (recommended)
|
|
60
|
+
|
|
61
|
+
# Language-specific
|
|
62
|
+
pip install abstractvoice[fr] # French with all features
|
|
63
|
+
pip install abstractvoice[de] # German with all features
|
|
80
64
|
```
|
|
81
65
|
|
|
82
|
-
|
|
66
|
+
### Optional Quality Upgrade
|
|
67
|
+
|
|
68
|
+
For the **absolute best voice quality**, install espeak-ng:
|
|
69
|
+
|
|
83
70
|
```bash
|
|
71
|
+
# macOS
|
|
72
|
+
brew install espeak-ng
|
|
73
|
+
|
|
74
|
+
# Linux
|
|
84
75
|
sudo apt-get install espeak-ng
|
|
76
|
+
|
|
77
|
+
# Windows
|
|
78
|
+
conda install espeak-ng
|
|
85
79
|
```
|
|
86
80
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
81
|
+
AbstractVoice automatically detects espeak-ng and upgrades to premium quality voices when available.
|
|
82
|
+
|
|
83
|
+
## Quick Start
|
|
84
|
+
|
|
85
|
+
### Basic Usage (Minimal Installation)
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
# First install with minimal dependencies
|
|
89
|
+
# pip install abstractvoice
|
|
90
|
+
|
|
91
|
+
from abstractvoice import VoiceManager
|
|
92
|
+
|
|
93
|
+
# This will show a helpful error message with installation instructions
|
|
94
|
+
try:
|
|
95
|
+
vm = VoiceManager()
|
|
96
|
+
except ImportError as e:
|
|
97
|
+
print(e) # Shows: "TTS functionality requires optional dependencies..."
|
|
98
|
+
# Follow the instructions to install: pip install abstractvoice[all]
|
|
90
99
|
```
|
|
91
100
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
101
|
+
### Full Usage Example
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
# After installing with: pip install abstractvoice[all]
|
|
105
|
+
|
|
106
|
+
from abstractvoice import VoiceManager
|
|
107
|
+
|
|
108
|
+
# Initialize voice manager
|
|
109
|
+
vm = VoiceManager(language='en', debug_mode=True)
|
|
110
|
+
|
|
111
|
+
# Text-to-speech
|
|
112
|
+
vm.speak("Hello! I can speak text and listen for responses.")
|
|
113
|
+
|
|
114
|
+
# Speech-to-text with callbacks
|
|
115
|
+
def on_transcription(text):
|
|
116
|
+
print(f"You said: {text}")
|
|
117
|
+
# Process the transcription
|
|
118
|
+
vm.speak(f"I heard you say: {text}")
|
|
119
|
+
|
|
120
|
+
def on_stop():
|
|
121
|
+
print("Stopping voice interaction")
|
|
96
122
|
|
|
97
|
-
#
|
|
98
|
-
|
|
123
|
+
# Start listening
|
|
124
|
+
vm.listen(on_transcription, on_stop)
|
|
99
125
|
|
|
100
|
-
#
|
|
126
|
+
# The voice manager will automatically pause listening when speaking
|
|
127
|
+
# and resume when done to prevent feedback loops
|
|
101
128
|
```
|
|
102
129
|
|
|
103
|
-
|
|
130
|
+
## Additional Examples
|
|
104
131
|
|
|
105
|
-
###
|
|
132
|
+
### Language-Specific Usage
|
|
106
133
|
|
|
107
|
-
```
|
|
108
|
-
#
|
|
109
|
-
|
|
134
|
+
```python
|
|
135
|
+
# French voice
|
|
136
|
+
vm_fr = VoiceManager(language='fr')
|
|
137
|
+
vm_fr.speak("Bonjour! Je peux parler français.")
|
|
138
|
+
|
|
139
|
+
# Spanish voice
|
|
140
|
+
vm_es = VoiceManager(language='es')
|
|
141
|
+
vm_es.speak("¡Hola! Puedo hablar español.")
|
|
110
142
|
|
|
111
|
-
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
pip install -e .
|
|
143
|
+
# Dynamic language switching
|
|
144
|
+
vm.set_language('fr') # Switch to French
|
|
145
|
+
vm.set_language('en') # Switch back to English
|
|
115
146
|
```
|
|
116
147
|
|
|
117
|
-
###
|
|
148
|
+
### Advanced Configuration
|
|
118
149
|
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
|
|
150
|
+
```python
|
|
151
|
+
from abstractvoice import VoiceManager
|
|
152
|
+
|
|
153
|
+
# Custom TTS model selection
|
|
154
|
+
vm = VoiceManager(
|
|
155
|
+
language='en',
|
|
156
|
+
tts_model='tts_models/en/ljspeech/fast_pitch', # Specific model
|
|
157
|
+
whisper_model='base', # Larger Whisper model for better accuracy
|
|
158
|
+
debug_mode=True
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Speed control
|
|
162
|
+
vm.set_speed(1.5) # 1.5x speed
|
|
163
|
+
vm.speak("This text will be spoken faster.")
|
|
164
|
+
|
|
165
|
+
# Model switching at runtime
|
|
166
|
+
vm.set_tts_model('tts_models/en/ljspeech/vits') # Switch to VITS
|
|
167
|
+
vm.set_whisper('small') # Switch to larger Whisper model
|
|
122
168
|
```
|
|
123
169
|
|
|
124
|
-
###
|
|
170
|
+
### Error Handling and Graceful Degradation
|
|
125
171
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
172
|
+
AbstractVoice is designed to provide helpful error messages and fallback gracefully:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# If you install just the basic package
|
|
176
|
+
# pip install abstractvoice
|
|
177
|
+
|
|
178
|
+
from abstractvoice import VoiceManager # This works fine
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
vm = VoiceManager() # This will fail with helpful message
|
|
182
|
+
except ImportError as e:
|
|
183
|
+
print(e)
|
|
184
|
+
# Output: "TTS functionality requires optional dependencies. Install with:
|
|
185
|
+
# pip install abstractvoice[tts] # For TTS only
|
|
186
|
+
# pip install abstractvoice[all] # For all features"
|
|
187
|
+
|
|
188
|
+
# Missing espeak-ng automatically falls back to compatible models
|
|
189
|
+
# Missing dependencies show clear installation instructions
|
|
190
|
+
# All errors are graceful with helpful guidance
|
|
129
191
|
```
|
|
130
192
|
|
|
131
|
-
##
|
|
193
|
+
## CLI and Web Examples
|
|
194
|
+
|
|
195
|
+
AbstractVoice includes example applications to demonstrate its capabilities:
|
|
132
196
|
|
|
133
197
|
### Using AbstractVoice from the Command Line
|
|
134
198
|
|
|
@@ -1129,4 +1193,4 @@ AbstractVoice is licensed under the [MIT License](LICENSE).
|
|
|
1129
1193
|
|
|
1130
1194
|
This project depends on several open-source libraries and models, each with their own licenses. Please see [ACKNOWLEDGMENTS.md](ACKNOWLEDGMENTS.md) for a detailed list of dependencies and their respective licenses.
|
|
1131
1195
|
|
|
1132
|
-
Some dependencies, particularly certain TTS models, may have non-commercial use restrictions. If you plan to use AbstractVoice in a commercial application, please ensure you are using models that permit commercial use or obtain appropriate licenses.
|
|
1196
|
+
Some dependencies, particularly certain TTS models, may have non-commercial use restrictions. If you plan to use AbstractVoice in a commercial application, please ensure you are using models that permit commercial use or obtain appropriate licenses.
|
|
@@ -15,20 +15,25 @@ def print_examples():
|
|
|
15
15
|
print(" cli - Command-line REPL example")
|
|
16
16
|
print(" web - Web API example")
|
|
17
17
|
print(" simple - Simple usage example")
|
|
18
|
-
print("\nUsage: python -m abstractvoice <example> [args...]")
|
|
18
|
+
print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
|
|
19
|
+
print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
|
|
20
|
+
print("\nExamples:")
|
|
21
|
+
print(" python -m abstractvoice cli --language fr # French CLI")
|
|
22
|
+
print(" python -m abstractvoice simple --language ru # Russian simple example")
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
def simple_example():
|
|
22
26
|
"""Run a simple example demonstrating basic usage."""
|
|
23
27
|
from abstractvoice import VoiceManager
|
|
24
28
|
import time
|
|
25
|
-
|
|
29
|
+
|
|
26
30
|
print("Simple AbstractVoice Example")
|
|
27
31
|
print("============================")
|
|
28
32
|
print("This example demonstrates basic TTS and STT functionality.")
|
|
33
|
+
print("(Use --language argument to test different languages)")
|
|
29
34
|
print()
|
|
30
|
-
|
|
31
|
-
# Initialize voice manager
|
|
35
|
+
|
|
36
|
+
# Initialize voice manager (can be overridden with --language)
|
|
32
37
|
manager = VoiceManager(debug_mode=True)
|
|
33
38
|
|
|
34
39
|
try:
|
|
@@ -91,17 +96,22 @@ def main():
|
|
|
91
96
|
"""Main entry point."""
|
|
92
97
|
parser = argparse.ArgumentParser(description="AbstractVoice examples")
|
|
93
98
|
parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple)")
|
|
94
|
-
|
|
95
|
-
|
|
99
|
+
parser.add_argument("--language", "--lang", default="en",
|
|
100
|
+
choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
|
|
101
|
+
help="Voice language for examples")
|
|
102
|
+
|
|
103
|
+
# Parse just the first argument and language
|
|
96
104
|
args, remaining = parser.parse_known_args()
|
|
97
|
-
|
|
105
|
+
|
|
98
106
|
if not args.example:
|
|
99
107
|
print_examples()
|
|
100
108
|
return
|
|
101
|
-
|
|
102
|
-
# Set remaining args as sys.argv for the examples
|
|
109
|
+
|
|
110
|
+
# Set remaining args as sys.argv for the examples, including language
|
|
111
|
+
if args.language != "en":
|
|
112
|
+
remaining = ["--language", args.language] + remaining
|
|
103
113
|
sys.argv = [sys.argv[0]] + remaining
|
|
104
|
-
|
|
114
|
+
|
|
105
115
|
if args.example == "cli":
|
|
106
116
|
from abstractvoice.examples.cli_repl import main
|
|
107
117
|
main()
|