abstractvoice 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/LICENSE +1 -1
- {abstractvoice-0.1.1/abstractvoice.egg-info → abstractvoice-0.2.0}/PKG-INFO +228 -50
- abstractvoice-0.1.1/PKG-INFO → abstractvoice-0.2.0/README.md +141 -77
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/__main__.py +20 -10
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/examples/cli_repl.py +198 -13
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/examples/voice_cli.py +20 -6
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/recognition.py +50 -7
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/stt/transcriber.py +17 -2
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/tts/tts_engine.py +84 -32
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/vad/voice_detector.py +16 -2
- abstractvoice-0.2.0/abstractvoice/voice_manager.py +836 -0
- abstractvoice-0.1.1/README.md → abstractvoice-0.2.0/abstractvoice.egg-info/PKG-INFO +255 -39
- abstractvoice-0.2.0/abstractvoice.egg-info/requires.txt +104 -0
- abstractvoice-0.2.0/pyproject.toml +157 -0
- abstractvoice-0.1.1/abstractvoice/voice_manager.py +0 -294
- abstractvoice-0.1.1/abstractvoice.egg-info/requires.txt +0 -18
- abstractvoice-0.1.1/pyproject.toml +0 -55
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/__init__.py +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/examples/__init__.py +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/examples/web_api.py +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/stt/__init__.py +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/tts/__init__.py +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice/vad/__init__.py +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice.egg-info/SOURCES.txt +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice.egg-info/dependency_links.txt +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice.egg-info/entry_points.txt +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/abstractvoice.egg-info/top_level.txt +0 -0
- {abstractvoice-0.1.1 → abstractvoice-0.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2025 Laurent-Philippe Albou (
|
|
3
|
+
Copyright (c) 2025 Laurent-Philippe Albou (contact@abstractcore.ai)
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,31 +18,108 @@ Requires-Python: >=3.8
|
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: numpy>=1.24.0
|
|
21
|
-
Requires-Dist: sounddevice>=0.4.6
|
|
22
|
-
Requires-Dist: webrtcvad>=2.0.10
|
|
23
|
-
Requires-Dist: PyAudio>=0.2.13
|
|
24
|
-
Requires-Dist: openai-whisper>=20230314
|
|
25
|
-
Requires-Dist: coqui-tts>=0.27.0
|
|
26
|
-
Requires-Dist: torch>=2.0.0
|
|
27
|
-
Requires-Dist: torchaudio>=2.0.0
|
|
28
|
-
Requires-Dist: librosa>=0.10.0
|
|
29
|
-
Requires-Dist: soundfile>=0.12.1
|
|
30
21
|
Requires-Dist: requests>=2.31.0
|
|
31
|
-
|
|
32
|
-
Requires-Dist:
|
|
22
|
+
Provides-Extra: voice
|
|
23
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "voice"
|
|
24
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
|
|
25
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "voice"
|
|
26
|
+
Requires-Dist: soundfile>=0.12.1; extra == "voice"
|
|
27
|
+
Provides-Extra: tts
|
|
28
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "tts"
|
|
29
|
+
Requires-Dist: torch>=2.0.0; extra == "tts"
|
|
30
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "tts"
|
|
31
|
+
Requires-Dist: librosa>=0.10.0; extra == "tts"
|
|
32
|
+
Provides-Extra: stt
|
|
33
|
+
Requires-Dist: openai-whisper>=20230314; extra == "stt"
|
|
34
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "stt"
|
|
35
|
+
Provides-Extra: web
|
|
36
|
+
Requires-Dist: flask>=2.0.0; extra == "web"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "all"
|
|
39
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "all"
|
|
40
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "all"
|
|
41
|
+
Requires-Dist: openai-whisper>=20230314; extra == "all"
|
|
42
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "all"
|
|
43
|
+
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
44
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "all"
|
|
45
|
+
Requires-Dist: librosa>=0.10.0; extra == "all"
|
|
46
|
+
Requires-Dist: soundfile>=0.12.1; extra == "all"
|
|
47
|
+
Requires-Dist: flask>=2.0.0; extra == "all"
|
|
48
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "all"
|
|
33
49
|
Provides-Extra: dev
|
|
34
50
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
35
51
|
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
36
52
|
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
53
|
+
Provides-Extra: languages
|
|
54
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "languages"
|
|
55
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "languages"
|
|
56
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "languages"
|
|
57
|
+
Requires-Dist: openai-whisper>=20230314; extra == "languages"
|
|
58
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "languages"
|
|
59
|
+
Requires-Dist: torch>=2.0.0; extra == "languages"
|
|
60
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "languages"
|
|
61
|
+
Requires-Dist: librosa>=0.10.0; extra == "languages"
|
|
62
|
+
Requires-Dist: soundfile>=0.12.1; extra == "languages"
|
|
63
|
+
Requires-Dist: flask>=2.0.0; extra == "languages"
|
|
64
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "languages"
|
|
65
|
+
Provides-Extra: fr
|
|
66
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "fr"
|
|
67
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "fr"
|
|
68
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "fr"
|
|
69
|
+
Requires-Dist: openai-whisper>=20230314; extra == "fr"
|
|
70
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "fr"
|
|
71
|
+
Requires-Dist: torch>=2.0.0; extra == "fr"
|
|
72
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "fr"
|
|
73
|
+
Requires-Dist: librosa>=0.10.0; extra == "fr"
|
|
74
|
+
Requires-Dist: soundfile>=0.12.1; extra == "fr"
|
|
75
|
+
Requires-Dist: flask>=2.0.0; extra == "fr"
|
|
76
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "fr"
|
|
77
|
+
Provides-Extra: es
|
|
78
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "es"
|
|
79
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "es"
|
|
80
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "es"
|
|
81
|
+
Requires-Dist: openai-whisper>=20230314; extra == "es"
|
|
82
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "es"
|
|
83
|
+
Requires-Dist: torch>=2.0.0; extra == "es"
|
|
84
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "es"
|
|
85
|
+
Requires-Dist: librosa>=0.10.0; extra == "es"
|
|
86
|
+
Requires-Dist: soundfile>=0.12.1; extra == "es"
|
|
87
|
+
Requires-Dist: flask>=2.0.0; extra == "es"
|
|
88
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "es"
|
|
89
|
+
Provides-Extra: de
|
|
90
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "de"
|
|
91
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "de"
|
|
92
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "de"
|
|
93
|
+
Requires-Dist: openai-whisper>=20230314; extra == "de"
|
|
94
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "de"
|
|
95
|
+
Requires-Dist: torch>=2.0.0; extra == "de"
|
|
96
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "de"
|
|
97
|
+
Requires-Dist: librosa>=0.10.0; extra == "de"
|
|
98
|
+
Requires-Dist: soundfile>=0.12.1; extra == "de"
|
|
99
|
+
Requires-Dist: flask>=2.0.0; extra == "de"
|
|
100
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "de"
|
|
101
|
+
Provides-Extra: it
|
|
102
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "it"
|
|
103
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "it"
|
|
104
|
+
Requires-Dist: PyAudio>=0.2.13; extra == "it"
|
|
105
|
+
Requires-Dist: openai-whisper>=20230314; extra == "it"
|
|
106
|
+
Requires-Dist: coqui-tts>=0.27.0; extra == "it"
|
|
107
|
+
Requires-Dist: torch>=2.0.0; extra == "it"
|
|
108
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "it"
|
|
109
|
+
Requires-Dist: librosa>=0.10.0; extra == "it"
|
|
110
|
+
Requires-Dist: soundfile>=0.12.1; extra == "it"
|
|
111
|
+
Requires-Dist: flask>=2.0.0; extra == "it"
|
|
112
|
+
Requires-Dist: tiktoken>=0.6.0; extra == "it"
|
|
37
113
|
Dynamic: license-file
|
|
38
114
|
|
|
39
115
|
# AbstractVoice
|
|
40
116
|
|
|
41
117
|
[](https://pypi.org/project/abstractvoice/)
|
|
42
118
|
[](https://pypi.org/project/abstractvoice/)
|
|
43
|
-
[](https://github.com/lpalbou/abstractvoice/blob/main/LICENSE)
|
|
44
120
|
[](https://github.com/lpalbou/abstractvoice/stargazers)
|
|
45
121
|
|
|
122
|
+
|
|
46
123
|
A modular Python library for voice interactions with AI systems, providing text-to-speech (TTS) and speech-to-text (STT) capabilities with interrupt handling.
|
|
47
124
|
|
|
48
125
|
While we provide CLI and WEB examples, AbstractVoice is designed to be integrated in other projects.
|
|
@@ -62,73 +139,174 @@ While we provide CLI and WEB examples, AbstractVoice is designed to be integrate
|
|
|
62
139
|
- **Interrupt Handling**: Stop TTS by speaking or using stop commands
|
|
63
140
|
- **Modular Design**: Easily integrate with any text generation system
|
|
64
141
|
|
|
142
|
+
Note : *the LLM access is rudimentary and abstractvoice is provided more as an example and demonstrator. A better integration is to use the functionalities of this library and use them directly in combination with [AbstractCore](https://github.com/lpalbou/AbstractCore)*.
|
|
143
|
+
|
|
65
144
|
## Installation
|
|
66
145
|
|
|
67
|
-
|
|
146
|
+
AbstractVoice is designed to **work everywhere, out of the box** with automatic quality upgrades.
|
|
147
|
+
|
|
148
|
+
### 🚀 Quick Start (Recommended)
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# One command installation - works on all systems
|
|
152
|
+
pip install abstractvoice[all]
|
|
153
|
+
|
|
154
|
+
# Verify it works
|
|
155
|
+
python -c "from abstractvoice import VoiceManager; print('✅ Ready to go!')"
|
|
156
|
+
```
|
|
68
157
|
|
|
69
|
-
|
|
70
|
-
-
|
|
71
|
-
- **
|
|
158
|
+
**That's it!** AbstractVoice automatically:
|
|
159
|
+
- ✅ **Works everywhere** - Uses reliable models that run on any system
|
|
160
|
+
- ✅ **Auto-upgrades quality** - Detects when better models are available
|
|
161
|
+
- ✅ **No system dependencies required** - Pure Python installation
|
|
162
|
+
- ✅ **Optional quality boost** - Install `espeak-ng` for premium voices
|
|
72
163
|
|
|
73
|
-
###
|
|
164
|
+
### Installation Options
|
|
74
165
|
|
|
75
166
|
```bash
|
|
76
|
-
#
|
|
167
|
+
# Minimal (just 2 dependencies)
|
|
77
168
|
pip install abstractvoice
|
|
78
169
|
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
pip install
|
|
170
|
+
# Add features as needed
|
|
171
|
+
pip install abstractvoice[tts] # Text-to-speech
|
|
172
|
+
pip install abstractvoice[stt] # Speech-to-text
|
|
173
|
+
pip install abstractvoice[all] # Everything (recommended)
|
|
174
|
+
|
|
175
|
+
# Language-specific
|
|
176
|
+
pip install abstractvoice[fr] # French with all features
|
|
177
|
+
pip install abstractvoice[de] # German with all features
|
|
83
178
|
```
|
|
84
179
|
|
|
85
|
-
###
|
|
180
|
+
### Optional Quality Upgrade
|
|
181
|
+
|
|
182
|
+
For the **absolute best voice quality**, install espeak-ng:
|
|
86
183
|
|
|
87
184
|
```bash
|
|
88
|
-
#
|
|
89
|
-
|
|
185
|
+
# macOS
|
|
186
|
+
brew install espeak-ng
|
|
187
|
+
|
|
188
|
+
# Linux
|
|
189
|
+
sudo apt-get install espeak-ng
|
|
190
|
+
|
|
191
|
+
# Windows
|
|
192
|
+
conda install espeak-ng
|
|
90
193
|
```
|
|
91
194
|
|
|
92
|
-
|
|
195
|
+
AbstractVoice automatically detects espeak-ng and upgrades to premium quality voices when available.
|
|
93
196
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
197
|
+
## Quick Start
|
|
198
|
+
|
|
199
|
+
### Basic Usage (Minimal Installation)
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
# First install with minimal dependencies
|
|
203
|
+
# pip install abstractvoice
|
|
204
|
+
|
|
205
|
+
from abstractvoice import VoiceManager
|
|
206
|
+
|
|
207
|
+
# This will show a helpful error message with installation instructions
|
|
208
|
+
try:
|
|
209
|
+
vm = VoiceManager()
|
|
210
|
+
except ImportError as e:
|
|
211
|
+
print(e) # Shows: "TTS functionality requires optional dependencies..."
|
|
212
|
+
# Follow the instructions to install: pip install abstractvoice[all]
|
|
97
213
|
```
|
|
98
214
|
|
|
99
|
-
###
|
|
215
|
+
### Full Usage Example
|
|
100
216
|
|
|
101
|
-
|
|
217
|
+
```python
|
|
218
|
+
# After installing with: pip install abstractvoice[all]
|
|
102
219
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
220
|
+
from abstractvoice import VoiceManager
|
|
221
|
+
|
|
222
|
+
# Initialize voice manager
|
|
223
|
+
vm = VoiceManager(language='en', debug_mode=True)
|
|
224
|
+
|
|
225
|
+
# Text-to-speech
|
|
226
|
+
vm.speak("Hello! I can speak text and listen for responses.")
|
|
227
|
+
|
|
228
|
+
# Speech-to-text with callbacks
|
|
229
|
+
def on_transcription(text):
|
|
230
|
+
print(f"You said: {text}")
|
|
231
|
+
# Process the transcription
|
|
232
|
+
vm.speak(f"I heard you say: {text}")
|
|
233
|
+
|
|
234
|
+
def on_stop():
|
|
235
|
+
print("Stopping voice interaction")
|
|
236
|
+
|
|
237
|
+
# Start listening
|
|
238
|
+
vm.listen(on_transcription, on_stop)
|
|
239
|
+
|
|
240
|
+
# The voice manager will automatically pause listening when speaking
|
|
241
|
+
# and resume when done to prevent feedback loops
|
|
106
242
|
```
|
|
107
243
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
244
|
+
## Additional Examples
|
|
245
|
+
|
|
246
|
+
### Language-Specific Usage
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
# French voice
|
|
250
|
+
vm_fr = VoiceManager(language='fr')
|
|
251
|
+
vm_fr.speak("Bonjour! Je peux parler français.")
|
|
252
|
+
|
|
253
|
+
# Spanish voice
|
|
254
|
+
vm_es = VoiceManager(language='es')
|
|
255
|
+
vm_es.speak("¡Hola! Puedo hablar español.")
|
|
256
|
+
|
|
257
|
+
# Dynamic language switching
|
|
258
|
+
vm.set_language('fr') # Switch to French
|
|
259
|
+
vm.set_language('en') # Switch back to English
|
|
111
260
|
```
|
|
112
261
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
262
|
+
### Advanced Configuration
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
from abstractvoice import VoiceManager
|
|
266
|
+
|
|
267
|
+
# Custom TTS model selection
|
|
268
|
+
vm = VoiceManager(
|
|
269
|
+
language='en',
|
|
270
|
+
tts_model='tts_models/en/ljspeech/fast_pitch', # Specific model
|
|
271
|
+
whisper_model='base', # Larger Whisper model for better accuracy
|
|
272
|
+
debug_mode=True
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Speed control
|
|
276
|
+
vm.set_speed(1.5) # 1.5x speed
|
|
277
|
+
vm.speak("This text will be spoken faster.")
|
|
278
|
+
|
|
279
|
+
# Model switching at runtime
|
|
280
|
+
vm.set_tts_model('tts_models/en/ljspeech/vits') # Switch to VITS
|
|
281
|
+
vm.set_whisper('small') # Switch to larger Whisper model
|
|
116
282
|
```
|
|
117
283
|
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
# Option 1: Using Conda
|
|
121
|
-
conda install -c conda-forge espeak-ng
|
|
284
|
+
### Error Handling and Graceful Degradation
|
|
122
285
|
|
|
123
|
-
|
|
124
|
-
|
|
286
|
+
AbstractVoice is designed to provide helpful error messages and fallback gracefully:
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
# If you install just the basic package
|
|
290
|
+
# pip install abstractvoice
|
|
125
291
|
|
|
126
|
-
|
|
292
|
+
from abstractvoice import VoiceManager # This works fine
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
vm = VoiceManager() # This will fail with helpful message
|
|
296
|
+
except ImportError as e:
|
|
297
|
+
print(e)
|
|
298
|
+
# Output: "TTS functionality requires optional dependencies. Install with:
|
|
299
|
+
# pip install abstractvoice[tts] # For TTS only
|
|
300
|
+
# pip install abstractvoice[all] # For all features"
|
|
301
|
+
|
|
302
|
+
# Missing espeak-ng automatically falls back to compatible models
|
|
303
|
+
# Missing dependencies show clear installation instructions
|
|
304
|
+
# All errors are graceful with helpful guidance
|
|
127
305
|
```
|
|
128
306
|
|
|
129
|
-
|
|
307
|
+
## CLI and Web Examples
|
|
130
308
|
|
|
131
|
-
|
|
309
|
+
AbstractVoice includes example applications to demonstrate its capabilities:
|
|
132
310
|
|
|
133
311
|
### Using AbstractVoice from the Command Line
|
|
134
312
|
|
|
@@ -1,48 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: abstractvoice
|
|
3
|
-
Version: 0.1.1
|
|
4
|
-
Summary: A modular Python library for voice interactions with AI systems
|
|
5
|
-
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Repository, https://github.com/lpalbou/abstractvoice
|
|
8
|
-
Project-URL: Documentation, https://github.com/lpalbou/abstractvoice#readme
|
|
9
|
-
Classifier: Development Status :: 3 - Alpha
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Python: >=3.8
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
|
-
Requires-Dist: numpy>=1.24.0
|
|
21
|
-
Requires-Dist: sounddevice>=0.4.6
|
|
22
|
-
Requires-Dist: webrtcvad>=2.0.10
|
|
23
|
-
Requires-Dist: PyAudio>=0.2.13
|
|
24
|
-
Requires-Dist: openai-whisper>=20230314
|
|
25
|
-
Requires-Dist: coqui-tts>=0.27.0
|
|
26
|
-
Requires-Dist: torch>=2.0.0
|
|
27
|
-
Requires-Dist: torchaudio>=2.0.0
|
|
28
|
-
Requires-Dist: librosa>=0.10.0
|
|
29
|
-
Requires-Dist: soundfile>=0.12.1
|
|
30
|
-
Requires-Dist: requests>=2.31.0
|
|
31
|
-
Requires-Dist: flask>=2.0.0
|
|
32
|
-
Requires-Dist: tiktoken>=0.6.0
|
|
33
|
-
Provides-Extra: dev
|
|
34
|
-
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
35
|
-
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
36
|
-
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
37
|
-
Dynamic: license-file
|
|
38
|
-
|
|
39
1
|
# AbstractVoice
|
|
40
2
|
|
|
41
3
|
[](https://pypi.org/project/abstractvoice/)
|
|
42
4
|
[](https://pypi.org/project/abstractvoice/)
|
|
43
|
-
[](https://github.com/lpalbou/abstractvoice/blob/main/LICENSE)
|
|
44
6
|
[](https://github.com/lpalbou/abstractvoice/stargazers)
|
|
45
7
|
|
|
8
|
+
|
|
46
9
|
A modular Python library for voice interactions with AI systems, providing text-to-speech (TTS) and speech-to-text (STT) capabilities with interrupt handling.
|
|
47
10
|
|
|
48
11
|
While we provide CLI and WEB examples, AbstractVoice is designed to be integrated in other projects.
|
|
@@ -62,73 +25,174 @@ While we provide CLI and WEB examples, AbstractVoice is designed to be integrate
|
|
|
62
25
|
- **Interrupt Handling**: Stop TTS by speaking or using stop commands
|
|
63
26
|
- **Modular Design**: Easily integrate with any text generation system
|
|
64
27
|
|
|
28
|
+
Note : *the LLM access is rudimentary and abstractvoice is provided more as an example and demonstrator. A better integration is to use the functionalities of this library and use them directly in combination with [AbstractCore](https://github.com/lpalbou/AbstractCore)*.
|
|
29
|
+
|
|
65
30
|
## Installation
|
|
66
31
|
|
|
67
|
-
|
|
32
|
+
AbstractVoice is designed to **work everywhere, out of the box** with automatic quality upgrades.
|
|
33
|
+
|
|
34
|
+
### 🚀 Quick Start (Recommended)
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# One command installation - works on all systems
|
|
38
|
+
pip install abstractvoice[all]
|
|
39
|
+
|
|
40
|
+
# Verify it works
|
|
41
|
+
python -c "from abstractvoice import VoiceManager; print('✅ Ready to go!')"
|
|
42
|
+
```
|
|
68
43
|
|
|
69
|
-
|
|
70
|
-
-
|
|
71
|
-
- **
|
|
44
|
+
**That's it!** AbstractVoice automatically:
|
|
45
|
+
- ✅ **Works everywhere** - Uses reliable models that run on any system
|
|
46
|
+
- ✅ **Auto-upgrades quality** - Detects when better models are available
|
|
47
|
+
- ✅ **No system dependencies required** - Pure Python installation
|
|
48
|
+
- ✅ **Optional quality boost** - Install `espeak-ng` for premium voices
|
|
72
49
|
|
|
73
|
-
###
|
|
50
|
+
### Installation Options
|
|
74
51
|
|
|
75
52
|
```bash
|
|
76
|
-
#
|
|
53
|
+
# Minimal (just 2 dependencies)
|
|
77
54
|
pip install abstractvoice
|
|
78
55
|
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
pip install
|
|
56
|
+
# Add features as needed
|
|
57
|
+
pip install abstractvoice[tts] # Text-to-speech
|
|
58
|
+
pip install abstractvoice[stt] # Speech-to-text
|
|
59
|
+
pip install abstractvoice[all] # Everything (recommended)
|
|
60
|
+
|
|
61
|
+
# Language-specific
|
|
62
|
+
pip install abstractvoice[fr] # French with all features
|
|
63
|
+
pip install abstractvoice[de] # German with all features
|
|
83
64
|
```
|
|
84
65
|
|
|
85
|
-
###
|
|
66
|
+
### Optional Quality Upgrade
|
|
67
|
+
|
|
68
|
+
For the **absolute best voice quality**, install espeak-ng:
|
|
86
69
|
|
|
87
70
|
```bash
|
|
88
|
-
#
|
|
89
|
-
|
|
71
|
+
# macOS
|
|
72
|
+
brew install espeak-ng
|
|
73
|
+
|
|
74
|
+
# Linux
|
|
75
|
+
sudo apt-get install espeak-ng
|
|
76
|
+
|
|
77
|
+
# Windows
|
|
78
|
+
conda install espeak-ng
|
|
90
79
|
```
|
|
91
80
|
|
|
92
|
-
|
|
81
|
+
AbstractVoice automatically detects espeak-ng and upgrades to premium quality voices when available.
|
|
93
82
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
83
|
+
## Quick Start
|
|
84
|
+
|
|
85
|
+
### Basic Usage (Minimal Installation)
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
# First install with minimal dependencies
|
|
89
|
+
# pip install abstractvoice
|
|
90
|
+
|
|
91
|
+
from abstractvoice import VoiceManager
|
|
92
|
+
|
|
93
|
+
# This will show a helpful error message with installation instructions
|
|
94
|
+
try:
|
|
95
|
+
vm = VoiceManager()
|
|
96
|
+
except ImportError as e:
|
|
97
|
+
print(e) # Shows: "TTS functionality requires optional dependencies..."
|
|
98
|
+
# Follow the instructions to install: pip install abstractvoice[all]
|
|
97
99
|
```
|
|
98
100
|
|
|
99
|
-
###
|
|
101
|
+
### Full Usage Example
|
|
100
102
|
|
|
101
|
-
|
|
103
|
+
```python
|
|
104
|
+
# After installing with: pip install abstractvoice[all]
|
|
102
105
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
+
from abstractvoice import VoiceManager
|
|
107
|
+
|
|
108
|
+
# Initialize voice manager
|
|
109
|
+
vm = VoiceManager(language='en', debug_mode=True)
|
|
110
|
+
|
|
111
|
+
# Text-to-speech
|
|
112
|
+
vm.speak("Hello! I can speak text and listen for responses.")
|
|
113
|
+
|
|
114
|
+
# Speech-to-text with callbacks
|
|
115
|
+
def on_transcription(text):
|
|
116
|
+
print(f"You said: {text}")
|
|
117
|
+
# Process the transcription
|
|
118
|
+
vm.speak(f"I heard you say: {text}")
|
|
119
|
+
|
|
120
|
+
def on_stop():
|
|
121
|
+
print("Stopping voice interaction")
|
|
122
|
+
|
|
123
|
+
# Start listening
|
|
124
|
+
vm.listen(on_transcription, on_stop)
|
|
125
|
+
|
|
126
|
+
# The voice manager will automatically pause listening when speaking
|
|
127
|
+
# and resume when done to prevent feedback loops
|
|
106
128
|
```
|
|
107
129
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
130
|
+
## Additional Examples
|
|
131
|
+
|
|
132
|
+
### Language-Specific Usage
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
# French voice
|
|
136
|
+
vm_fr = VoiceManager(language='fr')
|
|
137
|
+
vm_fr.speak("Bonjour! Je peux parler français.")
|
|
138
|
+
|
|
139
|
+
# Spanish voice
|
|
140
|
+
vm_es = VoiceManager(language='es')
|
|
141
|
+
vm_es.speak("¡Hola! Puedo hablar español.")
|
|
142
|
+
|
|
143
|
+
# Dynamic language switching
|
|
144
|
+
vm.set_language('fr') # Switch to French
|
|
145
|
+
vm.set_language('en') # Switch back to English
|
|
111
146
|
```
|
|
112
147
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
148
|
+
### Advanced Configuration
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from abstractvoice import VoiceManager
|
|
152
|
+
|
|
153
|
+
# Custom TTS model selection
|
|
154
|
+
vm = VoiceManager(
|
|
155
|
+
language='en',
|
|
156
|
+
tts_model='tts_models/en/ljspeech/fast_pitch', # Specific model
|
|
157
|
+
whisper_model='base', # Larger Whisper model for better accuracy
|
|
158
|
+
debug_mode=True
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Speed control
|
|
162
|
+
vm.set_speed(1.5) # 1.5x speed
|
|
163
|
+
vm.speak("This text will be spoken faster.")
|
|
164
|
+
|
|
165
|
+
# Model switching at runtime
|
|
166
|
+
vm.set_tts_model('tts_models/en/ljspeech/vits') # Switch to VITS
|
|
167
|
+
vm.set_whisper('small') # Switch to larger Whisper model
|
|
116
168
|
```
|
|
117
169
|
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
# Option 1: Using Conda
|
|
121
|
-
conda install -c conda-forge espeak-ng
|
|
170
|
+
### Error Handling and Graceful Degradation
|
|
122
171
|
|
|
123
|
-
|
|
124
|
-
|
|
172
|
+
AbstractVoice is designed to provide helpful error messages and fallback gracefully:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# If you install just the basic package
|
|
176
|
+
# pip install abstractvoice
|
|
125
177
|
|
|
126
|
-
|
|
178
|
+
from abstractvoice import VoiceManager # This works fine
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
vm = VoiceManager() # This will fail with helpful message
|
|
182
|
+
except ImportError as e:
|
|
183
|
+
print(e)
|
|
184
|
+
# Output: "TTS functionality requires optional dependencies. Install with:
|
|
185
|
+
# pip install abstractvoice[tts] # For TTS only
|
|
186
|
+
# pip install abstractvoice[all] # For all features"
|
|
187
|
+
|
|
188
|
+
# Missing espeak-ng automatically falls back to compatible models
|
|
189
|
+
# Missing dependencies show clear installation instructions
|
|
190
|
+
# All errors are graceful with helpful guidance
|
|
127
191
|
```
|
|
128
192
|
|
|
129
|
-
|
|
193
|
+
## CLI and Web Examples
|
|
130
194
|
|
|
131
|
-
|
|
195
|
+
AbstractVoice includes example applications to demonstrate its capabilities:
|
|
132
196
|
|
|
133
197
|
### Using AbstractVoice from the Command Line
|
|
134
198
|
|
|
@@ -1129,4 +1193,4 @@ AbstractVoice is licensed under the [MIT License](LICENSE).
|
|
|
1129
1193
|
|
|
1130
1194
|
This project depends on several open-source libraries and models, each with their own licenses. Please see [ACKNOWLEDGMENTS.md](ACKNOWLEDGMENTS.md) for a detailed list of dependencies and their respective licenses.
|
|
1131
1195
|
|
|
1132
|
-
Some dependencies, particularly certain TTS models, may have non-commercial use restrictions. If you plan to use AbstractVoice in a commercial application, please ensure you are using models that permit commercial use or obtain appropriate licenses.
|
|
1196
|
+
Some dependencies, particularly certain TTS models, may have non-commercial use restrictions. If you plan to use AbstractVoice in a commercial application, please ensure you are using models that permit commercial use or obtain appropriate licenses.
|
|
@@ -15,20 +15,25 @@ def print_examples():
|
|
|
15
15
|
print(" cli - Command-line REPL example")
|
|
16
16
|
print(" web - Web API example")
|
|
17
17
|
print(" simple - Simple usage example")
|
|
18
|
-
print("\nUsage: python -m abstractvoice <example> [args...]")
|
|
18
|
+
print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
|
|
19
|
+
print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
|
|
20
|
+
print("\nExamples:")
|
|
21
|
+
print(" python -m abstractvoice cli --language fr # French CLI")
|
|
22
|
+
print(" python -m abstractvoice simple --language ru # Russian simple example")
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
def simple_example():
|
|
22
26
|
"""Run a simple example demonstrating basic usage."""
|
|
23
27
|
from abstractvoice import VoiceManager
|
|
24
28
|
import time
|
|
25
|
-
|
|
29
|
+
|
|
26
30
|
print("Simple AbstractVoice Example")
|
|
27
31
|
print("============================")
|
|
28
32
|
print("This example demonstrates basic TTS and STT functionality.")
|
|
33
|
+
print("(Use --language argument to test different languages)")
|
|
29
34
|
print()
|
|
30
|
-
|
|
31
|
-
# Initialize voice manager
|
|
35
|
+
|
|
36
|
+
# Initialize voice manager (can be overridden with --language)
|
|
32
37
|
manager = VoiceManager(debug_mode=True)
|
|
33
38
|
|
|
34
39
|
try:
|
|
@@ -91,17 +96,22 @@ def main():
|
|
|
91
96
|
"""Main entry point."""
|
|
92
97
|
parser = argparse.ArgumentParser(description="AbstractVoice examples")
|
|
93
98
|
parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple)")
|
|
94
|
-
|
|
95
|
-
|
|
99
|
+
parser.add_argument("--language", "--lang", default="en",
|
|
100
|
+
choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
|
|
101
|
+
help="Voice language for examples")
|
|
102
|
+
|
|
103
|
+
# Parse just the first argument and language
|
|
96
104
|
args, remaining = parser.parse_known_args()
|
|
97
|
-
|
|
105
|
+
|
|
98
106
|
if not args.example:
|
|
99
107
|
print_examples()
|
|
100
108
|
return
|
|
101
|
-
|
|
102
|
-
# Set remaining args as sys.argv for the examples
|
|
109
|
+
|
|
110
|
+
# Set remaining args as sys.argv for the examples, including language
|
|
111
|
+
if args.language != "en":
|
|
112
|
+
remaining = ["--language", args.language] + remaining
|
|
103
113
|
sys.argv = [sys.argv[0]] + remaining
|
|
104
|
-
|
|
114
|
+
|
|
105
115
|
if args.example == "cli":
|
|
106
116
|
from abstractvoice.examples.cli_repl import main
|
|
107
117
|
main()
|