abstractvoice 0.4.6__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/PKG-INFO +69 -21
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/README.md +68 -20
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/__init__.py +1 -1
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/simple_model_manager.py +49 -10
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/tts/tts_engine.py +31 -1
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/voice_manager.py +84 -10
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice.egg-info/PKG-INFO +69 -21
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/LICENSE +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/__main__.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/dependency_check.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/examples/__init__.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/examples/cli_repl.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/examples/voice_cli.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/examples/web_api.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/instant_setup.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/recognition.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/stt/__init__.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/stt/transcriber.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/tts/__init__.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/vad/__init__.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice/vad/voice_detector.py +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice.egg-info/SOURCES.txt +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice.egg-info/dependency_links.txt +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice.egg-info/entry_points.txt +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice.egg-info/requires.txt +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/abstractvoice.egg-info/top_level.txt +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/pyproject.toml +0 -0
- {abstractvoice-0.4.6 → abstractvoice-0.5.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -172,38 +172,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
|
|
|
172
172
|
|
|
173
173
|
## Quick Start
|
|
174
174
|
|
|
175
|
-
### ⚡ Instant TTS (v0.
|
|
175
|
+
### ⚡ Instant TTS (v0.5.0+)
|
|
176
176
|
|
|
177
177
|
```python
|
|
178
178
|
from abstractvoice import VoiceManager
|
|
179
179
|
|
|
180
|
-
# Initialize voice manager -
|
|
180
|
+
# Initialize voice manager - works immediately with included dependencies
|
|
181
181
|
vm = VoiceManager()
|
|
182
182
|
|
|
183
|
-
# Text-to-speech works
|
|
183
|
+
# Text-to-speech works right away!
|
|
184
184
|
vm.speak("Hello! TTS works out of the box!")
|
|
185
|
+
|
|
186
|
+
# Language switching with automatic model download
|
|
187
|
+
vm.set_language('fr')
|
|
188
|
+
vm.speak("Bonjour! Le français fonctionne aussi!")
|
|
185
189
|
```
|
|
186
190
|
|
|
187
|
-
**That's it!** AbstractVoice v0.
|
|
188
|
-
- ✅
|
|
189
|
-
- ✅
|
|
190
|
-
- ✅ Works immediately after
|
|
191
|
+
**That's it!** AbstractVoice v0.5.0+ automatically:
|
|
192
|
+
- ✅ Includes essential TTS dependencies in base installation
|
|
193
|
+
- ✅ Downloads models automatically when switching languages/voices
|
|
194
|
+
- ✅ Works immediately after `pip install abstractvoice`
|
|
195
|
+
- ✅ No silent failures - clear error messages if download fails
|
|
191
196
|
- ✅ No complex configuration needed
|
|
192
197
|
|
|
193
|
-
### 🌍 Multi-Language Support
|
|
198
|
+
### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
|
|
194
199
|
|
|
195
200
|
```python
|
|
196
|
-
#
|
|
197
|
-
vm.download_model('fr.css10_vits') # Downloads automatically
|
|
201
|
+
# Simply switch language - downloads model automatically if needed!
|
|
198
202
|
vm.set_language('fr')
|
|
199
203
|
vm.speak("Bonjour! Je parle français maintenant.")
|
|
200
204
|
|
|
201
|
-
#
|
|
202
|
-
vm.download_model('de.thorsten_vits')
|
|
205
|
+
# Switch to German - no manual download needed
|
|
203
206
|
vm.set_language('de')
|
|
204
207
|
vm.speak("Hallo! Ich spreche jetzt Deutsch.")
|
|
208
|
+
|
|
209
|
+
# Spanish, Italian also supported
|
|
210
|
+
vm.set_language('es')
|
|
211
|
+
vm.speak("¡Hola! Hablo español ahora.")
|
|
212
|
+
|
|
213
|
+
# If download fails, you'll get clear error messages with instructions
|
|
214
|
+
# Example: "❌ Cannot switch to French: Model download failed"
|
|
215
|
+
# " Try: abstractvoice download-models --language fr"
|
|
205
216
|
```
|
|
206
217
|
|
|
218
|
+
**New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
|
|
219
|
+
|
|
207
220
|
### 🔧 Check System Status
|
|
208
221
|
|
|
209
222
|
```python
|
|
@@ -602,6 +615,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
|
602
615
|
# - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
|
|
603
616
|
# - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
604
617
|
|
|
618
|
+
# === Audio Lifecycle Callbacks (v0.5.1+) ===
|
|
619
|
+
|
|
620
|
+
# NEW: Precise audio timing callbacks for visual status indicators
|
|
621
|
+
def on_synthesis_start():
|
|
622
|
+
print("🔴 Synthesis started - show thinking animation")
|
|
623
|
+
|
|
624
|
+
def on_audio_start():
|
|
625
|
+
print("🔵 Audio started - show speaking animation")
|
|
626
|
+
|
|
627
|
+
def on_audio_pause():
|
|
628
|
+
print("⏸️ Audio paused - show paused animation")
|
|
629
|
+
|
|
630
|
+
def on_audio_resume():
|
|
631
|
+
print("▶️ Audio resumed - continue speaking animation")
|
|
632
|
+
|
|
633
|
+
def on_audio_end():
|
|
634
|
+
print("🟢 Audio ended - show ready animation")
|
|
635
|
+
|
|
636
|
+
def on_synthesis_end():
|
|
637
|
+
print("✅ Synthesis complete")
|
|
638
|
+
|
|
639
|
+
# Wire up callbacks
|
|
640
|
+
manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
|
|
641
|
+
manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
|
|
642
|
+
manager.on_audio_start = on_audio_start # NEW (actual audio playback)
|
|
643
|
+
manager.on_audio_end = on_audio_end # NEW (actual audio playback)
|
|
644
|
+
manager.on_audio_pause = on_audio_pause # NEW (pause events)
|
|
645
|
+
manager.on_audio_resume = on_audio_resume # NEW (resume events)
|
|
646
|
+
|
|
647
|
+
# Perfect for system tray icons, UI animations, or coordinating multiple audio streams
|
|
648
|
+
|
|
605
649
|
# === STT (Speech-to-Text) ===
|
|
606
650
|
|
|
607
651
|
def on_transcription(text):
|
|
@@ -1371,20 +1415,22 @@ abstractvoice check-deps
|
|
|
1371
1415
|
|
|
1372
1416
|
### CLI Voice Commands
|
|
1373
1417
|
|
|
1374
|
-
In the CLI REPL, use these commands:
|
|
1418
|
+
In the CLI REPL, use these commands (v0.5.0+):
|
|
1375
1419
|
|
|
1376
1420
|
```bash
|
|
1377
1421
|
# List all available voices with download status
|
|
1378
1422
|
/setvoice
|
|
1379
1423
|
|
|
1380
|
-
#
|
|
1381
|
-
/setvoice fr.css10_vits # French CSS10
|
|
1382
|
-
/setvoice de.thorsten_vits # German Thorsten
|
|
1383
|
-
/setvoice it.mai_male_vits # Italian Male
|
|
1424
|
+
# Automatically download and set specific voice (NEW in v0.5.0!)
|
|
1425
|
+
/setvoice fr.css10_vits # Downloads French CSS10 if needed
|
|
1426
|
+
/setvoice de.thorsten_vits # Downloads German Thorsten if needed
|
|
1427
|
+
/setvoice it.mai_male_vits # Downloads Italian Male if needed
|
|
1428
|
+
/setvoice en.jenny # Downloads Jenny voice if needed
|
|
1384
1429
|
|
|
1385
|
-
# Change language
|
|
1386
|
-
/language fr
|
|
1387
|
-
/language de
|
|
1430
|
+
# Change language (automatically downloads models if needed - NEW!)
|
|
1431
|
+
/language fr # Switches to French, downloads if needed
|
|
1432
|
+
/language de # Switches to German, downloads if needed
|
|
1433
|
+
/language es # Switches to Spanish, downloads if needed
|
|
1388
1434
|
|
|
1389
1435
|
# Voice controls
|
|
1390
1436
|
/pause # Pause current speech
|
|
@@ -1395,6 +1441,8 @@ In the CLI REPL, use these commands:
|
|
|
1395
1441
|
/exit
|
|
1396
1442
|
```
|
|
1397
1443
|
|
|
1444
|
+
**New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
|
|
1445
|
+
|
|
1398
1446
|
## Perspectives
|
|
1399
1447
|
|
|
1400
1448
|
This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).
|
|
@@ -82,38 +82,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
|
|
|
82
82
|
|
|
83
83
|
## Quick Start
|
|
84
84
|
|
|
85
|
-
### ⚡ Instant TTS (v0.
|
|
85
|
+
### ⚡ Instant TTS (v0.5.0+)
|
|
86
86
|
|
|
87
87
|
```python
|
|
88
88
|
from abstractvoice import VoiceManager
|
|
89
89
|
|
|
90
|
-
# Initialize voice manager -
|
|
90
|
+
# Initialize voice manager - works immediately with included dependencies
|
|
91
91
|
vm = VoiceManager()
|
|
92
92
|
|
|
93
|
-
# Text-to-speech works
|
|
93
|
+
# Text-to-speech works right away!
|
|
94
94
|
vm.speak("Hello! TTS works out of the box!")
|
|
95
|
+
|
|
96
|
+
# Language switching with automatic model download
|
|
97
|
+
vm.set_language('fr')
|
|
98
|
+
vm.speak("Bonjour! Le français fonctionne aussi!")
|
|
95
99
|
```
|
|
96
100
|
|
|
97
|
-
**That's it!** AbstractVoice v0.
|
|
98
|
-
- ✅
|
|
99
|
-
- ✅
|
|
100
|
-
- ✅ Works immediately after
|
|
101
|
+
**That's it!** AbstractVoice v0.5.0+ automatically:
|
|
102
|
+
- ✅ Includes essential TTS dependencies in base installation
|
|
103
|
+
- ✅ Downloads models automatically when switching languages/voices
|
|
104
|
+
- ✅ Works immediately after `pip install abstractvoice`
|
|
105
|
+
- ✅ No silent failures - clear error messages if download fails
|
|
101
106
|
- ✅ No complex configuration needed
|
|
102
107
|
|
|
103
|
-
### 🌍 Multi-Language Support
|
|
108
|
+
### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
|
|
104
109
|
|
|
105
110
|
```python
|
|
106
|
-
#
|
|
107
|
-
vm.download_model('fr.css10_vits') # Downloads automatically
|
|
111
|
+
# Simply switch language - downloads model automatically if needed!
|
|
108
112
|
vm.set_language('fr')
|
|
109
113
|
vm.speak("Bonjour! Je parle français maintenant.")
|
|
110
114
|
|
|
111
|
-
#
|
|
112
|
-
vm.download_model('de.thorsten_vits')
|
|
115
|
+
# Switch to German - no manual download needed
|
|
113
116
|
vm.set_language('de')
|
|
114
117
|
vm.speak("Hallo! Ich spreche jetzt Deutsch.")
|
|
118
|
+
|
|
119
|
+
# Spanish, Italian also supported
|
|
120
|
+
vm.set_language('es')
|
|
121
|
+
vm.speak("¡Hola! Hablo español ahora.")
|
|
122
|
+
|
|
123
|
+
# If download fails, you'll get clear error messages with instructions
|
|
124
|
+
# Example: "❌ Cannot switch to French: Model download failed"
|
|
125
|
+
# " Try: abstractvoice download-models --language fr"
|
|
115
126
|
```
|
|
116
127
|
|
|
128
|
+
**New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
|
|
129
|
+
|
|
117
130
|
### 🔧 Check System Status
|
|
118
131
|
|
|
119
132
|
```python
|
|
@@ -512,6 +525,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
|
512
525
|
# - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
|
|
513
526
|
# - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
514
527
|
|
|
528
|
+
# === Audio Lifecycle Callbacks (v0.5.1+) ===
|
|
529
|
+
|
|
530
|
+
# NEW: Precise audio timing callbacks for visual status indicators
|
|
531
|
+
def on_synthesis_start():
|
|
532
|
+
print("🔴 Synthesis started - show thinking animation")
|
|
533
|
+
|
|
534
|
+
def on_audio_start():
|
|
535
|
+
print("🔵 Audio started - show speaking animation")
|
|
536
|
+
|
|
537
|
+
def on_audio_pause():
|
|
538
|
+
print("⏸️ Audio paused - show paused animation")
|
|
539
|
+
|
|
540
|
+
def on_audio_resume():
|
|
541
|
+
print("▶️ Audio resumed - continue speaking animation")
|
|
542
|
+
|
|
543
|
+
def on_audio_end():
|
|
544
|
+
print("🟢 Audio ended - show ready animation")
|
|
545
|
+
|
|
546
|
+
def on_synthesis_end():
|
|
547
|
+
print("✅ Synthesis complete")
|
|
548
|
+
|
|
549
|
+
# Wire up callbacks
|
|
550
|
+
manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
|
|
551
|
+
manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
|
|
552
|
+
manager.on_audio_start = on_audio_start # NEW (actual audio playback)
|
|
553
|
+
manager.on_audio_end = on_audio_end # NEW (actual audio playback)
|
|
554
|
+
manager.on_audio_pause = on_audio_pause # NEW (pause events)
|
|
555
|
+
manager.on_audio_resume = on_audio_resume # NEW (resume events)
|
|
556
|
+
|
|
557
|
+
# Perfect for system tray icons, UI animations, or coordinating multiple audio streams
|
|
558
|
+
|
|
515
559
|
# === STT (Speech-to-Text) ===
|
|
516
560
|
|
|
517
561
|
def on_transcription(text):
|
|
@@ -1281,20 +1325,22 @@ abstractvoice check-deps
|
|
|
1281
1325
|
|
|
1282
1326
|
### CLI Voice Commands
|
|
1283
1327
|
|
|
1284
|
-
In the CLI REPL, use these commands:
|
|
1328
|
+
In the CLI REPL, use these commands (v0.5.0+):
|
|
1285
1329
|
|
|
1286
1330
|
```bash
|
|
1287
1331
|
# List all available voices with download status
|
|
1288
1332
|
/setvoice
|
|
1289
1333
|
|
|
1290
|
-
#
|
|
1291
|
-
/setvoice fr.css10_vits # French CSS10
|
|
1292
|
-
/setvoice de.thorsten_vits # German Thorsten
|
|
1293
|
-
/setvoice it.mai_male_vits # Italian Male
|
|
1334
|
+
# Automatically download and set specific voice (NEW in v0.5.0!)
|
|
1335
|
+
/setvoice fr.css10_vits # Downloads French CSS10 if needed
|
|
1336
|
+
/setvoice de.thorsten_vits # Downloads German Thorsten if needed
|
|
1337
|
+
/setvoice it.mai_male_vits # Downloads Italian Male if needed
|
|
1338
|
+
/setvoice en.jenny # Downloads Jenny voice if needed
|
|
1294
1339
|
|
|
1295
|
-
# Change language
|
|
1296
|
-
/language fr
|
|
1297
|
-
/language de
|
|
1340
|
+
# Change language (automatically downloads models if needed - NEW!)
|
|
1341
|
+
/language fr # Switches to French, downloads if needed
|
|
1342
|
+
/language de # Switches to German, downloads if needed
|
|
1343
|
+
/language es # Switches to Spanish, downloads if needed
|
|
1298
1344
|
|
|
1299
1345
|
# Voice controls
|
|
1300
1346
|
/pause # Pause current speech
|
|
@@ -1305,6 +1351,8 @@ In the CLI REPL, use these commands:
|
|
|
1305
1351
|
/exit
|
|
1306
1352
|
```
|
|
1307
1353
|
|
|
1354
|
+
**New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
|
|
1355
|
+
|
|
1308
1356
|
## Perspectives
|
|
1309
1357
|
|
|
1310
1358
|
This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).
|
|
@@ -32,5 +32,5 @@ from .voice_manager import VoiceManager
|
|
|
32
32
|
# Import simple APIs for third-party applications
|
|
33
33
|
from .simple_model_manager import list_models, download_model, get_status, is_ready
|
|
34
34
|
|
|
35
|
-
__version__ = "0.
|
|
35
|
+
__version__ = "0.5.1"
|
|
36
36
|
__all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
|
|
@@ -212,7 +212,7 @@ class SimpleModelManager:
|
|
|
212
212
|
return False
|
|
213
213
|
|
|
214
214
|
def download_model(self, model_name: str, progress_callback: Optional[Callable[[str, bool], None]] = None) -> bool:
|
|
215
|
-
"""Download a specific model.
|
|
215
|
+
"""Download a specific model with improved error handling.
|
|
216
216
|
|
|
217
217
|
Args:
|
|
218
218
|
model_name: TTS model name (e.g., 'tts_models/en/ljspeech/fast_pitch')
|
|
@@ -231,25 +231,56 @@ class SimpleModelManager:
|
|
|
231
231
|
try:
|
|
232
232
|
TTS, _ = _import_tts()
|
|
233
233
|
|
|
234
|
-
|
|
235
|
-
|
|
234
|
+
print(f"📥 Downloading {model_name}...")
|
|
235
|
+
print(f" This may take a few minutes depending on your connection...")
|
|
236
236
|
|
|
237
237
|
start_time = time.time()
|
|
238
238
|
|
|
239
239
|
# Initialize TTS to trigger download
|
|
240
|
-
|
|
240
|
+
# Set gpu=False to avoid CUDA errors on systems without GPU
|
|
241
|
+
try:
|
|
242
|
+
tts = TTS(model_name=model_name, progress_bar=True, gpu=False)
|
|
243
|
+
|
|
244
|
+
# Verify the model actually downloaded
|
|
245
|
+
if not self.is_model_cached(model_name):
|
|
246
|
+
print(f"⚠️ Model download completed but not found in cache")
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
except Exception as init_error:
|
|
250
|
+
# Try alternative download method
|
|
251
|
+
error_msg = str(init_error).lower()
|
|
252
|
+
if "connection" in error_msg or "timeout" in error_msg:
|
|
253
|
+
print(f"❌ Network error: Check your internet connection")
|
|
254
|
+
elif "not found" in error_msg:
|
|
255
|
+
print(f"❌ Model '{model_name}' not found in registry")
|
|
256
|
+
else:
|
|
257
|
+
print(f"❌ Download error: {init_error}")
|
|
258
|
+
raise
|
|
241
259
|
|
|
242
260
|
download_time = time.time() - start_time
|
|
243
|
-
|
|
244
|
-
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
261
|
+
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
245
262
|
|
|
246
263
|
if progress_callback:
|
|
247
264
|
progress_callback(model_name, True)
|
|
248
265
|
return True
|
|
249
266
|
|
|
250
267
|
except Exception as e:
|
|
251
|
-
|
|
252
|
-
|
|
268
|
+
error_msg = str(e).lower()
|
|
269
|
+
|
|
270
|
+
# Provide helpful error messages
|
|
271
|
+
if "connection" in error_msg or "timeout" in error_msg:
|
|
272
|
+
print(f"❌ Failed to download {model_name}: Network issue")
|
|
273
|
+
print(f" Check your internet connection and try again")
|
|
274
|
+
elif "permission" in error_msg:
|
|
275
|
+
print(f"❌ Failed to download {model_name}: Permission denied")
|
|
276
|
+
print(f" Check write permissions for cache directory")
|
|
277
|
+
elif "space" in error_msg:
|
|
278
|
+
print(f"❌ Failed to download {model_name}: Insufficient disk space")
|
|
279
|
+
else:
|
|
280
|
+
print(f"❌ Failed to download {model_name}")
|
|
281
|
+
if self.debug_mode:
|
|
282
|
+
print(f" Error: {e}")
|
|
283
|
+
|
|
253
284
|
if progress_callback:
|
|
254
285
|
progress_callback(model_name, False)
|
|
255
286
|
return False
|
|
@@ -454,8 +485,16 @@ def download_models_cli():
|
|
|
454
485
|
return
|
|
455
486
|
|
|
456
487
|
if args.clear:
|
|
457
|
-
|
|
458
|
-
|
|
488
|
+
# Ask for confirmation
|
|
489
|
+
response = input("⚠️ This will delete all downloaded TTS models. Continue? (y/N): ")
|
|
490
|
+
if response.lower() == 'y':
|
|
491
|
+
success = manager.clear_cache(confirm=True)
|
|
492
|
+
if success:
|
|
493
|
+
print("✅ Model cache cleared")
|
|
494
|
+
else:
|
|
495
|
+
print("❌ Failed to clear cache")
|
|
496
|
+
else:
|
|
497
|
+
print("Cancelled")
|
|
459
498
|
return
|
|
460
499
|
|
|
461
500
|
if args.model:
|
|
@@ -212,6 +212,13 @@ class NonBlockingAudioPlayer:
|
|
|
212
212
|
self.current_position = 0
|
|
213
213
|
self.playback_complete_callback = None
|
|
214
214
|
|
|
215
|
+
# NEW: Enhanced audio lifecycle callbacks
|
|
216
|
+
self.on_audio_start = None # Called when first audio sample plays
|
|
217
|
+
self.on_audio_end = None # Called when last audio sample finishes
|
|
218
|
+
self.on_audio_pause = None # Called when audio is paused
|
|
219
|
+
self.on_audio_resume = None # Called when audio is resumed
|
|
220
|
+
self._audio_started = False # Track if we've fired start callback
|
|
221
|
+
|
|
215
222
|
def _audio_callback(self, outdata, frames, time, status):
|
|
216
223
|
"""Callback function for OutputStream - provides immediate pause/resume."""
|
|
217
224
|
if status and self.debug_mode:
|
|
@@ -237,6 +244,12 @@ class NonBlockingAudioPlayer:
|
|
|
237
244
|
outdata.fill(0)
|
|
238
245
|
if self.is_playing:
|
|
239
246
|
self.is_playing = False
|
|
247
|
+
self._audio_started = False # Reset for next playback
|
|
248
|
+
|
|
249
|
+
# Fire audio end callback
|
|
250
|
+
if self.on_audio_end:
|
|
251
|
+
threading.Thread(target=self.on_audio_end, daemon=True).start()
|
|
252
|
+
|
|
240
253
|
if self.playback_complete_callback:
|
|
241
254
|
# Call completion callback in a separate thread to avoid blocking
|
|
242
255
|
threading.Thread(target=self.playback_complete_callback, daemon=True).start()
|
|
@@ -246,6 +259,12 @@ class NonBlockingAudioPlayer:
|
|
|
246
259
|
remaining = len(self.current_audio) - self.current_position
|
|
247
260
|
frames_to_output = min(frames, remaining)
|
|
248
261
|
|
|
262
|
+
# Fire audio start callback on first real audio output
|
|
263
|
+
if frames_to_output > 0 and not self._audio_started:
|
|
264
|
+
self._audio_started = True
|
|
265
|
+
if self.on_audio_start:
|
|
266
|
+
threading.Thread(target=self.on_audio_start, daemon=True).start()
|
|
267
|
+
|
|
249
268
|
# Output the audio data
|
|
250
269
|
if frames_to_output > 0:
|
|
251
270
|
# Handle both mono and stereo output
|
|
@@ -344,6 +363,11 @@ class NonBlockingAudioPlayer:
|
|
|
344
363
|
self.is_paused = True
|
|
345
364
|
if self.debug_mode:
|
|
346
365
|
print(" > Audio paused immediately")
|
|
366
|
+
|
|
367
|
+
# Fire audio pause callback
|
|
368
|
+
if self.on_audio_pause:
|
|
369
|
+
threading.Thread(target=self.on_audio_pause, daemon=True).start()
|
|
370
|
+
|
|
347
371
|
return True
|
|
348
372
|
return False
|
|
349
373
|
|
|
@@ -354,6 +378,11 @@ class NonBlockingAudioPlayer:
|
|
|
354
378
|
self.is_paused = False
|
|
355
379
|
if self.debug_mode:
|
|
356
380
|
print(" > Audio resumed immediately")
|
|
381
|
+
|
|
382
|
+
# Fire audio resume callback
|
|
383
|
+
if self.on_audio_resume:
|
|
384
|
+
threading.Thread(target=self.on_audio_resume, daemon=True).start()
|
|
385
|
+
|
|
357
386
|
return True
|
|
358
387
|
return False
|
|
359
388
|
|
|
@@ -1264,4 +1293,5 @@ class TTSEngine:
|
|
|
1264
1293
|
Returns:
|
|
1265
1294
|
True if TTS is active, False otherwise
|
|
1266
1295
|
"""
|
|
1267
|
-
return self.is_playing
|
|
1296
|
+
return self.is_playing
|
|
1297
|
+
|
|
@@ -241,6 +241,18 @@ class VoiceManager:
|
|
|
241
241
|
self.tts_engine.on_playback_start = self._on_tts_start
|
|
242
242
|
self.tts_engine.on_playback_end = self._on_tts_end
|
|
243
243
|
|
|
244
|
+
# NEW: Enhanced audio lifecycle callbacks (v0.5.1)
|
|
245
|
+
self.on_audio_start = None # Called when first audio sample plays
|
|
246
|
+
self.on_audio_end = None # Called when last audio sample finishes
|
|
247
|
+
self.on_audio_pause = None # Called when audio is paused
|
|
248
|
+
self.on_audio_resume = None # Called when audio is resumed
|
|
249
|
+
|
|
250
|
+
# Wire callbacks directly to audio player (skip TTSEngine layer)
|
|
251
|
+
self.tts_engine.audio_player.on_audio_start = self._on_audio_start
|
|
252
|
+
self.tts_engine.audio_player.on_audio_end = self._on_audio_end
|
|
253
|
+
self.tts_engine.audio_player.on_audio_pause = self._on_audio_pause
|
|
254
|
+
self.tts_engine.audio_player.on_audio_resume = self._on_audio_resume
|
|
255
|
+
|
|
244
256
|
# Voice recognizer is initialized on demand
|
|
245
257
|
self.voice_recognizer = None
|
|
246
258
|
self.whisper_model = whisper_model
|
|
@@ -548,13 +560,37 @@ class VoiceManager:
|
|
|
548
560
|
|
|
549
561
|
# Select best model for this language
|
|
550
562
|
selected_model = self._select_best_model(language)
|
|
551
|
-
|
|
563
|
+
|
|
564
|
+
# CRITICAL FIX: Check if model is available, download if not
|
|
565
|
+
from .instant_setup import is_model_cached
|
|
566
|
+
from .simple_model_manager import download_model
|
|
567
|
+
|
|
568
|
+
if not is_model_cached(selected_model):
|
|
569
|
+
if self.debug_mode:
|
|
570
|
+
print(f"📥 Model {selected_model} not cached, downloading...")
|
|
571
|
+
|
|
572
|
+
# Try to download the model
|
|
573
|
+
success = download_model(selected_model)
|
|
574
|
+
if not success:
|
|
575
|
+
if self.debug_mode:
|
|
576
|
+
print(f"❌ Failed to download {selected_model}")
|
|
577
|
+
# If download fails and it's not English, we have a problem
|
|
578
|
+
if language != 'en':
|
|
579
|
+
print(f"❌ Cannot switch to {self.LANGUAGES[language]['name']}: Model download failed")
|
|
580
|
+
print(f" Try: abstractvoice download-models --language {language}")
|
|
581
|
+
return False
|
|
582
|
+
|
|
583
|
+
models_to_try = [selected_model]
|
|
584
|
+
|
|
585
|
+
# Only add fallback if it's different from selected
|
|
586
|
+
if selected_model != self.SAFE_FALLBACK:
|
|
587
|
+
models_to_try.append(self.SAFE_FALLBACK)
|
|
552
588
|
|
|
553
589
|
for model_name in models_to_try:
|
|
554
590
|
try:
|
|
555
591
|
if self.debug_mode:
|
|
556
592
|
lang_name = self.LANGUAGES[language]['name']
|
|
557
|
-
print(f"🌍
|
|
593
|
+
print(f"🌍 Loading {lang_name} voice: {model_name}")
|
|
558
594
|
|
|
559
595
|
# Reinitialize TTS engine
|
|
560
596
|
TTSEngine = _import_tts_engine()
|
|
@@ -579,12 +615,16 @@ class VoiceManager:
|
|
|
579
615
|
|
|
580
616
|
except Exception as e:
|
|
581
617
|
if self.debug_mode:
|
|
582
|
-
print(f"⚠️ Model {model_name} failed: {e}")
|
|
618
|
+
print(f"⚠️ Model {model_name} failed to load: {e}")
|
|
619
|
+
# Don't silently continue - report the failure
|
|
620
|
+
if model_name == selected_model and language != 'en':
|
|
621
|
+
print(f"❌ Failed to load {lang_name} voice model")
|
|
622
|
+
print(f" The model might be corrupted. Try:")
|
|
623
|
+
print(f" abstractvoice download-models --language {language}")
|
|
583
624
|
continue
|
|
584
625
|
|
|
585
626
|
# All models failed
|
|
586
|
-
|
|
587
|
-
print(f"❌ All models failed for language '{language}'")
|
|
627
|
+
print(f"❌ Cannot switch to {self.LANGUAGES[language]['name']}: No working models")
|
|
588
628
|
return False
|
|
589
629
|
|
|
590
630
|
def get_language(self):
|
|
@@ -856,15 +896,29 @@ class VoiceManager:
|
|
|
856
896
|
return False
|
|
857
897
|
|
|
858
898
|
voice_info = self.VOICE_CATALOG[language][voice_id]
|
|
899
|
+
model_name = voice_info['model']
|
|
859
900
|
|
|
860
|
-
#
|
|
861
|
-
|
|
901
|
+
# CRITICAL FIX: Download model if not cached
|
|
902
|
+
from .instant_setup import is_model_cached
|
|
903
|
+
from .simple_model_manager import download_model
|
|
904
|
+
|
|
905
|
+
if not is_model_cached(model_name):
|
|
906
|
+
print(f"📥 Voice model '{voice_id}' not cached, downloading...")
|
|
907
|
+
success = download_model(model_name)
|
|
908
|
+
if not success:
|
|
909
|
+
print(f"❌ Failed to download voice '{voice_id}'")
|
|
910
|
+
print(f" Check your internet connection and try again")
|
|
911
|
+
return False
|
|
912
|
+
print(f"✅ Voice model '{voice_id}' downloaded successfully")
|
|
913
|
+
|
|
914
|
+
# Check compatibility after download
|
|
915
|
+
if voice_info['requires'] == 'espeak-ng' and not self._test_model_compatibility(model_name):
|
|
862
916
|
if self.debug_mode:
|
|
863
917
|
print(f"⚠️ Voice '{voice_id}' requires espeak-ng. Install it for premium quality.")
|
|
864
|
-
|
|
918
|
+
# Don't fail - try to load anyway
|
|
919
|
+
# return False
|
|
865
920
|
|
|
866
921
|
# Set the specific voice
|
|
867
|
-
model_name = voice_info['model']
|
|
868
922
|
if self.debug_mode:
|
|
869
923
|
print(f"🎭 Setting {language} voice to: {voice_id}")
|
|
870
924
|
print(f" Model: {model_name}")
|
|
@@ -988,4 +1042,24 @@ class VoiceManager:
|
|
|
988
1042
|
self.voice_recognizer.stop()
|
|
989
1043
|
|
|
990
1044
|
self.stop_speaking()
|
|
991
|
-
return True
|
|
1045
|
+
return True
|
|
1046
|
+
|
|
1047
|
+
def _on_audio_start(self):
|
|
1048
|
+
"""Called when audio actually starts playing."""
|
|
1049
|
+
if self.on_audio_start:
|
|
1050
|
+
self.on_audio_start()
|
|
1051
|
+
|
|
1052
|
+
def _on_audio_end(self):
|
|
1053
|
+
"""Called when audio actually finishes playing."""
|
|
1054
|
+
if self.on_audio_end:
|
|
1055
|
+
self.on_audio_end()
|
|
1056
|
+
|
|
1057
|
+
def _on_audio_pause(self):
|
|
1058
|
+
"""Called when audio is paused."""
|
|
1059
|
+
if self.on_audio_pause:
|
|
1060
|
+
self.on_audio_pause()
|
|
1061
|
+
|
|
1062
|
+
def _on_audio_resume(self):
|
|
1063
|
+
"""Called when audio is resumed."""
|
|
1064
|
+
if self.on_audio_resume:
|
|
1065
|
+
self.on_audio_resume()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -172,38 +172,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
|
|
|
172
172
|
|
|
173
173
|
## Quick Start
|
|
174
174
|
|
|
175
|
-
### ⚡ Instant TTS (v0.
|
|
175
|
+
### ⚡ Instant TTS (v0.5.0+)
|
|
176
176
|
|
|
177
177
|
```python
|
|
178
178
|
from abstractvoice import VoiceManager
|
|
179
179
|
|
|
180
|
-
# Initialize voice manager -
|
|
180
|
+
# Initialize voice manager - works immediately with included dependencies
|
|
181
181
|
vm = VoiceManager()
|
|
182
182
|
|
|
183
|
-
# Text-to-speech works
|
|
183
|
+
# Text-to-speech works right away!
|
|
184
184
|
vm.speak("Hello! TTS works out of the box!")
|
|
185
|
+
|
|
186
|
+
# Language switching with automatic model download
|
|
187
|
+
vm.set_language('fr')
|
|
188
|
+
vm.speak("Bonjour! Le français fonctionne aussi!")
|
|
185
189
|
```
|
|
186
190
|
|
|
187
|
-
**That's it!** AbstractVoice v0.
|
|
188
|
-
- ✅
|
|
189
|
-
- ✅
|
|
190
|
-
- ✅ Works immediately after
|
|
191
|
+
**That's it!** AbstractVoice v0.5.0+ automatically:
|
|
192
|
+
- ✅ Includes essential TTS dependencies in base installation
|
|
193
|
+
- ✅ Downloads models automatically when switching languages/voices
|
|
194
|
+
- ✅ Works immediately after `pip install abstractvoice`
|
|
195
|
+
- ✅ No silent failures - clear error messages if download fails
|
|
191
196
|
- ✅ No complex configuration needed
|
|
192
197
|
|
|
193
|
-
### 🌍 Multi-Language Support
|
|
198
|
+
### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
|
|
194
199
|
|
|
195
200
|
```python
|
|
196
|
-
#
|
|
197
|
-
vm.download_model('fr.css10_vits') # Downloads automatically
|
|
201
|
+
# Simply switch language - downloads model automatically if needed!
|
|
198
202
|
vm.set_language('fr')
|
|
199
203
|
vm.speak("Bonjour! Je parle français maintenant.")
|
|
200
204
|
|
|
201
|
-
#
|
|
202
|
-
vm.download_model('de.thorsten_vits')
|
|
205
|
+
# Switch to German - no manual download needed
|
|
203
206
|
vm.set_language('de')
|
|
204
207
|
vm.speak("Hallo! Ich spreche jetzt Deutsch.")
|
|
208
|
+
|
|
209
|
+
# Spanish, Italian also supported
|
|
210
|
+
vm.set_language('es')
|
|
211
|
+
vm.speak("¡Hola! Hablo español ahora.")
|
|
212
|
+
|
|
213
|
+
# If download fails, you'll get clear error messages with instructions
|
|
214
|
+
# Example: "❌ Cannot switch to French: Model download failed"
|
|
215
|
+
# " Try: abstractvoice download-models --language fr"
|
|
205
216
|
```
|
|
206
217
|
|
|
218
|
+
**New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
|
|
219
|
+
|
|
207
220
|
### 🔧 Check System Status
|
|
208
221
|
|
|
209
222
|
```python
|
|
@@ -602,6 +615,37 @@ manager.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
|
602
615
|
# - "tts_models/en/ljspeech/glow-tts" (alternative fallback)
|
|
603
616
|
# - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
604
617
|
|
|
618
|
+
# === Audio Lifecycle Callbacks (v0.5.1+) ===
|
|
619
|
+
|
|
620
|
+
# NEW: Precise audio timing callbacks for visual status indicators
|
|
621
|
+
def on_synthesis_start():
|
|
622
|
+
print("🔴 Synthesis started - show thinking animation")
|
|
623
|
+
|
|
624
|
+
def on_audio_start():
|
|
625
|
+
print("🔵 Audio started - show speaking animation")
|
|
626
|
+
|
|
627
|
+
def on_audio_pause():
|
|
628
|
+
print("⏸️ Audio paused - show paused animation")
|
|
629
|
+
|
|
630
|
+
def on_audio_resume():
|
|
631
|
+
print("▶️ Audio resumed - continue speaking animation")
|
|
632
|
+
|
|
633
|
+
def on_audio_end():
|
|
634
|
+
print("🟢 Audio ended - show ready animation")
|
|
635
|
+
|
|
636
|
+
def on_synthesis_end():
|
|
637
|
+
print("✅ Synthesis complete")
|
|
638
|
+
|
|
639
|
+
# Wire up callbacks
|
|
640
|
+
manager.tts_engine.on_playback_start = on_synthesis_start # Existing (synthesis phase)
|
|
641
|
+
manager.tts_engine.on_playback_end = on_synthesis_end # Existing (synthesis phase)
|
|
642
|
+
manager.on_audio_start = on_audio_start # NEW (actual audio playback)
|
|
643
|
+
manager.on_audio_end = on_audio_end # NEW (actual audio playback)
|
|
644
|
+
manager.on_audio_pause = on_audio_pause # NEW (pause events)
|
|
645
|
+
manager.on_audio_resume = on_audio_resume # NEW (resume events)
|
|
646
|
+
|
|
647
|
+
# Perfect for system tray icons, UI animations, or coordinating multiple audio streams
|
|
648
|
+
|
|
605
649
|
# === STT (Speech-to-Text) ===
|
|
606
650
|
|
|
607
651
|
def on_transcription(text):
|
|
@@ -1371,20 +1415,22 @@ abstractvoice check-deps
|
|
|
1371
1415
|
|
|
1372
1416
|
### CLI Voice Commands
|
|
1373
1417
|
|
|
1374
|
-
In the CLI REPL, use these commands:
|
|
1418
|
+
In the CLI REPL, use these commands (v0.5.0+):
|
|
1375
1419
|
|
|
1376
1420
|
```bash
|
|
1377
1421
|
# List all available voices with download status
|
|
1378
1422
|
/setvoice
|
|
1379
1423
|
|
|
1380
|
-
#
|
|
1381
|
-
/setvoice fr.css10_vits # French CSS10
|
|
1382
|
-
/setvoice de.thorsten_vits # German Thorsten
|
|
1383
|
-
/setvoice it.mai_male_vits # Italian Male
|
|
1424
|
+
# Automatically download and set specific voice (NEW in v0.5.0!)
|
|
1425
|
+
/setvoice fr.css10_vits # Downloads French CSS10 if needed
|
|
1426
|
+
/setvoice de.thorsten_vits # Downloads German Thorsten if needed
|
|
1427
|
+
/setvoice it.mai_male_vits # Downloads Italian Male if needed
|
|
1428
|
+
/setvoice en.jenny # Downloads Jenny voice if needed
|
|
1384
1429
|
|
|
1385
|
-
# Change language
|
|
1386
|
-
/language fr
|
|
1387
|
-
/language de
|
|
1430
|
+
# Change language (automatically downloads models if needed - NEW!)
|
|
1431
|
+
/language fr # Switches to French, downloads if needed
|
|
1432
|
+
/language de # Switches to German, downloads if needed
|
|
1433
|
+
/language es # Switches to Spanish, downloads if needed
|
|
1388
1434
|
|
|
1389
1435
|
# Voice controls
|
|
1390
1436
|
/pause # Pause current speech
|
|
@@ -1395,6 +1441,8 @@ In the CLI REPL, use these commands:
|
|
|
1395
1441
|
/exit
|
|
1396
1442
|
```
|
|
1397
1443
|
|
|
1444
|
+
**New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
|
|
1445
|
+
|
|
1398
1446
|
## Perspectives
|
|
1399
1447
|
|
|
1400
1448
|
This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|