abstractvoice 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/PKG-INFO +46 -21
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/README.md +37 -20
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/__init__.py +1 -1
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/examples/voice_cli.py +1 -1
- abstractvoice-0.5.0/abstractvoice/instant_setup.py +83 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/simple_model_manager.py +165 -24
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/tts/tts_engine.py +151 -38
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/voice_manager.py +144 -28
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice.egg-info/PKG-INFO +46 -21
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice.egg-info/SOURCES.txt +1 -1
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice.egg-info/requires.txt +8 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/pyproject.toml +9 -0
- abstractvoice-0.4.1/abstractvoice/model_manager.py +0 -384
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/LICENSE +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/__main__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/dependency_check.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/examples/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/examples/cli_repl.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/examples/web_api.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/recognition.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/stt/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/stt/transcriber.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/tts/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/vad/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice/vad/voice_detector.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice.egg-info/dependency_links.txt +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice.egg-info/entry_points.txt +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/abstractvoice.egg-info/top_level.txt +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.5.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -19,6 +19,14 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: numpy>=1.24.0
|
|
21
21
|
Requires-Dist: requests>=2.31.0
|
|
22
|
+
Requires-Dist: appdirs>=1.4.0
|
|
23
|
+
Requires-Dist: coqui-tts<0.30.0,>=0.27.0
|
|
24
|
+
Requires-Dist: torch<2.4.0,>=2.0.0
|
|
25
|
+
Requires-Dist: torchvision<0.19.0,>=0.15.0
|
|
26
|
+
Requires-Dist: torchaudio<2.4.0,>=2.0.0
|
|
27
|
+
Requires-Dist: librosa>=0.10.0
|
|
28
|
+
Requires-Dist: sounddevice>=0.4.6
|
|
29
|
+
Requires-Dist: soundfile>=0.12.1
|
|
22
30
|
Provides-Extra: voice
|
|
23
31
|
Requires-Dist: sounddevice>=0.4.6; extra == "voice"
|
|
24
32
|
Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
|
|
@@ -164,38 +172,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
|
|
|
164
172
|
|
|
165
173
|
## Quick Start
|
|
166
174
|
|
|
167
|
-
### ⚡ Instant TTS (v0.
|
|
175
|
+
### ⚡ Instant TTS (v0.5.0+)
|
|
168
176
|
|
|
169
177
|
```python
|
|
170
178
|
from abstractvoice import VoiceManager
|
|
171
179
|
|
|
172
|
-
# Initialize voice manager -
|
|
180
|
+
# Initialize voice manager - works immediately with included dependencies
|
|
173
181
|
vm = VoiceManager()
|
|
174
182
|
|
|
175
|
-
# Text-to-speech works
|
|
183
|
+
# Text-to-speech works right away!
|
|
176
184
|
vm.speak("Hello! TTS works out of the box!")
|
|
185
|
+
|
|
186
|
+
# Language switching with automatic model download
|
|
187
|
+
vm.set_language('fr')
|
|
188
|
+
vm.speak("Bonjour! Le français fonctionne aussi!")
|
|
177
189
|
```
|
|
178
190
|
|
|
179
|
-
**That's it!** AbstractVoice v0.
|
|
180
|
-
- ✅
|
|
181
|
-
- ✅
|
|
182
|
-
- ✅ Works immediately after
|
|
191
|
+
**That's it!** AbstractVoice v0.5.0+ automatically:
|
|
192
|
+
- ✅ Includes essential TTS dependencies in base installation
|
|
193
|
+
- ✅ Downloads models automatically when switching languages/voices
|
|
194
|
+
- ✅ Works immediately after `pip install abstractvoice`
|
|
195
|
+
- ✅ No silent failures - clear error messages if download fails
|
|
183
196
|
- ✅ No complex configuration needed
|
|
184
197
|
|
|
185
|
-
### 🌍 Multi-Language Support
|
|
198
|
+
### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
|
|
186
199
|
|
|
187
200
|
```python
|
|
188
|
-
#
|
|
189
|
-
vm.download_model('fr.css10_vits') # Downloads automatically
|
|
201
|
+
# Simply switch language - downloads model automatically if needed!
|
|
190
202
|
vm.set_language('fr')
|
|
191
203
|
vm.speak("Bonjour! Je parle français maintenant.")
|
|
192
204
|
|
|
193
|
-
#
|
|
194
|
-
vm.download_model('de.thorsten_vits')
|
|
205
|
+
# Switch to German - no manual download needed
|
|
195
206
|
vm.set_language('de')
|
|
196
207
|
vm.speak("Hallo! Ich spreche jetzt Deutsch.")
|
|
208
|
+
|
|
209
|
+
# Spanish, Italian also supported
|
|
210
|
+
vm.set_language('es')
|
|
211
|
+
vm.speak("¡Hola! Hablo español ahora.")
|
|
212
|
+
|
|
213
|
+
# If download fails, you'll get clear error messages with instructions
|
|
214
|
+
# Example: "❌ Cannot switch to French: Model download failed"
|
|
215
|
+
# " Try: abstractvoice download-models --language fr"
|
|
197
216
|
```
|
|
198
217
|
|
|
218
|
+
**New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
|
|
219
|
+
|
|
199
220
|
### 🔧 Check System Status
|
|
200
221
|
|
|
201
222
|
```python
|
|
@@ -1363,20 +1384,22 @@ abstractvoice check-deps
|
|
|
1363
1384
|
|
|
1364
1385
|
### CLI Voice Commands
|
|
1365
1386
|
|
|
1366
|
-
In the CLI REPL, use these commands:
|
|
1387
|
+
In the CLI REPL, use these commands (v0.5.0+):
|
|
1367
1388
|
|
|
1368
1389
|
```bash
|
|
1369
1390
|
# List all available voices with download status
|
|
1370
1391
|
/setvoice
|
|
1371
1392
|
|
|
1372
|
-
#
|
|
1373
|
-
/setvoice fr.css10_vits # French CSS10
|
|
1374
|
-
/setvoice de.thorsten_vits # German Thorsten
|
|
1375
|
-
/setvoice it.mai_male_vits # Italian Male
|
|
1393
|
+
# Automatically download and set specific voice (NEW in v0.5.0!)
|
|
1394
|
+
/setvoice fr.css10_vits # Downloads French CSS10 if needed
|
|
1395
|
+
/setvoice de.thorsten_vits # Downloads German Thorsten if needed
|
|
1396
|
+
/setvoice it.mai_male_vits # Downloads Italian Male if needed
|
|
1397
|
+
/setvoice en.jenny # Downloads Jenny voice if needed
|
|
1376
1398
|
|
|
1377
|
-
# Change language
|
|
1378
|
-
/language fr
|
|
1379
|
-
/language de
|
|
1399
|
+
# Change language (automatically downloads models if needed - NEW!)
|
|
1400
|
+
/language fr # Switches to French, downloads if needed
|
|
1401
|
+
/language de # Switches to German, downloads if needed
|
|
1402
|
+
/language es # Switches to Spanish, downloads if needed
|
|
1380
1403
|
|
|
1381
1404
|
# Voice controls
|
|
1382
1405
|
/pause # Pause current speech
|
|
@@ -1387,6 +1410,8 @@ In the CLI REPL, use these commands:
|
|
|
1387
1410
|
/exit
|
|
1388
1411
|
```
|
|
1389
1412
|
|
|
1413
|
+
**New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
|
|
1414
|
+
|
|
1390
1415
|
## Perspectives
|
|
1391
1416
|
|
|
1392
1417
|
This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).
|
|
@@ -82,38 +82,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
|
|
|
82
82
|
|
|
83
83
|
## Quick Start
|
|
84
84
|
|
|
85
|
-
### ⚡ Instant TTS (v0.
|
|
85
|
+
### ⚡ Instant TTS (v0.5.0+)
|
|
86
86
|
|
|
87
87
|
```python
|
|
88
88
|
from abstractvoice import VoiceManager
|
|
89
89
|
|
|
90
|
-
# Initialize voice manager -
|
|
90
|
+
# Initialize voice manager - works immediately with included dependencies
|
|
91
91
|
vm = VoiceManager()
|
|
92
92
|
|
|
93
|
-
# Text-to-speech works
|
|
93
|
+
# Text-to-speech works right away!
|
|
94
94
|
vm.speak("Hello! TTS works out of the box!")
|
|
95
|
+
|
|
96
|
+
# Language switching with automatic model download
|
|
97
|
+
vm.set_language('fr')
|
|
98
|
+
vm.speak("Bonjour! Le français fonctionne aussi!")
|
|
95
99
|
```
|
|
96
100
|
|
|
97
|
-
**That's it!** AbstractVoice v0.
|
|
98
|
-
- ✅
|
|
99
|
-
- ✅
|
|
100
|
-
- ✅ Works immediately after
|
|
101
|
+
**That's it!** AbstractVoice v0.5.0+ automatically:
|
|
102
|
+
- ✅ Includes essential TTS dependencies in base installation
|
|
103
|
+
- ✅ Downloads models automatically when switching languages/voices
|
|
104
|
+
- ✅ Works immediately after `pip install abstractvoice`
|
|
105
|
+
- ✅ No silent failures - clear error messages if download fails
|
|
101
106
|
- ✅ No complex configuration needed
|
|
102
107
|
|
|
103
|
-
### 🌍 Multi-Language Support
|
|
108
|
+
### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
|
|
104
109
|
|
|
105
110
|
```python
|
|
106
|
-
#
|
|
107
|
-
vm.download_model('fr.css10_vits') # Downloads automatically
|
|
111
|
+
# Simply switch language - downloads model automatically if needed!
|
|
108
112
|
vm.set_language('fr')
|
|
109
113
|
vm.speak("Bonjour! Je parle français maintenant.")
|
|
110
114
|
|
|
111
|
-
#
|
|
112
|
-
vm.download_model('de.thorsten_vits')
|
|
115
|
+
# Switch to German - no manual download needed
|
|
113
116
|
vm.set_language('de')
|
|
114
117
|
vm.speak("Hallo! Ich spreche jetzt Deutsch.")
|
|
118
|
+
|
|
119
|
+
# Spanish, Italian also supported
|
|
120
|
+
vm.set_language('es')
|
|
121
|
+
vm.speak("¡Hola! Hablo español ahora.")
|
|
122
|
+
|
|
123
|
+
# If download fails, you'll get clear error messages with instructions
|
|
124
|
+
# Example: "❌ Cannot switch to French: Model download failed"
|
|
125
|
+
# " Try: abstractvoice download-models --language fr"
|
|
115
126
|
```
|
|
116
127
|
|
|
128
|
+
**New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
|
|
129
|
+
|
|
117
130
|
### 🔧 Check System Status
|
|
118
131
|
|
|
119
132
|
```python
|
|
@@ -1281,20 +1294,22 @@ abstractvoice check-deps
|
|
|
1281
1294
|
|
|
1282
1295
|
### CLI Voice Commands
|
|
1283
1296
|
|
|
1284
|
-
In the CLI REPL, use these commands:
|
|
1297
|
+
In the CLI REPL, use these commands (v0.5.0+):
|
|
1285
1298
|
|
|
1286
1299
|
```bash
|
|
1287
1300
|
# List all available voices with download status
|
|
1288
1301
|
/setvoice
|
|
1289
1302
|
|
|
1290
|
-
#
|
|
1291
|
-
/setvoice fr.css10_vits # French CSS10
|
|
1292
|
-
/setvoice de.thorsten_vits # German Thorsten
|
|
1293
|
-
/setvoice it.mai_male_vits # Italian Male
|
|
1303
|
+
# Automatically download and set specific voice (NEW in v0.5.0!)
|
|
1304
|
+
/setvoice fr.css10_vits # Downloads French CSS10 if needed
|
|
1305
|
+
/setvoice de.thorsten_vits # Downloads German Thorsten if needed
|
|
1306
|
+
/setvoice it.mai_male_vits # Downloads Italian Male if needed
|
|
1307
|
+
/setvoice en.jenny # Downloads Jenny voice if needed
|
|
1294
1308
|
|
|
1295
|
-
# Change language
|
|
1296
|
-
/language fr
|
|
1297
|
-
/language de
|
|
1309
|
+
# Change language (automatically downloads models if needed - NEW!)
|
|
1310
|
+
/language fr # Switches to French, downloads if needed
|
|
1311
|
+
/language de # Switches to German, downloads if needed
|
|
1312
|
+
/language es # Switches to Spanish, downloads if needed
|
|
1298
1313
|
|
|
1299
1314
|
# Voice controls
|
|
1300
1315
|
/pause # Pause current speech
|
|
@@ -1305,6 +1320,8 @@ In the CLI REPL, use these commands:
|
|
|
1305
1320
|
/exit
|
|
1306
1321
|
```
|
|
1307
1322
|
|
|
1323
|
+
**New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
|
|
1324
|
+
|
|
1308
1325
|
## Perspectives
|
|
1309
1326
|
|
|
1310
1327
|
This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).
|
|
@@ -32,5 +32,5 @@ from .voice_manager import VoiceManager
|
|
|
32
32
|
# Import simple APIs for third-party applications
|
|
33
33
|
from .simple_model_manager import list_models, download_model, get_status, is_ready
|
|
34
34
|
|
|
35
|
-
__version__ = "0.
|
|
35
|
+
__version__ = "0.5.0"
|
|
36
36
|
__all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
|
|
@@ -158,7 +158,7 @@ def main():
|
|
|
158
158
|
traceback.print_exc()
|
|
159
159
|
return
|
|
160
160
|
elif args.command == "download-models":
|
|
161
|
-
from abstractvoice.
|
|
161
|
+
from abstractvoice.simple_model_manager import download_models_cli
|
|
162
162
|
# Pass remaining arguments to download_models_cli
|
|
163
163
|
import sys
|
|
164
164
|
original_argv = sys.argv
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Instant Setup Module for AbstractVoice
|
|
3
|
+
Provides immediate TTS functionality with seamless model download.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Essential model for instant functionality (lightweight, reliable)
|
|
11
|
+
ESSENTIAL_MODEL = "tts_models/en/ljspeech/fast_pitch"
|
|
12
|
+
ESSENTIAL_MODEL_SIZE = "~100MB"
|
|
13
|
+
|
|
14
|
+
def ensure_instant_tts():
|
|
15
|
+
"""
|
|
16
|
+
Ensure TTS is ready for immediate use.
|
|
17
|
+
Downloads essential model if needed with progress indicator.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
bool: True if TTS is ready, False if failed
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
from TTS.api import TTS
|
|
24
|
+
from TTS.utils.manage import ModelManager
|
|
25
|
+
|
|
26
|
+
manager = ModelManager()
|
|
27
|
+
|
|
28
|
+
# Check if essential model is already cached
|
|
29
|
+
if is_model_cached(ESSENTIAL_MODEL):
|
|
30
|
+
return True
|
|
31
|
+
|
|
32
|
+
# Download essential model with user-friendly progress
|
|
33
|
+
print(f"🚀 AbstractVoice: Setting up TTS ({ESSENTIAL_MODEL_SIZE})...")
|
|
34
|
+
print(f" This happens once and takes ~30 seconds")
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Download with progress bar
|
|
38
|
+
tts = TTS(model_name=ESSENTIAL_MODEL, progress_bar=True)
|
|
39
|
+
print(f"✅ TTS ready! AbstractVoice is now fully functional.")
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
print(f"❌ Setup failed: {e}")
|
|
44
|
+
print(f"💡 Try: pip install abstractvoice[all]")
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
except ImportError as e:
|
|
48
|
+
print(f"❌ Missing dependencies: {e}")
|
|
49
|
+
print(f"💡 Install with: pip install abstractvoice[all]")
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
def is_model_cached(model_name):
|
|
53
|
+
"""Check if a model is already cached."""
|
|
54
|
+
try:
|
|
55
|
+
from TTS.utils.manage import ModelManager
|
|
56
|
+
manager = ModelManager()
|
|
57
|
+
|
|
58
|
+
# Get cached models list
|
|
59
|
+
models_file = os.path.join(manager.output_prefix, ".models.json")
|
|
60
|
+
if os.path.exists(models_file):
|
|
61
|
+
import json
|
|
62
|
+
with open(models_file, 'r') as f:
|
|
63
|
+
cached_models = json.load(f)
|
|
64
|
+
return model_name in cached_models
|
|
65
|
+
|
|
66
|
+
# Fallback: check if model directory exists and has content
|
|
67
|
+
model_dir = model_name.replace("/", "--")
|
|
68
|
+
model_path = os.path.join(manager.output_prefix, model_dir)
|
|
69
|
+
return os.path.exists(model_path) and bool(os.listdir(model_path))
|
|
70
|
+
|
|
71
|
+
except:
|
|
72
|
+
# If anything fails, assume not cached
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def get_instant_model():
|
|
76
|
+
"""Get the essential model name for instant setup."""
|
|
77
|
+
return ESSENTIAL_MODEL
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
# CLI test
|
|
81
|
+
print("🧪 Testing instant setup...")
|
|
82
|
+
success = ensure_instant_tts()
|
|
83
|
+
print(f"Result: {'✅ Ready' if success else '❌ Failed'}")
|
|
@@ -31,37 +31,65 @@ class SimpleModelManager:
|
|
|
31
31
|
"""Simple, clean model manager for AbstractVoice."""
|
|
32
32
|
|
|
33
33
|
# Essential model - guaranteed to work everywhere, reasonable size
|
|
34
|
-
|
|
34
|
+
# Changed from fast_pitch to tacotron2-DDC because fast_pitch downloads are failing
|
|
35
|
+
ESSENTIAL_MODEL = "tts_models/en/ljspeech/tacotron2-DDC"
|
|
35
36
|
|
|
36
37
|
# Available models organized by language with metadata
|
|
37
38
|
AVAILABLE_MODELS = {
|
|
38
39
|
"en": {
|
|
40
|
+
"tacotron2": {
|
|
41
|
+
"model": "tts_models/en/ljspeech/tacotron2-DDC",
|
|
42
|
+
"name": "Linda (LJSpeech)",
|
|
43
|
+
"quality": "good",
|
|
44
|
+
"size_mb": 362,
|
|
45
|
+
"description": "Standard female voice (LJSpeech speaker)",
|
|
46
|
+
"requires_espeak": False,
|
|
47
|
+
"default": True
|
|
48
|
+
},
|
|
49
|
+
"jenny": {
|
|
50
|
+
"model": "tts_models/en/jenny/jenny",
|
|
51
|
+
"name": "Jenny",
|
|
52
|
+
"quality": "excellent",
|
|
53
|
+
"size_mb": 368,
|
|
54
|
+
"description": "Different female voice, clear and natural",
|
|
55
|
+
"requires_espeak": False,
|
|
56
|
+
"default": False
|
|
57
|
+
},
|
|
58
|
+
"ek1": {
|
|
59
|
+
"model": "tts_models/en/ek1/tacotron2",
|
|
60
|
+
"name": "Edward (EK1)",
|
|
61
|
+
"quality": "excellent",
|
|
62
|
+
"size_mb": 310,
|
|
63
|
+
"description": "Male voice with British accent",
|
|
64
|
+
"requires_espeak": False,
|
|
65
|
+
"default": False
|
|
66
|
+
},
|
|
67
|
+
"sam": {
|
|
68
|
+
"model": "tts_models/en/sam/tacotron-DDC",
|
|
69
|
+
"name": "Sam",
|
|
70
|
+
"quality": "good",
|
|
71
|
+
"size_mb": 370,
|
|
72
|
+
"description": "Different male voice, deeper tone",
|
|
73
|
+
"requires_espeak": False,
|
|
74
|
+
"default": False
|
|
75
|
+
},
|
|
39
76
|
"fast_pitch": {
|
|
40
77
|
"model": "tts_models/en/ljspeech/fast_pitch",
|
|
41
|
-
"name": "Fast
|
|
78
|
+
"name": "Linda Fast (LJSpeech)",
|
|
42
79
|
"quality": "good",
|
|
43
80
|
"size_mb": 107,
|
|
44
|
-
"description": "
|
|
81
|
+
"description": "Same speaker as Linda but faster engine",
|
|
45
82
|
"requires_espeak": False,
|
|
46
|
-
"default":
|
|
83
|
+
"default": False
|
|
47
84
|
},
|
|
48
85
|
"vits": {
|
|
49
86
|
"model": "tts_models/en/ljspeech/vits",
|
|
50
|
-
"name": "
|
|
87
|
+
"name": "Linda Premium (LJSpeech)",
|
|
51
88
|
"quality": "excellent",
|
|
52
89
|
"size_mb": 328,
|
|
53
|
-
"description": "
|
|
90
|
+
"description": "Same speaker as Linda but premium quality",
|
|
54
91
|
"requires_espeak": True,
|
|
55
92
|
"default": False
|
|
56
|
-
},
|
|
57
|
-
"tacotron2": {
|
|
58
|
-
"model": "tts_models/en/ljspeech/tacotron2-DDC",
|
|
59
|
-
"name": "Tacotron2 (English)",
|
|
60
|
-
"quality": "good",
|
|
61
|
-
"size_mb": 362,
|
|
62
|
-
"description": "Classic English voice, reliable",
|
|
63
|
-
"requires_espeak": False,
|
|
64
|
-
"default": False
|
|
65
93
|
}
|
|
66
94
|
},
|
|
67
95
|
"fr": {
|
|
@@ -184,7 +212,7 @@ class SimpleModelManager:
|
|
|
184
212
|
return False
|
|
185
213
|
|
|
186
214
|
def download_model(self, model_name: str, progress_callback: Optional[Callable[[str, bool], None]] = None) -> bool:
|
|
187
|
-
"""Download a specific model.
|
|
215
|
+
"""Download a specific model with improved error handling.
|
|
188
216
|
|
|
189
217
|
Args:
|
|
190
218
|
model_name: TTS model name (e.g., 'tts_models/en/ljspeech/fast_pitch')
|
|
@@ -203,25 +231,56 @@ class SimpleModelManager:
|
|
|
203
231
|
try:
|
|
204
232
|
TTS, _ = _import_tts()
|
|
205
233
|
|
|
206
|
-
|
|
207
|
-
|
|
234
|
+
print(f"📥 Downloading {model_name}...")
|
|
235
|
+
print(f" This may take a few minutes depending on your connection...")
|
|
208
236
|
|
|
209
237
|
start_time = time.time()
|
|
210
238
|
|
|
211
239
|
# Initialize TTS to trigger download
|
|
212
|
-
|
|
240
|
+
# Set gpu=False to avoid CUDA errors on systems without GPU
|
|
241
|
+
try:
|
|
242
|
+
tts = TTS(model_name=model_name, progress_bar=True, gpu=False)
|
|
243
|
+
|
|
244
|
+
# Verify the model actually downloaded
|
|
245
|
+
if not self.is_model_cached(model_name):
|
|
246
|
+
print(f"⚠️ Model download completed but not found in cache")
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
except Exception as init_error:
|
|
250
|
+
# Try alternative download method
|
|
251
|
+
error_msg = str(init_error).lower()
|
|
252
|
+
if "connection" in error_msg or "timeout" in error_msg:
|
|
253
|
+
print(f"❌ Network error: Check your internet connection")
|
|
254
|
+
elif "not found" in error_msg:
|
|
255
|
+
print(f"❌ Model '{model_name}' not found in registry")
|
|
256
|
+
else:
|
|
257
|
+
print(f"❌ Download error: {init_error}")
|
|
258
|
+
raise
|
|
213
259
|
|
|
214
260
|
download_time = time.time() - start_time
|
|
215
|
-
|
|
216
|
-
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
261
|
+
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
217
262
|
|
|
218
263
|
if progress_callback:
|
|
219
264
|
progress_callback(model_name, True)
|
|
220
265
|
return True
|
|
221
266
|
|
|
222
267
|
except Exception as e:
|
|
223
|
-
|
|
224
|
-
|
|
268
|
+
error_msg = str(e).lower()
|
|
269
|
+
|
|
270
|
+
# Provide helpful error messages
|
|
271
|
+
if "connection" in error_msg or "timeout" in error_msg:
|
|
272
|
+
print(f"❌ Failed to download {model_name}: Network issue")
|
|
273
|
+
print(f" Check your internet connection and try again")
|
|
274
|
+
elif "permission" in error_msg:
|
|
275
|
+
print(f"❌ Failed to download {model_name}: Permission denied")
|
|
276
|
+
print(f" Check write permissions for cache directory")
|
|
277
|
+
elif "space" in error_msg:
|
|
278
|
+
print(f"❌ Failed to download {model_name}: Insufficient disk space")
|
|
279
|
+
else:
|
|
280
|
+
print(f"❌ Failed to download {model_name}")
|
|
281
|
+
if self.debug_mode:
|
|
282
|
+
print(f" Error: {e}")
|
|
283
|
+
|
|
225
284
|
if progress_callback:
|
|
226
285
|
progress_callback(model_name, False)
|
|
227
286
|
return False
|
|
@@ -395,4 +454,86 @@ def get_status() -> str:
|
|
|
395
454
|
def is_ready() -> bool:
|
|
396
455
|
"""Check if essential model is ready for immediate use."""
|
|
397
456
|
manager = get_model_manager()
|
|
398
|
-
return manager.is_model_cached(manager.ESSENTIAL_MODEL)
|
|
457
|
+
return manager.is_model_cached(manager.ESSENTIAL_MODEL)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def download_models_cli():
|
|
461
|
+
"""Simple CLI entry point for downloading models."""
|
|
462
|
+
import argparse
|
|
463
|
+
import sys
|
|
464
|
+
|
|
465
|
+
parser = argparse.ArgumentParser(description="Download TTS models for offline use")
|
|
466
|
+
parser.add_argument("--essential", action="store_true",
|
|
467
|
+
help="Download essential model (default)")
|
|
468
|
+
parser.add_argument("--all", action="store_true",
|
|
469
|
+
help="Download all available models")
|
|
470
|
+
parser.add_argument("--model", type=str,
|
|
471
|
+
help="Download specific model by name")
|
|
472
|
+
parser.add_argument("--language", type=str,
|
|
473
|
+
help="Download models for specific language (en, fr, es, de, it)")
|
|
474
|
+
parser.add_argument("--status", action="store_true",
|
|
475
|
+
help="Show current cache status")
|
|
476
|
+
parser.add_argument("--clear", action="store_true",
|
|
477
|
+
help="Clear model cache")
|
|
478
|
+
|
|
479
|
+
args = parser.parse_args()
|
|
480
|
+
|
|
481
|
+
manager = get_model_manager(debug_mode=True)
|
|
482
|
+
|
|
483
|
+
if args.status:
|
|
484
|
+
print(get_status())
|
|
485
|
+
return
|
|
486
|
+
|
|
487
|
+
if args.clear:
|
|
488
|
+
# Ask for confirmation
|
|
489
|
+
response = input("⚠️ This will delete all downloaded TTS models. Continue? (y/N): ")
|
|
490
|
+
if response.lower() == 'y':
|
|
491
|
+
success = manager.clear_cache(confirm=True)
|
|
492
|
+
if success:
|
|
493
|
+
print("✅ Model cache cleared")
|
|
494
|
+
else:
|
|
495
|
+
print("❌ Failed to clear cache")
|
|
496
|
+
else:
|
|
497
|
+
print("Cancelled")
|
|
498
|
+
return
|
|
499
|
+
|
|
500
|
+
if args.model:
|
|
501
|
+
success = download_model(args.model)
|
|
502
|
+
if success:
|
|
503
|
+
print(f"✅ Downloaded {args.model}")
|
|
504
|
+
else:
|
|
505
|
+
print(f"❌ Failed to download {args.model}")
|
|
506
|
+
sys.exit(0 if success else 1)
|
|
507
|
+
|
|
508
|
+
if args.language:
|
|
509
|
+
# Language-specific downloads using our simple API
|
|
510
|
+
lang_models = {
|
|
511
|
+
'en': ['en.tacotron2', 'en.jenny', 'en.ek1'],
|
|
512
|
+
'fr': ['fr.css10_vits', 'fr.mai_tacotron2'],
|
|
513
|
+
'es': ['es.mai_tacotron2'],
|
|
514
|
+
'de': ['de.thorsten_vits'],
|
|
515
|
+
'it': ['it.mai_male_vits', 'it.mai_female_vits']
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
if args.language not in lang_models:
|
|
519
|
+
print(f"❌ Language '{args.language}' not supported")
|
|
520
|
+
print(f" Available: {list(lang_models.keys())}")
|
|
521
|
+
sys.exit(1)
|
|
522
|
+
|
|
523
|
+
success = False
|
|
524
|
+
for model_id in lang_models[args.language]:
|
|
525
|
+
if download_model(model_id):
|
|
526
|
+
print(f"✅ Downloaded {model_id}")
|
|
527
|
+
success = True
|
|
528
|
+
break
|
|
529
|
+
|
|
530
|
+
sys.exit(0 if success else 1)
|
|
531
|
+
|
|
532
|
+
# Default: download essential model
|
|
533
|
+
print("📦 Downloading essential TTS model...")
|
|
534
|
+
success = download_model(manager.ESSENTIAL_MODEL)
|
|
535
|
+
if success:
|
|
536
|
+
print("✅ Essential model ready!")
|
|
537
|
+
else:
|
|
538
|
+
print("❌ Failed to download essential model")
|
|
539
|
+
sys.exit(0 if success else 1)
|