abstractvoice 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvoice/__init__.py +1 -1
- abstractvoice/examples/voice_cli.py +1 -1
- abstractvoice/instant_setup.py +83 -0
- abstractvoice/simple_model_manager.py +165 -24
- abstractvoice/tts/tts_engine.py +151 -38
- abstractvoice/voice_manager.py +144 -28
- {abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/METADATA +46 -21
- {abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/RECORD +12 -12
- abstractvoice/model_manager.py +0 -384
- {abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/WHEEL +0 -0
- {abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/entry_points.txt +0 -0
- {abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {abstractvoice-0.4.1.dist-info → abstractvoice-0.5.0.dist-info}/top_level.txt +0 -0
abstractvoice/__init__.py
CHANGED
|
@@ -32,5 +32,5 @@ from .voice_manager import VoiceManager
|
|
|
32
32
|
# Import simple APIs for third-party applications
|
|
33
33
|
from .simple_model_manager import list_models, download_model, get_status, is_ready
|
|
34
34
|
|
|
35
|
-
__version__ = "0.
|
|
35
|
+
__version__ = "0.5.0"
|
|
36
36
|
__all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
|
|
@@ -158,7 +158,7 @@ def main():
|
|
|
158
158
|
traceback.print_exc()
|
|
159
159
|
return
|
|
160
160
|
elif args.command == "download-models":
|
|
161
|
-
from abstractvoice.
|
|
161
|
+
from abstractvoice.simple_model_manager import download_models_cli
|
|
162
162
|
# Pass remaining arguments to download_models_cli
|
|
163
163
|
import sys
|
|
164
164
|
original_argv = sys.argv
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Instant Setup Module for AbstractVoice
|
|
3
|
+
Provides immediate TTS functionality with seamless model download.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Essential model for instant functionality (lightweight, reliable)
|
|
11
|
+
ESSENTIAL_MODEL = "tts_models/en/ljspeech/fast_pitch"
|
|
12
|
+
ESSENTIAL_MODEL_SIZE = "~100MB"
|
|
13
|
+
|
|
14
|
+
def ensure_instant_tts():
|
|
15
|
+
"""
|
|
16
|
+
Ensure TTS is ready for immediate use.
|
|
17
|
+
Downloads essential model if needed with progress indicator.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
bool: True if TTS is ready, False if failed
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
from TTS.api import TTS
|
|
24
|
+
from TTS.utils.manage import ModelManager
|
|
25
|
+
|
|
26
|
+
manager = ModelManager()
|
|
27
|
+
|
|
28
|
+
# Check if essential model is already cached
|
|
29
|
+
if is_model_cached(ESSENTIAL_MODEL):
|
|
30
|
+
return True
|
|
31
|
+
|
|
32
|
+
# Download essential model with user-friendly progress
|
|
33
|
+
print(f"🚀 AbstractVoice: Setting up TTS ({ESSENTIAL_MODEL_SIZE})...")
|
|
34
|
+
print(f" This happens once and takes ~30 seconds")
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Download with progress bar
|
|
38
|
+
tts = TTS(model_name=ESSENTIAL_MODEL, progress_bar=True)
|
|
39
|
+
print(f"✅ TTS ready! AbstractVoice is now fully functional.")
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
print(f"❌ Setup failed: {e}")
|
|
44
|
+
print(f"💡 Try: pip install abstractvoice[all]")
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
except ImportError as e:
|
|
48
|
+
print(f"❌ Missing dependencies: {e}")
|
|
49
|
+
print(f"💡 Install with: pip install abstractvoice[all]")
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
def is_model_cached(model_name):
|
|
53
|
+
"""Check if a model is already cached."""
|
|
54
|
+
try:
|
|
55
|
+
from TTS.utils.manage import ModelManager
|
|
56
|
+
manager = ModelManager()
|
|
57
|
+
|
|
58
|
+
# Get cached models list
|
|
59
|
+
models_file = os.path.join(manager.output_prefix, ".models.json")
|
|
60
|
+
if os.path.exists(models_file):
|
|
61
|
+
import json
|
|
62
|
+
with open(models_file, 'r') as f:
|
|
63
|
+
cached_models = json.load(f)
|
|
64
|
+
return model_name in cached_models
|
|
65
|
+
|
|
66
|
+
# Fallback: check if model directory exists and has content
|
|
67
|
+
model_dir = model_name.replace("/", "--")
|
|
68
|
+
model_path = os.path.join(manager.output_prefix, model_dir)
|
|
69
|
+
return os.path.exists(model_path) and bool(os.listdir(model_path))
|
|
70
|
+
|
|
71
|
+
except:
|
|
72
|
+
# If anything fails, assume not cached
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def get_instant_model():
|
|
76
|
+
"""Get the essential model name for instant setup."""
|
|
77
|
+
return ESSENTIAL_MODEL
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
# CLI test
|
|
81
|
+
print("🧪 Testing instant setup...")
|
|
82
|
+
success = ensure_instant_tts()
|
|
83
|
+
print(f"Result: {'✅ Ready' if success else '❌ Failed'}")
|
|
@@ -31,37 +31,65 @@ class SimpleModelManager:
|
|
|
31
31
|
"""Simple, clean model manager for AbstractVoice."""
|
|
32
32
|
|
|
33
33
|
# Essential model - guaranteed to work everywhere, reasonable size
|
|
34
|
-
|
|
34
|
+
# Changed from fast_pitch to tacotron2-DDC because fast_pitch downloads are failing
|
|
35
|
+
ESSENTIAL_MODEL = "tts_models/en/ljspeech/tacotron2-DDC"
|
|
35
36
|
|
|
36
37
|
# Available models organized by language with metadata
|
|
37
38
|
AVAILABLE_MODELS = {
|
|
38
39
|
"en": {
|
|
40
|
+
"tacotron2": {
|
|
41
|
+
"model": "tts_models/en/ljspeech/tacotron2-DDC",
|
|
42
|
+
"name": "Linda (LJSpeech)",
|
|
43
|
+
"quality": "good",
|
|
44
|
+
"size_mb": 362,
|
|
45
|
+
"description": "Standard female voice (LJSpeech speaker)",
|
|
46
|
+
"requires_espeak": False,
|
|
47
|
+
"default": True
|
|
48
|
+
},
|
|
49
|
+
"jenny": {
|
|
50
|
+
"model": "tts_models/en/jenny/jenny",
|
|
51
|
+
"name": "Jenny",
|
|
52
|
+
"quality": "excellent",
|
|
53
|
+
"size_mb": 368,
|
|
54
|
+
"description": "Different female voice, clear and natural",
|
|
55
|
+
"requires_espeak": False,
|
|
56
|
+
"default": False
|
|
57
|
+
},
|
|
58
|
+
"ek1": {
|
|
59
|
+
"model": "tts_models/en/ek1/tacotron2",
|
|
60
|
+
"name": "Edward (EK1)",
|
|
61
|
+
"quality": "excellent",
|
|
62
|
+
"size_mb": 310,
|
|
63
|
+
"description": "Male voice with British accent",
|
|
64
|
+
"requires_espeak": False,
|
|
65
|
+
"default": False
|
|
66
|
+
},
|
|
67
|
+
"sam": {
|
|
68
|
+
"model": "tts_models/en/sam/tacotron-DDC",
|
|
69
|
+
"name": "Sam",
|
|
70
|
+
"quality": "good",
|
|
71
|
+
"size_mb": 370,
|
|
72
|
+
"description": "Different male voice, deeper tone",
|
|
73
|
+
"requires_espeak": False,
|
|
74
|
+
"default": False
|
|
75
|
+
},
|
|
39
76
|
"fast_pitch": {
|
|
40
77
|
"model": "tts_models/en/ljspeech/fast_pitch",
|
|
41
|
-
"name": "Fast
|
|
78
|
+
"name": "Linda Fast (LJSpeech)",
|
|
42
79
|
"quality": "good",
|
|
43
80
|
"size_mb": 107,
|
|
44
|
-
"description": "
|
|
81
|
+
"description": "Same speaker as Linda but faster engine",
|
|
45
82
|
"requires_espeak": False,
|
|
46
|
-
"default":
|
|
83
|
+
"default": False
|
|
47
84
|
},
|
|
48
85
|
"vits": {
|
|
49
86
|
"model": "tts_models/en/ljspeech/vits",
|
|
50
|
-
"name": "
|
|
87
|
+
"name": "Linda Premium (LJSpeech)",
|
|
51
88
|
"quality": "excellent",
|
|
52
89
|
"size_mb": 328,
|
|
53
|
-
"description": "
|
|
90
|
+
"description": "Same speaker as Linda but premium quality",
|
|
54
91
|
"requires_espeak": True,
|
|
55
92
|
"default": False
|
|
56
|
-
},
|
|
57
|
-
"tacotron2": {
|
|
58
|
-
"model": "tts_models/en/ljspeech/tacotron2-DDC",
|
|
59
|
-
"name": "Tacotron2 (English)",
|
|
60
|
-
"quality": "good",
|
|
61
|
-
"size_mb": 362,
|
|
62
|
-
"description": "Classic English voice, reliable",
|
|
63
|
-
"requires_espeak": False,
|
|
64
|
-
"default": False
|
|
65
93
|
}
|
|
66
94
|
},
|
|
67
95
|
"fr": {
|
|
@@ -184,7 +212,7 @@ class SimpleModelManager:
|
|
|
184
212
|
return False
|
|
185
213
|
|
|
186
214
|
def download_model(self, model_name: str, progress_callback: Optional[Callable[[str, bool], None]] = None) -> bool:
|
|
187
|
-
"""Download a specific model.
|
|
215
|
+
"""Download a specific model with improved error handling.
|
|
188
216
|
|
|
189
217
|
Args:
|
|
190
218
|
model_name: TTS model name (e.g., 'tts_models/en/ljspeech/fast_pitch')
|
|
@@ -203,25 +231,56 @@ class SimpleModelManager:
|
|
|
203
231
|
try:
|
|
204
232
|
TTS, _ = _import_tts()
|
|
205
233
|
|
|
206
|
-
|
|
207
|
-
|
|
234
|
+
print(f"📥 Downloading {model_name}...")
|
|
235
|
+
print(f" This may take a few minutes depending on your connection...")
|
|
208
236
|
|
|
209
237
|
start_time = time.time()
|
|
210
238
|
|
|
211
239
|
# Initialize TTS to trigger download
|
|
212
|
-
|
|
240
|
+
# Set gpu=False to avoid CUDA errors on systems without GPU
|
|
241
|
+
try:
|
|
242
|
+
tts = TTS(model_name=model_name, progress_bar=True, gpu=False)
|
|
243
|
+
|
|
244
|
+
# Verify the model actually downloaded
|
|
245
|
+
if not self.is_model_cached(model_name):
|
|
246
|
+
print(f"⚠️ Model download completed but not found in cache")
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
except Exception as init_error:
|
|
250
|
+
# Try alternative download method
|
|
251
|
+
error_msg = str(init_error).lower()
|
|
252
|
+
if "connection" in error_msg or "timeout" in error_msg:
|
|
253
|
+
print(f"❌ Network error: Check your internet connection")
|
|
254
|
+
elif "not found" in error_msg:
|
|
255
|
+
print(f"❌ Model '{model_name}' not found in registry")
|
|
256
|
+
else:
|
|
257
|
+
print(f"❌ Download error: {init_error}")
|
|
258
|
+
raise
|
|
213
259
|
|
|
214
260
|
download_time = time.time() - start_time
|
|
215
|
-
|
|
216
|
-
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
261
|
+
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
217
262
|
|
|
218
263
|
if progress_callback:
|
|
219
264
|
progress_callback(model_name, True)
|
|
220
265
|
return True
|
|
221
266
|
|
|
222
267
|
except Exception as e:
|
|
223
|
-
|
|
224
|
-
|
|
268
|
+
error_msg = str(e).lower()
|
|
269
|
+
|
|
270
|
+
# Provide helpful error messages
|
|
271
|
+
if "connection" in error_msg or "timeout" in error_msg:
|
|
272
|
+
print(f"❌ Failed to download {model_name}: Network issue")
|
|
273
|
+
print(f" Check your internet connection and try again")
|
|
274
|
+
elif "permission" in error_msg:
|
|
275
|
+
print(f"❌ Failed to download {model_name}: Permission denied")
|
|
276
|
+
print(f" Check write permissions for cache directory")
|
|
277
|
+
elif "space" in error_msg:
|
|
278
|
+
print(f"❌ Failed to download {model_name}: Insufficient disk space")
|
|
279
|
+
else:
|
|
280
|
+
print(f"❌ Failed to download {model_name}")
|
|
281
|
+
if self.debug_mode:
|
|
282
|
+
print(f" Error: {e}")
|
|
283
|
+
|
|
225
284
|
if progress_callback:
|
|
226
285
|
progress_callback(model_name, False)
|
|
227
286
|
return False
|
|
@@ -395,4 +454,86 @@ def get_status() -> str:
|
|
|
395
454
|
def is_ready() -> bool:
|
|
396
455
|
"""Check if essential model is ready for immediate use."""
|
|
397
456
|
manager = get_model_manager()
|
|
398
|
-
return manager.is_model_cached(manager.ESSENTIAL_MODEL)
|
|
457
|
+
return manager.is_model_cached(manager.ESSENTIAL_MODEL)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def download_models_cli():
|
|
461
|
+
"""Simple CLI entry point for downloading models."""
|
|
462
|
+
import argparse
|
|
463
|
+
import sys
|
|
464
|
+
|
|
465
|
+
parser = argparse.ArgumentParser(description="Download TTS models for offline use")
|
|
466
|
+
parser.add_argument("--essential", action="store_true",
|
|
467
|
+
help="Download essential model (default)")
|
|
468
|
+
parser.add_argument("--all", action="store_true",
|
|
469
|
+
help="Download all available models")
|
|
470
|
+
parser.add_argument("--model", type=str,
|
|
471
|
+
help="Download specific model by name")
|
|
472
|
+
parser.add_argument("--language", type=str,
|
|
473
|
+
help="Download models for specific language (en, fr, es, de, it)")
|
|
474
|
+
parser.add_argument("--status", action="store_true",
|
|
475
|
+
help="Show current cache status")
|
|
476
|
+
parser.add_argument("--clear", action="store_true",
|
|
477
|
+
help="Clear model cache")
|
|
478
|
+
|
|
479
|
+
args = parser.parse_args()
|
|
480
|
+
|
|
481
|
+
manager = get_model_manager(debug_mode=True)
|
|
482
|
+
|
|
483
|
+
if args.status:
|
|
484
|
+
print(get_status())
|
|
485
|
+
return
|
|
486
|
+
|
|
487
|
+
if args.clear:
|
|
488
|
+
# Ask for confirmation
|
|
489
|
+
response = input("⚠️ This will delete all downloaded TTS models. Continue? (y/N): ")
|
|
490
|
+
if response.lower() == 'y':
|
|
491
|
+
success = manager.clear_cache(confirm=True)
|
|
492
|
+
if success:
|
|
493
|
+
print("✅ Model cache cleared")
|
|
494
|
+
else:
|
|
495
|
+
print("❌ Failed to clear cache")
|
|
496
|
+
else:
|
|
497
|
+
print("Cancelled")
|
|
498
|
+
return
|
|
499
|
+
|
|
500
|
+
if args.model:
|
|
501
|
+
success = download_model(args.model)
|
|
502
|
+
if success:
|
|
503
|
+
print(f"✅ Downloaded {args.model}")
|
|
504
|
+
else:
|
|
505
|
+
print(f"❌ Failed to download {args.model}")
|
|
506
|
+
sys.exit(0 if success else 1)
|
|
507
|
+
|
|
508
|
+
if args.language:
|
|
509
|
+
# Language-specific downloads using our simple API
|
|
510
|
+
lang_models = {
|
|
511
|
+
'en': ['en.tacotron2', 'en.jenny', 'en.ek1'],
|
|
512
|
+
'fr': ['fr.css10_vits', 'fr.mai_tacotron2'],
|
|
513
|
+
'es': ['es.mai_tacotron2'],
|
|
514
|
+
'de': ['de.thorsten_vits'],
|
|
515
|
+
'it': ['it.mai_male_vits', 'it.mai_female_vits']
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
if args.language not in lang_models:
|
|
519
|
+
print(f"❌ Language '{args.language}' not supported")
|
|
520
|
+
print(f" Available: {list(lang_models.keys())}")
|
|
521
|
+
sys.exit(1)
|
|
522
|
+
|
|
523
|
+
success = False
|
|
524
|
+
for model_id in lang_models[args.language]:
|
|
525
|
+
if download_model(model_id):
|
|
526
|
+
print(f"✅ Downloaded {model_id}")
|
|
527
|
+
success = True
|
|
528
|
+
break
|
|
529
|
+
|
|
530
|
+
sys.exit(0 if success else 1)
|
|
531
|
+
|
|
532
|
+
# Default: download essential model
|
|
533
|
+
print("📦 Downloading essential TTS model...")
|
|
534
|
+
success = download_model(manager.ESSENTIAL_MODEL)
|
|
535
|
+
if success:
|
|
536
|
+
print("✅ Essential model ready!")
|
|
537
|
+
else:
|
|
538
|
+
print("❌ Failed to download essential model")
|
|
539
|
+
sys.exit(0 if success else 1)
|
abstractvoice/tts/tts_engine.py
CHANGED
|
@@ -300,11 +300,24 @@ class NonBlockingAudioPlayer:
|
|
|
300
300
|
print(f"Error stopping audio stream: {e}")
|
|
301
301
|
finally:
|
|
302
302
|
self.stream = None
|
|
303
|
-
|
|
303
|
+
|
|
304
304
|
self.is_playing = False
|
|
305
305
|
with self.pause_lock:
|
|
306
306
|
self.is_paused = False
|
|
307
307
|
self.clear_queue()
|
|
308
|
+
|
|
309
|
+
def cleanup(self):
|
|
310
|
+
"""Cleanup resources to prevent memory conflicts."""
|
|
311
|
+
try:
|
|
312
|
+
self.stop_stream()
|
|
313
|
+
# Clear any remaining references
|
|
314
|
+
self.current_audio = None
|
|
315
|
+
self.playback_complete_callback = None
|
|
316
|
+
if self.debug_mode:
|
|
317
|
+
print(" > Audio player cleaned up")
|
|
318
|
+
except Exception as e:
|
|
319
|
+
if self.debug_mode:
|
|
320
|
+
print(f"Audio cleanup warning: {e}")
|
|
308
321
|
|
|
309
322
|
def play_audio(self, audio_array):
|
|
310
323
|
"""Add audio to the playback queue."""
|
|
@@ -509,58 +522,155 @@ class TTSEngine:
|
|
|
509
522
|
self.is_paused_state = False # Explicit paused state tracking
|
|
510
523
|
|
|
511
524
|
def _load_with_simple_fallback(self, TTS, preferred_model: str, debug_mode: bool) -> tuple[bool, str]:
|
|
512
|
-
"""Load TTS model with
|
|
525
|
+
"""Load TTS model with bulletproof compatibility-first strategy."""
|
|
513
526
|
from ..simple_model_manager import get_model_manager
|
|
514
527
|
|
|
515
528
|
model_manager = get_model_manager(debug_mode=debug_mode)
|
|
516
529
|
|
|
517
|
-
#
|
|
518
|
-
|
|
530
|
+
# Step 1: Check espeak availability for smart model filtering
|
|
531
|
+
espeak_available = self._check_espeak_available()
|
|
532
|
+
if debug_mode and not espeak_available:
|
|
533
|
+
print(" > espeak-ng not found, will skip VITS models")
|
|
534
|
+
|
|
535
|
+
# Step 2: Try the REQUESTED model first if it's cached
|
|
536
|
+
cached_models = model_manager.get_cached_models()
|
|
537
|
+
if cached_models and debug_mode:
|
|
538
|
+
print(f" > Found {len(cached_models)} cached models")
|
|
539
|
+
|
|
540
|
+
# FORCE USER'S CHOICE: Try the specifically requested model first
|
|
541
|
+
if preferred_model in cached_models:
|
|
519
542
|
try:
|
|
520
543
|
if debug_mode:
|
|
521
|
-
print(f" >
|
|
544
|
+
print(f" > LOADING REQUESTED MODEL: {preferred_model}")
|
|
545
|
+
|
|
546
|
+
# Safety check for Italian VITS models that might crash
|
|
547
|
+
if "it/" in preferred_model and "vits" in preferred_model:
|
|
548
|
+
if debug_mode:
|
|
549
|
+
print(f" > Italian VITS model detected - using safe loading...")
|
|
550
|
+
|
|
522
551
|
self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
|
|
552
|
+
|
|
553
|
+
if debug_mode:
|
|
554
|
+
print(f" > ✅ SUCCESS: Loaded requested model: {preferred_model}")
|
|
523
555
|
return True, preferred_model
|
|
556
|
+
|
|
524
557
|
except Exception as e:
|
|
558
|
+
error_msg = str(e).lower()
|
|
525
559
|
if debug_mode:
|
|
526
|
-
print(f" >
|
|
560
|
+
print(f" > ❌ Requested model failed: {e}")
|
|
561
|
+
|
|
562
|
+
# Special handling for Italian model crashes
|
|
563
|
+
if "it/" in preferred_model and ("segmentation" in error_msg or "crash" in error_msg):
|
|
564
|
+
if debug_mode:
|
|
565
|
+
print(f" > Italian model caused crash - marking as incompatible")
|
|
566
|
+
# Force fallback for crashed Italian models
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
# Only fall back if the model actually failed to load, not due to dependencies
|
|
570
|
+
|
|
571
|
+
# Step 3: Only fall back to compatibility order if requested model failed
|
|
572
|
+
if debug_mode:
|
|
573
|
+
print(" > Requested model unavailable, trying fallback models...")
|
|
574
|
+
|
|
575
|
+
# Compatibility-first fallback order
|
|
576
|
+
fallback_models = [
|
|
577
|
+
"tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
|
|
578
|
+
"tts_models/en/jenny/jenny", # Different female speaker (Jenny)
|
|
579
|
+
"tts_models/en/ek1/tacotron2", # Male British accent (Edward)
|
|
580
|
+
"tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
|
|
581
|
+
"tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
|
|
582
|
+
"tts_models/en/ljspeech/glow-tts", # Another alternative
|
|
583
|
+
"tts_models/en/vctk/vits", # Multi-speaker (requires espeak)
|
|
584
|
+
"tts_models/en/ljspeech/vits", # Premium (requires espeak)
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
# Remove the preferred model from fallbacks to avoid duplicate attempts
|
|
588
|
+
fallback_models = [m for m in fallback_models if m != preferred_model]
|
|
527
589
|
|
|
528
|
-
#
|
|
529
|
-
|
|
530
|
-
|
|
590
|
+
# Try fallback models
|
|
591
|
+
for model in fallback_models:
|
|
592
|
+
if model in cached_models:
|
|
593
|
+
# Skip VITS models if no espeak
|
|
594
|
+
if "vits" in model and not espeak_available:
|
|
595
|
+
if debug_mode:
|
|
596
|
+
print(f" > Skipping {model} (requires espeak-ng)")
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
try:
|
|
600
|
+
if debug_mode:
|
|
601
|
+
print(f" > Trying fallback model: {model}")
|
|
602
|
+
self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
|
|
603
|
+
if debug_mode:
|
|
604
|
+
print(f" > ✅ Successfully loaded fallback: {model}")
|
|
605
|
+
return True, model
|
|
606
|
+
except Exception as e:
|
|
607
|
+
if debug_mode:
|
|
608
|
+
print(f" > ❌ Fallback {model} failed: {e}")
|
|
609
|
+
|
|
610
|
+
# Step 4: If no cached models work, try downloading requested model first
|
|
611
|
+
if debug_mode:
|
|
612
|
+
print(" > No cached models worked, attempting downloads...")
|
|
613
|
+
|
|
614
|
+
# Try downloading the requested model first
|
|
615
|
+
if "vits" not in preferred_model or espeak_available:
|
|
531
616
|
try:
|
|
532
617
|
if debug_mode:
|
|
533
|
-
print(f" >
|
|
534
|
-
|
|
535
|
-
|
|
618
|
+
print(f" > Downloading requested model: {preferred_model}...")
|
|
619
|
+
success = model_manager.download_model(preferred_model)
|
|
620
|
+
if success:
|
|
621
|
+
self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
|
|
622
|
+
if debug_mode:
|
|
623
|
+
print(f" > ✅ Downloaded and loaded requested: {preferred_model}")
|
|
624
|
+
return True, preferred_model
|
|
625
|
+
elif debug_mode:
|
|
626
|
+
print(f" > ❌ Download failed for requested model: {preferred_model}")
|
|
536
627
|
except Exception as e:
|
|
537
628
|
if debug_mode:
|
|
538
|
-
print(f" >
|
|
629
|
+
print(f" > ❌ Failed to download/load requested model: {e}")
|
|
539
630
|
|
|
540
|
-
#
|
|
541
|
-
|
|
542
|
-
if
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
if success:
|
|
546
|
-
self.tts = TTS(model_name=essential_model, progress_bar=self.debug_mode)
|
|
547
|
-
return True, essential_model
|
|
548
|
-
except Exception as e:
|
|
549
|
-
if debug_mode:
|
|
550
|
-
print(f" > Essential model download failed: {e}")
|
|
631
|
+
# Step 5: If requested model download failed, try fallback downloads
|
|
632
|
+
for model in fallback_models:
|
|
633
|
+
# Skip VITS models if no espeak
|
|
634
|
+
if "vits" in model and not espeak_available:
|
|
635
|
+
continue
|
|
551
636
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
637
|
+
try:
|
|
638
|
+
if debug_mode:
|
|
639
|
+
print(f" > Downloading fallback: {model}...")
|
|
640
|
+
|
|
641
|
+
# First try to download
|
|
642
|
+
success = model_manager.download_model(model)
|
|
643
|
+
if success:
|
|
644
|
+
# Then try to load
|
|
645
|
+
self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
|
|
646
|
+
if debug_mode:
|
|
647
|
+
print(f" > ✅ Downloaded and loaded fallback: {model}")
|
|
648
|
+
return True, model
|
|
649
|
+
elif debug_mode:
|
|
650
|
+
print(f" > ❌ Download failed for {model}")
|
|
651
|
+
|
|
652
|
+
except Exception as e:
|
|
653
|
+
if debug_mode:
|
|
654
|
+
print(f" > ❌ Failed to load {model}: {e}")
|
|
561
655
|
|
|
562
656
|
return False, None
|
|
563
657
|
|
|
658
|
+
def _check_espeak_available(self) -> bool:
|
|
659
|
+
"""Check if espeak-ng is available on the system."""
|
|
660
|
+
import subprocess
|
|
661
|
+
try:
|
|
662
|
+
subprocess.run(['espeak-ng', '--version'],
|
|
663
|
+
capture_output=True, check=True, timeout=5)
|
|
664
|
+
return True
|
|
665
|
+
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
|
|
666
|
+
# Try alternative espeak command
|
|
667
|
+
try:
|
|
668
|
+
subprocess.run(['espeak', '--version'],
|
|
669
|
+
capture_output=True, check=True, timeout=5)
|
|
670
|
+
return True
|
|
671
|
+
except:
|
|
672
|
+
return False
|
|
673
|
+
|
|
564
674
|
def _handle_espeak_fallback(self, debug_mode: bool):
|
|
565
675
|
"""Handle espeak-related errors with fallback to non-phoneme models."""
|
|
566
676
|
# Restore stdout to show user-friendly message
|
|
@@ -574,7 +684,7 @@ class TTSEngine:
|
|
|
574
684
|
print(" • macOS: brew install espeak-ng")
|
|
575
685
|
print(" • Linux: sudo apt-get install espeak-ng")
|
|
576
686
|
print(" • Windows: conda install espeak-ng (or see README)")
|
|
577
|
-
print("\nFalling back to
|
|
687
|
+
print("\nFalling back to compatible models (no espeak dependency)")
|
|
578
688
|
print("="*70 + "\n")
|
|
579
689
|
|
|
580
690
|
if not debug_mode:
|
|
@@ -582,12 +692,15 @@ class TTSEngine:
|
|
|
582
692
|
null_out = open(os.devnull, 'w')
|
|
583
693
|
sys.stdout = null_out
|
|
584
694
|
|
|
585
|
-
# Try non-phoneme models that don't require espeak
|
|
695
|
+
# Try non-phoneme models that don't require espeak (compatibility-first order)
|
|
586
696
|
from TTS.api import TTS
|
|
587
697
|
fallback_models = [
|
|
588
|
-
"tts_models/en/ljspeech/
|
|
589
|
-
"tts_models/en/
|
|
590
|
-
"tts_models/en/
|
|
698
|
+
"tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
|
|
699
|
+
"tts_models/en/jenny/jenny", # Different female speaker (Jenny)
|
|
700
|
+
"tts_models/en/ek1/tacotron2", # Male British accent (Edward)
|
|
701
|
+
"tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
|
|
702
|
+
"tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
|
|
703
|
+
"tts_models/en/ljspeech/glow-tts" # Another alternative
|
|
591
704
|
]
|
|
592
705
|
|
|
593
706
|
tts_loaded = False
|