abstractvoice 0.4.1__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/PKG-INFO +9 -1
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/__init__.py +1 -1
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/examples/voice_cli.py +1 -1
- abstractvoice-0.4.6/abstractvoice/instant_setup.py +83 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/simple_model_manager.py +118 -16
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/tts/tts_engine.py +151 -38
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/voice_manager.py +93 -19
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice.egg-info/PKG-INFO +9 -1
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice.egg-info/SOURCES.txt +1 -1
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice.egg-info/requires.txt +8 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/pyproject.toml +9 -0
- abstractvoice-0.4.1/abstractvoice/model_manager.py +0 -384
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/LICENSE +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/README.md +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/__main__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/dependency_check.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/examples/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/examples/cli_repl.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/examples/web_api.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/recognition.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/stt/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/stt/transcriber.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/tts/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/vad/__init__.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice/vad/voice_detector.py +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice.egg-info/dependency_links.txt +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice.egg-info/entry_points.txt +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/abstractvoice.egg-info/top_level.txt +0 -0
- {abstractvoice-0.4.1 → abstractvoice-0.4.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -19,6 +19,14 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: numpy>=1.24.0
|
|
21
21
|
Requires-Dist: requests>=2.31.0
|
|
22
|
+
Requires-Dist: appdirs>=1.4.0
|
|
23
|
+
Requires-Dist: coqui-tts<0.30.0,>=0.27.0
|
|
24
|
+
Requires-Dist: torch<2.4.0,>=2.0.0
|
|
25
|
+
Requires-Dist: torchvision<0.19.0,>=0.15.0
|
|
26
|
+
Requires-Dist: torchaudio<2.4.0,>=2.0.0
|
|
27
|
+
Requires-Dist: librosa>=0.10.0
|
|
28
|
+
Requires-Dist: sounddevice>=0.4.6
|
|
29
|
+
Requires-Dist: soundfile>=0.12.1
|
|
22
30
|
Provides-Extra: voice
|
|
23
31
|
Requires-Dist: sounddevice>=0.4.6; extra == "voice"
|
|
24
32
|
Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
|
|
@@ -32,5 +32,5 @@ from .voice_manager import VoiceManager
|
|
|
32
32
|
# Import simple APIs for third-party applications
|
|
33
33
|
from .simple_model_manager import list_models, download_model, get_status, is_ready
|
|
34
34
|
|
|
35
|
-
__version__ = "0.4.
|
|
35
|
+
__version__ = "0.4.6"
|
|
36
36
|
__all__ = ['VoiceManager', 'list_models', 'download_model', 'get_status', 'is_ready']
|
|
@@ -158,7 +158,7 @@ def main():
|
|
|
158
158
|
traceback.print_exc()
|
|
159
159
|
return
|
|
160
160
|
elif args.command == "download-models":
|
|
161
|
-
from abstractvoice.
|
|
161
|
+
from abstractvoice.simple_model_manager import download_models_cli
|
|
162
162
|
# Pass remaining arguments to download_models_cli
|
|
163
163
|
import sys
|
|
164
164
|
original_argv = sys.argv
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Instant Setup Module for AbstractVoice
|
|
3
|
+
Provides immediate TTS functionality with seamless model download.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Essential model for instant functionality (lightweight, reliable)
|
|
11
|
+
ESSENTIAL_MODEL = "tts_models/en/ljspeech/fast_pitch"
|
|
12
|
+
ESSENTIAL_MODEL_SIZE = "~100MB"
|
|
13
|
+
|
|
14
|
+
def ensure_instant_tts():
|
|
15
|
+
"""
|
|
16
|
+
Ensure TTS is ready for immediate use.
|
|
17
|
+
Downloads essential model if needed with progress indicator.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
bool: True if TTS is ready, False if failed
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
from TTS.api import TTS
|
|
24
|
+
from TTS.utils.manage import ModelManager
|
|
25
|
+
|
|
26
|
+
manager = ModelManager()
|
|
27
|
+
|
|
28
|
+
# Check if essential model is already cached
|
|
29
|
+
if is_model_cached(ESSENTIAL_MODEL):
|
|
30
|
+
return True
|
|
31
|
+
|
|
32
|
+
# Download essential model with user-friendly progress
|
|
33
|
+
print(f"🚀 AbstractVoice: Setting up TTS ({ESSENTIAL_MODEL_SIZE})...")
|
|
34
|
+
print(f" This happens once and takes ~30 seconds")
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Download with progress bar
|
|
38
|
+
tts = TTS(model_name=ESSENTIAL_MODEL, progress_bar=True)
|
|
39
|
+
print(f"✅ TTS ready! AbstractVoice is now fully functional.")
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
print(f"❌ Setup failed: {e}")
|
|
44
|
+
print(f"💡 Try: pip install abstractvoice[all]")
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
except ImportError as e:
|
|
48
|
+
print(f"❌ Missing dependencies: {e}")
|
|
49
|
+
print(f"💡 Install with: pip install abstractvoice[all]")
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
def is_model_cached(model_name):
|
|
53
|
+
"""Check if a model is already cached."""
|
|
54
|
+
try:
|
|
55
|
+
from TTS.utils.manage import ModelManager
|
|
56
|
+
manager = ModelManager()
|
|
57
|
+
|
|
58
|
+
# Get cached models list
|
|
59
|
+
models_file = os.path.join(manager.output_prefix, ".models.json")
|
|
60
|
+
if os.path.exists(models_file):
|
|
61
|
+
import json
|
|
62
|
+
with open(models_file, 'r') as f:
|
|
63
|
+
cached_models = json.load(f)
|
|
64
|
+
return model_name in cached_models
|
|
65
|
+
|
|
66
|
+
# Fallback: check if model directory exists and has content
|
|
67
|
+
model_dir = model_name.replace("/", "--")
|
|
68
|
+
model_path = os.path.join(manager.output_prefix, model_dir)
|
|
69
|
+
return os.path.exists(model_path) and bool(os.listdir(model_path))
|
|
70
|
+
|
|
71
|
+
except:
|
|
72
|
+
# If anything fails, assume not cached
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def get_instant_model():
|
|
76
|
+
"""Get the essential model name for instant setup."""
|
|
77
|
+
return ESSENTIAL_MODEL
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
# CLI test
|
|
81
|
+
print("🧪 Testing instant setup...")
|
|
82
|
+
success = ensure_instant_tts()
|
|
83
|
+
print(f"Result: {'✅ Ready' if success else '❌ Failed'}")
|
|
@@ -31,37 +31,65 @@ class SimpleModelManager:
|
|
|
31
31
|
"""Simple, clean model manager for AbstractVoice."""
|
|
32
32
|
|
|
33
33
|
# Essential model - guaranteed to work everywhere, reasonable size
|
|
34
|
-
|
|
34
|
+
# Changed from fast_pitch to tacotron2-DDC because fast_pitch downloads are failing
|
|
35
|
+
ESSENTIAL_MODEL = "tts_models/en/ljspeech/tacotron2-DDC"
|
|
35
36
|
|
|
36
37
|
# Available models organized by language with metadata
|
|
37
38
|
AVAILABLE_MODELS = {
|
|
38
39
|
"en": {
|
|
40
|
+
"tacotron2": {
|
|
41
|
+
"model": "tts_models/en/ljspeech/tacotron2-DDC",
|
|
42
|
+
"name": "Linda (LJSpeech)",
|
|
43
|
+
"quality": "good",
|
|
44
|
+
"size_mb": 362,
|
|
45
|
+
"description": "Standard female voice (LJSpeech speaker)",
|
|
46
|
+
"requires_espeak": False,
|
|
47
|
+
"default": True
|
|
48
|
+
},
|
|
49
|
+
"jenny": {
|
|
50
|
+
"model": "tts_models/en/jenny/jenny",
|
|
51
|
+
"name": "Jenny",
|
|
52
|
+
"quality": "excellent",
|
|
53
|
+
"size_mb": 368,
|
|
54
|
+
"description": "Different female voice, clear and natural",
|
|
55
|
+
"requires_espeak": False,
|
|
56
|
+
"default": False
|
|
57
|
+
},
|
|
58
|
+
"ek1": {
|
|
59
|
+
"model": "tts_models/en/ek1/tacotron2",
|
|
60
|
+
"name": "Edward (EK1)",
|
|
61
|
+
"quality": "excellent",
|
|
62
|
+
"size_mb": 310,
|
|
63
|
+
"description": "Male voice with British accent",
|
|
64
|
+
"requires_espeak": False,
|
|
65
|
+
"default": False
|
|
66
|
+
},
|
|
67
|
+
"sam": {
|
|
68
|
+
"model": "tts_models/en/sam/tacotron-DDC",
|
|
69
|
+
"name": "Sam",
|
|
70
|
+
"quality": "good",
|
|
71
|
+
"size_mb": 370,
|
|
72
|
+
"description": "Different male voice, deeper tone",
|
|
73
|
+
"requires_espeak": False,
|
|
74
|
+
"default": False
|
|
75
|
+
},
|
|
39
76
|
"fast_pitch": {
|
|
40
77
|
"model": "tts_models/en/ljspeech/fast_pitch",
|
|
41
|
-
"name": "Fast
|
|
78
|
+
"name": "Linda Fast (LJSpeech)",
|
|
42
79
|
"quality": "good",
|
|
43
80
|
"size_mb": 107,
|
|
44
|
-
"description": "
|
|
81
|
+
"description": "Same speaker as Linda but faster engine",
|
|
45
82
|
"requires_espeak": False,
|
|
46
|
-
"default":
|
|
83
|
+
"default": False
|
|
47
84
|
},
|
|
48
85
|
"vits": {
|
|
49
86
|
"model": "tts_models/en/ljspeech/vits",
|
|
50
|
-
"name": "
|
|
87
|
+
"name": "Linda Premium (LJSpeech)",
|
|
51
88
|
"quality": "excellent",
|
|
52
89
|
"size_mb": 328,
|
|
53
|
-
"description": "
|
|
90
|
+
"description": "Same speaker as Linda but premium quality",
|
|
54
91
|
"requires_espeak": True,
|
|
55
92
|
"default": False
|
|
56
|
-
},
|
|
57
|
-
"tacotron2": {
|
|
58
|
-
"model": "tts_models/en/ljspeech/tacotron2-DDC",
|
|
59
|
-
"name": "Tacotron2 (English)",
|
|
60
|
-
"quality": "good",
|
|
61
|
-
"size_mb": 362,
|
|
62
|
-
"description": "Classic English voice, reliable",
|
|
63
|
-
"requires_espeak": False,
|
|
64
|
-
"default": False
|
|
65
93
|
}
|
|
66
94
|
},
|
|
67
95
|
"fr": {
|
|
@@ -395,4 +423,78 @@ def get_status() -> str:
|
|
|
395
423
|
def is_ready() -> bool:
|
|
396
424
|
"""Check if essential model is ready for immediate use."""
|
|
397
425
|
manager = get_model_manager()
|
|
398
|
-
return manager.is_model_cached(manager.ESSENTIAL_MODEL)
|
|
426
|
+
return manager.is_model_cached(manager.ESSENTIAL_MODEL)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def download_models_cli():
|
|
430
|
+
"""Simple CLI entry point for downloading models."""
|
|
431
|
+
import argparse
|
|
432
|
+
import sys
|
|
433
|
+
|
|
434
|
+
parser = argparse.ArgumentParser(description="Download TTS models for offline use")
|
|
435
|
+
parser.add_argument("--essential", action="store_true",
|
|
436
|
+
help="Download essential model (default)")
|
|
437
|
+
parser.add_argument("--all", action="store_true",
|
|
438
|
+
help="Download all available models")
|
|
439
|
+
parser.add_argument("--model", type=str,
|
|
440
|
+
help="Download specific model by name")
|
|
441
|
+
parser.add_argument("--language", type=str,
|
|
442
|
+
help="Download models for specific language (en, fr, es, de, it)")
|
|
443
|
+
parser.add_argument("--status", action="store_true",
|
|
444
|
+
help="Show current cache status")
|
|
445
|
+
parser.add_argument("--clear", action="store_true",
|
|
446
|
+
help="Clear model cache")
|
|
447
|
+
|
|
448
|
+
args = parser.parse_args()
|
|
449
|
+
|
|
450
|
+
manager = get_model_manager(debug_mode=True)
|
|
451
|
+
|
|
452
|
+
if args.status:
|
|
453
|
+
print(get_status())
|
|
454
|
+
return
|
|
455
|
+
|
|
456
|
+
if args.clear:
|
|
457
|
+
manager.clear_cache()
|
|
458
|
+
print("✅ Cache cleared")
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
if args.model:
|
|
462
|
+
success = download_model(args.model)
|
|
463
|
+
if success:
|
|
464
|
+
print(f"✅ Downloaded {args.model}")
|
|
465
|
+
else:
|
|
466
|
+
print(f"❌ Failed to download {args.model}")
|
|
467
|
+
sys.exit(0 if success else 1)
|
|
468
|
+
|
|
469
|
+
if args.language:
|
|
470
|
+
# Language-specific downloads using our simple API
|
|
471
|
+
lang_models = {
|
|
472
|
+
'en': ['en.tacotron2', 'en.jenny', 'en.ek1'],
|
|
473
|
+
'fr': ['fr.css10_vits', 'fr.mai_tacotron2'],
|
|
474
|
+
'es': ['es.mai_tacotron2'],
|
|
475
|
+
'de': ['de.thorsten_vits'],
|
|
476
|
+
'it': ['it.mai_male_vits', 'it.mai_female_vits']
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
if args.language not in lang_models:
|
|
480
|
+
print(f"❌ Language '{args.language}' not supported")
|
|
481
|
+
print(f" Available: {list(lang_models.keys())}")
|
|
482
|
+
sys.exit(1)
|
|
483
|
+
|
|
484
|
+
success = False
|
|
485
|
+
for model_id in lang_models[args.language]:
|
|
486
|
+
if download_model(model_id):
|
|
487
|
+
print(f"✅ Downloaded {model_id}")
|
|
488
|
+
success = True
|
|
489
|
+
break
|
|
490
|
+
|
|
491
|
+
sys.exit(0 if success else 1)
|
|
492
|
+
|
|
493
|
+
# Default: download essential model
|
|
494
|
+
print("📦 Downloading essential TTS model...")
|
|
495
|
+
success = download_model(manager.ESSENTIAL_MODEL)
|
|
496
|
+
if success:
|
|
497
|
+
print("✅ Essential model ready!")
|
|
498
|
+
else:
|
|
499
|
+
print("❌ Failed to download essential model")
|
|
500
|
+
sys.exit(0 if success else 1)
|
|
@@ -300,11 +300,24 @@ class NonBlockingAudioPlayer:
|
|
|
300
300
|
print(f"Error stopping audio stream: {e}")
|
|
301
301
|
finally:
|
|
302
302
|
self.stream = None
|
|
303
|
-
|
|
303
|
+
|
|
304
304
|
self.is_playing = False
|
|
305
305
|
with self.pause_lock:
|
|
306
306
|
self.is_paused = False
|
|
307
307
|
self.clear_queue()
|
|
308
|
+
|
|
309
|
+
def cleanup(self):
|
|
310
|
+
"""Cleanup resources to prevent memory conflicts."""
|
|
311
|
+
try:
|
|
312
|
+
self.stop_stream()
|
|
313
|
+
# Clear any remaining references
|
|
314
|
+
self.current_audio = None
|
|
315
|
+
self.playback_complete_callback = None
|
|
316
|
+
if self.debug_mode:
|
|
317
|
+
print(" > Audio player cleaned up")
|
|
318
|
+
except Exception as e:
|
|
319
|
+
if self.debug_mode:
|
|
320
|
+
print(f"Audio cleanup warning: {e}")
|
|
308
321
|
|
|
309
322
|
def play_audio(self, audio_array):
|
|
310
323
|
"""Add audio to the playback queue."""
|
|
@@ -509,58 +522,155 @@ class TTSEngine:
|
|
|
509
522
|
self.is_paused_state = False # Explicit paused state tracking
|
|
510
523
|
|
|
511
524
|
def _load_with_simple_fallback(self, TTS, preferred_model: str, debug_mode: bool) -> tuple[bool, str]:
|
|
512
|
-
"""Load TTS model with
|
|
525
|
+
"""Load TTS model with bulletproof compatibility-first strategy."""
|
|
513
526
|
from ..simple_model_manager import get_model_manager
|
|
514
527
|
|
|
515
528
|
model_manager = get_model_manager(debug_mode=debug_mode)
|
|
516
529
|
|
|
517
|
-
#
|
|
518
|
-
|
|
530
|
+
# Step 1: Check espeak availability for smart model filtering
|
|
531
|
+
espeak_available = self._check_espeak_available()
|
|
532
|
+
if debug_mode and not espeak_available:
|
|
533
|
+
print(" > espeak-ng not found, will skip VITS models")
|
|
534
|
+
|
|
535
|
+
# Step 2: Try the REQUESTED model first if it's cached
|
|
536
|
+
cached_models = model_manager.get_cached_models()
|
|
537
|
+
if cached_models and debug_mode:
|
|
538
|
+
print(f" > Found {len(cached_models)} cached models")
|
|
539
|
+
|
|
540
|
+
# FORCE USER'S CHOICE: Try the specifically requested model first
|
|
541
|
+
if preferred_model in cached_models:
|
|
519
542
|
try:
|
|
520
543
|
if debug_mode:
|
|
521
|
-
print(f" >
|
|
544
|
+
print(f" > LOADING REQUESTED MODEL: {preferred_model}")
|
|
545
|
+
|
|
546
|
+
# Safety check for Italian VITS models that might crash
|
|
547
|
+
if "it/" in preferred_model and "vits" in preferred_model:
|
|
548
|
+
if debug_mode:
|
|
549
|
+
print(f" > Italian VITS model detected - using safe loading...")
|
|
550
|
+
|
|
522
551
|
self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
|
|
552
|
+
|
|
553
|
+
if debug_mode:
|
|
554
|
+
print(f" > ✅ SUCCESS: Loaded requested model: {preferred_model}")
|
|
523
555
|
return True, preferred_model
|
|
556
|
+
|
|
524
557
|
except Exception as e:
|
|
558
|
+
error_msg = str(e).lower()
|
|
525
559
|
if debug_mode:
|
|
526
|
-
print(f" >
|
|
560
|
+
print(f" > ❌ Requested model failed: {e}")
|
|
561
|
+
|
|
562
|
+
# Special handling for Italian model crashes
|
|
563
|
+
if "it/" in preferred_model and ("segmentation" in error_msg or "crash" in error_msg):
|
|
564
|
+
if debug_mode:
|
|
565
|
+
print(f" > Italian model caused crash - marking as incompatible")
|
|
566
|
+
# Force fallback for crashed Italian models
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
# Only fall back if the model actually failed to load, not due to dependencies
|
|
570
|
+
|
|
571
|
+
# Step 3: Only fall back to compatibility order if requested model failed
|
|
572
|
+
if debug_mode:
|
|
573
|
+
print(" > Requested model unavailable, trying fallback models...")
|
|
574
|
+
|
|
575
|
+
# Compatibility-first fallback order
|
|
576
|
+
fallback_models = [
|
|
577
|
+
"tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
|
|
578
|
+
"tts_models/en/jenny/jenny", # Different female speaker (Jenny)
|
|
579
|
+
"tts_models/en/ek1/tacotron2", # Male British accent (Edward)
|
|
580
|
+
"tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
|
|
581
|
+
"tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
|
|
582
|
+
"tts_models/en/ljspeech/glow-tts", # Another alternative
|
|
583
|
+
"tts_models/en/vctk/vits", # Multi-speaker (requires espeak)
|
|
584
|
+
"tts_models/en/ljspeech/vits", # Premium (requires espeak)
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
# Remove the preferred model from fallbacks to avoid duplicate attempts
|
|
588
|
+
fallback_models = [m for m in fallback_models if m != preferred_model]
|
|
527
589
|
|
|
528
|
-
#
|
|
529
|
-
|
|
530
|
-
|
|
590
|
+
# Try fallback models
|
|
591
|
+
for model in fallback_models:
|
|
592
|
+
if model in cached_models:
|
|
593
|
+
# Skip VITS models if no espeak
|
|
594
|
+
if "vits" in model and not espeak_available:
|
|
595
|
+
if debug_mode:
|
|
596
|
+
print(f" > Skipping {model} (requires espeak-ng)")
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
try:
|
|
600
|
+
if debug_mode:
|
|
601
|
+
print(f" > Trying fallback model: {model}")
|
|
602
|
+
self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
|
|
603
|
+
if debug_mode:
|
|
604
|
+
print(f" > ✅ Successfully loaded fallback: {model}")
|
|
605
|
+
return True, model
|
|
606
|
+
except Exception as e:
|
|
607
|
+
if debug_mode:
|
|
608
|
+
print(f" > ❌ Fallback {model} failed: {e}")
|
|
609
|
+
|
|
610
|
+
# Step 4: If no cached models work, try downloading requested model first
|
|
611
|
+
if debug_mode:
|
|
612
|
+
print(" > No cached models worked, attempting downloads...")
|
|
613
|
+
|
|
614
|
+
# Try downloading the requested model first
|
|
615
|
+
if "vits" not in preferred_model or espeak_available:
|
|
531
616
|
try:
|
|
532
617
|
if debug_mode:
|
|
533
|
-
print(f" >
|
|
534
|
-
|
|
535
|
-
|
|
618
|
+
print(f" > Downloading requested model: {preferred_model}...")
|
|
619
|
+
success = model_manager.download_model(preferred_model)
|
|
620
|
+
if success:
|
|
621
|
+
self.tts = TTS(model_name=preferred_model, progress_bar=self.debug_mode)
|
|
622
|
+
if debug_mode:
|
|
623
|
+
print(f" > ✅ Downloaded and loaded requested: {preferred_model}")
|
|
624
|
+
return True, preferred_model
|
|
625
|
+
elif debug_mode:
|
|
626
|
+
print(f" > ❌ Download failed for requested model: {preferred_model}")
|
|
536
627
|
except Exception as e:
|
|
537
628
|
if debug_mode:
|
|
538
|
-
print(f" >
|
|
629
|
+
print(f" > ❌ Failed to download/load requested model: {e}")
|
|
539
630
|
|
|
540
|
-
#
|
|
541
|
-
|
|
542
|
-
if
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
if success:
|
|
546
|
-
self.tts = TTS(model_name=essential_model, progress_bar=self.debug_mode)
|
|
547
|
-
return True, essential_model
|
|
548
|
-
except Exception as e:
|
|
549
|
-
if debug_mode:
|
|
550
|
-
print(f" > Essential model download failed: {e}")
|
|
631
|
+
# Step 5: If requested model download failed, try fallback downloads
|
|
632
|
+
for model in fallback_models:
|
|
633
|
+
# Skip VITS models if no espeak
|
|
634
|
+
if "vits" in model and not espeak_available:
|
|
635
|
+
continue
|
|
551
636
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
637
|
+
try:
|
|
638
|
+
if debug_mode:
|
|
639
|
+
print(f" > Downloading fallback: {model}...")
|
|
640
|
+
|
|
641
|
+
# First try to download
|
|
642
|
+
success = model_manager.download_model(model)
|
|
643
|
+
if success:
|
|
644
|
+
# Then try to load
|
|
645
|
+
self.tts = TTS(model_name=model, progress_bar=self.debug_mode)
|
|
646
|
+
if debug_mode:
|
|
647
|
+
print(f" > ✅ Downloaded and loaded fallback: {model}")
|
|
648
|
+
return True, model
|
|
649
|
+
elif debug_mode:
|
|
650
|
+
print(f" > ❌ Download failed for {model}")
|
|
651
|
+
|
|
652
|
+
except Exception as e:
|
|
653
|
+
if debug_mode:
|
|
654
|
+
print(f" > ❌ Failed to load {model}: {e}")
|
|
561
655
|
|
|
562
656
|
return False, None
|
|
563
657
|
|
|
658
|
+
def _check_espeak_available(self) -> bool:
|
|
659
|
+
"""Check if espeak-ng is available on the system."""
|
|
660
|
+
import subprocess
|
|
661
|
+
try:
|
|
662
|
+
subprocess.run(['espeak-ng', '--version'],
|
|
663
|
+
capture_output=True, check=True, timeout=5)
|
|
664
|
+
return True
|
|
665
|
+
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
|
|
666
|
+
# Try alternative espeak command
|
|
667
|
+
try:
|
|
668
|
+
subprocess.run(['espeak', '--version'],
|
|
669
|
+
capture_output=True, check=True, timeout=5)
|
|
670
|
+
return True
|
|
671
|
+
except:
|
|
672
|
+
return False
|
|
673
|
+
|
|
564
674
|
def _handle_espeak_fallback(self, debug_mode: bool):
|
|
565
675
|
"""Handle espeak-related errors with fallback to non-phoneme models."""
|
|
566
676
|
# Restore stdout to show user-friendly message
|
|
@@ -574,7 +684,7 @@ class TTSEngine:
|
|
|
574
684
|
print(" • macOS: brew install espeak-ng")
|
|
575
685
|
print(" • Linux: sudo apt-get install espeak-ng")
|
|
576
686
|
print(" • Windows: conda install espeak-ng (or see README)")
|
|
577
|
-
print("\nFalling back to
|
|
687
|
+
print("\nFalling back to compatible models (no espeak dependency)")
|
|
578
688
|
print("="*70 + "\n")
|
|
579
689
|
|
|
580
690
|
if not debug_mode:
|
|
@@ -582,12 +692,15 @@ class TTSEngine:
|
|
|
582
692
|
null_out = open(os.devnull, 'w')
|
|
583
693
|
sys.stdout = null_out
|
|
584
694
|
|
|
585
|
-
# Try non-phoneme models that don't require espeak
|
|
695
|
+
# Try non-phoneme models that don't require espeak (compatibility-first order)
|
|
586
696
|
from TTS.api import TTS
|
|
587
697
|
fallback_models = [
|
|
588
|
-
"tts_models/en/ljspeech/
|
|
589
|
-
"tts_models/en/
|
|
590
|
-
"tts_models/en/
|
|
698
|
+
"tts_models/en/ljspeech/tacotron2-DDC", # Most reliable (Linda)
|
|
699
|
+
"tts_models/en/jenny/jenny", # Different female speaker (Jenny)
|
|
700
|
+
"tts_models/en/ek1/tacotron2", # Male British accent (Edward)
|
|
701
|
+
"tts_models/en/sam/tacotron-DDC", # Different male voice (Sam)
|
|
702
|
+
"tts_models/en/ljspeech/fast_pitch", # Lightweight alternative
|
|
703
|
+
"tts_models/en/ljspeech/glow-tts" # Another alternative
|
|
591
704
|
]
|
|
592
705
|
|
|
593
706
|
tts_loaded = False
|
|
@@ -38,8 +38,8 @@ class VoiceManager:
|
|
|
38
38
|
# Smart language configuration - high quality stable defaults
|
|
39
39
|
LANGUAGES = {
|
|
40
40
|
'en': {
|
|
41
|
-
'default': 'tts_models/en/ljspeech/
|
|
42
|
-
'premium': 'tts_models/en/ljspeech/vits', #
|
|
41
|
+
'default': 'tts_models/en/ljspeech/tacotron2-DDC', # Reliable, compatible voice
|
|
42
|
+
'premium': 'tts_models/en/ljspeech/vits', # High quality (requires espeak)
|
|
43
43
|
'name': 'English'
|
|
44
44
|
},
|
|
45
45
|
'fr': {
|
|
@@ -70,15 +70,39 @@ class VoiceManager:
|
|
|
70
70
|
# Complete voice catalog with metadata
|
|
71
71
|
VOICE_CATALOG = {
|
|
72
72
|
'en': {
|
|
73
|
-
'
|
|
74
|
-
'model': 'tts_models/en/ljspeech/
|
|
75
|
-
'quality': '
|
|
73
|
+
'tacotron2': {
|
|
74
|
+
'model': 'tts_models/en/ljspeech/tacotron2-DDC',
|
|
75
|
+
'quality': 'good',
|
|
76
76
|
'gender': 'female',
|
|
77
77
|
'accent': 'US English',
|
|
78
78
|
'license': 'Open source (LJSpeech)',
|
|
79
|
-
'requires': '
|
|
79
|
+
'requires': 'none'
|
|
80
|
+
},
|
|
81
|
+
'jenny': {
|
|
82
|
+
'model': 'tts_models/en/jenny/jenny',
|
|
83
|
+
'quality': 'excellent',
|
|
84
|
+
'gender': 'female',
|
|
85
|
+
'accent': 'US English',
|
|
86
|
+
'license': 'Open source (Jenny)',
|
|
87
|
+
'requires': 'none'
|
|
88
|
+
},
|
|
89
|
+
'ek1': {
|
|
90
|
+
'model': 'tts_models/en/ek1/tacotron2',
|
|
91
|
+
'quality': 'excellent',
|
|
92
|
+
'gender': 'male',
|
|
93
|
+
'accent': 'British English',
|
|
94
|
+
'license': 'Open source (EK1)',
|
|
95
|
+
'requires': 'none'
|
|
80
96
|
},
|
|
81
|
-
'
|
|
97
|
+
'sam': {
|
|
98
|
+
'model': 'tts_models/en/sam/tacotron-DDC',
|
|
99
|
+
'quality': 'good',
|
|
100
|
+
'gender': 'male',
|
|
101
|
+
'accent': 'US English',
|
|
102
|
+
'license': 'Open source (Sam)',
|
|
103
|
+
'requires': 'none'
|
|
104
|
+
},
|
|
105
|
+
'fast_pitch': {
|
|
82
106
|
'model': 'tts_models/en/ljspeech/fast_pitch',
|
|
83
107
|
'quality': 'good',
|
|
84
108
|
'gender': 'female',
|
|
@@ -86,12 +110,12 @@ class VoiceManager:
|
|
|
86
110
|
'license': 'Open source (LJSpeech)',
|
|
87
111
|
'requires': 'none'
|
|
88
112
|
},
|
|
89
|
-
'
|
|
90
|
-
'model': 'tts_models/en/
|
|
113
|
+
'vits': {
|
|
114
|
+
'model': 'tts_models/en/ljspeech/vits',
|
|
91
115
|
'quality': 'premium',
|
|
92
|
-
'gender': '
|
|
93
|
-
'accent': '
|
|
94
|
-
'license': 'Open source (
|
|
116
|
+
'gender': 'female',
|
|
117
|
+
'accent': 'US English',
|
|
118
|
+
'license': 'Open source (LJSpeech)',
|
|
95
119
|
'requires': 'espeak-ng'
|
|
96
120
|
}
|
|
97
121
|
},
|
|
@@ -191,6 +215,20 @@ class VoiceManager:
|
|
|
191
215
|
lang_name = self.LANGUAGES[self.language]['name']
|
|
192
216
|
print(f"🌍 Using {lang_name} voice: {tts_model}")
|
|
193
217
|
|
|
218
|
+
# Initialize TTS engine with instant setup for new users
|
|
219
|
+
from .instant_setup import ensure_instant_tts, get_instant_model, is_model_cached
|
|
220
|
+
|
|
221
|
+
# If using default VITS model but it's not cached, use instant setup
|
|
222
|
+
if tts_model == "tts_models/en/ljspeech/vits" and not is_model_cached(tts_model):
|
|
223
|
+
if debug_mode:
|
|
224
|
+
print("🚀 First-time setup: ensuring instant TTS availability...")
|
|
225
|
+
|
|
226
|
+
# Try instant setup with lightweight model
|
|
227
|
+
if ensure_instant_tts():
|
|
228
|
+
tts_model = get_instant_model() # Use fast_pitch instead
|
|
229
|
+
if debug_mode:
|
|
230
|
+
print(f"✅ Using essential model: {tts_model}")
|
|
231
|
+
|
|
194
232
|
# Initialize TTS engine using lazy import
|
|
195
233
|
TTSEngine = _import_tts_engine()
|
|
196
234
|
self.tts_engine = TTSEngine(
|
|
@@ -391,32 +429,68 @@ class VoiceManager:
|
|
|
391
429
|
return self.speed
|
|
392
430
|
|
|
393
431
|
def set_tts_model(self, model_name):
|
|
394
|
-
"""Change the TTS model.
|
|
395
|
-
|
|
432
|
+
"""Change the TTS model safely without memory conflicts.
|
|
433
|
+
|
|
396
434
|
Available models (all pure Python, cross-platform):
|
|
397
435
|
- "tts_models/en/ljspeech/fast_pitch" (default, recommended)
|
|
398
436
|
- "tts_models/en/ljspeech/glow-tts" (alternative)
|
|
399
437
|
- "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
|
|
400
|
-
|
|
438
|
+
|
|
401
439
|
Args:
|
|
402
440
|
model_name: TTS model name to use
|
|
403
|
-
|
|
441
|
+
|
|
404
442
|
Returns:
|
|
405
443
|
True if successful
|
|
406
|
-
|
|
444
|
+
|
|
407
445
|
Example:
|
|
408
446
|
vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
|
|
409
447
|
"""
|
|
410
448
|
# Stop any current speech
|
|
411
449
|
self.stop_speaking()
|
|
412
|
-
|
|
450
|
+
|
|
451
|
+
# CRITICAL: Crash-safe cleanup of old TTS engine
|
|
452
|
+
if hasattr(self, 'tts_engine') and self.tts_engine:
|
|
453
|
+
try:
|
|
454
|
+
# Stop all audio and cleanup player
|
|
455
|
+
if hasattr(self.tts_engine, 'audio_player') and self.tts_engine.audio_player:
|
|
456
|
+
# Try stop method if available
|
|
457
|
+
if hasattr(self.tts_engine.audio_player, 'stop'):
|
|
458
|
+
self.tts_engine.audio_player.stop()
|
|
459
|
+
self.tts_engine.audio_player.cleanup()
|
|
460
|
+
|
|
461
|
+
# Force cleanup of TTS object and release GPU memory
|
|
462
|
+
if hasattr(self.tts_engine, 'tts') and self.tts_engine.tts:
|
|
463
|
+
# Clear CUDA cache if using GPU
|
|
464
|
+
try:
|
|
465
|
+
import torch
|
|
466
|
+
if torch.cuda.is_available():
|
|
467
|
+
torch.cuda.empty_cache()
|
|
468
|
+
except:
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
del self.tts_engine.tts
|
|
472
|
+
|
|
473
|
+
# Clear the engine itself
|
|
474
|
+
del self.tts_engine
|
|
475
|
+
self.tts_engine = None
|
|
476
|
+
|
|
477
|
+
# Force garbage collection to prevent memory leaks
|
|
478
|
+
import gc
|
|
479
|
+
gc.collect()
|
|
480
|
+
|
|
481
|
+
except Exception as e:
|
|
482
|
+
if self.debug_mode:
|
|
483
|
+
print(f"Warning: TTS cleanup issue: {e}")
|
|
484
|
+
# Force clear even if cleanup failed
|
|
485
|
+
self.tts_engine = None
|
|
486
|
+
|
|
413
487
|
# Reinitialize TTS engine with new model using lazy import
|
|
414
488
|
TTSEngine = _import_tts_engine()
|
|
415
489
|
self.tts_engine = TTSEngine(
|
|
416
490
|
model_name=model_name,
|
|
417
491
|
debug_mode=self.debug_mode
|
|
418
492
|
)
|
|
419
|
-
|
|
493
|
+
|
|
420
494
|
# Restore callbacks
|
|
421
495
|
self.tts_engine.on_playback_start = self._on_tts_start
|
|
422
496
|
self.tts_engine.on_playback_end = self._on_tts_end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractvoice
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: A modular Python library for voice interactions with AI systems
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -19,6 +19,14 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: numpy>=1.24.0
|
|
21
21
|
Requires-Dist: requests>=2.31.0
|
|
22
|
+
Requires-Dist: appdirs>=1.4.0
|
|
23
|
+
Requires-Dist: coqui-tts<0.30.0,>=0.27.0
|
|
24
|
+
Requires-Dist: torch<2.4.0,>=2.0.0
|
|
25
|
+
Requires-Dist: torchvision<0.19.0,>=0.15.0
|
|
26
|
+
Requires-Dist: torchaudio<2.4.0,>=2.0.0
|
|
27
|
+
Requires-Dist: librosa>=0.10.0
|
|
28
|
+
Requires-Dist: sounddevice>=0.4.6
|
|
29
|
+
Requires-Dist: soundfile>=0.12.1
|
|
22
30
|
Provides-Extra: voice
|
|
23
31
|
Requires-Dist: sounddevice>=0.4.6; extra == "voice"
|
|
24
32
|
Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
|
|
@@ -4,7 +4,7 @@ pyproject.toml
|
|
|
4
4
|
abstractvoice/__init__.py
|
|
5
5
|
abstractvoice/__main__.py
|
|
6
6
|
abstractvoice/dependency_check.py
|
|
7
|
-
abstractvoice/
|
|
7
|
+
abstractvoice/instant_setup.py
|
|
8
8
|
abstractvoice/recognition.py
|
|
9
9
|
abstractvoice/simple_model_manager.py
|
|
10
10
|
abstractvoice/voice_manager.py
|
|
@@ -26,6 +26,15 @@ classifiers = [
|
|
|
26
26
|
dependencies = [
|
|
27
27
|
"numpy>=1.24.0",
|
|
28
28
|
"requests>=2.31.0",
|
|
29
|
+
"appdirs>=1.4.0",
|
|
30
|
+
# Essential TTS dependencies for immediate functionality
|
|
31
|
+
"coqui-tts>=0.27.0,<0.30.0",
|
|
32
|
+
"torch>=2.0.0,<2.4.0",
|
|
33
|
+
"torchvision>=0.15.0,<0.19.0",
|
|
34
|
+
"torchaudio>=2.0.0,<2.4.0",
|
|
35
|
+
"librosa>=0.10.0",
|
|
36
|
+
"sounddevice>=0.4.6",
|
|
37
|
+
"soundfile>=0.12.1",
|
|
29
38
|
]
|
|
30
39
|
|
|
31
40
|
[project.optional-dependencies]
|
|
@@ -1,384 +0,0 @@
|
|
|
1
|
-
"""Model management utilities for AbstractVoice.
|
|
2
|
-
|
|
3
|
-
This module provides utilities for downloading, caching, and managing TTS models
|
|
4
|
-
to ensure offline functionality and better user experience.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
import sys
|
|
9
|
-
import time
|
|
10
|
-
import threading
|
|
11
|
-
from typing import List, Optional, Dict, Any
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def _import_tts():
|
|
16
|
-
"""Import TTS with helpful error message if dependencies missing."""
|
|
17
|
-
try:
|
|
18
|
-
from TTS.api import TTS
|
|
19
|
-
from TTS.utils.manage import ModelManager
|
|
20
|
-
return TTS, ModelManager
|
|
21
|
-
except ImportError as e:
|
|
22
|
-
raise ImportError(
|
|
23
|
-
"TTS functionality requires coqui-tts. Install with:\n"
|
|
24
|
-
" pip install abstractvoice[tts] # For TTS only\n"
|
|
25
|
-
" pip install abstractvoice[voice-full] # For complete voice functionality\n"
|
|
26
|
-
" pip install abstractvoice[all] # For all features\n"
|
|
27
|
-
f"Original error: {e}"
|
|
28
|
-
) from e
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class ModelManager:
|
|
32
|
-
"""Manages TTS model downloading, caching, and offline availability."""
|
|
33
|
-
|
|
34
|
-
# Essential models for immediate functionality
|
|
35
|
-
ESSENTIAL_MODELS = [
|
|
36
|
-
"tts_models/en/ljspeech/fast_pitch", # Lightweight, no espeak dependency
|
|
37
|
-
"tts_models/en/ljspeech/tacotron2-DDC", # Reliable fallback
|
|
38
|
-
]
|
|
39
|
-
|
|
40
|
-
# Premium models for best quality (downloaded on-demand)
|
|
41
|
-
PREMIUM_MODELS = [
|
|
42
|
-
"tts_models/en/ljspeech/vits", # Best quality English
|
|
43
|
-
"tts_models/fr/css10/vits", # Best quality French
|
|
44
|
-
"tts_models/es/mai/tacotron2-DDC", # Best quality Spanish
|
|
45
|
-
"tts_models/de/thorsten/vits", # Best quality German
|
|
46
|
-
"tts_models/it/mai_male/vits", # Best quality Italian
|
|
47
|
-
]
|
|
48
|
-
|
|
49
|
-
# All supported models
|
|
50
|
-
ALL_MODELS = ESSENTIAL_MODELS + PREMIUM_MODELS
|
|
51
|
-
|
|
52
|
-
def __init__(self, debug_mode: bool = False):
|
|
53
|
-
self.debug_mode = debug_mode
|
|
54
|
-
self._cache_dir = None
|
|
55
|
-
self._model_manager = None
|
|
56
|
-
|
|
57
|
-
@property
|
|
58
|
-
def cache_dir(self) -> str:
|
|
59
|
-
"""Get the TTS model cache directory."""
|
|
60
|
-
if self._cache_dir is None:
|
|
61
|
-
# Check common cache locations
|
|
62
|
-
import appdirs
|
|
63
|
-
potential_dirs = [
|
|
64
|
-
os.path.expanduser("~/.cache/tts"),
|
|
65
|
-
appdirs.user_data_dir("tts"),
|
|
66
|
-
os.path.expanduser("~/.local/share/tts"),
|
|
67
|
-
]
|
|
68
|
-
|
|
69
|
-
# Find existing cache or use default
|
|
70
|
-
for cache_dir in potential_dirs:
|
|
71
|
-
if os.path.exists(cache_dir):
|
|
72
|
-
self._cache_dir = cache_dir
|
|
73
|
-
break
|
|
74
|
-
else:
|
|
75
|
-
# Use appdirs default
|
|
76
|
-
self._cache_dir = appdirs.user_data_dir("tts")
|
|
77
|
-
|
|
78
|
-
return self._cache_dir
|
|
79
|
-
|
|
80
|
-
@property
|
|
81
|
-
def model_manager(self):
|
|
82
|
-
"""Get TTS ModelManager instance."""
|
|
83
|
-
if self._model_manager is None:
|
|
84
|
-
_, ModelManagerClass = _import_tts()
|
|
85
|
-
self._model_manager = ModelManagerClass()
|
|
86
|
-
return self._model_manager
|
|
87
|
-
|
|
88
|
-
def check_model_cache(self, model_name: str) -> bool:
|
|
89
|
-
"""Check if a model is already cached locally."""
|
|
90
|
-
try:
|
|
91
|
-
# Look for model files in cache
|
|
92
|
-
model_path = self._get_model_path(model_name)
|
|
93
|
-
if model_path and os.path.exists(model_path):
|
|
94
|
-
# Check for essential model files
|
|
95
|
-
model_files = ["model.pth", "config.json"]
|
|
96
|
-
return any(
|
|
97
|
-
os.path.exists(os.path.join(model_path, f))
|
|
98
|
-
for f in model_files
|
|
99
|
-
)
|
|
100
|
-
return False
|
|
101
|
-
except Exception as e:
|
|
102
|
-
if self.debug_mode:
|
|
103
|
-
print(f"Error checking cache for {model_name}: {e}")
|
|
104
|
-
return False
|
|
105
|
-
|
|
106
|
-
def _get_model_path(self, model_name: str) -> Optional[str]:
|
|
107
|
-
"""Get the expected cache path for a model."""
|
|
108
|
-
# Convert model name to cache directory structure
|
|
109
|
-
# e.g., "tts_models/en/ljspeech/vits" -> "tts_models--en--ljspeech--vits"
|
|
110
|
-
cache_name = model_name.replace("/", "--")
|
|
111
|
-
return os.path.join(self.cache_dir, cache_name)
|
|
112
|
-
|
|
113
|
-
def get_cached_models(self) -> List[str]:
|
|
114
|
-
"""Get list of models that are cached locally."""
|
|
115
|
-
if not os.path.exists(self.cache_dir):
|
|
116
|
-
return []
|
|
117
|
-
|
|
118
|
-
cached = []
|
|
119
|
-
try:
|
|
120
|
-
for item in os.listdir(self.cache_dir):
|
|
121
|
-
if item.startswith("tts_models--"):
|
|
122
|
-
# Convert cache name back to model name
|
|
123
|
-
model_name = item.replace("--", "/")
|
|
124
|
-
if self.check_model_cache(model_name):
|
|
125
|
-
cached.append(model_name)
|
|
126
|
-
except Exception as e:
|
|
127
|
-
if self.debug_mode:
|
|
128
|
-
print(f"Error listing cached models: {e}")
|
|
129
|
-
|
|
130
|
-
return cached
|
|
131
|
-
|
|
132
|
-
def download_model(self, model_name: str, force: bool = False) -> bool:
|
|
133
|
-
"""Download a specific model."""
|
|
134
|
-
if not force and self.check_model_cache(model_name):
|
|
135
|
-
if self.debug_mode:
|
|
136
|
-
print(f"✅ {model_name} already cached")
|
|
137
|
-
return True
|
|
138
|
-
|
|
139
|
-
try:
|
|
140
|
-
TTS, _ = _import_tts()
|
|
141
|
-
|
|
142
|
-
print(f"📥 Downloading {model_name}...")
|
|
143
|
-
start_time = time.time()
|
|
144
|
-
|
|
145
|
-
# Initialize TTS to trigger download
|
|
146
|
-
tts = TTS(model_name=model_name, progress_bar=True)
|
|
147
|
-
|
|
148
|
-
download_time = time.time() - start_time
|
|
149
|
-
print(f"✅ Downloaded {model_name} in {download_time:.1f}s")
|
|
150
|
-
return True
|
|
151
|
-
|
|
152
|
-
except Exception as e:
|
|
153
|
-
print(f"❌ Failed to download {model_name}: {e}")
|
|
154
|
-
return False
|
|
155
|
-
|
|
156
|
-
def download_all_models(self) -> bool:
|
|
157
|
-
"""Download all supported models."""
|
|
158
|
-
print("📦 Downloading all TTS models...")
|
|
159
|
-
|
|
160
|
-
success_count = 0
|
|
161
|
-
for model in self.ALL_MODELS:
|
|
162
|
-
if self.download_model(model):
|
|
163
|
-
success_count += 1
|
|
164
|
-
|
|
165
|
-
print(f"✅ Downloaded {success_count}/{len(self.ALL_MODELS)} models")
|
|
166
|
-
return success_count > 0
|
|
167
|
-
|
|
168
|
-
def get_offline_model(self, preferred_models: List[str]) -> Optional[str]:
|
|
169
|
-
"""Get the best available cached model from a preference list."""
|
|
170
|
-
cached_models = self.get_cached_models()
|
|
171
|
-
|
|
172
|
-
# Return first preferred model that's cached
|
|
173
|
-
for model in preferred_models:
|
|
174
|
-
if model in cached_models:
|
|
175
|
-
return model
|
|
176
|
-
|
|
177
|
-
# Fallback to any cached model
|
|
178
|
-
if cached_models:
|
|
179
|
-
return cached_models[0]
|
|
180
|
-
|
|
181
|
-
return None
|
|
182
|
-
|
|
183
|
-
def print_status(self):
|
|
184
|
-
"""Print current model cache status."""
|
|
185
|
-
print("🎭 TTS Model Cache Status")
|
|
186
|
-
print("=" * 50)
|
|
187
|
-
|
|
188
|
-
cached_models = self.get_cached_models()
|
|
189
|
-
|
|
190
|
-
if not cached_models:
|
|
191
|
-
print("❌ No models cached - first use will require internet")
|
|
192
|
-
print("\nTo download essential models for offline use:")
|
|
193
|
-
print(" abstractvoice download-models")
|
|
194
|
-
return
|
|
195
|
-
|
|
196
|
-
print(f"✅ {len(cached_models)} models cached for offline use:")
|
|
197
|
-
|
|
198
|
-
# Group by category
|
|
199
|
-
essential_cached = [m for m in cached_models if m in self.ESSENTIAL_MODELS]
|
|
200
|
-
premium_cached = [m for m in cached_models if m in self.PREMIUM_MODELS]
|
|
201
|
-
other_cached = [m for m in cached_models if m not in self.ALL_MODELS]
|
|
202
|
-
|
|
203
|
-
if essential_cached:
|
|
204
|
-
print(f"\n📦 Essential Models ({len(essential_cached)}):")
|
|
205
|
-
for model in essential_cached:
|
|
206
|
-
print(f" ✅ {model}")
|
|
207
|
-
|
|
208
|
-
if premium_cached:
|
|
209
|
-
print(f"\n✨ Premium Models ({len(premium_cached)}):")
|
|
210
|
-
for model in premium_cached:
|
|
211
|
-
print(f" ✅ {model}")
|
|
212
|
-
|
|
213
|
-
if other_cached:
|
|
214
|
-
print(f"\n🔧 Other Models ({len(other_cached)}):")
|
|
215
|
-
for model in other_cached:
|
|
216
|
-
print(f" ✅ {model}")
|
|
217
|
-
|
|
218
|
-
print(f"\n💾 Cache location: {self.cache_dir}")
|
|
219
|
-
|
|
220
|
-
# Check cache size
|
|
221
|
-
try:
|
|
222
|
-
total_size = 0
|
|
223
|
-
for root, dirs, files in os.walk(self.cache_dir):
|
|
224
|
-
for file in files:
|
|
225
|
-
total_size += os.path.getsize(os.path.join(root, file))
|
|
226
|
-
size_mb = total_size / (1024 * 1024)
|
|
227
|
-
print(f"💽 Total cache size: {size_mb:.1f} MB")
|
|
228
|
-
except:
|
|
229
|
-
pass
|
|
230
|
-
|
|
231
|
-
def clear_cache(self, confirm: bool = False) -> bool:
|
|
232
|
-
"""Clear the model cache."""
|
|
233
|
-
if not confirm:
|
|
234
|
-
print("⚠️ This will delete all cached TTS models.")
|
|
235
|
-
print("Use clear_cache(confirm=True) to proceed.")
|
|
236
|
-
return False
|
|
237
|
-
|
|
238
|
-
try:
|
|
239
|
-
import shutil
|
|
240
|
-
if os.path.exists(self.cache_dir):
|
|
241
|
-
shutil.rmtree(self.cache_dir)
|
|
242
|
-
print(f"✅ Cleared model cache: {self.cache_dir}")
|
|
243
|
-
return True
|
|
244
|
-
else:
|
|
245
|
-
print("ℹ️ No cache to clear")
|
|
246
|
-
return True
|
|
247
|
-
except Exception as e:
|
|
248
|
-
print(f"❌ Failed to clear cache: {e}")
|
|
249
|
-
return False
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def download_models_cli():
|
|
253
|
-
"""CLI entry point for downloading models."""
|
|
254
|
-
import argparse
|
|
255
|
-
import sys
|
|
256
|
-
|
|
257
|
-
parser = argparse.ArgumentParser(description="Download TTS models for offline use")
|
|
258
|
-
parser.add_argument("--essential", action="store_true",
|
|
259
|
-
help="Download only essential models (recommended)")
|
|
260
|
-
parser.add_argument("--all", action="store_true",
|
|
261
|
-
help="Download all supported models")
|
|
262
|
-
parser.add_argument("--model", type=str,
|
|
263
|
-
help="Download specific model by name")
|
|
264
|
-
parser.add_argument("--language", type=str,
|
|
265
|
-
help="Download models for specific language (en, fr, es, de, it)")
|
|
266
|
-
parser.add_argument("--status", action="store_true",
|
|
267
|
-
help="Show current cache status")
|
|
268
|
-
parser.add_argument("--clear", action="store_true",
|
|
269
|
-
help="Clear model cache")
|
|
270
|
-
parser.add_argument("--debug", action="store_true",
|
|
271
|
-
help="Enable debug output")
|
|
272
|
-
|
|
273
|
-
args = parser.parse_args()
|
|
274
|
-
|
|
275
|
-
# Use VoiceManager for consistent programmatic API
|
|
276
|
-
from abstractvoice.voice_manager import VoiceManager
|
|
277
|
-
|
|
278
|
-
vm = VoiceManager(debug_mode=args.debug)
|
|
279
|
-
|
|
280
|
-
if args.status:
|
|
281
|
-
# Use VoiceManager's model status
|
|
282
|
-
status = vm.get_cache_status()
|
|
283
|
-
print("🎭 TTS Model Cache Status")
|
|
284
|
-
print("=" * 50)
|
|
285
|
-
|
|
286
|
-
if status['total_cached'] == 0:
|
|
287
|
-
print("❌ No models cached - first use will require internet")
|
|
288
|
-
print("\nTo download essential models for offline use:")
|
|
289
|
-
print(" abstractvoice download-models --essential")
|
|
290
|
-
return
|
|
291
|
-
|
|
292
|
-
print(f"✅ {status['total_cached']} models cached for offline use")
|
|
293
|
-
print(f"📦 Essential model cached: {status['essential_model_cached']}")
|
|
294
|
-
print(f"🌐 Ready for offline: {status['ready_for_offline']}")
|
|
295
|
-
print(f"💾 Cache location: {status['cache_dir']}")
|
|
296
|
-
print(f"💽 Total cache size: {status['total_size_mb']} MB")
|
|
297
|
-
|
|
298
|
-
# Show cached models
|
|
299
|
-
cached_models = status['cached_models']
|
|
300
|
-
essential_model = status['essential_model']
|
|
301
|
-
|
|
302
|
-
print(f"\n📦 Essential Model:")
|
|
303
|
-
if essential_model in cached_models:
|
|
304
|
-
print(f" ✅ {essential_model}")
|
|
305
|
-
else:
|
|
306
|
-
print(f" 📥 {essential_model} (not cached)")
|
|
307
|
-
|
|
308
|
-
print(f"\n📋 All Cached Models ({len(cached_models)}):")
|
|
309
|
-
for model in sorted(cached_models)[:10]: # Show first 10
|
|
310
|
-
print(f" ✅ {model}")
|
|
311
|
-
if len(cached_models) > 10:
|
|
312
|
-
print(f" ... and {len(cached_models) - 10} more")
|
|
313
|
-
return
|
|
314
|
-
|
|
315
|
-
if args.clear:
|
|
316
|
-
# Use ModelManager for low-level cache operations
|
|
317
|
-
manager = ModelManager(debug_mode=args.debug)
|
|
318
|
-
manager.clear_cache(confirm=True)
|
|
319
|
-
return
|
|
320
|
-
|
|
321
|
-
if args.model:
|
|
322
|
-
# Use ModelManager for direct model download
|
|
323
|
-
manager = ModelManager(debug_mode=args.debug)
|
|
324
|
-
success = manager.download_model(args.model)
|
|
325
|
-
sys.exit(0 if success else 1)
|
|
326
|
-
|
|
327
|
-
if args.language:
|
|
328
|
-
# Use simple model download for language-specific models
|
|
329
|
-
print(f"📦 Downloading models for {args.language}...")
|
|
330
|
-
|
|
331
|
-
# Get available models for this language
|
|
332
|
-
models = vm.list_available_models(args.language)
|
|
333
|
-
if args.language not in models:
|
|
334
|
-
print(f"❌ Language '{args.language}' not supported")
|
|
335
|
-
print(f" Available languages: {list(vm.list_available_models().keys())}")
|
|
336
|
-
sys.exit(1)
|
|
337
|
-
|
|
338
|
-
# Download the default model for this language
|
|
339
|
-
language_models = models[args.language]
|
|
340
|
-
default_model = None
|
|
341
|
-
for voice_id, voice_info in language_models.items():
|
|
342
|
-
if voice_info.get('default', False):
|
|
343
|
-
default_model = f"{args.language}.{voice_id}"
|
|
344
|
-
break
|
|
345
|
-
|
|
346
|
-
if not default_model:
|
|
347
|
-
# Take the first available model
|
|
348
|
-
first_voice = list(language_models.keys())[0]
|
|
349
|
-
default_model = f"{args.language}.{first_voice}"
|
|
350
|
-
|
|
351
|
-
print(f" 📥 Downloading {default_model}...")
|
|
352
|
-
success = vm.download_model(default_model)
|
|
353
|
-
|
|
354
|
-
if success:
|
|
355
|
-
print(f"✅ Downloaded {default_model}")
|
|
356
|
-
print(f"✅ {args.language.upper()} voice is now ready!")
|
|
357
|
-
else:
|
|
358
|
-
print(f"❌ Failed to download {default_model}")
|
|
359
|
-
sys.exit(0 if success else 1)
|
|
360
|
-
|
|
361
|
-
if args.all:
|
|
362
|
-
# Use ModelManager for downloading all models
|
|
363
|
-
manager = ModelManager(debug_mode=args.debug)
|
|
364
|
-
success = manager.download_all_models()
|
|
365
|
-
sys.exit(0 if success else 1)
|
|
366
|
-
|
|
367
|
-
# Default to essential models via VoiceManager
|
|
368
|
-
if args.essential or (not args.all and not args.model and not args.language):
|
|
369
|
-
print("📦 Downloading essential TTS model for offline use...")
|
|
370
|
-
|
|
371
|
-
# Use the simple ensure_ready method
|
|
372
|
-
success = vm.ensure_ready(auto_download=True)
|
|
373
|
-
|
|
374
|
-
if success:
|
|
375
|
-
print("✅ Essential model downloaded successfully!")
|
|
376
|
-
print("🎉 AbstractVoice is now ready for offline use!")
|
|
377
|
-
else:
|
|
378
|
-
print("❌ Essential model download failed")
|
|
379
|
-
print(" Check your internet connection")
|
|
380
|
-
sys.exit(0 if success else 1)
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if __name__ == "__main__":
|
|
384
|
-
download_models_cli()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|