abstractvoice 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,8 +38,8 @@ class VoiceManager:
38
38
  # Smart language configuration - high quality stable defaults
39
39
  LANGUAGES = {
40
40
  'en': {
41
- 'default': 'tts_models/en/ljspeech/vits', # High quality premium voice
42
- 'premium': 'tts_models/en/ljspeech/vits', # Use same stable model
41
+ 'default': 'tts_models/en/ljspeech/tacotron2-DDC', # Reliable, compatible voice
42
+ 'premium': 'tts_models/en/ljspeech/vits', # High quality (requires espeak)
43
43
  'name': 'English'
44
44
  },
45
45
  'fr': {
@@ -70,15 +70,39 @@ class VoiceManager:
70
70
  # Complete voice catalog with metadata
71
71
  VOICE_CATALOG = {
72
72
  'en': {
73
- 'vits_premium': {
74
- 'model': 'tts_models/en/ljspeech/vits',
75
- 'quality': 'premium',
73
+ 'tacotron2': {
74
+ 'model': 'tts_models/en/ljspeech/tacotron2-DDC',
75
+ 'quality': 'good',
76
76
  'gender': 'female',
77
77
  'accent': 'US English',
78
78
  'license': 'Open source (LJSpeech)',
79
- 'requires': 'espeak-ng'
79
+ 'requires': 'none'
80
+ },
81
+ 'jenny': {
82
+ 'model': 'tts_models/en/jenny/jenny',
83
+ 'quality': 'excellent',
84
+ 'gender': 'female',
85
+ 'accent': 'US English',
86
+ 'license': 'Open source (Jenny)',
87
+ 'requires': 'none'
88
+ },
89
+ 'ek1': {
90
+ 'model': 'tts_models/en/ek1/tacotron2',
91
+ 'quality': 'excellent',
92
+ 'gender': 'male',
93
+ 'accent': 'British English',
94
+ 'license': 'Open source (EK1)',
95
+ 'requires': 'none'
96
+ },
97
+ 'sam': {
98
+ 'model': 'tts_models/en/sam/tacotron-DDC',
99
+ 'quality': 'good',
100
+ 'gender': 'male',
101
+ 'accent': 'US English',
102
+ 'license': 'Open source (Sam)',
103
+ 'requires': 'none'
80
104
  },
81
- 'fast_pitch_reliable': {
105
+ 'fast_pitch': {
82
106
  'model': 'tts_models/en/ljspeech/fast_pitch',
83
107
  'quality': 'good',
84
108
  'gender': 'female',
@@ -86,12 +110,12 @@ class VoiceManager:
86
110
  'license': 'Open source (LJSpeech)',
87
111
  'requires': 'none'
88
112
  },
89
- 'vctk_multi': {
90
- 'model': 'tts_models/en/vctk/vits',
113
+ 'vits': {
114
+ 'model': 'tts_models/en/ljspeech/vits',
91
115
  'quality': 'premium',
92
- 'gender': 'multiple',
93
- 'accent': 'British English',
94
- 'license': 'Open source (VCTK)',
116
+ 'gender': 'female',
117
+ 'accent': 'US English',
118
+ 'license': 'Open source (LJSpeech)',
95
119
  'requires': 'espeak-ng'
96
120
  }
97
121
  },
@@ -191,6 +215,20 @@ class VoiceManager:
191
215
  lang_name = self.LANGUAGES[self.language]['name']
192
216
  print(f"🌍 Using {lang_name} voice: {tts_model}")
193
217
 
218
+ # Initialize TTS engine with instant setup for new users
219
+ from .instant_setup import ensure_instant_tts, get_instant_model, is_model_cached
220
+
221
+ # If using default VITS model but it's not cached, use instant setup
222
+ if tts_model == "tts_models/en/ljspeech/vits" and not is_model_cached(tts_model):
223
+ if debug_mode:
224
+ print("🚀 First-time setup: ensuring instant TTS availability...")
225
+
226
+ # Try instant setup with lightweight model
227
+ if ensure_instant_tts():
228
+ tts_model = get_instant_model() # Use fast_pitch instead
229
+ if debug_mode:
230
+ print(f"✅ Using essential model: {tts_model}")
231
+
194
232
  # Initialize TTS engine using lazy import
195
233
  TTSEngine = _import_tts_engine()
196
234
  self.tts_engine = TTSEngine(
@@ -391,32 +429,68 @@ class VoiceManager:
391
429
  return self.speed
392
430
 
393
431
  def set_tts_model(self, model_name):
394
- """Change the TTS model.
395
-
432
+ """Change the TTS model safely without memory conflicts.
433
+
396
434
  Available models (all pure Python, cross-platform):
397
435
  - "tts_models/en/ljspeech/fast_pitch" (default, recommended)
398
436
  - "tts_models/en/ljspeech/glow-tts" (alternative)
399
437
  - "tts_models/en/ljspeech/tacotron2-DDC" (legacy)
400
-
438
+
401
439
  Args:
402
440
  model_name: TTS model name to use
403
-
441
+
404
442
  Returns:
405
443
  True if successful
406
-
444
+
407
445
  Example:
408
446
  vm.set_tts_model("tts_models/en/ljspeech/glow-tts")
409
447
  """
410
448
  # Stop any current speech
411
449
  self.stop_speaking()
412
-
450
+
451
+ # CRITICAL: Crash-safe cleanup of old TTS engine
452
+ if hasattr(self, 'tts_engine') and self.tts_engine:
453
+ try:
454
+ # Stop all audio and cleanup player
455
+ if hasattr(self.tts_engine, 'audio_player') and self.tts_engine.audio_player:
456
+ # Try stop method if available
457
+ if hasattr(self.tts_engine.audio_player, 'stop'):
458
+ self.tts_engine.audio_player.stop()
459
+ self.tts_engine.audio_player.cleanup()
460
+
461
+ # Force cleanup of TTS object and release GPU memory
462
+ if hasattr(self.tts_engine, 'tts') and self.tts_engine.tts:
463
+ # Clear CUDA cache if using GPU
464
+ try:
465
+ import torch
466
+ if torch.cuda.is_available():
467
+ torch.cuda.empty_cache()
468
+ except:
469
+ pass
470
+
471
+ del self.tts_engine.tts
472
+
473
+ # Clear the engine itself
474
+ del self.tts_engine
475
+ self.tts_engine = None
476
+
477
+ # Force garbage collection to prevent memory leaks
478
+ import gc
479
+ gc.collect()
480
+
481
+ except Exception as e:
482
+ if self.debug_mode:
483
+ print(f"Warning: TTS cleanup issue: {e}")
484
+ # Force clear even if cleanup failed
485
+ self.tts_engine = None
486
+
413
487
  # Reinitialize TTS engine with new model using lazy import
414
488
  TTSEngine = _import_tts_engine()
415
489
  self.tts_engine = TTSEngine(
416
490
  model_name=model_name,
417
491
  debug_mode=self.debug_mode
418
492
  )
419
-
493
+
420
494
  # Restore callbacks
421
495
  self.tts_engine.on_playback_start = self._on_tts_start
422
496
  self.tts_engine.on_playback_end = self._on_tts_end
@@ -474,13 +548,37 @@ class VoiceManager:
474
548
 
475
549
  # Select best model for this language
476
550
  selected_model = self._select_best_model(language)
477
- models_to_try = [selected_model, self.SAFE_FALLBACK]
551
+
552
+ # CRITICAL FIX: Check if model is available, download if not
553
+ from .instant_setup import is_model_cached
554
+ from .simple_model_manager import download_model
555
+
556
+ if not is_model_cached(selected_model):
557
+ if self.debug_mode:
558
+ print(f"📥 Model {selected_model} not cached, downloading...")
559
+
560
+ # Try to download the model
561
+ success = download_model(selected_model)
562
+ if not success:
563
+ if self.debug_mode:
564
+ print(f"❌ Failed to download {selected_model}")
565
+ # If download fails and it's not English, we have a problem
566
+ if language != 'en':
567
+ print(f"❌ Cannot switch to {self.LANGUAGES[language]['name']}: Model download failed")
568
+ print(f" Try: abstractvoice download-models --language {language}")
569
+ return False
570
+
571
+ models_to_try = [selected_model]
572
+
573
+ # Only add fallback if it's different from selected
574
+ if selected_model != self.SAFE_FALLBACK:
575
+ models_to_try.append(self.SAFE_FALLBACK)
478
576
 
479
577
  for model_name in models_to_try:
480
578
  try:
481
579
  if self.debug_mode:
482
580
  lang_name = self.LANGUAGES[language]['name']
483
- print(f"🌍 Switching to {lang_name} voice: {model_name}")
581
+ print(f"🌍 Loading {lang_name} voice: {model_name}")
484
582
 
485
583
  # Reinitialize TTS engine
486
584
  TTSEngine = _import_tts_engine()
@@ -505,12 +603,16 @@ class VoiceManager:
505
603
 
506
604
  except Exception as e:
507
605
  if self.debug_mode:
508
- print(f"⚠️ Model {model_name} failed: {e}")
606
+ print(f"⚠️ Model {model_name} failed to load: {e}")
607
+ # Don't silently continue - report the failure
608
+ if model_name == selected_model and language != 'en':
609
+ print(f"❌ Failed to load {lang_name} voice model")
610
+ print(f" The model might be corrupted. Try:")
611
+ print(f" abstractvoice download-models --language {language}")
509
612
  continue
510
613
 
511
614
  # All models failed
512
- if self.debug_mode:
513
- print(f"❌ All models failed for language '{language}'")
615
+ print(f"❌ Cannot switch to {self.LANGUAGES[language]['name']}: No working models")
514
616
  return False
515
617
 
516
618
  def get_language(self):
@@ -782,15 +884,29 @@ class VoiceManager:
782
884
  return False
783
885
 
784
886
  voice_info = self.VOICE_CATALOG[language][voice_id]
887
+ model_name = voice_info['model']
888
+
889
+ # CRITICAL FIX: Download model if not cached
890
+ from .instant_setup import is_model_cached
891
+ from .simple_model_manager import download_model
785
892
 
786
- # Check compatibility
787
- if voice_info['requires'] == 'espeak-ng' and not self._test_model_compatibility(voice_info['model']):
893
+ if not is_model_cached(model_name):
894
+ print(f"📥 Voice model '{voice_id}' not cached, downloading...")
895
+ success = download_model(model_name)
896
+ if not success:
897
+ print(f"❌ Failed to download voice '{voice_id}'")
898
+ print(f" Check your internet connection and try again")
899
+ return False
900
+ print(f"✅ Voice model '{voice_id}' downloaded successfully")
901
+
902
+ # Check compatibility after download
903
+ if voice_info['requires'] == 'espeak-ng' and not self._test_model_compatibility(model_name):
788
904
  if self.debug_mode:
789
905
  print(f"⚠️ Voice '{voice_id}' requires espeak-ng. Install it for premium quality.")
790
- return False
906
+ # Don't fail - try to load anyway
907
+ # return False
791
908
 
792
909
  # Set the specific voice
793
- model_name = voice_info['model']
794
910
  if self.debug_mode:
795
911
  print(f"🎭 Setting {language} voice to: {voice_id}")
796
912
  print(f" Model: {model_name}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstractvoice
3
- Version: 0.4.1
3
+ Version: 0.5.0
4
4
  Summary: A modular Python library for voice interactions with AI systems
5
5
  Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
6
6
  License-Expression: MIT
@@ -19,6 +19,14 @@ Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: numpy>=1.24.0
21
21
  Requires-Dist: requests>=2.31.0
22
+ Requires-Dist: appdirs>=1.4.0
23
+ Requires-Dist: coqui-tts<0.30.0,>=0.27.0
24
+ Requires-Dist: torch<2.4.0,>=2.0.0
25
+ Requires-Dist: torchvision<0.19.0,>=0.15.0
26
+ Requires-Dist: torchaudio<2.4.0,>=2.0.0
27
+ Requires-Dist: librosa>=0.10.0
28
+ Requires-Dist: sounddevice>=0.4.6
29
+ Requires-Dist: soundfile>=0.12.1
22
30
  Provides-Extra: voice
23
31
  Requires-Dist: sounddevice>=0.4.6; extra == "voice"
24
32
  Requires-Dist: webrtcvad>=2.0.10; extra == "voice"
@@ -164,38 +172,51 @@ AbstractVoice automatically detects espeak-ng and upgrades to premium quality vo
164
172
 
165
173
  ## Quick Start
166
174
 
167
- ### ⚡ Instant TTS (v0.4.0+)
175
+ ### ⚡ Instant TTS (v0.5.0+)
168
176
 
169
177
  ```python
170
178
  from abstractvoice import VoiceManager
171
179
 
172
- # Initialize voice manager - automatically downloads essential model if needed
180
+ # Initialize voice manager - works immediately with included dependencies
173
181
  vm = VoiceManager()
174
182
 
175
- # Text-to-speech works immediately!
183
+ # Text-to-speech works right away!
176
184
  vm.speak("Hello! TTS works out of the box!")
185
+
186
+ # Language switching with automatic model download
187
+ vm.set_language('fr')
188
+ vm.speak("Bonjour! Le français fonctionne aussi!")
177
189
  ```
178
190
 
179
- **That's it!** AbstractVoice v0.4.0+ automatically:
180
- - ✅ Downloads essential English model (107MB) on first use
181
- - ✅ Caches models permanently for offline use
182
- - ✅ Works immediately after first setup
191
+ **That's it!** AbstractVoice v0.5.0+ automatically:
192
+ - ✅ Includes essential TTS dependencies in base installation
193
+ - ✅ Downloads models automatically when switching languages/voices
194
+ - ✅ Works immediately after `pip install abstractvoice`
195
+ - ✅ No silent failures - clear error messages if download fails
183
196
  - ✅ No complex configuration needed
184
197
 
185
- ### 🌍 Multi-Language Support
198
+ ### 🌍 Multi-Language Support (Auto-Download in v0.5.0+)
186
199
 
187
200
  ```python
188
- # Download and use French voice
189
- vm.download_model('fr.css10_vits') # Downloads automatically
201
+ # Simply switch language - downloads model automatically if needed!
190
202
  vm.set_language('fr')
191
203
  vm.speak("Bonjour! Je parle français maintenant.")
192
204
 
193
- # Download and use German voice
194
- vm.download_model('de.thorsten_vits')
205
+ # Switch to German - no manual download needed
195
206
  vm.set_language('de')
196
207
  vm.speak("Hallo! Ich spreche jetzt Deutsch.")
208
+
209
+ # Spanish, Italian also supported
210
+ vm.set_language('es')
211
+ vm.speak("¡Hola! Hablo español ahora.")
212
+
213
+ # If download fails, you'll get clear error messages with instructions
214
+ # Example: "❌ Cannot switch to French: Model download failed"
215
+ # " Try: abstractvoice download-models --language fr"
197
216
  ```
198
217
 
218
+ **New in v0.5.0:** No more manual `download_model()` calls! Language switching handles downloads automatically.
219
+
199
220
  ### 🔧 Check System Status
200
221
 
201
222
  ```python
@@ -1363,20 +1384,22 @@ abstractvoice check-deps
1363
1384
 
1364
1385
  ### CLI Voice Commands
1365
1386
 
1366
- In the CLI REPL, use these commands:
1387
+ In the CLI REPL, use these commands (v0.5.0+):
1367
1388
 
1368
1389
  ```bash
1369
1390
  # List all available voices with download status
1370
1391
  /setvoice
1371
1392
 
1372
- # Download and set specific voice
1373
- /setvoice fr.css10_vits # French CSS10 VITS
1374
- /setvoice de.thorsten_vits # German Thorsten
1375
- /setvoice it.mai_male_vits # Italian Male
1393
+ # Automatically download and set specific voice (NEW in v0.5.0!)
1394
+ /setvoice fr.css10_vits # Downloads French CSS10 if needed
1395
+ /setvoice de.thorsten_vits # Downloads German Thorsten if needed
1396
+ /setvoice it.mai_male_vits # Downloads Italian Male if needed
1397
+ /setvoice en.jenny # Downloads Jenny voice if needed
1376
1398
 
1377
- # Change language
1378
- /language fr
1379
- /language de
1399
+ # Change language (automatically downloads models if needed - NEW!)
1400
+ /language fr # Switches to French, downloads if needed
1401
+ /language de # Switches to German, downloads if needed
1402
+ /language es # Switches to Spanish, downloads if needed
1380
1403
 
1381
1404
  # Voice controls
1382
1405
  /pause # Pause current speech
@@ -1387,6 +1410,8 @@ In the CLI REPL, use these commands:
1387
1410
  /exit
1388
1411
  ```
1389
1412
 
1413
+ **New in v0.5.0:** Language and voice commands now automatically download missing models with progress indicators. No more silent failures!
1414
+
1390
1415
  ## Perspectives
1391
1416
 
1392
1417
  This is a test project that I designed with examples to work with Ollama, but I will adapt the examples and abstractvoice to work with any LLM provider (anthropic, openai, etc).
@@ -1,23 +1,23 @@
1
- abstractvoice/__init__.py,sha256=HZYSCQ-xztoj7gWr5dVLBsGh4AYrViTYe8-ze4b-ynY,1011
1
+ abstractvoice/__init__.py,sha256=EM_gfVmMvSOGeYPfMd8-AFThLrKwWh8TN4JFqdn0deU,1011
2
2
  abstractvoice/__main__.py,sha256=e6jhoONg3uwwPUCdnr68bSRTT1RrpWy2DrOJ6ozMJVc,4775
3
3
  abstractvoice/dependency_check.py,sha256=BUUADz4un4_FCZzNpgwk1qpJ6yqVi5Pvjfd3JLS8hAI,10045
4
- abstractvoice/model_manager.py,sha256=hnN3PTaY109mjTjgBuOB8yfAYVlMpqtMVBljLASRUi4,14275
4
+ abstractvoice/instant_setup.py,sha256=_Q8T6tcMSor--1XPlgdOya3lvC-VtClHz4FSgDOXFNI,2667
5
5
  abstractvoice/recognition.py,sha256=4KtDUDFixEYuBUMDH2fWaD9csKlwA9tqXkMAkyQMSMo,11259
6
- abstractvoice/simple_model_manager.py,sha256=DTvEBEPtfu9zJA6V3S8SaWQ_pDYFlK_SoOMlnnRjBtk,13801
7
- abstractvoice/voice_manager.py,sha256=n7QHZPR1LWh3RjEBQ3LVrBKoOr5zccc3soKah5CBrac,32584
6
+ abstractvoice/simple_model_manager.py,sha256=DfmrF3t3-V6rIWs4IvJmB_0Cck0LBY3YMvMzHGsHg4Q,19423
7
+ abstractvoice/voice_manager.py,sha256=iw8Lu4VhsSMk0HQYlUU2NWAQR2C4dDiCAsiFjaeldiU,37664
8
8
  abstractvoice/examples/__init__.py,sha256=94vpKJDlfOrEBIUETg-57Q5Z7fYDidg6v4UzV7V_lZA,60
9
9
  abstractvoice/examples/cli_repl.py,sha256=kIgvgrGfyejX8-VFeFhvAVqrp3X-s-K3Ul861aM4Bh8,44220
10
- abstractvoice/examples/voice_cli.py,sha256=SYnFkz9KWWTISLgS2beJzb2tzLoz4dXpHQBWpKgS0sc,11585
10
+ abstractvoice/examples/voice_cli.py,sha256=VdgDT01wly8HjWF53t_hDLkJoZc9FWQq2I-nxcSIAp8,11592
11
11
  abstractvoice/examples/web_api.py,sha256=0g5LKJpl7fZepPQJL25AcdaevV-xv34VqqyWGYYchPk,6376
12
12
  abstractvoice/stt/__init__.py,sha256=PFc6la3tTkxT4TJYwb0PnMIahM_hFtU4pNQdeKmbooo,120
13
13
  abstractvoice/stt/transcriber.py,sha256=GdaH1OsCHu4Vu9rUsQlzH6X9bfcnoiK5tGz1AW_uj6Q,5481
14
14
  abstractvoice/tts/__init__.py,sha256=WgJrxqdc_qaRyfFt1jbgMQD9S757jYuBpDzMRB02TFs,122
15
- abstractvoice/tts/tts_engine.py,sha256=HstJMwxTbZJx87Q-CY4mWeKHKbj17DhdvDdlch3xUNQ,49725
15
+ abstractvoice/tts/tts_engine.py,sha256=iTa9eBH9vPH8VR2qoJX6nmNi6yERvA4Uz1jPu4OXrTA,55074
16
16
  abstractvoice/vad/__init__.py,sha256=RIIbFw25jNHgel06E4VvTWJnXjwjeFZ98m1Vx9hVjuo,119
17
17
  abstractvoice/vad/voice_detector.py,sha256=ghrhpDFlIR5TsMB2gpigXY6t5c_1yZ7vEX1imAMgWjc,3166
18
- abstractvoice-0.4.1.dist-info/licenses/LICENSE,sha256=TiDPM5WcFRQPoC5e46jGMeMppZ-eu0eFx_HytjE49bk,1105
19
- abstractvoice-0.4.1.dist-info/METADATA,sha256=AN_KjRcI2ZaetOIuAb9JOf4dvFWJI7AckQpV25wq2tI,40713
20
- abstractvoice-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- abstractvoice-0.4.1.dist-info/entry_points.txt,sha256=3bDX2dNOGvrsTx1wZ_o_hVgmM_a2zbcHc1ZkL154rN4,72
22
- abstractvoice-0.4.1.dist-info/top_level.txt,sha256=a1qyxqgF1O8cJtPKpcJuImGZ_uXqPNghbLZ9gp-UiOo,14
23
- abstractvoice-0.4.1.dist-info/RECORD,,
18
+ abstractvoice-0.5.0.dist-info/licenses/LICENSE,sha256=TiDPM5WcFRQPoC5e46jGMeMppZ-eu0eFx_HytjE49bk,1105
19
+ abstractvoice-0.5.0.dist-info/METADATA,sha256=cH2FfruHztr3vENO6zo93yuKHkf6KmqI0-C1GnHCYA0,42154
20
+ abstractvoice-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ abstractvoice-0.5.0.dist-info/entry_points.txt,sha256=3bDX2dNOGvrsTx1wZ_o_hVgmM_a2zbcHc1ZkL154rN4,72
22
+ abstractvoice-0.5.0.dist-info/top_level.txt,sha256=a1qyxqgF1O8cJtPKpcJuImGZ_uXqPNghbLZ9gp-UiOo,14
23
+ abstractvoice-0.5.0.dist-info/RECORD,,