abstractvoice 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
abstractvoice/__main__.py CHANGED
@@ -15,20 +15,25 @@ def print_examples():
15
15
  print(" cli - Command-line REPL example")
16
16
  print(" web - Web API example")
17
17
  print(" simple - Simple usage example")
18
- print("\nUsage: python -m abstractvoice <example> [args...]")
18
+ print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
19
+ print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
20
+ print("\nExamples:")
21
+ print(" python -m abstractvoice cli --language fr # French CLI")
22
+ print(" python -m abstractvoice simple --language ru # Russian simple example")
19
23
 
20
24
 
21
25
  def simple_example():
22
26
  """Run a simple example demonstrating basic usage."""
23
27
  from abstractvoice import VoiceManager
24
28
  import time
25
-
29
+
26
30
  print("Simple AbstractVoice Example")
27
31
  print("============================")
28
32
  print("This example demonstrates basic TTS and STT functionality.")
33
+ print("(Use --language argument to test different languages)")
29
34
  print()
30
-
31
- # Initialize voice manager
35
+
36
+ # Initialize voice manager (can be overridden with --language)
32
37
  manager = VoiceManager(debug_mode=True)
33
38
 
34
39
  try:
@@ -91,17 +96,22 @@ def main():
91
96
  """Main entry point."""
92
97
  parser = argparse.ArgumentParser(description="AbstractVoice examples")
93
98
  parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple)")
94
-
95
- # Parse just the first argument
99
+ parser.add_argument("--language", "--lang", default="en",
100
+ choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
101
+ help="Voice language for examples")
102
+
103
+ # Parse just the first argument and language
96
104
  args, remaining = parser.parse_known_args()
97
-
105
+
98
106
  if not args.example:
99
107
  print_examples()
100
108
  return
101
-
102
- # Set remaining args as sys.argv for the examples
109
+
110
+ # Set remaining args as sys.argv for the examples, including language
111
+ if args.language != "en":
112
+ remaining = ["--language", args.language] + remaining
103
113
  sys.argv = [sys.argv[0]] + remaining
104
-
114
+
105
115
  if args.example == "cli":
106
116
  from abstractvoice.examples.cli_repl import main
107
117
  main()
@@ -37,21 +37,28 @@ class VoiceREPL(cmd.Cmd):
37
37
  ruler = "" # No horizontal rule line
38
38
  use_rawinput = True
39
39
 
40
- def __init__(self, api_url="http://localhost:11434/api/chat",
41
- model="granite3.3:2b", debug_mode=False):
40
+ def __init__(self, api_url="http://localhost:11434/api/chat",
41
+ model="granite3.3:2b", debug_mode=False, language="en", tts_model=None):
42
42
  super().__init__()
43
-
43
+
44
44
  # Debug mode
45
45
  self.debug_mode = debug_mode
46
-
46
+
47
47
  # API settings
48
48
  self.api_url = api_url
49
49
  self.model = model
50
50
  self.temperature = 0.4
51
51
  self.max_tokens = 4096
52
-
53
- # Initialize voice manager
54
- self.voice_manager = VoiceManager(debug_mode=debug_mode)
52
+
53
+ # Language settings
54
+ self.current_language = language
55
+
56
+ # Initialize voice manager with language support
57
+ self.voice_manager = VoiceManager(
58
+ language=language,
59
+ tts_model=tts_model,
60
+ debug_mode=debug_mode
61
+ )
55
62
 
56
63
  # Settings
57
64
  self.use_tts = True
@@ -83,10 +90,12 @@ class VoiceREPL(cmd.Cmd):
83
90
  def _get_intro(self):
84
91
  """Generate intro message with help."""
85
92
  intro = f"\n{Colors.BOLD}Welcome to AbstractVoice CLI REPL{Colors.END}\n"
86
- intro += f"API: {self.api_url} | Model: {self.model}\n"
93
+ lang_name = self.voice_manager.get_language_name()
94
+ intro += f"API: {self.api_url} | Model: {self.model} | Voice: {lang_name}\n"
87
95
  intro += f"\n{Colors.CYAN}Quick Start:{Colors.END}\n"
88
96
  intro += " • Type messages to chat with the LLM\n"
89
97
  intro += " • Use /voice <mode> to enable voice input\n"
98
+ intro += " • Use /language <lang> to switch voice language\n"
90
99
  intro += " • Type /help for full command list\n"
91
100
  intro += " • Type /exit or /q to quit\n"
92
101
  return intro
@@ -278,7 +287,172 @@ class VoiceREPL(cmd.Cmd):
278
287
  text = re.sub(pattern, "", text, flags=re.DOTALL)
279
288
 
280
289
  return text.strip()
281
-
290
+
291
+ def do_language(self, args):
292
+ """Switch voice language.
293
+
294
+ Usage: /language <lang>
295
+ Available languages: en, fr, es, de, it
296
+ """
297
+ if not args:
298
+ current_name = self.voice_manager.get_language_name()
299
+ current_code = self.voice_manager.get_language()
300
+ print(f"Current language: {current_name} ({current_code})")
301
+
302
+ print("Available languages:")
303
+ for code in self.voice_manager.get_supported_languages():
304
+ name = self.voice_manager.get_language_name(code)
305
+ print(f" {code} - {name}")
306
+ return
307
+
308
+ language = args.strip().lower()
309
+
310
+ # Stop any current voice activity
311
+ if self.voice_mode_active:
312
+ self._voice_stop_callback()
313
+ was_active = True
314
+ else:
315
+ was_active = False
316
+
317
+ # Switch language
318
+ old_lang = self.current_language
319
+ if self.voice_manager.set_language(language):
320
+ self.current_language = language
321
+ old_name = self.voice_manager.get_language_name(old_lang)
322
+ new_name = self.voice_manager.get_language_name(language)
323
+ print(f"🌍 Language changed: {old_name} → {new_name}")
324
+
325
+ # Test the new language with localized message
326
+ test_messages = {
327
+ 'en': "Language switched to English.",
328
+ 'fr': "Langue changée en français.",
329
+ 'es': "Idioma cambiado a español.",
330
+ 'de': "Sprache auf Deutsch umgestellt.",
331
+ 'it': "Lingua cambiata in italiano."
332
+ }
333
+ test_msg = test_messages.get(language, "Language switched.")
334
+ self.voice_manager.speak(test_msg)
335
+
336
+ # Restart voice mode if it was active
337
+ if was_active:
338
+ self.do_voice(self.voice_mode)
339
+ else:
340
+ supported = ', '.join(self.voice_manager.get_supported_languages())
341
+ print(f"Failed to switch to language: {language}")
342
+ print(f"Supported languages: {supported}")
343
+ if self.debug_mode:
344
+ import traceback
345
+ traceback.print_exc()
346
+
347
+ def do_setvoice(self, args):
348
+ """Set a specific voice model.
349
+
350
+ Usage:
351
+ /setvoice # Show all available voices
352
+ /setvoice <voice_id> # Set voice (format: language.voice_id)
353
+
354
+ Examples:
355
+ /setvoice # List all voices
356
+ /setvoice fr.css10_vits # Set French CSS10 VITS voice
357
+ /setvoice it.mai_male_vits # Set Italian male VITS voice
358
+ """
359
+ if not args:
360
+ # Show all available voices organized by language
361
+ print(f"\n{Colors.CYAN}Available Voice Models:{Colors.END}")
362
+ self.voice_manager.list_voices()
363
+
364
+ print(f"\n{Colors.YELLOW}Usage:{Colors.END}")
365
+ print(" /setvoice <language>.<voice_id>")
366
+ print(" Example: /setvoice fr.css10_vits")
367
+ return
368
+
369
+ voice_spec = args.strip()
370
+
371
+ # Parse language.voice_id format
372
+ if '.' not in voice_spec:
373
+ print(f"❌ Invalid format. Use: language.voice_id")
374
+ print(f" Example: /setvoice fr.css10_vits")
375
+ print(f" Run '/setvoice' to see available voices")
376
+ return
377
+
378
+ try:
379
+ language, voice_id = voice_spec.split('.', 1)
380
+ except ValueError:
381
+ print(f"❌ Invalid format. Use: language.voice_id")
382
+ return
383
+
384
+ # Stop any current voice activity
385
+ if self.voice_mode_active:
386
+ self._voice_stop_callback()
387
+ was_active = True
388
+ else:
389
+ was_active = False
390
+
391
+ # Set the specific voice
392
+ try:
393
+ success = self.voice_manager.set_voice(language, voice_id)
394
+ if success:
395
+ # Update current language to match the voice
396
+ self.current_language = language
397
+
398
+ # Get voice info for confirmation
399
+ voice_info = self.voice_manager.VOICE_CATALOG.get(language, {}).get(voice_id, {})
400
+ lang_name = self.voice_manager.get_language_name(language)
401
+
402
+ print(f"✅ Voice changed successfully!")
403
+ print(f" Language: {lang_name} ({language})")
404
+ print(f" Voice: {voice_id}")
405
+ if voice_info:
406
+ quality_icon = "✨" if voice_info.get('quality') == 'premium' else "🔧"
407
+ gender_icon = {"male": "👨", "female": "👩", "multiple": "👥"}.get(voice_info.get('gender'), "🗣️")
408
+ print(f" Details: {quality_icon} {gender_icon} {voice_info.get('accent', 'Unknown accent')}")
409
+
410
+ # Test the new voice
411
+ test_messages = {
412
+ 'en': "Voice changed to English.",
413
+ 'fr': "Voix changée en français.",
414
+ 'es': "Voz cambiada al español.",
415
+ 'de': "Stimme auf Deutsch geändert.",
416
+ 'it': "Voce cambiata in italiano."
417
+ }
418
+ test_msg = test_messages.get(language, "Voice changed successfully.")
419
+ self.voice_manager.speak(test_msg)
420
+
421
+ # Restart voice mode if it was active
422
+ if was_active:
423
+ self.do_voice(self.voice_mode)
424
+ else:
425
+ print(f"❌ Failed to set voice: {voice_spec}")
426
+ print(f" Run '/setvoice' to see available voices")
427
+
428
+ except Exception as e:
429
+ print(f"❌ Error setting voice: {e}")
430
+ if self.debug_mode:
431
+ import traceback
432
+ traceback.print_exc()
433
+
434
+ def do_lang_info(self, args):
435
+ """Show current language information."""
436
+ info = self.voice_manager.get_language_info()
437
+ print(f"\n{Colors.CYAN}Current Language Information:{Colors.END}")
438
+ print(f" Language: {info['name']} ({info['code']})")
439
+ print(f" Model: {info['model']}")
440
+ print(f" Available models: {list(info['available_models'].keys())}")
441
+
442
+ # Check if XTTS supports multiple languages
443
+ if 'xtts' in (info['model'] or '').lower():
444
+ print(f" ✅ Supports multilingual synthesis")
445
+ else:
446
+ print(f" ℹ️ Monolingual model")
447
+
448
+ def do_list_languages(self, args):
449
+ """List all supported languages."""
450
+ print(f"\n{Colors.CYAN}Supported Languages:{Colors.END}")
451
+ for lang in self.voice_manager.get_supported_languages():
452
+ name = self.voice_manager.get_language_name(lang)
453
+ current = " (current)" if lang == self.current_language else ""
454
+ print(f" {lang} - {name}{current}")
455
+
282
456
  def do_voice(self, arg):
283
457
  """Control voice input mode.
284
458
 
@@ -554,6 +728,10 @@ class VoiceREPL(cmd.Cmd):
554
728
  print(" /clear Clear history")
555
729
  print(" /tts on|off Toggle TTS")
556
730
  print(" /voice <mode> Voice input: off|full|wait|stop|ptt")
731
+ print(" /language <lang> Switch voice language (en, fr, es, de, it)")
732
+ print(" /setvoice [id] List voices or set specific voice (lang.voice_id)")
733
+ print(" /lang_info Show current language information")
734
+ print(" /list_languages List all supported languages")
557
735
  print(" /speed <number> Set TTS speed (0.5-2.0, default: 1.0, pitch preserved)")
558
736
  print(" /tts_model <model> Switch TTS model: vits(best)|fast_pitch|glow-tts|tacotron2-DDC")
559
737
  print(" /whisper <model> Switch Whisper model: tiny|base|small|medium|large")
@@ -831,10 +1009,15 @@ def parse_args():
831
1009
  """Parse command line arguments."""
832
1010
  parser = argparse.ArgumentParser(description="AbstractVoice CLI Example")
833
1011
  parser.add_argument("--debug", action="store_true", help="Enable debug mode")
834
- parser.add_argument("--api", default="http://localhost:11434/api/chat",
1012
+ parser.add_argument("--api", default="http://localhost:11434/api/chat",
835
1013
  help="LLM API URL")
836
- parser.add_argument("--model", default="granite3.3:2b",
1014
+ parser.add_argument("--model", default="granite3.3:2b",
837
1015
  help="LLM model name")
1016
+ parser.add_argument("--language", "--lang", default="en",
1017
+ choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
1018
+ help="Voice language (en=English, fr=French, es=Spanish, de=German, it=Italian, ru=Russian, multilingual=All)")
1019
+ parser.add_argument("--tts-model",
1020
+ help="Specific TTS model to use (overrides language default)")
838
1021
  return parser.parse_args()
839
1022
 
840
1023
 
@@ -844,11 +1027,13 @@ def main():
844
1027
  # Parse command line arguments
845
1028
  args = parse_args()
846
1029
 
847
- # Initialize and run REPL
1030
+ # Initialize and run REPL with language support
848
1031
  repl = VoiceREPL(
849
1032
  api_url=args.api,
850
1033
  model=args.model,
851
- debug_mode=args.debug
1034
+ debug_mode=args.debug,
1035
+ language=args.language,
1036
+ tts_model=args.tts_model
852
1037
  )
853
1038
  repl.cmdloop()
854
1039
  except KeyboardInterrupt:
@@ -13,20 +13,25 @@ def parse_args():
13
13
  """Parse command line arguments."""
14
14
  parser = argparse.ArgumentParser(description="AbstractVoice Voice Mode")
15
15
  parser.add_argument("--debug", action="store_true", help="Enable debug mode")
16
- parser.add_argument("--api", default="http://localhost:11434/api/chat",
16
+ parser.add_argument("--api", default="http://localhost:11434/api/chat",
17
17
  help="LLM API URL")
18
- parser.add_argument("--model", default="granite3.3:2b",
18
+ parser.add_argument("--model", default="granite3.3:2b",
19
19
  help="LLM model name")
20
20
  parser.add_argument("--whisper", default="tiny",
21
21
  help="Whisper model to use (tiny, base, small, medium, large)")
22
22
  parser.add_argument("--no-listening", action="store_true",
23
23
  help="Disable speech-to-text (listening), TTS still works")
24
- parser.add_argument("--system",
24
+ parser.add_argument("--system",
25
25
  help="Custom system prompt")
26
26
  parser.add_argument("--temperature", type=float, default=0.4,
27
27
  help="Set temperature (0.0-2.0) for the LLM")
28
28
  parser.add_argument("--max-tokens", type=int, default=4096,
29
29
  help="Set maximum tokens for the LLM response")
30
+ parser.add_argument("--language", "--lang", default="en",
31
+ choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
32
+ help="Voice language (en=English, fr=French, es=Spanish, de=German, it=Italian, ru=Russian, multilingual=All)")
33
+ parser.add_argument("--tts-model",
34
+ help="Specific TTS model to use (overrides language default)")
30
35
  return parser.parse_args()
31
36
 
32
37
  def main():
@@ -35,13 +40,22 @@ def main():
35
40
  # Parse command line arguments
36
41
  args = parse_args()
37
42
 
38
- print("Starting AbstractVoice voice interface...")
43
+ # Show language information
44
+ language_names = {
45
+ 'en': 'English', 'fr': 'French', 'es': 'Spanish',
46
+ 'de': 'German', 'it': 'Italian', 'ru': 'Russian',
47
+ 'multilingual': 'Multilingual'
48
+ }
49
+ lang_name = language_names.get(args.language, args.language)
50
+ print(f"Starting AbstractVoice voice interface ({lang_name})...")
39
51
 
40
- # Initialize REPL
52
+ # Initialize REPL with language support
41
53
  repl = VoiceREPL(
42
54
  api_url=args.api,
43
55
  model=args.model,
44
- debug_mode=args.debug
56
+ debug_mode=args.debug,
57
+ language=args.language,
58
+ tts_model=args.tts_model
45
59
  )
46
60
 
47
61
  # Set custom system prompt if provided
@@ -2,9 +2,50 @@
2
2
 
3
3
  import threading
4
4
  import time
5
- import pyaudio
6
- from .vad import VoiceDetector
7
- from .stt import Transcriber
5
+
6
+ # Lazy imports for heavy dependencies
7
+ def _import_audio_deps():
8
+ """Import audio dependencies with helpful error message if missing."""
9
+ try:
10
+ import pyaudio
11
+ return pyaudio
12
+ except ImportError as e:
13
+ raise ImportError(
14
+ "Audio functionality requires optional dependencies. Install with:\n"
15
+ " pip install abstractvoice[voice] # For basic audio\n"
16
+ " pip install abstractvoice[all] # For all features\n"
17
+ f"Original error: {e}"
18
+ ) from e
19
+
20
+ def _import_vad():
21
+ """Import VoiceDetector with helpful error message if dependencies missing."""
22
+ try:
23
+ from .vad import VoiceDetector
24
+ return VoiceDetector
25
+ except ImportError as e:
26
+ if "webrtcvad" in str(e):
27
+ raise ImportError(
28
+ "Voice activity detection requires optional dependencies. Install with:\n"
29
+ " pip install abstractvoice[voice] # For basic audio\n"
30
+ " pip install abstractvoice[all] # For all features\n"
31
+ f"Original error: {e}"
32
+ ) from e
33
+ raise
34
+
35
+ def _import_transcriber():
36
+ """Import Transcriber with helpful error message if dependencies missing."""
37
+ try:
38
+ from .stt import Transcriber
39
+ return Transcriber
40
+ except ImportError as e:
41
+ if "whisper" in str(e) or "tiktoken" in str(e):
42
+ raise ImportError(
43
+ "Speech recognition functionality requires optional dependencies. Install with:\n"
44
+ " pip install abstractvoice[stt] # For speech recognition only\n"
45
+ " pip install abstractvoice[all] # For all features\n"
46
+ f"Original error: {e}"
47
+ ) from e
48
+ raise
8
49
 
9
50
 
10
51
  class VoiceRecognizer:
@@ -40,13 +81,15 @@ class VoiceRecognizer:
40
81
  self.min_speech_chunks = int(min_speech_duration / chunk_duration)
41
82
  self.silence_timeout_chunks = int(silence_timeout / chunk_duration)
42
83
 
43
- # Initialize components
84
+ # Initialize components using lazy imports
85
+ VoiceDetector = _import_vad()
44
86
  self.voice_detector = VoiceDetector(
45
87
  aggressiveness=vad_aggressiveness,
46
88
  sample_rate=sample_rate,
47
89
  debug_mode=debug_mode
48
90
  )
49
-
91
+
92
+ Transcriber = _import_transcriber()
50
93
  self.transcriber = Transcriber(
51
94
  model_name=whisper_model,
52
95
  min_transcription_length=min_transcription_length,
@@ -109,8 +152,8 @@ class VoiceRecognizer:
109
152
 
110
153
  def _recognition_loop(self):
111
154
  """Main recognition loop."""
112
- import pyaudio
113
-
155
+ pyaudio = _import_audio_deps()
156
+
114
157
  self.pyaudio = pyaudio.PyAudio()
115
158
  self.stream = self.pyaudio.open(
116
159
  format=pyaudio.paInt16,
@@ -1,11 +1,24 @@
1
1
  """Speech-to-text transcription using OpenAI's Whisper."""
2
2
 
3
- import whisper
4
3
  import numpy as np
5
4
  import os
6
5
  import sys
7
6
  import logging
8
7
 
8
+ # Lazy import for heavy dependencies
9
+ def _import_whisper():
10
+ """Import whisper with helpful error message if dependencies missing."""
11
+ try:
12
+ import whisper
13
+ return whisper
14
+ except ImportError as e:
15
+ raise ImportError(
16
+ "Speech recognition functionality requires optional dependencies. Install with:\n"
17
+ " pip install abstractvoice[stt] # For speech recognition only\n"
18
+ " pip install abstractvoice[all] # For all features\n"
19
+ f"Original error: {e}"
20
+ ) from e
21
+
9
22
 
10
23
  class Transcriber:
11
24
  """Transcribes audio using OpenAI's Whisper model."""
@@ -38,7 +51,8 @@ class Transcriber:
38
51
  null_out = open(os.devnull, 'w')
39
52
  sys.stdout = null_out
40
53
 
41
- # Load the Whisper model
54
+ # Load the Whisper model using lazy import
55
+ whisper = _import_whisper()
42
56
  self.model = whisper.load_model(model_name)
43
57
  finally:
44
58
  # Restore stdout if we redirected it
@@ -120,6 +134,7 @@ class Transcriber:
120
134
  sys.stdout = null_out
121
135
 
122
136
  try:
137
+ whisper = _import_whisper()
123
138
  self.model = whisper.load_model(model_name)
124
139
  self.model_name = model_name
125
140
  finally: