abstractvoice 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvoice/__main__.py +20 -10
- abstractvoice/examples/cli_repl.py +198 -13
- abstractvoice/examples/voice_cli.py +20 -6
- abstractvoice/recognition.py +50 -7
- abstractvoice/stt/transcriber.py +17 -2
- abstractvoice/tts/tts_engine.py +84 -32
- abstractvoice/vad/voice_detector.py +16 -2
- abstractvoice/voice_manager.py +558 -16
- {abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/METADATA +228 -50
- abstractvoice-0.2.0.dist-info/RECORD +20 -0
- {abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/licenses/LICENSE +1 -1
- abstractvoice-0.1.1.dist-info/RECORD +0 -20
- {abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/WHEEL +0 -0
- {abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/entry_points.txt +0 -0
- {abstractvoice-0.1.1.dist-info → abstractvoice-0.2.0.dist-info}/top_level.txt +0 -0
abstractvoice/__main__.py
CHANGED
|
@@ -15,20 +15,25 @@ def print_examples():
|
|
|
15
15
|
print(" cli - Command-line REPL example")
|
|
16
16
|
print(" web - Web API example")
|
|
17
17
|
print(" simple - Simple usage example")
|
|
18
|
-
print("\nUsage: python -m abstractvoice <example> [args...]")
|
|
18
|
+
print("\nUsage: python -m abstractvoice <example> [--language <lang>] [args...]")
|
|
19
|
+
print("\nSupported languages: en, fr, es, de, it, ru, multilingual")
|
|
20
|
+
print("\nExamples:")
|
|
21
|
+
print(" python -m abstractvoice cli --language fr # French CLI")
|
|
22
|
+
print(" python -m abstractvoice simple --language ru # Russian simple example")
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
def simple_example():
|
|
22
26
|
"""Run a simple example demonstrating basic usage."""
|
|
23
27
|
from abstractvoice import VoiceManager
|
|
24
28
|
import time
|
|
25
|
-
|
|
29
|
+
|
|
26
30
|
print("Simple AbstractVoice Example")
|
|
27
31
|
print("============================")
|
|
28
32
|
print("This example demonstrates basic TTS and STT functionality.")
|
|
33
|
+
print("(Use --language argument to test different languages)")
|
|
29
34
|
print()
|
|
30
|
-
|
|
31
|
-
# Initialize voice manager
|
|
35
|
+
|
|
36
|
+
# Initialize voice manager (can be overridden with --language)
|
|
32
37
|
manager = VoiceManager(debug_mode=True)
|
|
33
38
|
|
|
34
39
|
try:
|
|
@@ -91,17 +96,22 @@ def main():
|
|
|
91
96
|
"""Main entry point."""
|
|
92
97
|
parser = argparse.ArgumentParser(description="AbstractVoice examples")
|
|
93
98
|
parser.add_argument("example", nargs="?", help="Example to run (cli, web, simple)")
|
|
94
|
-
|
|
95
|
-
|
|
99
|
+
parser.add_argument("--language", "--lang", default="en",
|
|
100
|
+
choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
|
|
101
|
+
help="Voice language for examples")
|
|
102
|
+
|
|
103
|
+
# Parse just the first argument and language
|
|
96
104
|
args, remaining = parser.parse_known_args()
|
|
97
|
-
|
|
105
|
+
|
|
98
106
|
if not args.example:
|
|
99
107
|
print_examples()
|
|
100
108
|
return
|
|
101
|
-
|
|
102
|
-
# Set remaining args as sys.argv for the examples
|
|
109
|
+
|
|
110
|
+
# Set remaining args as sys.argv for the examples, including language
|
|
111
|
+
if args.language != "en":
|
|
112
|
+
remaining = ["--language", args.language] + remaining
|
|
103
113
|
sys.argv = [sys.argv[0]] + remaining
|
|
104
|
-
|
|
114
|
+
|
|
105
115
|
if args.example == "cli":
|
|
106
116
|
from abstractvoice.examples.cli_repl import main
|
|
107
117
|
main()
|
|
@@ -37,21 +37,28 @@ class VoiceREPL(cmd.Cmd):
|
|
|
37
37
|
ruler = "" # No horizontal rule line
|
|
38
38
|
use_rawinput = True
|
|
39
39
|
|
|
40
|
-
def __init__(self, api_url="http://localhost:11434/api/chat",
|
|
41
|
-
model="granite3.3:2b", debug_mode=False):
|
|
40
|
+
def __init__(self, api_url="http://localhost:11434/api/chat",
|
|
41
|
+
model="granite3.3:2b", debug_mode=False, language="en", tts_model=None):
|
|
42
42
|
super().__init__()
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
# Debug mode
|
|
45
45
|
self.debug_mode = debug_mode
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
# API settings
|
|
48
48
|
self.api_url = api_url
|
|
49
49
|
self.model = model
|
|
50
50
|
self.temperature = 0.4
|
|
51
51
|
self.max_tokens = 4096
|
|
52
|
-
|
|
53
|
-
#
|
|
54
|
-
self.
|
|
52
|
+
|
|
53
|
+
# Language settings
|
|
54
|
+
self.current_language = language
|
|
55
|
+
|
|
56
|
+
# Initialize voice manager with language support
|
|
57
|
+
self.voice_manager = VoiceManager(
|
|
58
|
+
language=language,
|
|
59
|
+
tts_model=tts_model,
|
|
60
|
+
debug_mode=debug_mode
|
|
61
|
+
)
|
|
55
62
|
|
|
56
63
|
# Settings
|
|
57
64
|
self.use_tts = True
|
|
@@ -83,10 +90,12 @@ class VoiceREPL(cmd.Cmd):
|
|
|
83
90
|
def _get_intro(self):
|
|
84
91
|
"""Generate intro message with help."""
|
|
85
92
|
intro = f"\n{Colors.BOLD}Welcome to AbstractVoice CLI REPL{Colors.END}\n"
|
|
86
|
-
|
|
93
|
+
lang_name = self.voice_manager.get_language_name()
|
|
94
|
+
intro += f"API: {self.api_url} | Model: {self.model} | Voice: {lang_name}\n"
|
|
87
95
|
intro += f"\n{Colors.CYAN}Quick Start:{Colors.END}\n"
|
|
88
96
|
intro += " • Type messages to chat with the LLM\n"
|
|
89
97
|
intro += " • Use /voice <mode> to enable voice input\n"
|
|
98
|
+
intro += " • Use /language <lang> to switch voice language\n"
|
|
90
99
|
intro += " • Type /help for full command list\n"
|
|
91
100
|
intro += " • Type /exit or /q to quit\n"
|
|
92
101
|
return intro
|
|
@@ -278,7 +287,172 @@ class VoiceREPL(cmd.Cmd):
|
|
|
278
287
|
text = re.sub(pattern, "", text, flags=re.DOTALL)
|
|
279
288
|
|
|
280
289
|
return text.strip()
|
|
281
|
-
|
|
290
|
+
|
|
291
|
+
def do_language(self, args):
|
|
292
|
+
"""Switch voice language.
|
|
293
|
+
|
|
294
|
+
Usage: /language <lang>
|
|
295
|
+
Available languages: en, fr, es, de, it
|
|
296
|
+
"""
|
|
297
|
+
if not args:
|
|
298
|
+
current_name = self.voice_manager.get_language_name()
|
|
299
|
+
current_code = self.voice_manager.get_language()
|
|
300
|
+
print(f"Current language: {current_name} ({current_code})")
|
|
301
|
+
|
|
302
|
+
print("Available languages:")
|
|
303
|
+
for code in self.voice_manager.get_supported_languages():
|
|
304
|
+
name = self.voice_manager.get_language_name(code)
|
|
305
|
+
print(f" {code} - {name}")
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
language = args.strip().lower()
|
|
309
|
+
|
|
310
|
+
# Stop any current voice activity
|
|
311
|
+
if self.voice_mode_active:
|
|
312
|
+
self._voice_stop_callback()
|
|
313
|
+
was_active = True
|
|
314
|
+
else:
|
|
315
|
+
was_active = False
|
|
316
|
+
|
|
317
|
+
# Switch language
|
|
318
|
+
old_lang = self.current_language
|
|
319
|
+
if self.voice_manager.set_language(language):
|
|
320
|
+
self.current_language = language
|
|
321
|
+
old_name = self.voice_manager.get_language_name(old_lang)
|
|
322
|
+
new_name = self.voice_manager.get_language_name(language)
|
|
323
|
+
print(f"🌍 Language changed: {old_name} → {new_name}")
|
|
324
|
+
|
|
325
|
+
# Test the new language with localized message
|
|
326
|
+
test_messages = {
|
|
327
|
+
'en': "Language switched to English.",
|
|
328
|
+
'fr': "Langue changée en français.",
|
|
329
|
+
'es': "Idioma cambiado a español.",
|
|
330
|
+
'de': "Sprache auf Deutsch umgestellt.",
|
|
331
|
+
'it': "Lingua cambiata in italiano."
|
|
332
|
+
}
|
|
333
|
+
test_msg = test_messages.get(language, "Language switched.")
|
|
334
|
+
self.voice_manager.speak(test_msg)
|
|
335
|
+
|
|
336
|
+
# Restart voice mode if it was active
|
|
337
|
+
if was_active:
|
|
338
|
+
self.do_voice(self.voice_mode)
|
|
339
|
+
else:
|
|
340
|
+
supported = ', '.join(self.voice_manager.get_supported_languages())
|
|
341
|
+
print(f"Failed to switch to language: {language}")
|
|
342
|
+
print(f"Supported languages: {supported}")
|
|
343
|
+
if self.debug_mode:
|
|
344
|
+
import traceback
|
|
345
|
+
traceback.print_exc()
|
|
346
|
+
|
|
347
|
+
def do_setvoice(self, args):
|
|
348
|
+
"""Set a specific voice model.
|
|
349
|
+
|
|
350
|
+
Usage:
|
|
351
|
+
/setvoice # Show all available voices
|
|
352
|
+
/setvoice <voice_id> # Set voice (format: language.voice_id)
|
|
353
|
+
|
|
354
|
+
Examples:
|
|
355
|
+
/setvoice # List all voices
|
|
356
|
+
/setvoice fr.css10_vits # Set French CSS10 VITS voice
|
|
357
|
+
/setvoice it.mai_male_vits # Set Italian male VITS voice
|
|
358
|
+
"""
|
|
359
|
+
if not args:
|
|
360
|
+
# Show all available voices organized by language
|
|
361
|
+
print(f"\n{Colors.CYAN}Available Voice Models:{Colors.END}")
|
|
362
|
+
self.voice_manager.list_voices()
|
|
363
|
+
|
|
364
|
+
print(f"\n{Colors.YELLOW}Usage:{Colors.END}")
|
|
365
|
+
print(" /setvoice <language>.<voice_id>")
|
|
366
|
+
print(" Example: /setvoice fr.css10_vits")
|
|
367
|
+
return
|
|
368
|
+
|
|
369
|
+
voice_spec = args.strip()
|
|
370
|
+
|
|
371
|
+
# Parse language.voice_id format
|
|
372
|
+
if '.' not in voice_spec:
|
|
373
|
+
print(f"❌ Invalid format. Use: language.voice_id")
|
|
374
|
+
print(f" Example: /setvoice fr.css10_vits")
|
|
375
|
+
print(f" Run '/setvoice' to see available voices")
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
language, voice_id = voice_spec.split('.', 1)
|
|
380
|
+
except ValueError:
|
|
381
|
+
print(f"❌ Invalid format. Use: language.voice_id")
|
|
382
|
+
return
|
|
383
|
+
|
|
384
|
+
# Stop any current voice activity
|
|
385
|
+
if self.voice_mode_active:
|
|
386
|
+
self._voice_stop_callback()
|
|
387
|
+
was_active = True
|
|
388
|
+
else:
|
|
389
|
+
was_active = False
|
|
390
|
+
|
|
391
|
+
# Set the specific voice
|
|
392
|
+
try:
|
|
393
|
+
success = self.voice_manager.set_voice(language, voice_id)
|
|
394
|
+
if success:
|
|
395
|
+
# Update current language to match the voice
|
|
396
|
+
self.current_language = language
|
|
397
|
+
|
|
398
|
+
# Get voice info for confirmation
|
|
399
|
+
voice_info = self.voice_manager.VOICE_CATALOG.get(language, {}).get(voice_id, {})
|
|
400
|
+
lang_name = self.voice_manager.get_language_name(language)
|
|
401
|
+
|
|
402
|
+
print(f"✅ Voice changed successfully!")
|
|
403
|
+
print(f" Language: {lang_name} ({language})")
|
|
404
|
+
print(f" Voice: {voice_id}")
|
|
405
|
+
if voice_info:
|
|
406
|
+
quality_icon = "✨" if voice_info.get('quality') == 'premium' else "🔧"
|
|
407
|
+
gender_icon = {"male": "👨", "female": "👩", "multiple": "👥"}.get(voice_info.get('gender'), "🗣️")
|
|
408
|
+
print(f" Details: {quality_icon} {gender_icon} {voice_info.get('accent', 'Unknown accent')}")
|
|
409
|
+
|
|
410
|
+
# Test the new voice
|
|
411
|
+
test_messages = {
|
|
412
|
+
'en': "Voice changed to English.",
|
|
413
|
+
'fr': "Voix changée en français.",
|
|
414
|
+
'es': "Voz cambiada al español.",
|
|
415
|
+
'de': "Stimme auf Deutsch geändert.",
|
|
416
|
+
'it': "Voce cambiata in italiano."
|
|
417
|
+
}
|
|
418
|
+
test_msg = test_messages.get(language, "Voice changed successfully.")
|
|
419
|
+
self.voice_manager.speak(test_msg)
|
|
420
|
+
|
|
421
|
+
# Restart voice mode if it was active
|
|
422
|
+
if was_active:
|
|
423
|
+
self.do_voice(self.voice_mode)
|
|
424
|
+
else:
|
|
425
|
+
print(f"❌ Failed to set voice: {voice_spec}")
|
|
426
|
+
print(f" Run '/setvoice' to see available voices")
|
|
427
|
+
|
|
428
|
+
except Exception as e:
|
|
429
|
+
print(f"❌ Error setting voice: {e}")
|
|
430
|
+
if self.debug_mode:
|
|
431
|
+
import traceback
|
|
432
|
+
traceback.print_exc()
|
|
433
|
+
|
|
434
|
+
def do_lang_info(self, args):
|
|
435
|
+
"""Show current language information."""
|
|
436
|
+
info = self.voice_manager.get_language_info()
|
|
437
|
+
print(f"\n{Colors.CYAN}Current Language Information:{Colors.END}")
|
|
438
|
+
print(f" Language: {info['name']} ({info['code']})")
|
|
439
|
+
print(f" Model: {info['model']}")
|
|
440
|
+
print(f" Available models: {list(info['available_models'].keys())}")
|
|
441
|
+
|
|
442
|
+
# Check if XTTS supports multiple languages
|
|
443
|
+
if 'xtts' in (info['model'] or '').lower():
|
|
444
|
+
print(f" ✅ Supports multilingual synthesis")
|
|
445
|
+
else:
|
|
446
|
+
print(f" ℹ️ Monolingual model")
|
|
447
|
+
|
|
448
|
+
def do_list_languages(self, args):
|
|
449
|
+
"""List all supported languages."""
|
|
450
|
+
print(f"\n{Colors.CYAN}Supported Languages:{Colors.END}")
|
|
451
|
+
for lang in self.voice_manager.get_supported_languages():
|
|
452
|
+
name = self.voice_manager.get_language_name(lang)
|
|
453
|
+
current = " (current)" if lang == self.current_language else ""
|
|
454
|
+
print(f" {lang} - {name}{current}")
|
|
455
|
+
|
|
282
456
|
def do_voice(self, arg):
|
|
283
457
|
"""Control voice input mode.
|
|
284
458
|
|
|
@@ -554,6 +728,10 @@ class VoiceREPL(cmd.Cmd):
|
|
|
554
728
|
print(" /clear Clear history")
|
|
555
729
|
print(" /tts on|off Toggle TTS")
|
|
556
730
|
print(" /voice <mode> Voice input: off|full|wait|stop|ptt")
|
|
731
|
+
print(" /language <lang> Switch voice language (en, fr, es, de, it)")
|
|
732
|
+
print(" /setvoice [id] List voices or set specific voice (lang.voice_id)")
|
|
733
|
+
print(" /lang_info Show current language information")
|
|
734
|
+
print(" /list_languages List all supported languages")
|
|
557
735
|
print(" /speed <number> Set TTS speed (0.5-2.0, default: 1.0, pitch preserved)")
|
|
558
736
|
print(" /tts_model <model> Switch TTS model: vits(best)|fast_pitch|glow-tts|tacotron2-DDC")
|
|
559
737
|
print(" /whisper <model> Switch Whisper model: tiny|base|small|medium|large")
|
|
@@ -831,10 +1009,15 @@ def parse_args():
|
|
|
831
1009
|
"""Parse command line arguments."""
|
|
832
1010
|
parser = argparse.ArgumentParser(description="AbstractVoice CLI Example")
|
|
833
1011
|
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
|
|
834
|
-
parser.add_argument("--api", default="http://localhost:11434/api/chat",
|
|
1012
|
+
parser.add_argument("--api", default="http://localhost:11434/api/chat",
|
|
835
1013
|
help="LLM API URL")
|
|
836
|
-
parser.add_argument("--model", default="granite3.3:2b",
|
|
1014
|
+
parser.add_argument("--model", default="granite3.3:2b",
|
|
837
1015
|
help="LLM model name")
|
|
1016
|
+
parser.add_argument("--language", "--lang", default="en",
|
|
1017
|
+
choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
|
|
1018
|
+
help="Voice language (en=English, fr=French, es=Spanish, de=German, it=Italian, ru=Russian, multilingual=All)")
|
|
1019
|
+
parser.add_argument("--tts-model",
|
|
1020
|
+
help="Specific TTS model to use (overrides language default)")
|
|
838
1021
|
return parser.parse_args()
|
|
839
1022
|
|
|
840
1023
|
|
|
@@ -844,11 +1027,13 @@ def main():
|
|
|
844
1027
|
# Parse command line arguments
|
|
845
1028
|
args = parse_args()
|
|
846
1029
|
|
|
847
|
-
# Initialize and run REPL
|
|
1030
|
+
# Initialize and run REPL with language support
|
|
848
1031
|
repl = VoiceREPL(
|
|
849
1032
|
api_url=args.api,
|
|
850
1033
|
model=args.model,
|
|
851
|
-
debug_mode=args.debug
|
|
1034
|
+
debug_mode=args.debug,
|
|
1035
|
+
language=args.language,
|
|
1036
|
+
tts_model=args.tts_model
|
|
852
1037
|
)
|
|
853
1038
|
repl.cmdloop()
|
|
854
1039
|
except KeyboardInterrupt:
|
|
@@ -13,20 +13,25 @@ def parse_args():
|
|
|
13
13
|
"""Parse command line arguments."""
|
|
14
14
|
parser = argparse.ArgumentParser(description="AbstractVoice Voice Mode")
|
|
15
15
|
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
|
|
16
|
-
parser.add_argument("--api", default="http://localhost:11434/api/chat",
|
|
16
|
+
parser.add_argument("--api", default="http://localhost:11434/api/chat",
|
|
17
17
|
help="LLM API URL")
|
|
18
|
-
parser.add_argument("--model", default="granite3.3:2b",
|
|
18
|
+
parser.add_argument("--model", default="granite3.3:2b",
|
|
19
19
|
help="LLM model name")
|
|
20
20
|
parser.add_argument("--whisper", default="tiny",
|
|
21
21
|
help="Whisper model to use (tiny, base, small, medium, large)")
|
|
22
22
|
parser.add_argument("--no-listening", action="store_true",
|
|
23
23
|
help="Disable speech-to-text (listening), TTS still works")
|
|
24
|
-
parser.add_argument("--system",
|
|
24
|
+
parser.add_argument("--system",
|
|
25
25
|
help="Custom system prompt")
|
|
26
26
|
parser.add_argument("--temperature", type=float, default=0.4,
|
|
27
27
|
help="Set temperature (0.0-2.0) for the LLM")
|
|
28
28
|
parser.add_argument("--max-tokens", type=int, default=4096,
|
|
29
29
|
help="Set maximum tokens for the LLM response")
|
|
30
|
+
parser.add_argument("--language", "--lang", default="en",
|
|
31
|
+
choices=["en", "fr", "es", "de", "it", "ru", "multilingual"],
|
|
32
|
+
help="Voice language (en=English, fr=French, es=Spanish, de=German, it=Italian, ru=Russian, multilingual=All)")
|
|
33
|
+
parser.add_argument("--tts-model",
|
|
34
|
+
help="Specific TTS model to use (overrides language default)")
|
|
30
35
|
return parser.parse_args()
|
|
31
36
|
|
|
32
37
|
def main():
|
|
@@ -35,13 +40,22 @@ def main():
|
|
|
35
40
|
# Parse command line arguments
|
|
36
41
|
args = parse_args()
|
|
37
42
|
|
|
38
|
-
|
|
43
|
+
# Show language information
|
|
44
|
+
language_names = {
|
|
45
|
+
'en': 'English', 'fr': 'French', 'es': 'Spanish',
|
|
46
|
+
'de': 'German', 'it': 'Italian', 'ru': 'Russian',
|
|
47
|
+
'multilingual': 'Multilingual'
|
|
48
|
+
}
|
|
49
|
+
lang_name = language_names.get(args.language, args.language)
|
|
50
|
+
print(f"Starting AbstractVoice voice interface ({lang_name})...")
|
|
39
51
|
|
|
40
|
-
# Initialize REPL
|
|
52
|
+
# Initialize REPL with language support
|
|
41
53
|
repl = VoiceREPL(
|
|
42
54
|
api_url=args.api,
|
|
43
55
|
model=args.model,
|
|
44
|
-
debug_mode=args.debug
|
|
56
|
+
debug_mode=args.debug,
|
|
57
|
+
language=args.language,
|
|
58
|
+
tts_model=args.tts_model
|
|
45
59
|
)
|
|
46
60
|
|
|
47
61
|
# Set custom system prompt if provided
|
abstractvoice/recognition.py
CHANGED
|
@@ -2,9 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
import threading
|
|
4
4
|
import time
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
|
|
6
|
+
# Lazy imports for heavy dependencies
|
|
7
|
+
def _import_audio_deps():
|
|
8
|
+
"""Import audio dependencies with helpful error message if missing."""
|
|
9
|
+
try:
|
|
10
|
+
import pyaudio
|
|
11
|
+
return pyaudio
|
|
12
|
+
except ImportError as e:
|
|
13
|
+
raise ImportError(
|
|
14
|
+
"Audio functionality requires optional dependencies. Install with:\n"
|
|
15
|
+
" pip install abstractvoice[voice] # For basic audio\n"
|
|
16
|
+
" pip install abstractvoice[all] # For all features\n"
|
|
17
|
+
f"Original error: {e}"
|
|
18
|
+
) from e
|
|
19
|
+
|
|
20
|
+
def _import_vad():
|
|
21
|
+
"""Import VoiceDetector with helpful error message if dependencies missing."""
|
|
22
|
+
try:
|
|
23
|
+
from .vad import VoiceDetector
|
|
24
|
+
return VoiceDetector
|
|
25
|
+
except ImportError as e:
|
|
26
|
+
if "webrtcvad" in str(e):
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"Voice activity detection requires optional dependencies. Install with:\n"
|
|
29
|
+
" pip install abstractvoice[voice] # For basic audio\n"
|
|
30
|
+
" pip install abstractvoice[all] # For all features\n"
|
|
31
|
+
f"Original error: {e}"
|
|
32
|
+
) from e
|
|
33
|
+
raise
|
|
34
|
+
|
|
35
|
+
def _import_transcriber():
|
|
36
|
+
"""Import Transcriber with helpful error message if dependencies missing."""
|
|
37
|
+
try:
|
|
38
|
+
from .stt import Transcriber
|
|
39
|
+
return Transcriber
|
|
40
|
+
except ImportError as e:
|
|
41
|
+
if "whisper" in str(e) or "tiktoken" in str(e):
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"Speech recognition functionality requires optional dependencies. Install with:\n"
|
|
44
|
+
" pip install abstractvoice[stt] # For speech recognition only\n"
|
|
45
|
+
" pip install abstractvoice[all] # For all features\n"
|
|
46
|
+
f"Original error: {e}"
|
|
47
|
+
) from e
|
|
48
|
+
raise
|
|
8
49
|
|
|
9
50
|
|
|
10
51
|
class VoiceRecognizer:
|
|
@@ -40,13 +81,15 @@ class VoiceRecognizer:
|
|
|
40
81
|
self.min_speech_chunks = int(min_speech_duration / chunk_duration)
|
|
41
82
|
self.silence_timeout_chunks = int(silence_timeout / chunk_duration)
|
|
42
83
|
|
|
43
|
-
# Initialize components
|
|
84
|
+
# Initialize components using lazy imports
|
|
85
|
+
VoiceDetector = _import_vad()
|
|
44
86
|
self.voice_detector = VoiceDetector(
|
|
45
87
|
aggressiveness=vad_aggressiveness,
|
|
46
88
|
sample_rate=sample_rate,
|
|
47
89
|
debug_mode=debug_mode
|
|
48
90
|
)
|
|
49
|
-
|
|
91
|
+
|
|
92
|
+
Transcriber = _import_transcriber()
|
|
50
93
|
self.transcriber = Transcriber(
|
|
51
94
|
model_name=whisper_model,
|
|
52
95
|
min_transcription_length=min_transcription_length,
|
|
@@ -109,8 +152,8 @@ class VoiceRecognizer:
|
|
|
109
152
|
|
|
110
153
|
def _recognition_loop(self):
|
|
111
154
|
"""Main recognition loop."""
|
|
112
|
-
|
|
113
|
-
|
|
155
|
+
pyaudio = _import_audio_deps()
|
|
156
|
+
|
|
114
157
|
self.pyaudio = pyaudio.PyAudio()
|
|
115
158
|
self.stream = self.pyaudio.open(
|
|
116
159
|
format=pyaudio.paInt16,
|
abstractvoice/stt/transcriber.py
CHANGED
|
@@ -1,11 +1,24 @@
|
|
|
1
1
|
"""Speech-to-text transcription using OpenAI's Whisper."""
|
|
2
2
|
|
|
3
|
-
import whisper
|
|
4
3
|
import numpy as np
|
|
5
4
|
import os
|
|
6
5
|
import sys
|
|
7
6
|
import logging
|
|
8
7
|
|
|
8
|
+
# Lazy import for heavy dependencies
|
|
9
|
+
def _import_whisper():
|
|
10
|
+
"""Import whisper with helpful error message if dependencies missing."""
|
|
11
|
+
try:
|
|
12
|
+
import whisper
|
|
13
|
+
return whisper
|
|
14
|
+
except ImportError as e:
|
|
15
|
+
raise ImportError(
|
|
16
|
+
"Speech recognition functionality requires optional dependencies. Install with:\n"
|
|
17
|
+
" pip install abstractvoice[stt] # For speech recognition only\n"
|
|
18
|
+
" pip install abstractvoice[all] # For all features\n"
|
|
19
|
+
f"Original error: {e}"
|
|
20
|
+
) from e
|
|
21
|
+
|
|
9
22
|
|
|
10
23
|
class Transcriber:
|
|
11
24
|
"""Transcribes audio using OpenAI's Whisper model."""
|
|
@@ -38,7 +51,8 @@ class Transcriber:
|
|
|
38
51
|
null_out = open(os.devnull, 'w')
|
|
39
52
|
sys.stdout = null_out
|
|
40
53
|
|
|
41
|
-
# Load the Whisper model
|
|
54
|
+
# Load the Whisper model using lazy import
|
|
55
|
+
whisper = _import_whisper()
|
|
42
56
|
self.model = whisper.load_model(model_name)
|
|
43
57
|
finally:
|
|
44
58
|
# Restore stdout if we redirected it
|
|
@@ -120,6 +134,7 @@ class Transcriber:
|
|
|
120
134
|
sys.stdout = null_out
|
|
121
135
|
|
|
122
136
|
try:
|
|
137
|
+
whisper = _import_whisper()
|
|
123
138
|
self.model = whisper.load_model(model_name)
|
|
124
139
|
self.model_name = model_name
|
|
125
140
|
finally:
|