npcpy 1.3.20__tar.gz → 1.3.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {npcpy-1.3.20/npcpy.egg-info → npcpy-1.3.22}/PKG-INFO +1 -1
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/audio.py +58 -286
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/audio_gen.py +172 -2
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/llm_funcs.py +2 -1
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_sysenv.py +24 -2
- {npcpy-1.3.20 → npcpy-1.3.22/npcpy.egg-info}/PKG-INFO +1 -1
- {npcpy-1.3.20 → npcpy-1.3.22}/setup.py +1 -1
- {npcpy-1.3.20 → npcpy-1.3.22}/LICENSE +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/MANIFEST.in +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/README.md +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/build_funcs.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/data_models.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/image.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/load.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/text.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/video.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/web.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/diff.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/ge.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/memory_trainer.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/model_ensembler.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/rl.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/sft.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/usft.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/embeddings.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/image_gen.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/ocr.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/response.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/video_gen.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/world_gen.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/main.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/command_history.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/kg_vis.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/knowledge_graph.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/memory_processor.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/search.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/mix/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/mix/debate.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ml_funcs.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_array.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_compiler.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npcs.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/serve.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/ai_function_tools.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/database_ai_adapters.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/database_ai_functions.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/model_runner.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/npcsql.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/sql_model_compiler.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/tools.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/__init__.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/browser.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/desktop.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/plan.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/trigger.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/SOURCES.txt +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/dependency_links.txt +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/requires.txt +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/top_level.txt +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/setup.cfg +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_audio.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_browser.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_build_funcs.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_command_history.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_data_models.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_diff.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_documentation_examples.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_genetic_evolver.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_image.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_llm_funcs.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_load.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_memory_processor.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_ml_funcs.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_model_runner.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npc_array.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npc_compiler.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npc_sysenv.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npcsql.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_response.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_serve.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_sql_adapters.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_sql_compiler.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_sql_functions.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_text.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_tools.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_video.py +0 -0
- {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_web.py +0 -0
|
@@ -6,45 +6,22 @@ import time
|
|
|
6
6
|
import queue
|
|
7
7
|
import re
|
|
8
8
|
import json
|
|
9
|
-
|
|
10
9
|
import subprocess
|
|
10
|
+
import logging
|
|
11
11
|
|
|
12
|
+
from typing import Optional, List, Dict, Any
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Audio constants
|
|
12
17
|
try:
|
|
13
|
-
import torch
|
|
14
18
|
import pyaudio
|
|
15
|
-
import wave
|
|
16
|
-
from typing import Optional, List, Dict, Any
|
|
17
|
-
from gtts import gTTS
|
|
18
|
-
from faster_whisper import WhisperModel
|
|
19
|
-
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
|
|
20
|
-
|
|
21
|
-
import pygame
|
|
22
|
-
|
|
23
19
|
FORMAT = pyaudio.paInt16
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
is_speaking = False
|
|
30
|
-
should_stop_speaking = False
|
|
31
|
-
tts_sequence = 0
|
|
32
|
-
recording_data = []
|
|
33
|
-
buffer_data = []
|
|
34
|
-
is_recording = False
|
|
35
|
-
last_speech_time = 0
|
|
36
|
-
running = True
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
audio_queue = queue.Queue()
|
|
40
|
-
tts_queue = queue.PriorityQueue()
|
|
41
|
-
cleanup_files = []
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
pygame.mixer.quit()
|
|
45
|
-
pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
|
|
46
|
-
except:
|
|
47
|
-
print("audio dependencies not installed")
|
|
20
|
+
except ImportError:
|
|
21
|
+
FORMAT = 8 # paInt16 value fallback
|
|
22
|
+
CHANNELS = 1
|
|
23
|
+
RATE = 16000
|
|
24
|
+
CHUNK = 512
|
|
48
25
|
|
|
49
26
|
|
|
50
27
|
def convert_mp3_to_wav(mp3_file, wav_file):
|
|
@@ -90,49 +67,9 @@ def check_ffmpeg():
|
|
|
90
67
|
return False
|
|
91
68
|
|
|
92
69
|
|
|
93
|
-
def get_context_string():
|
|
94
|
-
context = []
|
|
95
|
-
for exchange in history:
|
|
96
|
-
context.append(f"User: {exchange['user']}")
|
|
97
|
-
context.append(f"Assistant: {exchange['assistant']}")
|
|
98
|
-
return "\n".join(context)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def cleanup_temp_files():
|
|
103
|
-
global cleanup_files
|
|
104
|
-
for file in list(cleanup_files):
|
|
105
|
-
try:
|
|
106
|
-
if os.path.exists(file):
|
|
107
|
-
os.remove(file)
|
|
108
|
-
cleanup_files.remove(file)
|
|
109
|
-
except Exception:
|
|
110
|
-
pass
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def interrupt_speech():
|
|
114
|
-
global should_stop_speaking
|
|
115
|
-
should_stop_speaking = True
|
|
116
|
-
pygame.mixer.music.stop()
|
|
117
|
-
pygame.mixer.music.unload()
|
|
118
|
-
|
|
119
|
-
while not tts_queue.empty():
|
|
120
|
-
try:
|
|
121
|
-
_, temp_filename = tts_queue.get_nowait()
|
|
122
|
-
try:
|
|
123
|
-
if os.path.exists(temp_filename):
|
|
124
|
-
os.remove(temp_filename)
|
|
125
|
-
except:
|
|
126
|
-
if temp_filename not in cleanup_files:
|
|
127
|
-
cleanup_files.append(temp_filename)
|
|
128
|
-
except queue.Empty:
|
|
129
|
-
break
|
|
130
|
-
|
|
131
|
-
global tts_sequence
|
|
132
|
-
tts_sequence = 0
|
|
133
|
-
|
|
134
|
-
|
|
135
70
|
def audio_callback(in_data, frame_count, time_info, status):
|
|
71
|
+
import pyaudio
|
|
72
|
+
audio_queue = queue.Queue()
|
|
136
73
|
audio_queue.put(in_data)
|
|
137
74
|
return (in_data, pyaudio.paContinue)
|
|
138
75
|
|
|
@@ -571,218 +508,67 @@ def get_available_stt_engines() -> dict:
|
|
|
571
508
|
|
|
572
509
|
|
|
573
510
|
|
|
574
|
-
def load_history():
|
|
575
|
-
global history
|
|
576
|
-
try:
|
|
577
|
-
if os.path.exists(memory_file):
|
|
578
|
-
with open(memory_file, "r") as f:
|
|
579
|
-
history = json.load(f)
|
|
580
|
-
except Exception as e:
|
|
581
|
-
print(f"Error loading conversation history: {e}")
|
|
582
|
-
history = []
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
def save_history():
|
|
586
|
-
try:
|
|
587
|
-
with open(memory_file, "w") as f:
|
|
588
|
-
json.dump(history, f)
|
|
589
|
-
except Exception as e:
|
|
590
|
-
print(f"Error saving conversation history: {e}")
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
def add_exchange(user_input, assistant_response):
|
|
594
|
-
global history
|
|
595
|
-
exchange = {
|
|
596
|
-
"user": user_input,
|
|
597
|
-
"assistant": assistant_response,
|
|
598
|
-
"timestamp": time.time(),
|
|
599
|
-
}
|
|
600
|
-
history.append(exchange)
|
|
601
|
-
if len(history) > max_history:
|
|
602
|
-
history.pop(0)
|
|
603
|
-
save_history()
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
def get_context_string():
|
|
607
|
-
context = []
|
|
608
|
-
for exchange in history:
|
|
609
|
-
context.append(f"User: {exchange['user']}")
|
|
610
|
-
context.append(f"Assistant: {exchange['assistant']}")
|
|
611
|
-
return "\n".join(context)
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
def cleanup_temp_files():
|
|
616
|
-
global cleanup_files
|
|
617
|
-
for file in list(cleanup_files):
|
|
618
|
-
try:
|
|
619
|
-
if os.path.exists(file):
|
|
620
|
-
os.remove(file)
|
|
621
|
-
cleanup_files.remove(file)
|
|
622
|
-
except Exception:
|
|
623
|
-
pass
|
|
624
|
-
|
|
625
511
|
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
pygame.mixer.music.stop()
|
|
630
|
-
pygame.mixer.music.unload()
|
|
631
|
-
|
|
632
|
-
while not tts_queue.empty():
|
|
633
|
-
try:
|
|
634
|
-
_, temp_filename = tts_queue.get_nowait()
|
|
635
|
-
try:
|
|
636
|
-
if os.path.exists(temp_filename):
|
|
637
|
-
os.remove(temp_filename)
|
|
638
|
-
except:
|
|
639
|
-
if temp_filename not in cleanup_files:
|
|
640
|
-
cleanup_files.append(temp_filename)
|
|
641
|
-
except queue.Empty:
|
|
642
|
-
break
|
|
643
|
-
|
|
644
|
-
tts_sequence = 0
|
|
645
|
-
is_speaking = False
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
def audio_callback(in_data, frame_count, time_info, status):
|
|
649
|
-
audio_queue.put(in_data)
|
|
650
|
-
return (in_data, pyaudio.paContinue)
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
def play_audio_from_queue():
|
|
655
|
-
global is_speaking, cleanup_files, should_stop_speaking
|
|
656
|
-
next_sequence = 0
|
|
657
|
-
|
|
658
|
-
while True:
|
|
659
|
-
if should_stop_speaking:
|
|
660
|
-
pygame.mixer.music.stop()
|
|
661
|
-
pygame.mixer.music.unload()
|
|
662
|
-
|
|
663
|
-
while not tts_queue.empty():
|
|
664
|
-
try:
|
|
665
|
-
_, temp_filename = tts_queue.get_nowait()
|
|
666
|
-
try:
|
|
667
|
-
if os.path.exists(temp_filename):
|
|
668
|
-
os.remove(temp_filename)
|
|
669
|
-
except:
|
|
670
|
-
if temp_filename not in cleanup_files:
|
|
671
|
-
cleanup_files.append(temp_filename)
|
|
672
|
-
except queue.Empty:
|
|
673
|
-
break
|
|
674
|
-
|
|
675
|
-
next_sequence = 0
|
|
676
|
-
is_speaking = False
|
|
677
|
-
should_stop_speaking = False
|
|
678
|
-
time.sleep(0.1)
|
|
679
|
-
continue
|
|
680
|
-
|
|
681
|
-
try:
|
|
682
|
-
if not tts_queue.empty():
|
|
683
|
-
sequence, temp_filename = tts_queue.queue[0]
|
|
684
|
-
|
|
685
|
-
if sequence == next_sequence:
|
|
686
|
-
sequence, temp_filename = tts_queue.get()
|
|
687
|
-
is_speaking = True
|
|
688
|
-
|
|
689
|
-
try:
|
|
690
|
-
if len(cleanup_files) > 0 and not pygame.mixer.music.get_busy():
|
|
691
|
-
cleanup_temp_files()
|
|
692
|
-
|
|
693
|
-
if should_stop_speaking:
|
|
694
|
-
continue
|
|
695
|
-
|
|
696
|
-
pygame.mixer.music.load(temp_filename)
|
|
697
|
-
pygame.mixer.music.play()
|
|
698
|
-
|
|
699
|
-
while (
|
|
700
|
-
pygame.mixer.music.get_busy() and not should_stop_speaking
|
|
701
|
-
):
|
|
702
|
-
pygame.time.wait(50)
|
|
703
|
-
|
|
704
|
-
pygame.mixer.music.unload()
|
|
705
|
-
|
|
706
|
-
except Exception as e:
|
|
707
|
-
print(f"Audio playback error: {str(e)}")
|
|
708
|
-
finally:
|
|
709
|
-
try:
|
|
710
|
-
if os.path.exists(temp_filename):
|
|
711
|
-
os.remove(temp_filename)
|
|
712
|
-
except:
|
|
713
|
-
if temp_filename not in cleanup_files:
|
|
714
|
-
cleanup_files.append(temp_filename)
|
|
715
|
-
|
|
716
|
-
if not should_stop_speaking:
|
|
717
|
-
next_sequence += 1
|
|
718
|
-
is_speaking = False
|
|
719
|
-
|
|
720
|
-
time.sleep(0.05)
|
|
721
|
-
except Exception:
|
|
722
|
-
time.sleep(0.05)
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
import pygame
|
|
726
|
-
from gtts import gTTS
|
|
727
|
-
import tempfile
|
|
728
|
-
import os
|
|
729
|
-
import logging
|
|
730
|
-
|
|
731
|
-
logging.basicConfig(level=logging.ERROR)
|
|
732
|
-
logger = logging.getLogger(__name__)
|
|
733
|
-
|
|
734
|
-
import pyaudio
|
|
735
|
-
import wave
|
|
736
|
-
from gtts import gTTS
|
|
737
|
-
import tempfile
|
|
738
|
-
import os
|
|
739
|
-
import logging
|
|
512
|
+
# =============================================================================
|
|
513
|
+
# TTS Playback Helpers (use unified audio_gen.text_to_speech)
|
|
514
|
+
# =============================================================================
|
|
740
515
|
|
|
741
|
-
|
|
742
|
-
|
|
516
|
+
def create_and_queue_audio(text, state, engine="kokoro", voice=None):
|
|
517
|
+
"""Create and play TTS audio using the unified engine interface.
|
|
743
518
|
|
|
519
|
+
Args:
|
|
520
|
+
text: Text to speak
|
|
521
|
+
state: Dict with 'tts_is_speaking', 'tts_just_finished', 'running' keys
|
|
522
|
+
engine: TTS engine name (kokoro, qwen3, elevenlabs, openai, gemini, gtts)
|
|
523
|
+
voice: Voice ID (engine-specific)
|
|
524
|
+
"""
|
|
525
|
+
import wave
|
|
526
|
+
import uuid
|
|
744
527
|
|
|
745
|
-
def create_and_queue_audio(text, state):
|
|
746
|
-
"""Create and queue audio with state awareness for TTS/recording coordination"""
|
|
747
|
-
|
|
748
528
|
state["tts_is_speaking"] = True
|
|
749
529
|
|
|
750
530
|
if not text.strip():
|
|
751
|
-
print("Empty text, skipping TTS")
|
|
752
531
|
state["tts_is_speaking"] = False
|
|
753
532
|
return
|
|
754
533
|
|
|
755
534
|
try:
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
wav_file = os.path.join(temp_dir, f"temp_{unique_id}.wav")
|
|
535
|
+
from npcpy.gen.audio_gen import text_to_speech
|
|
536
|
+
|
|
537
|
+
audio_bytes = text_to_speech(text, engine=engine, voice=voice)
|
|
760
538
|
|
|
761
|
-
|
|
762
|
-
|
|
539
|
+
# Write to temp file and play
|
|
540
|
+
suffix = '.mp3' if engine in ('elevenlabs', 'gtts') else '.wav'
|
|
541
|
+
tmp_path = os.path.join(tempfile.gettempdir(), f"npc_tts_{uuid.uuid4()}{suffix}")
|
|
542
|
+
with open(tmp_path, 'wb') as f:
|
|
543
|
+
f.write(audio_bytes)
|
|
763
544
|
|
|
764
|
-
|
|
545
|
+
play_path = tmp_path
|
|
546
|
+
if suffix == '.mp3':
|
|
547
|
+
wav_path = tmp_path.replace('.mp3', '.wav')
|
|
548
|
+
convert_mp3_to_wav(tmp_path, wav_path)
|
|
549
|
+
play_path = wav_path
|
|
765
550
|
|
|
766
|
-
|
|
767
|
-
|
|
551
|
+
play_audio(play_path, state)
|
|
552
|
+
|
|
553
|
+
for p in set([tmp_path, play_path]):
|
|
554
|
+
try:
|
|
555
|
+
if os.path.exists(p):
|
|
556
|
+
os.remove(p)
|
|
557
|
+
except Exception:
|
|
558
|
+
pass
|
|
768
559
|
except Exception as e:
|
|
769
|
-
|
|
560
|
+
logger.error(f"TTS error: {e}")
|
|
770
561
|
finally:
|
|
771
|
-
|
|
772
562
|
state["tts_is_speaking"] = False
|
|
773
563
|
state["tts_just_finished"] = True
|
|
774
564
|
|
|
775
|
-
for file in [mp3_file, wav_file]:
|
|
776
|
-
try:
|
|
777
|
-
if os.path.exists(file):
|
|
778
|
-
os.remove(file)
|
|
779
|
-
except Exception as e:
|
|
780
|
-
print(f"Error removing temporary file {file}: {e}")
|
|
781
|
-
|
|
782
565
|
|
|
783
566
|
def play_audio(filename, state):
|
|
784
|
-
"""Play
|
|
785
|
-
|
|
567
|
+
"""Play a WAV file via pyaudio with state awareness."""
|
|
568
|
+
import pyaudio
|
|
569
|
+
import wave
|
|
570
|
+
|
|
571
|
+
PLAY_CHUNK = 4096
|
|
786
572
|
|
|
787
573
|
wf = wave.open(filename, "rb")
|
|
788
574
|
p = pyaudio.PyAudio()
|
|
@@ -794,33 +580,19 @@ def play_audio(filename, state):
|
|
|
794
580
|
output=True,
|
|
795
581
|
)
|
|
796
582
|
|
|
797
|
-
data = wf.readframes(
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
while data and state["running"]:
|
|
583
|
+
data = wf.readframes(PLAY_CHUNK)
|
|
584
|
+
while data and state.get("running", True):
|
|
801
585
|
stream.write(data)
|
|
802
|
-
data = wf.readframes(
|
|
586
|
+
data = wf.readframes(PLAY_CHUNK)
|
|
803
587
|
|
|
804
588
|
stream.stop_stream()
|
|
805
589
|
stream.close()
|
|
806
590
|
p.terminate()
|
|
807
591
|
|
|
808
|
-
try:
|
|
809
|
-
os.unlink(filename)
|
|
810
|
-
except:
|
|
811
|
-
pass
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
def process_response_chunk(text_chunk):
|
|
816
|
-
if not text_chunk.strip():
|
|
817
|
-
return
|
|
818
|
-
processed_text = process_text_for_tts(text_chunk)
|
|
819
|
-
create_and_queue_audio(processed_text)
|
|
820
|
-
|
|
821
592
|
|
|
822
593
|
def process_text_for_tts(text):
|
|
823
|
-
text
|
|
594
|
+
"""Clean text for TTS consumption."""
|
|
595
|
+
text = re.sub(r"[*<>{}()\[\]&%#@^~`]", "", text)
|
|
824
596
|
text = text.strip()
|
|
825
597
|
text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
|
|
826
598
|
text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
|
|
@@ -4,6 +4,7 @@ Supports multiple TTS engines including real-time voice APIs.
|
|
|
4
4
|
|
|
5
5
|
TTS Engines:
|
|
6
6
|
- Kokoro: Local neural TTS (default)
|
|
7
|
+
- Qwen3-TTS: Local high-quality multilingual TTS (0.6B/1.7B)
|
|
7
8
|
- ElevenLabs: Cloud TTS with streaming
|
|
8
9
|
- OpenAI: Realtime voice API
|
|
9
10
|
- Gemini: Live API for real-time voice
|
|
@@ -13,6 +14,7 @@ Usage:
|
|
|
13
14
|
from npcpy.gen.audio_gen import text_to_speech
|
|
14
15
|
|
|
15
16
|
audio = text_to_speech("Hello world", engine="kokoro", voice="af_heart")
|
|
17
|
+
audio = text_to_speech("Hello world", engine="qwen3", voice="ryan")
|
|
16
18
|
|
|
17
19
|
For STT, see npcpy.data.audio
|
|
18
20
|
"""
|
|
@@ -477,6 +479,155 @@ def get_gemini_voices() -> list:
|
|
|
477
479
|
]
|
|
478
480
|
|
|
479
481
|
|
|
482
|
+
# =============================================================================
|
|
483
|
+
# Qwen3-TTS (Local High-Quality Multilingual)
|
|
484
|
+
# =============================================================================
|
|
485
|
+
|
|
486
|
+
_qwen3_model_cache = {}
|
|
487
|
+
|
|
488
|
+
def _get_qwen3_model(
|
|
489
|
+
model_size: str = "1.7B",
|
|
490
|
+
model_type: str = "custom_voice",
|
|
491
|
+
device: str = "auto",
|
|
492
|
+
):
|
|
493
|
+
"""Load and cache a Qwen3-TTS model."""
|
|
494
|
+
cache_key = (model_size, model_type, device)
|
|
495
|
+
if cache_key in _qwen3_model_cache:
|
|
496
|
+
return _qwen3_model_cache[cache_key]
|
|
497
|
+
|
|
498
|
+
import torch
|
|
499
|
+
from huggingface_hub import snapshot_download
|
|
500
|
+
|
|
501
|
+
if device == "auto":
|
|
502
|
+
if torch.cuda.is_available():
|
|
503
|
+
device = "cuda"
|
|
504
|
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
505
|
+
device = "mps"
|
|
506
|
+
else:
|
|
507
|
+
device = "cpu"
|
|
508
|
+
|
|
509
|
+
dtype = torch.bfloat16 if device != "cpu" else torch.float32
|
|
510
|
+
|
|
511
|
+
size_tag = "0.6B" if "0.6" in model_size else "1.7B"
|
|
512
|
+
type_map = {
|
|
513
|
+
"custom_voice": f"Qwen/Qwen3-TTS-12Hz-{size_tag}-CustomVoice",
|
|
514
|
+
"base": f"Qwen/Qwen3-TTS-12Hz-{size_tag}-Base",
|
|
515
|
+
"voice_design": f"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
repo_id = type_map.get(model_type, type_map["custom_voice"])
|
|
519
|
+
|
|
520
|
+
# Try local cache first, then download
|
|
521
|
+
cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "qwen-tts")
|
|
522
|
+
model_dir = os.path.join(cache_dir, repo_id.split("/")[-1])
|
|
523
|
+
|
|
524
|
+
if not os.path.exists(os.path.join(model_dir, "config.json")):
|
|
525
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
526
|
+
snapshot_download(repo_id=repo_id, local_dir=model_dir)
|
|
527
|
+
|
|
528
|
+
# Import the model class
|
|
529
|
+
try:
|
|
530
|
+
from qwen_tts import Qwen3TTSModel
|
|
531
|
+
except ImportError:
|
|
532
|
+
raise ImportError(
|
|
533
|
+
"qwen_tts package not found. Install from: "
|
|
534
|
+
"https://github.com/QwenLM/Qwen3-TTS or pip install qwen-tts"
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
model = Qwen3TTSModel.from_pretrained(
|
|
538
|
+
model_dir, device_map=device, dtype=dtype
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
# Clear old entries if switching configs
|
|
542
|
+
_qwen3_model_cache.clear()
|
|
543
|
+
_qwen3_model_cache[cache_key] = model
|
|
544
|
+
return model
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def tts_qwen3(
|
|
548
|
+
text: str,
|
|
549
|
+
voice: str = "ryan",
|
|
550
|
+
language: str = "auto",
|
|
551
|
+
model_size: str = "1.7B",
|
|
552
|
+
device: str = "auto",
|
|
553
|
+
speed: float = 1.0,
|
|
554
|
+
ref_audio: str = None,
|
|
555
|
+
ref_text: str = None,
|
|
556
|
+
instruct: str = None,
|
|
557
|
+
) -> bytes:
|
|
558
|
+
"""
|
|
559
|
+
Generate speech using Qwen3-TTS local model.
|
|
560
|
+
|
|
561
|
+
Supports three modes based on arguments:
|
|
562
|
+
- Custom voice (default): Use a preset speaker name
|
|
563
|
+
- Voice clone: Provide ref_audio (path) to clone a voice
|
|
564
|
+
- Voice design: Provide instruct (text description) to design a voice
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
text: Text to synthesize
|
|
568
|
+
voice: Speaker name for custom voice mode
|
|
569
|
+
(aiden, dylan, eric, ono_anna, ryan, serena, sohee, uncle_fu, vivian)
|
|
570
|
+
language: Language (auto, chinese, english, japanese, korean, french, etc.)
|
|
571
|
+
model_size: '0.6B' or '1.7B'
|
|
572
|
+
device: 'auto', 'cuda', 'mps', 'cpu'
|
|
573
|
+
speed: Speech speed (not directly supported, reserved)
|
|
574
|
+
ref_audio: Path to reference audio for voice cloning
|
|
575
|
+
ref_text: Transcript of reference audio (recommended for cloning)
|
|
576
|
+
instruct: Natural language voice description for voice design mode
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
WAV audio bytes
|
|
580
|
+
"""
|
|
581
|
+
import numpy as np
|
|
582
|
+
import soundfile as sf
|
|
583
|
+
|
|
584
|
+
if ref_audio:
|
|
585
|
+
model = _get_qwen3_model(model_size, "base", device)
|
|
586
|
+
wavs, sr = model.generate_voice_clone(
|
|
587
|
+
text=text,
|
|
588
|
+
language=language,
|
|
589
|
+
ref_audio=ref_audio,
|
|
590
|
+
ref_text=ref_text,
|
|
591
|
+
)
|
|
592
|
+
elif instruct:
|
|
593
|
+
model = _get_qwen3_model(model_size, "voice_design", device)
|
|
594
|
+
wavs, sr = model.generate_voice_design(
|
|
595
|
+
text=text,
|
|
596
|
+
language=language,
|
|
597
|
+
instruct=instruct,
|
|
598
|
+
)
|
|
599
|
+
else:
|
|
600
|
+
model = _get_qwen3_model(model_size, "custom_voice", device)
|
|
601
|
+
wavs, sr = model.generate_custom_voice(
|
|
602
|
+
text=text,
|
|
603
|
+
language=language,
|
|
604
|
+
speaker=voice.lower().replace(" ", "_"),
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
if not wavs:
|
|
608
|
+
raise ValueError("Qwen3-TTS generated no audio")
|
|
609
|
+
|
|
610
|
+
wav_buffer = io.BytesIO()
|
|
611
|
+
sf.write(wav_buffer, wavs[0], sr, format='WAV')
|
|
612
|
+
wav_buffer.seek(0)
|
|
613
|
+
return wav_buffer.read()
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def get_qwen3_voices() -> list:
|
|
617
|
+
"""Get available Qwen3-TTS preset voices."""
|
|
618
|
+
return [
|
|
619
|
+
{"id": "aiden", "name": "Aiden", "gender": "male"},
|
|
620
|
+
{"id": "dylan", "name": "Dylan", "gender": "male"},
|
|
621
|
+
{"id": "eric", "name": "Eric", "gender": "male"},
|
|
622
|
+
{"id": "ryan", "name": "Ryan", "gender": "male"},
|
|
623
|
+
{"id": "serena", "name": "Serena", "gender": "female"},
|
|
624
|
+
{"id": "vivian", "name": "Vivian", "gender": "female"},
|
|
625
|
+
{"id": "sohee", "name": "Sohee", "gender": "female"},
|
|
626
|
+
{"id": "ono_anna", "name": "Ono Anna", "gender": "female"},
|
|
627
|
+
{"id": "uncle_fu", "name": "Uncle Fu", "gender": "male"},
|
|
628
|
+
]
|
|
629
|
+
|
|
630
|
+
|
|
480
631
|
# =============================================================================
|
|
481
632
|
# gTTS (Google Text-to-Speech) - Fallback
|
|
482
633
|
# =============================================================================
|
|
@@ -527,7 +678,7 @@ def text_to_speech(
|
|
|
527
678
|
|
|
528
679
|
Args:
|
|
529
680
|
text: Text to synthesize
|
|
530
|
-
engine: TTS engine (kokoro, elevenlabs, openai, gemini, gtts)
|
|
681
|
+
engine: TTS engine (kokoro, qwen3, elevenlabs, openai, gemini, gtts)
|
|
531
682
|
voice: Voice ID (engine-specific)
|
|
532
683
|
**kwargs: Engine-specific options
|
|
533
684
|
|
|
@@ -542,6 +693,10 @@ def text_to_speech(
|
|
|
542
693
|
lang_code = voices.get(voice, {}).get("lang", "a")
|
|
543
694
|
return tts_kokoro(text, voice=voice, lang_code=lang_code, **kwargs)
|
|
544
695
|
|
|
696
|
+
elif engine in ("qwen3", "qwen3-tts", "qwen"):
|
|
697
|
+
voice = voice or "ryan"
|
|
698
|
+
return tts_qwen3(text, voice=voice, **kwargs)
|
|
699
|
+
|
|
545
700
|
elif engine == "elevenlabs":
|
|
546
701
|
voice = voice or "JBFqnCBsd6RMkjVDRZzb"
|
|
547
702
|
return tts_elevenlabs(text, voice_id=voice, **kwargs)
|
|
@@ -568,6 +723,8 @@ def get_available_voices(engine: str = "kokoro") -> list:
|
|
|
568
723
|
|
|
569
724
|
if engine == "kokoro":
|
|
570
725
|
return get_kokoro_voices()
|
|
726
|
+
elif engine in ("qwen3", "qwen3-tts", "qwen"):
|
|
727
|
+
return get_qwen3_voices()
|
|
571
728
|
elif engine == "elevenlabs":
|
|
572
729
|
return get_elevenlabs_voices()
|
|
573
730
|
elif engine == "openai":
|
|
@@ -590,6 +747,13 @@ def get_available_engines() -> dict:
|
|
|
590
747
|
"description": "Local neural TTS (82M params)",
|
|
591
748
|
"install": "pip install kokoro soundfile"
|
|
592
749
|
},
|
|
750
|
+
"qwen3": {
|
|
751
|
+
"name": "Qwen3-TTS",
|
|
752
|
+
"type": "local",
|
|
753
|
+
"available": False,
|
|
754
|
+
"description": "Local high-quality multilingual TTS (0.6B/1.7B)",
|
|
755
|
+
"install": "pip install qwen-tts torch torchaudio transformers"
|
|
756
|
+
},
|
|
593
757
|
"elevenlabs": {
|
|
594
758
|
"name": "ElevenLabs",
|
|
595
759
|
"type": "cloud",
|
|
@@ -615,7 +779,7 @@ def get_available_engines() -> dict:
|
|
|
615
779
|
"name": "Google TTS",
|
|
616
780
|
"type": "cloud",
|
|
617
781
|
"available": False,
|
|
618
|
-
"description": "Free Google TTS"
|
|
782
|
+
"description": "Free Google TTS (fallback)"
|
|
619
783
|
}
|
|
620
784
|
}
|
|
621
785
|
|
|
@@ -625,6 +789,12 @@ def get_available_engines() -> dict:
|
|
|
625
789
|
except ImportError:
|
|
626
790
|
pass
|
|
627
791
|
|
|
792
|
+
try:
|
|
793
|
+
from qwen_tts import Qwen3TTSModel
|
|
794
|
+
engines["qwen3"]["available"] = True
|
|
795
|
+
except ImportError:
|
|
796
|
+
pass
|
|
797
|
+
|
|
628
798
|
if os.environ.get('ELEVENLABS_API_KEY'):
|
|
629
799
|
engines["elevenlabs"]["available"] = True
|
|
630
800
|
|
|
@@ -242,7 +242,8 @@ def get_llm_response(
|
|
|
242
242
|
base_model, base_provider, base_api_url = _resolve_model_provider(npc, team, model, provider)
|
|
243
243
|
|
|
244
244
|
def _run_single(run_model, run_provider, run_npc, run_team, run_context, extra_kwargs):
|
|
245
|
-
|
|
245
|
+
_tool_capable = bool(extra_kwargs.get("tools"))
|
|
246
|
+
system_message = get_system_message(run_npc, run_team, tool_capable=_tool_capable) if run_npc is not None else "You are a helpful assistant."
|
|
246
247
|
ctx_suffix = _context_suffix(run_context)
|
|
247
248
|
run_messages = _build_messages(messages, system_message, prompt, ctx_suffix)
|
|
248
249
|
return get_litellm_response(
|
|
@@ -1010,7 +1010,7 @@ def print_and_process_stream(response, model, provider):
|
|
|
1010
1010
|
|
|
1011
1011
|
|
|
1012
1012
|
return thinking_str+str_output
|
|
1013
|
-
def get_system_message(npc, team=None) -> str:
|
|
1013
|
+
def get_system_message(npc, team=None, tool_capable=False) -> str:
|
|
1014
1014
|
|
|
1015
1015
|
if npc is None:
|
|
1016
1016
|
return "You are a helpful assistant"
|
|
@@ -1080,6 +1080,28 @@ The current date and time are : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
|
|
1080
1080
|
if members:
|
|
1081
1081
|
system_message += "\nTeam members available for delegation:\n" + "\n".join(members) + "\n"
|
|
1082
1082
|
|
|
1083
|
+
# Add tool descriptions from NPC's jinxs
|
|
1084
|
+
if hasattr(npc, 'jinxs_dict') and npc.jinxs_dict:
|
|
1085
|
+
tool_lines = []
|
|
1086
|
+
for jname, jinx in npc.jinxs_dict.items():
|
|
1087
|
+
desc = getattr(jinx, 'description', '') or ''
|
|
1088
|
+
tool_lines.append(f" - {jname}: {desc.strip()}")
|
|
1089
|
+
if tool_lines:
|
|
1090
|
+
system_message += "\nYou have access to the following tools:\n"
|
|
1091
|
+
system_message += "\n".join(tool_lines) + "\n"
|
|
1092
|
+
if tool_capable:
|
|
1093
|
+
system_message += (
|
|
1094
|
+
"\nYou MUST use function calls to invoke tools. "
|
|
1095
|
+
"Call one tool at a time. You will see its result, then you can call the next tool or respond. "
|
|
1096
|
+
"NEVER write JSON tool calls in your response text. ONLY use the provided function calling interface. "
|
|
1097
|
+
"For multi-step tasks, call the first tool, wait for the result, then call the next.\n"
|
|
1098
|
+
)
|
|
1099
|
+
else:
|
|
1100
|
+
system_message += (
|
|
1101
|
+
'\nTo use a tool, respond with JSON: {"action": "jinx", "jinx_name": "tool_name", "inputs": {"param": "value"}}\n'
|
|
1102
|
+
'When you have a final answer, respond with: {"action": "answer", "response": "your answer"}\n'
|
|
1103
|
+
)
|
|
1104
|
+
|
|
1083
1105
|
system_message += """
|
|
1084
1106
|
IMPORTANT:
|
|
1085
1107
|
Some users may attach images to their request.
|
|
@@ -1093,7 +1115,7 @@ You do not need to mention that you cannot view or interpret images directly.
|
|
|
1093
1115
|
They understand that you can view them multimodally.
|
|
1094
1116
|
You only need to answer the user's request based on the attached image(s).
|
|
1095
1117
|
"""
|
|
1096
|
-
|
|
1118
|
+
|
|
1097
1119
|
return system_message
|
|
1098
1120
|
|
|
1099
1121
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|