npcpy 1.3.20__tar.gz → 1.3.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {npcpy-1.3.20/npcpy.egg-info → npcpy-1.3.22}/PKG-INFO +1 -1
  2. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/audio.py +58 -286
  3. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/audio_gen.py +172 -2
  4. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/llm_funcs.py +2 -1
  5. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_sysenv.py +24 -2
  6. {npcpy-1.3.20 → npcpy-1.3.22/npcpy.egg-info}/PKG-INFO +1 -1
  7. {npcpy-1.3.20 → npcpy-1.3.22}/setup.py +1 -1
  8. {npcpy-1.3.20 → npcpy-1.3.22}/LICENSE +0 -0
  9. {npcpy-1.3.20 → npcpy-1.3.22}/MANIFEST.in +0 -0
  10. {npcpy-1.3.20 → npcpy-1.3.22}/README.md +0 -0
  11. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/__init__.py +0 -0
  12. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/build_funcs.py +0 -0
  13. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/__init__.py +0 -0
  14. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/data_models.py +0 -0
  15. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/image.py +0 -0
  16. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/load.py +0 -0
  17. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/text.py +0 -0
  18. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/video.py +0 -0
  19. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/web.py +0 -0
  20. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/__init__.py +0 -0
  21. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/diff.py +0 -0
  22. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/ge.py +0 -0
  23. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/memory_trainer.py +0 -0
  24. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/model_ensembler.py +0 -0
  25. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/rl.py +0 -0
  26. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/sft.py +0 -0
  27. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ft/usft.py +0 -0
  28. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/__init__.py +0 -0
  29. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/embeddings.py +0 -0
  30. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/image_gen.py +0 -0
  31. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/ocr.py +0 -0
  32. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/response.py +0 -0
  33. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/video_gen.py +0 -0
  34. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/world_gen.py +0 -0
  35. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/main.py +0 -0
  36. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/__init__.py +0 -0
  37. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/command_history.py +0 -0
  38. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/kg_vis.py +0 -0
  39. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/knowledge_graph.py +0 -0
  40. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/memory_processor.py +0 -0
  41. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/memory/search.py +0 -0
  42. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/mix/__init__.py +0 -0
  43. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/mix/debate.py +0 -0
  44. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/ml_funcs.py +0 -0
  45. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_array.py +0 -0
  46. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_compiler.py +0 -0
  47. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npcs.py +0 -0
  48. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/serve.py +0 -0
  49. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/__init__.py +0 -0
  50. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/ai_function_tools.py +0 -0
  51. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/database_ai_adapters.py +0 -0
  52. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/database_ai_functions.py +0 -0
  53. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/model_runner.py +0 -0
  54. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/npcsql.py +0 -0
  55. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/sql/sql_model_compiler.py +0 -0
  56. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/tools.py +0 -0
  57. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/__init__.py +0 -0
  58. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/browser.py +0 -0
  59. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/desktop.py +0 -0
  60. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/plan.py +0 -0
  61. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy/work/trigger.py +0 -0
  62. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/SOURCES.txt +0 -0
  63. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/dependency_links.txt +0 -0
  64. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/requires.txt +0 -0
  65. {npcpy-1.3.20 → npcpy-1.3.22}/npcpy.egg-info/top_level.txt +0 -0
  66. {npcpy-1.3.20 → npcpy-1.3.22}/setup.cfg +0 -0
  67. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_audio.py +0 -0
  68. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_browser.py +0 -0
  69. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_build_funcs.py +0 -0
  70. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_command_history.py +0 -0
  71. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_data_models.py +0 -0
  72. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_diff.py +0 -0
  73. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_documentation_examples.py +0 -0
  74. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_genetic_evolver.py +0 -0
  75. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_image.py +0 -0
  76. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_llm_funcs.py +0 -0
  77. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_load.py +0 -0
  78. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_memory_processor.py +0 -0
  79. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_ml_funcs.py +0 -0
  80. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_model_runner.py +0 -0
  81. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npc_array.py +0 -0
  82. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npc_compiler.py +0 -0
  83. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npc_sysenv.py +0 -0
  84. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_npcsql.py +0 -0
  85. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_response.py +0 -0
  86. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_serve.py +0 -0
  87. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_sql_adapters.py +0 -0
  88. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_sql_compiler.py +0 -0
  89. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_sql_functions.py +0 -0
  90. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_text.py +0 -0
  91. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_tools.py +0 -0
  92. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_video.py +0 -0
  93. {npcpy-1.3.20 → npcpy-1.3.22}/tests/test_web.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: npcpy
3
- Version: 1.3.20
3
+ Version: 1.3.22
4
4
  Summary: npcpy is the premier open-source library for integrating LLMs and Agents into python systems.
5
5
  Home-page: https://github.com/NPC-Worldwide/npcpy
6
6
  Author: Christopher Agostino
@@ -6,45 +6,22 @@ import time
6
6
  import queue
7
7
  import re
8
8
  import json
9
-
10
9
  import subprocess
10
+ import logging
11
11
 
12
+ from typing import Optional, List, Dict, Any
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Audio constants
12
17
  try:
13
- import torch
14
18
  import pyaudio
15
- import wave
16
- from typing import Optional, List, Dict, Any
17
- from gtts import gTTS
18
- from faster_whisper import WhisperModel
19
- os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
20
-
21
- import pygame
22
-
23
19
  FORMAT = pyaudio.paInt16
24
- CHANNELS = 1
25
- RATE = 16000
26
- CHUNK = 512
27
-
28
-
29
- is_speaking = False
30
- should_stop_speaking = False
31
- tts_sequence = 0
32
- recording_data = []
33
- buffer_data = []
34
- is_recording = False
35
- last_speech_time = 0
36
- running = True
37
-
38
-
39
- audio_queue = queue.Queue()
40
- tts_queue = queue.PriorityQueue()
41
- cleanup_files = []
42
-
43
-
44
- pygame.mixer.quit()
45
- pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
46
- except:
47
- print("audio dependencies not installed")
20
+ except ImportError:
21
+ FORMAT = 8 # paInt16 value fallback
22
+ CHANNELS = 1
23
+ RATE = 16000
24
+ CHUNK = 512
48
25
 
49
26
 
50
27
  def convert_mp3_to_wav(mp3_file, wav_file):
@@ -90,49 +67,9 @@ def check_ffmpeg():
90
67
  return False
91
68
 
92
69
 
93
- def get_context_string():
94
- context = []
95
- for exchange in history:
96
- context.append(f"User: {exchange['user']}")
97
- context.append(f"Assistant: {exchange['assistant']}")
98
- return "\n".join(context)
99
-
100
-
101
-
102
- def cleanup_temp_files():
103
- global cleanup_files
104
- for file in list(cleanup_files):
105
- try:
106
- if os.path.exists(file):
107
- os.remove(file)
108
- cleanup_files.remove(file)
109
- except Exception:
110
- pass
111
-
112
-
113
- def interrupt_speech():
114
- global should_stop_speaking
115
- should_stop_speaking = True
116
- pygame.mixer.music.stop()
117
- pygame.mixer.music.unload()
118
-
119
- while not tts_queue.empty():
120
- try:
121
- _, temp_filename = tts_queue.get_nowait()
122
- try:
123
- if os.path.exists(temp_filename):
124
- os.remove(temp_filename)
125
- except:
126
- if temp_filename not in cleanup_files:
127
- cleanup_files.append(temp_filename)
128
- except queue.Empty:
129
- break
130
-
131
- global tts_sequence
132
- tts_sequence = 0
133
-
134
-
135
70
  def audio_callback(in_data, frame_count, time_info, status):
71
+ import pyaudio
72
+ audio_queue = queue.Queue()
136
73
  audio_queue.put(in_data)
137
74
  return (in_data, pyaudio.paContinue)
138
75
 
@@ -571,218 +508,67 @@ def get_available_stt_engines() -> dict:
571
508
 
572
509
 
573
510
 
574
- def load_history():
575
- global history
576
- try:
577
- if os.path.exists(memory_file):
578
- with open(memory_file, "r") as f:
579
- history = json.load(f)
580
- except Exception as e:
581
- print(f"Error loading conversation history: {e}")
582
- history = []
583
-
584
-
585
- def save_history():
586
- try:
587
- with open(memory_file, "w") as f:
588
- json.dump(history, f)
589
- except Exception as e:
590
- print(f"Error saving conversation history: {e}")
591
-
592
-
593
- def add_exchange(user_input, assistant_response):
594
- global history
595
- exchange = {
596
- "user": user_input,
597
- "assistant": assistant_response,
598
- "timestamp": time.time(),
599
- }
600
- history.append(exchange)
601
- if len(history) > max_history:
602
- history.pop(0)
603
- save_history()
604
-
605
-
606
- def get_context_string():
607
- context = []
608
- for exchange in history:
609
- context.append(f"User: {exchange['user']}")
610
- context.append(f"Assistant: {exchange['assistant']}")
611
- return "\n".join(context)
612
-
613
-
614
-
615
- def cleanup_temp_files():
616
- global cleanup_files
617
- for file in list(cleanup_files):
618
- try:
619
- if os.path.exists(file):
620
- os.remove(file)
621
- cleanup_files.remove(file)
622
- except Exception:
623
- pass
624
-
625
511
 
626
- def interrupt_speech():
627
- global should_stop_speaking, response_generator, is_speaking, tts_sequence
628
- should_stop_speaking = True
629
- pygame.mixer.music.stop()
630
- pygame.mixer.music.unload()
631
-
632
- while not tts_queue.empty():
633
- try:
634
- _, temp_filename = tts_queue.get_nowait()
635
- try:
636
- if os.path.exists(temp_filename):
637
- os.remove(temp_filename)
638
- except:
639
- if temp_filename not in cleanup_files:
640
- cleanup_files.append(temp_filename)
641
- except queue.Empty:
642
- break
643
-
644
- tts_sequence = 0
645
- is_speaking = False
646
-
647
-
648
- def audio_callback(in_data, frame_count, time_info, status):
649
- audio_queue.put(in_data)
650
- return (in_data, pyaudio.paContinue)
651
-
652
-
653
-
654
- def play_audio_from_queue():
655
- global is_speaking, cleanup_files, should_stop_speaking
656
- next_sequence = 0
657
-
658
- while True:
659
- if should_stop_speaking:
660
- pygame.mixer.music.stop()
661
- pygame.mixer.music.unload()
662
-
663
- while not tts_queue.empty():
664
- try:
665
- _, temp_filename = tts_queue.get_nowait()
666
- try:
667
- if os.path.exists(temp_filename):
668
- os.remove(temp_filename)
669
- except:
670
- if temp_filename not in cleanup_files:
671
- cleanup_files.append(temp_filename)
672
- except queue.Empty:
673
- break
674
-
675
- next_sequence = 0
676
- is_speaking = False
677
- should_stop_speaking = False
678
- time.sleep(0.1)
679
- continue
680
-
681
- try:
682
- if not tts_queue.empty():
683
- sequence, temp_filename = tts_queue.queue[0]
684
-
685
- if sequence == next_sequence:
686
- sequence, temp_filename = tts_queue.get()
687
- is_speaking = True
688
-
689
- try:
690
- if len(cleanup_files) > 0 and not pygame.mixer.music.get_busy():
691
- cleanup_temp_files()
692
-
693
- if should_stop_speaking:
694
- continue
695
-
696
- pygame.mixer.music.load(temp_filename)
697
- pygame.mixer.music.play()
698
-
699
- while (
700
- pygame.mixer.music.get_busy() and not should_stop_speaking
701
- ):
702
- pygame.time.wait(50)
703
-
704
- pygame.mixer.music.unload()
705
-
706
- except Exception as e:
707
- print(f"Audio playback error: {str(e)}")
708
- finally:
709
- try:
710
- if os.path.exists(temp_filename):
711
- os.remove(temp_filename)
712
- except:
713
- if temp_filename not in cleanup_files:
714
- cleanup_files.append(temp_filename)
715
-
716
- if not should_stop_speaking:
717
- next_sequence += 1
718
- is_speaking = False
719
-
720
- time.sleep(0.05)
721
- except Exception:
722
- time.sleep(0.05)
723
-
724
-
725
- import pygame
726
- from gtts import gTTS
727
- import tempfile
728
- import os
729
- import logging
730
-
731
- logging.basicConfig(level=logging.ERROR)
732
- logger = logging.getLogger(__name__)
733
-
734
- import pyaudio
735
- import wave
736
- from gtts import gTTS
737
- import tempfile
738
- import os
739
- import logging
512
+ # =============================================================================
513
+ # TTS Playback Helpers (use unified audio_gen.text_to_speech)
514
+ # =============================================================================
740
515
 
741
- import tempfile
742
- import uuid
516
+ def create_and_queue_audio(text, state, engine="kokoro", voice=None):
517
+ """Create and play TTS audio using the unified engine interface.
743
518
 
519
+ Args:
520
+ text: Text to speak
521
+ state: Dict with 'tts_is_speaking', 'tts_just_finished', 'running' keys
522
+ engine: TTS engine name (kokoro, qwen3, elevenlabs, openai, gemini, gtts)
523
+ voice: Voice ID (engine-specific)
524
+ """
525
+ import wave
526
+ import uuid
744
527
 
745
- def create_and_queue_audio(text, state):
746
- """Create and queue audio with state awareness for TTS/recording coordination"""
747
-
748
528
  state["tts_is_speaking"] = True
749
529
 
750
530
  if not text.strip():
751
- print("Empty text, skipping TTS")
752
531
  state["tts_is_speaking"] = False
753
532
  return
754
533
 
755
534
  try:
756
- unique_id = uuid.uuid4()
757
- with tempfile.TemporaryDirectory() as temp_dir:
758
- mp3_file = os.path.join(temp_dir, f"temp_{unique_id}.mp3")
759
- wav_file = os.path.join(temp_dir, f"temp_{unique_id}.wav")
535
+ from npcpy.gen.audio_gen import text_to_speech
536
+
537
+ audio_bytes = text_to_speech(text, engine=engine, voice=voice)
760
538
 
761
- tts = gTTS(text=text, lang="en", slow=False)
762
- tts.save(mp3_file)
539
+ # Write to temp file and play
540
+ suffix = '.mp3' if engine in ('elevenlabs', 'gtts') else '.wav'
541
+ tmp_path = os.path.join(tempfile.gettempdir(), f"npc_tts_{uuid.uuid4()}{suffix}")
542
+ with open(tmp_path, 'wb') as f:
543
+ f.write(audio_bytes)
763
544
 
764
- convert_mp3_to_wav(mp3_file, wav_file)
545
+ play_path = tmp_path
546
+ if suffix == '.mp3':
547
+ wav_path = tmp_path.replace('.mp3', '.wav')
548
+ convert_mp3_to_wav(tmp_path, wav_path)
549
+ play_path = wav_path
765
550
 
766
-
767
- play_audio(wav_file, state)
551
+ play_audio(play_path, state)
552
+
553
+ for p in set([tmp_path, play_path]):
554
+ try:
555
+ if os.path.exists(p):
556
+ os.remove(p)
557
+ except Exception:
558
+ pass
768
559
  except Exception as e:
769
- print(f"Error in TTS process: {e}")
560
+ logger.error(f"TTS error: {e}")
770
561
  finally:
771
-
772
562
  state["tts_is_speaking"] = False
773
563
  state["tts_just_finished"] = True
774
564
 
775
- for file in [mp3_file, wav_file]:
776
- try:
777
- if os.path.exists(file):
778
- os.remove(file)
779
- except Exception as e:
780
- print(f"Error removing temporary file {file}: {e}")
781
-
782
565
 
783
566
  def play_audio(filename, state):
784
- """Play audio with state awareness for TTS/recording coordination"""
785
- CHUNK = 4096
567
+ """Play a WAV file via pyaudio with state awareness."""
568
+ import pyaudio
569
+ import wave
570
+
571
+ PLAY_CHUNK = 4096
786
572
 
787
573
  wf = wave.open(filename, "rb")
788
574
  p = pyaudio.PyAudio()
@@ -794,33 +580,19 @@ def play_audio(filename, state):
794
580
  output=True,
795
581
  )
796
582
 
797
- data = wf.readframes(CHUNK)
798
-
799
-
800
- while data and state["running"]:
583
+ data = wf.readframes(PLAY_CHUNK)
584
+ while data and state.get("running", True):
801
585
  stream.write(data)
802
- data = wf.readframes(CHUNK)
586
+ data = wf.readframes(PLAY_CHUNK)
803
587
 
804
588
  stream.stop_stream()
805
589
  stream.close()
806
590
  p.terminate()
807
591
 
808
- try:
809
- os.unlink(filename)
810
- except:
811
- pass
812
-
813
-
814
-
815
- def process_response_chunk(text_chunk):
816
- if not text_chunk.strip():
817
- return
818
- processed_text = process_text_for_tts(text_chunk)
819
- create_and_queue_audio(processed_text)
820
-
821
592
 
822
593
  def process_text_for_tts(text):
823
- text = re.sub(r"[*<>{}()\[\]&%")
594
+ """Clean text for TTS consumption."""
595
+ text = re.sub(r"[*<>{}()\[\]&%#@^~`]", "", text)
824
596
  text = text.strip()
825
597
  text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
826
598
  text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
@@ -4,6 +4,7 @@ Supports multiple TTS engines including real-time voice APIs.
4
4
 
5
5
  TTS Engines:
6
6
  - Kokoro: Local neural TTS (default)
7
+ - Qwen3-TTS: Local high-quality multilingual TTS (0.6B/1.7B)
7
8
  - ElevenLabs: Cloud TTS with streaming
8
9
  - OpenAI: Realtime voice API
9
10
  - Gemini: Live API for real-time voice
@@ -13,6 +14,7 @@ Usage:
13
14
  from npcpy.gen.audio_gen import text_to_speech
14
15
 
15
16
  audio = text_to_speech("Hello world", engine="kokoro", voice="af_heart")
17
+ audio = text_to_speech("Hello world", engine="qwen3", voice="ryan")
16
18
 
17
19
  For STT, see npcpy.data.audio
18
20
  """
@@ -477,6 +479,155 @@ def get_gemini_voices() -> list:
477
479
  ]
478
480
 
479
481
 
482
+ # =============================================================================
483
+ # Qwen3-TTS (Local High-Quality Multilingual)
484
+ # =============================================================================
485
+
486
+ _qwen3_model_cache = {}
487
+
488
+ def _get_qwen3_model(
489
+ model_size: str = "1.7B",
490
+ model_type: str = "custom_voice",
491
+ device: str = "auto",
492
+ ):
493
+ """Load and cache a Qwen3-TTS model."""
494
+ cache_key = (model_size, model_type, device)
495
+ if cache_key in _qwen3_model_cache:
496
+ return _qwen3_model_cache[cache_key]
497
+
498
+ import torch
499
+ from huggingface_hub import snapshot_download
500
+
501
+ if device == "auto":
502
+ if torch.cuda.is_available():
503
+ device = "cuda"
504
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
505
+ device = "mps"
506
+ else:
507
+ device = "cpu"
508
+
509
+ dtype = torch.bfloat16 if device != "cpu" else torch.float32
510
+
511
+ size_tag = "0.6B" if "0.6" in model_size else "1.7B"
512
+ type_map = {
513
+ "custom_voice": f"Qwen/Qwen3-TTS-12Hz-{size_tag}-CustomVoice",
514
+ "base": f"Qwen/Qwen3-TTS-12Hz-{size_tag}-Base",
515
+ "voice_design": f"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
516
+ }
517
+
518
+ repo_id = type_map.get(model_type, type_map["custom_voice"])
519
+
520
+ # Try local cache first, then download
521
+ cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "qwen-tts")
522
+ model_dir = os.path.join(cache_dir, repo_id.split("/")[-1])
523
+
524
+ if not os.path.exists(os.path.join(model_dir, "config.json")):
525
+ os.makedirs(cache_dir, exist_ok=True)
526
+ snapshot_download(repo_id=repo_id, local_dir=model_dir)
527
+
528
+ # Import the model class
529
+ try:
530
+ from qwen_tts import Qwen3TTSModel
531
+ except ImportError:
532
+ raise ImportError(
533
+ "qwen_tts package not found. Install from: "
534
+ "https://github.com/QwenLM/Qwen3-TTS or pip install qwen-tts"
535
+ )
536
+
537
+ model = Qwen3TTSModel.from_pretrained(
538
+ model_dir, device_map=device, dtype=dtype
539
+ )
540
+
541
+ # Clear old entries if switching configs
542
+ _qwen3_model_cache.clear()
543
+ _qwen3_model_cache[cache_key] = model
544
+ return model
545
+
546
+
547
+ def tts_qwen3(
548
+ text: str,
549
+ voice: str = "ryan",
550
+ language: str = "auto",
551
+ model_size: str = "1.7B",
552
+ device: str = "auto",
553
+ speed: float = 1.0,
554
+ ref_audio: str = None,
555
+ ref_text: str = None,
556
+ instruct: str = None,
557
+ ) -> bytes:
558
+ """
559
+ Generate speech using Qwen3-TTS local model.
560
+
561
+ Supports three modes based on arguments:
562
+ - Custom voice (default): Use a preset speaker name
563
+ - Voice clone: Provide ref_audio (path) to clone a voice
564
+ - Voice design: Provide instruct (text description) to design a voice
565
+
566
+ Args:
567
+ text: Text to synthesize
568
+ voice: Speaker name for custom voice mode
569
+ (aiden, dylan, eric, ono_anna, ryan, serena, sohee, uncle_fu, vivian)
570
+ language: Language (auto, chinese, english, japanese, korean, french, etc.)
571
+ model_size: '0.6B' or '1.7B'
572
+ device: 'auto', 'cuda', 'mps', 'cpu'
573
+ speed: Speech speed (not directly supported, reserved)
574
+ ref_audio: Path to reference audio for voice cloning
575
+ ref_text: Transcript of reference audio (recommended for cloning)
576
+ instruct: Natural language voice description for voice design mode
577
+
578
+ Returns:
579
+ WAV audio bytes
580
+ """
581
+ import numpy as np
582
+ import soundfile as sf
583
+
584
+ if ref_audio:
585
+ model = _get_qwen3_model(model_size, "base", device)
586
+ wavs, sr = model.generate_voice_clone(
587
+ text=text,
588
+ language=language,
589
+ ref_audio=ref_audio,
590
+ ref_text=ref_text,
591
+ )
592
+ elif instruct:
593
+ model = _get_qwen3_model(model_size, "voice_design", device)
594
+ wavs, sr = model.generate_voice_design(
595
+ text=text,
596
+ language=language,
597
+ instruct=instruct,
598
+ )
599
+ else:
600
+ model = _get_qwen3_model(model_size, "custom_voice", device)
601
+ wavs, sr = model.generate_custom_voice(
602
+ text=text,
603
+ language=language,
604
+ speaker=voice.lower().replace(" ", "_"),
605
+ )
606
+
607
+ if not wavs:
608
+ raise ValueError("Qwen3-TTS generated no audio")
609
+
610
+ wav_buffer = io.BytesIO()
611
+ sf.write(wav_buffer, wavs[0], sr, format='WAV')
612
+ wav_buffer.seek(0)
613
+ return wav_buffer.read()
614
+
615
+
616
+ def get_qwen3_voices() -> list:
617
+ """Get available Qwen3-TTS preset voices."""
618
+ return [
619
+ {"id": "aiden", "name": "Aiden", "gender": "male"},
620
+ {"id": "dylan", "name": "Dylan", "gender": "male"},
621
+ {"id": "eric", "name": "Eric", "gender": "male"},
622
+ {"id": "ryan", "name": "Ryan", "gender": "male"},
623
+ {"id": "serena", "name": "Serena", "gender": "female"},
624
+ {"id": "vivian", "name": "Vivian", "gender": "female"},
625
+ {"id": "sohee", "name": "Sohee", "gender": "female"},
626
+ {"id": "ono_anna", "name": "Ono Anna", "gender": "female"},
627
+ {"id": "uncle_fu", "name": "Uncle Fu", "gender": "male"},
628
+ ]
629
+
630
+
480
631
  # =============================================================================
481
632
  # gTTS (Google Text-to-Speech) - Fallback
482
633
  # =============================================================================
@@ -527,7 +678,7 @@ def text_to_speech(
527
678
 
528
679
  Args:
529
680
  text: Text to synthesize
530
- engine: TTS engine (kokoro, elevenlabs, openai, gemini, gtts)
681
+ engine: TTS engine (kokoro, qwen3, elevenlabs, openai, gemini, gtts)
531
682
  voice: Voice ID (engine-specific)
532
683
  **kwargs: Engine-specific options
533
684
 
@@ -542,6 +693,10 @@ def text_to_speech(
542
693
  lang_code = voices.get(voice, {}).get("lang", "a")
543
694
  return tts_kokoro(text, voice=voice, lang_code=lang_code, **kwargs)
544
695
 
696
+ elif engine in ("qwen3", "qwen3-tts", "qwen"):
697
+ voice = voice or "ryan"
698
+ return tts_qwen3(text, voice=voice, **kwargs)
699
+
545
700
  elif engine == "elevenlabs":
546
701
  voice = voice or "JBFqnCBsd6RMkjVDRZzb"
547
702
  return tts_elevenlabs(text, voice_id=voice, **kwargs)
@@ -568,6 +723,8 @@ def get_available_voices(engine: str = "kokoro") -> list:
568
723
 
569
724
  if engine == "kokoro":
570
725
  return get_kokoro_voices()
726
+ elif engine in ("qwen3", "qwen3-tts", "qwen"):
727
+ return get_qwen3_voices()
571
728
  elif engine == "elevenlabs":
572
729
  return get_elevenlabs_voices()
573
730
  elif engine == "openai":
@@ -590,6 +747,13 @@ def get_available_engines() -> dict:
590
747
  "description": "Local neural TTS (82M params)",
591
748
  "install": "pip install kokoro soundfile"
592
749
  },
750
+ "qwen3": {
751
+ "name": "Qwen3-TTS",
752
+ "type": "local",
753
+ "available": False,
754
+ "description": "Local high-quality multilingual TTS (0.6B/1.7B)",
755
+ "install": "pip install qwen-tts torch torchaudio transformers"
756
+ },
593
757
  "elevenlabs": {
594
758
  "name": "ElevenLabs",
595
759
  "type": "cloud",
@@ -615,7 +779,7 @@ def get_available_engines() -> dict:
615
779
  "name": "Google TTS",
616
780
  "type": "cloud",
617
781
  "available": False,
618
- "description": "Free Google TTS"
782
+ "description": "Free Google TTS (fallback)"
619
783
  }
620
784
  }
621
785
 
@@ -625,6 +789,12 @@ def get_available_engines() -> dict:
625
789
  except ImportError:
626
790
  pass
627
791
 
792
+ try:
793
+ from qwen_tts import Qwen3TTSModel
794
+ engines["qwen3"]["available"] = True
795
+ except ImportError:
796
+ pass
797
+
628
798
  if os.environ.get('ELEVENLABS_API_KEY'):
629
799
  engines["elevenlabs"]["available"] = True
630
800
 
@@ -242,7 +242,8 @@ def get_llm_response(
242
242
  base_model, base_provider, base_api_url = _resolve_model_provider(npc, team, model, provider)
243
243
 
244
244
  def _run_single(run_model, run_provider, run_npc, run_team, run_context, extra_kwargs):
245
- system_message = get_system_message(run_npc, run_team) if run_npc is not None else "You are a helpful assistant."
245
+ _tool_capable = bool(extra_kwargs.get("tools"))
246
+ system_message = get_system_message(run_npc, run_team, tool_capable=_tool_capable) if run_npc is not None else "You are a helpful assistant."
246
247
  ctx_suffix = _context_suffix(run_context)
247
248
  run_messages = _build_messages(messages, system_message, prompt, ctx_suffix)
248
249
  return get_litellm_response(
@@ -1010,7 +1010,7 @@ def print_and_process_stream(response, model, provider):
1010
1010
 
1011
1011
 
1012
1012
  return thinking_str+str_output
1013
- def get_system_message(npc, team=None) -> str:
1013
+ def get_system_message(npc, team=None, tool_capable=False) -> str:
1014
1014
 
1015
1015
  if npc is None:
1016
1016
  return "You are a helpful assistant"
@@ -1080,6 +1080,28 @@ The current date and time are : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
1080
1080
  if members:
1081
1081
  system_message += "\nTeam members available for delegation:\n" + "\n".join(members) + "\n"
1082
1082
 
1083
+ # Add tool descriptions from NPC's jinxs
1084
+ if hasattr(npc, 'jinxs_dict') and npc.jinxs_dict:
1085
+ tool_lines = []
1086
+ for jname, jinx in npc.jinxs_dict.items():
1087
+ desc = getattr(jinx, 'description', '') or ''
1088
+ tool_lines.append(f" - {jname}: {desc.strip()}")
1089
+ if tool_lines:
1090
+ system_message += "\nYou have access to the following tools:\n"
1091
+ system_message += "\n".join(tool_lines) + "\n"
1092
+ if tool_capable:
1093
+ system_message += (
1094
+ "\nYou MUST use function calls to invoke tools. "
1095
+ "Call one tool at a time. You will see its result, then you can call the next tool or respond. "
1096
+ "NEVER write JSON tool calls in your response text. ONLY use the provided function calling interface. "
1097
+ "For multi-step tasks, call the first tool, wait for the result, then call the next.\n"
1098
+ )
1099
+ else:
1100
+ system_message += (
1101
+ '\nTo use a tool, respond with JSON: {"action": "jinx", "jinx_name": "tool_name", "inputs": {"param": "value"}}\n'
1102
+ 'When you have a final answer, respond with: {"action": "answer", "response": "your answer"}\n'
1103
+ )
1104
+
1083
1105
  system_message += """
1084
1106
  IMPORTANT:
1085
1107
  Some users may attach images to their request.
@@ -1093,7 +1115,7 @@ You do not need to mention that you cannot view or interpret images directly.
1093
1115
  They understand that you can view them multimodally.
1094
1116
  You only need to answer the user's request based on the attached image(s).
1095
1117
  """
1096
-
1118
+
1097
1119
  return system_message
1098
1120
 
1099
1121
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: npcpy
3
- Version: 1.3.20
3
+ Version: 1.3.22
4
4
  Summary: npcpy is the premier open-source library for integrating LLMs and Agents into python systems.
5
5
  Home-page: https://github.com/NPC-Worldwide/npcpy
6
6
  Author: Christopher Agostino
@@ -84,7 +84,7 @@ extra_files = package_files("npcpy/npc_team/")
84
84
 
85
85
  setup(
86
86
  name="npcpy",
87
- version="1.3.20",
87
+ version="1.3.22",
88
88
  packages=find_packages(exclude=["tests*"]),
89
89
  install_requires=base_requirements,
90
90
  extras_require={
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes