PyPI - npcpy - Versions diffs - 1.3.20__tar.gz → 1.3.22__tar.gz - Mend

npcpy 1.3.20tar.gz → 1.3.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

{npcpy-1.3.20/npcpy.egg-info → npcpy-1.3.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: npcpy
-Version: 1.3.20
+Version: 1.3.22
 Summary: npcpy is the premier open-source library for integrating LLMs and Agents into python systems.
 Home-page: https://github.com/NPC-Worldwide/npcpy
 Author: Christopher Agostino

{npcpy-1.3.20 → npcpy-1.3.22}/npcpy/data/audio.py RENAMED Viewed

@@ -6,45 +6,22 @@ import time
 import queue
 import re
 import json
 import subprocess
+import logging
+from typing import Optional, List, Dict, Any
+logger = logging.getLogger(__name__)
+# Audio constants
 try:
-    import torch
     import pyaudio
-    import wave
-    from typing import Optional, List, Dict, Any
-    from gtts import gTTS
-    from faster_whisper import WhisperModel
-    os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
-    import pygame
     FORMAT = pyaudio.paInt16
-    CHANNELS = 1
-    RATE = 16000
-    CHUNK = 512
-    is_speaking = False
-    should_stop_speaking = False
-    tts_sequence = 0
-    recording_data = []
-    buffer_data = []
-    is_recording = False
-    last_speech_time = 0
-    running = True
-    audio_queue = queue.Queue()
-    tts_queue = queue.PriorityQueue()
-    cleanup_files = []
-    pygame.mixer.quit()
-    pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
-except:
-    print("audio dependencies not installed")
+except ImportError:
+    FORMAT = 8  # paInt16 value fallback
+CHANNELS = 1
+RATE = 16000
+CHUNK = 512
 def convert_mp3_to_wav(mp3_file, wav_file):
@@ -90,49 +67,9 @@ def check_ffmpeg():
         return False
-def get_context_string():
-    context = []
-    for exchange in history:
-        context.append(f"User: {exchange['user']}")
-        context.append(f"Assistant: {exchange['assistant']}")
-    return "\n".join(context)
-def cleanup_temp_files():
-    global cleanup_files
-    for file in list(cleanup_files):
-        try:
-            if os.path.exists(file):
-                os.remove(file)
-                cleanup_files.remove(file)
-        except Exception:
-            pass
-def interrupt_speech():
-    global should_stop_speaking
-    should_stop_speaking = True
-    pygame.mixer.music.stop()
-    pygame.mixer.music.unload()
-    while not tts_queue.empty():
-        try:
-            _, temp_filename = tts_queue.get_nowait()
-            try:
-                if os.path.exists(temp_filename):
-                    os.remove(temp_filename)
-            except:
-                if temp_filename not in cleanup_files:
-                    cleanup_files.append(temp_filename)
-        except queue.Empty:
-            break
-    global tts_sequence
-    tts_sequence = 0
 def audio_callback(in_data, frame_count, time_info, status):
+    import pyaudio
+    audio_queue = queue.Queue()
     audio_queue.put(in_data)
     return (in_data, pyaudio.paContinue)
@@ -571,218 +508,67 @@ def get_available_stt_engines() -> dict:
-def load_history():
-    global history
-    try:
-        if os.path.exists(memory_file):
-            with open(memory_file, "r") as f:
-                history = json.load(f)
-    except Exception as e:
-        print(f"Error loading conversation history: {e}")
-        history = []
-def save_history():
-    try:
-        with open(memory_file, "w") as f:
-            json.dump(history, f)
-    except Exception as e:
-        print(f"Error saving conversation history: {e}")
-def add_exchange(user_input, assistant_response):
-    global history
-    exchange = {
-        "user": user_input,
-        "assistant": assistant_response,
-        "timestamp": time.time(),
-    }
-    history.append(exchange)
-    if len(history) > max_history:
-        history.pop(0)
-    save_history()
-def get_context_string():
-    context = []
-    for exchange in history:
-        context.append(f"User: {exchange['user']}")
-        context.append(f"Assistant: {exchange['assistant']}")
-    return "\n".join(context)
-def cleanup_temp_files():
-    global cleanup_files
-    for file in list(cleanup_files):
-        try:
-            if os.path.exists(file):
-                os.remove(file)
-                cleanup_files.remove(file)
-        except Exception:
-            pass
-def interrupt_speech():
-    global should_stop_speaking, response_generator, is_speaking, tts_sequence
-    should_stop_speaking = True
-    pygame.mixer.music.stop()
-    pygame.mixer.music.unload()
-    while not tts_queue.empty():
-        try:
-            _, temp_filename = tts_queue.get_nowait()
-            try:
-                if os.path.exists(temp_filename):
-                    os.remove(temp_filename)
-            except:
-                if temp_filename not in cleanup_files:
-                    cleanup_files.append(temp_filename)
-        except queue.Empty:
-            break
-    tts_sequence = 0
-    is_speaking = False
-def audio_callback(in_data, frame_count, time_info, status):
-    audio_queue.put(in_data)
-    return (in_data, pyaudio.paContinue)
-def play_audio_from_queue():
-    global is_speaking, cleanup_files, should_stop_speaking
-    next_sequence = 0
-    while True:
-        if should_stop_speaking:
-            pygame.mixer.music.stop()
-            pygame.mixer.music.unload()
-            while not tts_queue.empty():
-                try:
-                    _, temp_filename = tts_queue.get_nowait()
-                    try:
-                        if os.path.exists(temp_filename):
-                            os.remove(temp_filename)
-                    except:
-                        if temp_filename not in cleanup_files:
-                            cleanup_files.append(temp_filename)
-                except queue.Empty:
-                    break
-            next_sequence = 0
-            is_speaking = False
-            should_stop_speaking = False
-            time.sleep(0.1)
-            continue
-        try:
-            if not tts_queue.empty():
-                sequence, temp_filename = tts_queue.queue[0]
-                if sequence == next_sequence:
-                    sequence, temp_filename = tts_queue.get()
-                    is_speaking = True
-                    try:
-                        if len(cleanup_files) > 0 and not pygame.mixer.music.get_busy():
-                            cleanup_temp_files()
-                        if should_stop_speaking:
-                            continue
-                        pygame.mixer.music.load(temp_filename)
-                        pygame.mixer.music.play()
-                        while (
-                            pygame.mixer.music.get_busy() and not should_stop_speaking
-                        ):
-                            pygame.time.wait(50)
-                        pygame.mixer.music.unload()
-                    except Exception as e:
-                        print(f"Audio playback error: {str(e)}")
-                    finally:
-                        try:
-                            if os.path.exists(temp_filename):
-                                os.remove(temp_filename)
-                        except:
-                            if temp_filename not in cleanup_files:
-                                cleanup_files.append(temp_filename)
-                        if not should_stop_speaking:
-                            next_sequence += 1
-                        is_speaking = False
-            time.sleep(0.05)
-        except Exception:
-            time.sleep(0.05)
-import pygame
-from gtts import gTTS
-import tempfile
-import os
-import logging
-logging.basicConfig(level=logging.ERROR)
-logger = logging.getLogger(__name__)
-import pyaudio
-import wave
-from gtts import gTTS
-import tempfile
-import os
-import logging
+# =============================================================================
+# TTS Playback Helpers (use unified audio_gen.text_to_speech)
+# =============================================================================
-import tempfile
-import uuid
+def create_and_queue_audio(text, state, engine="kokoro", voice=None):
+    """Create and play TTS audio using the unified engine interface.
+    Args:
+        text: Text to speak
+        state: Dict with 'tts_is_speaking', 'tts_just_finished', 'running' keys
+        engine: TTS engine name (kokoro, qwen3, elevenlabs, openai, gemini, gtts)
+        voice: Voice ID (engine-specific)
+    """
+    import wave
+    import uuid
-def create_and_queue_audio(text, state):
-    """Create and queue audio with state awareness for TTS/recording coordination"""
     state["tts_is_speaking"] = True
     if not text.strip():
-        print("Empty text, skipping TTS")
         state["tts_is_speaking"] = False
         return
     try:
-        unique_id = uuid.uuid4()
-        with tempfile.TemporaryDirectory() as temp_dir:
-            mp3_file = os.path.join(temp_dir, f"temp_{unique_id}.mp3")
-            wav_file = os.path.join(temp_dir, f"temp_{unique_id}.wav")
+        from npcpy.gen.audio_gen import text_to_speech
+        audio_bytes = text_to_speech(text, engine=engine, voice=voice)
-            tts = gTTS(text=text, lang="en", slow=False)
-            tts.save(mp3_file)
+        # Write to temp file and play
+        suffix = '.mp3' if engine in ('elevenlabs', 'gtts') else '.wav'
+        tmp_path = os.path.join(tempfile.gettempdir(), f"npc_tts_{uuid.uuid4()}{suffix}")
+        with open(tmp_path, 'wb') as f:
+            f.write(audio_bytes)
-            convert_mp3_to_wav(mp3_file, wav_file)
+        play_path = tmp_path
+        if suffix == '.mp3':
+            wav_path = tmp_path.replace('.mp3', '.wav')
+            convert_mp3_to_wav(tmp_path, wav_path)
+            play_path = wav_path
-            play_audio(wav_file, state)
+        play_audio(play_path, state)
+        for p in set([tmp_path, play_path]):
+            try:
+                if os.path.exists(p):
+                    os.remove(p)
+            except Exception:
+                pass
     except Exception as e:
-        print(f"Error in TTS process: {e}")
+        logger.error(f"TTS error: {e}")
     finally:
         state["tts_is_speaking"] = False
         state["tts_just_finished"] = True
-        for file in [mp3_file, wav_file]:
-            try:
-                if os.path.exists(file):
-                    os.remove(file)
-            except Exception as e:
-                print(f"Error removing temporary file {file}: {e}")
 def play_audio(filename, state):
-    """Play audio with state awareness for TTS/recording coordination"""
-    CHUNK = 4096
+    """Play a WAV file via pyaudio with state awareness."""
+    import pyaudio
+    import wave
+    PLAY_CHUNK = 4096
     wf = wave.open(filename, "rb")
     p = pyaudio.PyAudio()
@@ -794,33 +580,19 @@ def play_audio(filename, state):
         output=True,
     )
-    data = wf.readframes(CHUNK)
-    while data and state["running"]:
+    data = wf.readframes(PLAY_CHUNK)
+    while data and state.get("running", True):
         stream.write(data)
-        data = wf.readframes(CHUNK)
+        data = wf.readframes(PLAY_CHUNK)
     stream.stop_stream()
     stream.close()
     p.terminate()
-    try:
-        os.unlink(filename)
-    except:
-        pass
-def process_response_chunk(text_chunk):
-    if not text_chunk.strip():
-        return
-    processed_text = process_text_for_tts(text_chunk)
-    create_and_queue_audio(processed_text)
 def process_text_for_tts(text):
-    text = re.sub(r"[*<>{}()\[\]&%")
+    """Clean text for TTS consumption."""
+    text = re.sub(r"[*<>{}()\[\]&%#@^~`]", "", text)
     text = text.strip()
     text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
     text = re.sub(r"([.!?])(\w)", r"\1 \2", text)

{npcpy-1.3.20 → npcpy-1.3.22}/npcpy/gen/audio_gen.py RENAMED Viewed

@@ -4,6 +4,7 @@ Supports multiple TTS engines including real-time voice APIs.
 TTS Engines:
 - Kokoro: Local neural TTS (default)
+- Qwen3-TTS: Local high-quality multilingual TTS (0.6B/1.7B)
 - ElevenLabs: Cloud TTS with streaming
 - OpenAI: Realtime voice API
 - Gemini: Live API for real-time voice
@@ -13,6 +14,7 @@ Usage:
     from npcpy.gen.audio_gen import text_to_speech
     audio = text_to_speech("Hello world", engine="kokoro", voice="af_heart")
+    audio = text_to_speech("Hello world", engine="qwen3", voice="ryan")
 For STT, see npcpy.data.audio
 """
@@ -477,6 +479,155 @@ def get_gemini_voices() -> list:
     ]
+# =============================================================================
+# Qwen3-TTS (Local High-Quality Multilingual)
+# =============================================================================
+_qwen3_model_cache = {}
+def _get_qwen3_model(
+    model_size: str = "1.7B",
+    model_type: str = "custom_voice",
+    device: str = "auto",
+):
+    """Load and cache a Qwen3-TTS model."""
+    cache_key = (model_size, model_type, device)
+    if cache_key in _qwen3_model_cache:
+        return _qwen3_model_cache[cache_key]
+    import torch
+    from huggingface_hub import snapshot_download
+    if device == "auto":
+        if torch.cuda.is_available():
+            device = "cuda"
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            device = "mps"
+        else:
+            device = "cpu"
+    dtype = torch.bfloat16 if device != "cpu" else torch.float32
+    size_tag = "0.6B" if "0.6" in model_size else "1.7B"
+    type_map = {
+        "custom_voice": f"Qwen/Qwen3-TTS-12Hz-{size_tag}-CustomVoice",
+        "base": f"Qwen/Qwen3-TTS-12Hz-{size_tag}-Base",
+        "voice_design": f"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
+    }
+    repo_id = type_map.get(model_type, type_map["custom_voice"])
+    # Try local cache first, then download
+    cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "qwen-tts")
+    model_dir = os.path.join(cache_dir, repo_id.split("/")[-1])
+    if not os.path.exists(os.path.join(model_dir, "config.json")):
+        os.makedirs(cache_dir, exist_ok=True)
+        snapshot_download(repo_id=repo_id, local_dir=model_dir)
+    # Import the model class
+    try:
+        from qwen_tts import Qwen3TTSModel
+    except ImportError:
+        raise ImportError(
+            "qwen_tts package not found. Install from: "
+            "https://github.com/QwenLM/Qwen3-TTS or pip install qwen-tts"
+        )
+    model = Qwen3TTSModel.from_pretrained(
+        model_dir, device_map=device, dtype=dtype
+    )
+    # Clear old entries if switching configs
+    _qwen3_model_cache.clear()
+    _qwen3_model_cache[cache_key] = model
+    return model
+def tts_qwen3(
+    text: str,
+    voice: str = "ryan",
+    language: str = "auto",
+    model_size: str = "1.7B",
+    device: str = "auto",
+    speed: float = 1.0,
+    ref_audio: str = None,
+    ref_text: str = None,
+    instruct: str = None,
+) -> bytes:
+    """
+    Generate speech using Qwen3-TTS local model.
+    Supports three modes based on arguments:
+    - Custom voice (default): Use a preset speaker name
+    - Voice clone: Provide ref_audio (path) to clone a voice
+    - Voice design: Provide instruct (text description) to design a voice
+    Args:
+        text: Text to synthesize
+        voice: Speaker name for custom voice mode
+            (aiden, dylan, eric, ono_anna, ryan, serena, sohee, uncle_fu, vivian)
+        language: Language (auto, chinese, english, japanese, korean, french, etc.)
+        model_size: '0.6B' or '1.7B'
+        device: 'auto', 'cuda', 'mps', 'cpu'
+        speed: Speech speed (not directly supported, reserved)
+        ref_audio: Path to reference audio for voice cloning
+        ref_text: Transcript of reference audio (recommended for cloning)
+        instruct: Natural language voice description for voice design mode
+    Returns:
+        WAV audio bytes
+    """
+    import numpy as np
+    import soundfile as sf
+    if ref_audio:
+        model = _get_qwen3_model(model_size, "base", device)
+        wavs, sr = model.generate_voice_clone(
+            text=text,
+            language=language,
+            ref_audio=ref_audio,
+            ref_text=ref_text,
+        )
+    elif instruct:
+        model = _get_qwen3_model(model_size, "voice_design", device)
+        wavs, sr = model.generate_voice_design(
+            text=text,
+            language=language,
+            instruct=instruct,
+        )
+    else:
+        model = _get_qwen3_model(model_size, "custom_voice", device)
+        wavs, sr = model.generate_custom_voice(
+            text=text,
+            language=language,
+            speaker=voice.lower().replace(" ", "_"),
+        )
+    if not wavs:
+        raise ValueError("Qwen3-TTS generated no audio")
+    wav_buffer = io.BytesIO()
+    sf.write(wav_buffer, wavs[0], sr, format='WAV')
+    wav_buffer.seek(0)
+    return wav_buffer.read()
+def get_qwen3_voices() -> list:
+    """Get available Qwen3-TTS preset voices."""
+    return [
+        {"id": "aiden", "name": "Aiden", "gender": "male"},
+        {"id": "dylan", "name": "Dylan", "gender": "male"},
+        {"id": "eric", "name": "Eric", "gender": "male"},
+        {"id": "ryan", "name": "Ryan", "gender": "male"},
+        {"id": "serena", "name": "Serena", "gender": "female"},
+        {"id": "vivian", "name": "Vivian", "gender": "female"},
+        {"id": "sohee", "name": "Sohee", "gender": "female"},
+        {"id": "ono_anna", "name": "Ono Anna", "gender": "female"},
+        {"id": "uncle_fu", "name": "Uncle Fu", "gender": "male"},
+    ]
 # =============================================================================
 # gTTS (Google Text-to-Speech) - Fallback
 # =============================================================================
@@ -527,7 +678,7 @@ def text_to_speech(
     Args:
         text: Text to synthesize
-        engine: TTS engine (kokoro, elevenlabs, openai, gemini, gtts)
+        engine: TTS engine (kokoro, qwen3, elevenlabs, openai, gemini, gtts)
         voice: Voice ID (engine-specific)
         **kwargs: Engine-specific options
@@ -542,6 +693,10 @@ def text_to_speech(
         lang_code = voices.get(voice, {}).get("lang", "a")
         return tts_kokoro(text, voice=voice, lang_code=lang_code, **kwargs)
+    elif engine in ("qwen3", "qwen3-tts", "qwen"):
+        voice = voice or "ryan"
+        return tts_qwen3(text, voice=voice, **kwargs)
     elif engine == "elevenlabs":
         voice = voice or "JBFqnCBsd6RMkjVDRZzb"
         return tts_elevenlabs(text, voice_id=voice, **kwargs)
@@ -568,6 +723,8 @@ def get_available_voices(engine: str = "kokoro") -> list:
     if engine == "kokoro":
         return get_kokoro_voices()
+    elif engine in ("qwen3", "qwen3-tts", "qwen"):
+        return get_qwen3_voices()
     elif engine == "elevenlabs":
         return get_elevenlabs_voices()
     elif engine == "openai":
@@ -590,6 +747,13 @@ def get_available_engines() -> dict:
             "description": "Local neural TTS (82M params)",
             "install": "pip install kokoro soundfile"
         },
+        "qwen3": {
+            "name": "Qwen3-TTS",
+            "type": "local",
+            "available": False,
+            "description": "Local high-quality multilingual TTS (0.6B/1.7B)",
+            "install": "pip install qwen-tts torch torchaudio transformers"
+        },
         "elevenlabs": {
             "name": "ElevenLabs",
             "type": "cloud",
@@ -615,7 +779,7 @@ def get_available_engines() -> dict:
             "name": "Google TTS",
             "type": "cloud",
             "available": False,
-            "description": "Free Google TTS"
+            "description": "Free Google TTS (fallback)"
         }
     }
@@ -625,6 +789,12 @@ def get_available_engines() -> dict:
     except ImportError:
         pass
+    try:
+        from qwen_tts import Qwen3TTSModel
+        engines["qwen3"]["available"] = True
+    except ImportError:
+        pass
     if os.environ.get('ELEVENLABS_API_KEY'):
         engines["elevenlabs"]["available"] = True

{npcpy-1.3.20 → npcpy-1.3.22}/npcpy/llm_funcs.py RENAMED Viewed

@@ -242,7 +242,8 @@ def get_llm_response(
     base_model, base_provider, base_api_url = _resolve_model_provider(npc, team, model, provider)
     def _run_single(run_model, run_provider, run_npc, run_team, run_context, extra_kwargs):
-        system_message = get_system_message(run_npc, run_team) if run_npc is not None else "You are a helpful assistant."
+        _tool_capable = bool(extra_kwargs.get("tools"))
+        system_message = get_system_message(run_npc, run_team, tool_capable=_tool_capable) if run_npc is not None else "You are a helpful assistant."
         ctx_suffix = _context_suffix(run_context)
         run_messages = _build_messages(messages, system_message, prompt, ctx_suffix)
         return get_litellm_response(

{npcpy-1.3.20 → npcpy-1.3.22}/npcpy/npc_sysenv.py RENAMED Viewed

@@ -1010,7 +1010,7 @@ def print_and_process_stream(response, model, provider):
     return thinking_str+str_output
-def get_system_message(npc, team=None) -> str:
+def get_system_message(npc, team=None, tool_capable=False) -> str:
     if npc is None:
         return "You are a helpful assistant"
@@ -1080,6 +1080,28 @@ The current date and time are : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
             if members:
                 system_message += "\nTeam members available for delegation:\n" + "\n".join(members) + "\n"
+    # Add tool descriptions from NPC's jinxs
+    if hasattr(npc, 'jinxs_dict') and npc.jinxs_dict:
+        tool_lines = []
+        for jname, jinx in npc.jinxs_dict.items():
+            desc = getattr(jinx, 'description', '') or ''
+            tool_lines.append(f"  - {jname}: {desc.strip()}")
+        if tool_lines:
+            system_message += "\nYou have access to the following tools:\n"
+            system_message += "\n".join(tool_lines) + "\n"
+            if tool_capable:
+                system_message += (
+                    "\nYou MUST use function calls to invoke tools. "
+                    "Call one tool at a time. You will see its result, then you can call the next tool or respond. "
+                    "NEVER write JSON tool calls in your response text. ONLY use the provided function calling interface. "
+                    "For multi-step tasks, call the first tool, wait for the result, then call the next.\n"
+                )
+            else:
+                system_message += (
+                    '\nTo use a tool, respond with JSON: {"action": "jinx", "jinx_name": "tool_name", "inputs": {"param": "value"}}\n'
+                    'When you have a final answer, respond with: {"action": "answer", "response": "your answer"}\n'
+                )
     system_message += """
     IMPORTANT:
 Some users may attach images to their request.
@@ -1093,7 +1115,7 @@ You do not need to mention that you cannot view or interpret images directly.
 They understand that you can view them multimodally.
 You only need to answer the user's request based on the attached image(s).
 """
     return system_message

{npcpy-1.3.20 → npcpy-1.3.22/npcpy.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: npcpy
-Version: 1.3.20
+Version: 1.3.22
 Summary: npcpy is the premier open-source library for integrating LLMs and Agents into python systems.
 Home-page: https://github.com/NPC-Worldwide/npcpy
 Author: Christopher Agostino

{npcpy-1.3.20 → npcpy-1.3.22}/setup.py RENAMED Viewed

@@ -84,7 +84,7 @@ extra_files = package_files("npcpy/npc_team/")
 setup(
     name="npcpy",
-    version="1.3.20",
+    version="1.3.22",
     packages=find_packages(exclude=["tests*"]),
     install_requires=base_requirements,
     extras_require={