PyPI - pygpt-net - Versions diffs - 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl - Mend

pygpt-net 2.6.29py3-none-any.whl → 2.6.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +4 -0
pygpt_net/{container.py → app_core.py} +5 -6
pygpt_net/controller/__init__.py +5 -2
pygpt_net/controller/access/control.py +1 -9
pygpt_net/controller/assistant/assistant.py +4 -4
pygpt_net/controller/assistant/batch.py +7 -7
pygpt_net/controller/assistant/files.py +4 -4
pygpt_net/controller/assistant/threads.py +3 -3
pygpt_net/controller/attachment/attachment.py +4 -7
pygpt_net/controller/audio/audio.py +25 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +30 -4
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +4 -405
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/chat/vision.py +11 -19
pygpt_net/controller/config/placeholder.py +1 -1
pygpt_net/controller/ctx/ctx.py +1 -1
pygpt_net/controller/ctx/summarizer.py +1 -1
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/mode/mode.py +21 -12
pygpt_net/controller/plugins/settings.py +3 -2
pygpt_net/controller/presets/editor.py +112 -99
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +268 -0
pygpt_net/controller/theme/theme.py +3 -2
pygpt_net/controller/ui/mode.py +7 -0
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/controller/ui/vision.py +4 -4
pygpt_net/core/agents/legacy.py +2 -2
pygpt_net/core/agents/runners/openai_workflow.py +2 -2
pygpt_net/core/assistants/files.py +5 -5
pygpt_net/core/assistants/store.py +4 -4
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +13 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +4 -3
pygpt_net/core/bridge/worker.py +31 -9
pygpt_net/core/debug/console/console.py +2 -2
pygpt_net/core/debug/presets.py +2 -2
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/experts/experts.py +2 -2
pygpt_net/core/image/image.py +51 -1
pygpt_net/core/modes/modes.py +2 -2
pygpt_net/core/presets/presets.py +3 -3
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +164 -0
pygpt_net/core/tokens/tokens.py +4 -4
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +48 -0
pygpt_net/core/types/mode.py +5 -2
pygpt_net/core/vision/analyzer.py +1 -1
pygpt_net/data/config/config.json +13 -4
pygpt_net/data/config/models.json +219 -101
pygpt_net/data/config/modes.json +3 -9
pygpt_net/data/config/settings.json +135 -27
pygpt_net/data/config/settings_section.json +2 -2
pygpt_net/data/locale/locale.de.ini +7 -7
pygpt_net/data/locale/locale.en.ini +25 -12
pygpt_net/data/locale/locale.es.ini +7 -7
pygpt_net/data/locale/locale.fr.ini +7 -7
pygpt_net/data/locale/locale.it.ini +7 -7
pygpt_net/data/locale/locale.pl.ini +8 -8
pygpt_net/data/locale/locale.uk.ini +7 -7
pygpt_net/data/locale/locale.zh.ini +3 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/item/model.py +23 -3
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/plugin/openai_dalle/plugin.py +4 -4
pygpt_net/plugin/openai_vision/plugin.py +12 -13
pygpt_net/provider/agents/openai/agent.py +5 -5
pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
pygpt_net/provider/agents/openai/agent_planner.py +5 -6
pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
pygpt_net/provider/agents/openai/evolve.py +5 -5
pygpt_net/provider/agents/openai/supervisor.py +4 -4
pygpt_net/provider/api/__init__.py +27 -0
pygpt_net/provider/api/anthropic/__init__.py +68 -0
pygpt_net/provider/api/google/__init__.py +295 -0
pygpt_net/provider/api/google/audio.py +121 -0
pygpt_net/provider/api/google/chat.py +591 -0
pygpt_net/provider/api/google/image.py +427 -0
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/tools.py +222 -0
pygpt_net/provider/api/google/vision.py +129 -0
pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
pygpt_net/provider/api/openai/agents/__init__.py +0 -0
pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
pygpt_net/provider/api/openai/worker/__init__.py +0 -0
pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_input/openai_whisper.py +1 -1
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/openai_tts.py +9 -6
pygpt_net/provider/core/config/patch.py +26 -0
pygpt_net/provider/core/model/patch.py +20 -0
pygpt_net/provider/core/preset/json_file.py +2 -4
pygpt_net/provider/llms/anthropic.py +2 -5
pygpt_net/provider/llms/base.py +4 -3
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/llms/openai.py +1 -1
pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
pygpt_net/ui/dialog/preset.py +71 -55
pygpt_net/ui/layout/toolbox/footer.py +16 -0
pygpt_net/ui/layout/toolbox/image.py +5 -0
pygpt_net/ui/main.py +6 -4
pygpt_net/ui/widget/option/combo.py +15 -1
pygpt_net/utils.py +9 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
pygpt_net/core/audio/backend/pyaudio.py +0 -554
/pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
/pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0

pygpt_net/core/dispatcher/dispatcher.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.23 15:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 from typing import List, Tuple
@@ -17,6 +17,7 @@ from pygpt_net.core.events import (
     ControlEvent,
     AppEvent,
     RenderEvent,
+    RealtimeEvent,
 )
@@ -71,6 +72,14 @@ class Dispatcher:
         handled = False
+        # realtime first, if it's a realtime event
+        if isinstance(event, RealtimeEvent):
+            controller.realtime.handle(event)
+            if log_event:
+                debug.info(f"[event] Dispatch end: {event.full_name} ({event.call_id})")
+            self.call_id += 1
+            return [], event
         # kernel
         if isinstance(event, KernelEvent):
             kernel_auto = (KernelEvent.INIT, KernelEvent.RESTART, KernelEvent.STOP, KernelEvent.TERMINATE)
@@ -96,20 +105,47 @@ class Dispatcher:
         if handled:
             return [], event
+        # realtime
+        controller.realtime.handle(event)
+        if event.stop:
+            if log_event:
+                debug.info(f"[event] Skipping... (stopped): {event.name}")
+            return [], event
         # agents
         controller.agent.handle(event)
+        if event.stop:
+            if log_event:
+                debug.info(f"[event] Skipping... (stopped): {event.name}")
+            return [], event
         # ctx
         controller.ctx.handle(event)
+        if event.stop:
+            if log_event:
+                debug.info(f"[event] Skipping... (stopped): {event.name}")
+            return [], event
         # model
         controller.model.handle(event)
+        if event.stop:
+            if log_event:
+                debug.info(f"[event] Skipping... (stopped): {event.name}")
+            return [], event
         # idx
         controller.idx.handle(event)
+        if event.stop:
+            if log_event:
+                debug.info(f"[event] Skipping... (stopped): {event.name}")
+            return [], event
         # ui
         controller.ui.handle(event)
+        if event.stop:
+            if log_event:
+                debug.info(f"[event] Skipping... (stopped): {event.name}")
+            return [], event
         # access
         if isinstance(event, (ControlEvent, AppEvent)):

pygpt_net/core/events/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.11.20 03:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 from .base import BaseEvent
@@ -14,4 +14,5 @@ from .app import AppEvent
 from .control import ControlEvent
 from .event import Event
 from .kernel import KernelEvent
+from .realtime import RealtimeEvent
 from .render import RenderEvent

pygpt_net/core/events/realtime.py ADDED Viewed

@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.30 06:00:00                  #
+# ================================================== #
+from typing import Optional
+from .base import BaseEvent
+class RealtimeEvent(BaseEvent):
+    """
+    Realtime events
+    - RT_OUTPUT_AUDIO_DELTA - audio output chunk (with payload)
+    - RT_OUTPUT_READY - audio output is ready (STREAM_BEGIN)
+    - RT_OUTPUT_TEXT_DELTA - text chunk (delta)
+    - RT_OUTPUT_AUDIO_END - audio output ended (STREAM_END)
+    - RT_OUTPUT_TURN_END - audio output turn ended (TURN_END)
+    - RT_OUTPUT_AUDIO_ERROR - audio output error (STREAM_ERROR)
+    - RT_OUTPUT_AUDIO_VOLUME_CHANGED - audio output volume changed (volume level)
+    """
+    # realtime events
+    RT_OUTPUT_AUDIO_DELTA = "rt.output.audio.delta"
+    RT_OUTPUT_AUDIO_END = "rt.output.audio.end"
+    RT_OUTPUT_AUDIO_ERROR = "rt.output.audio.error"
+    RT_OUTPUT_AUDIO_VOLUME_CHANGED = "rt.output.audio.volume.changed"
+    RT_OUTPUT_AUDIO_COMMIT = "rt.output.audio.commit"
+    RT_OUTPUT_READY = "rt.output.audio.ready"
+    RT_OUTPUT_TEXT_DELTA = "rt.output.text.delta"
+    RT_OUTPUT_TURN_END = "rt.output.turn.end"
+    RT_INPUT_AUDIO_DELTA = "rt.input.audio.delta"
+    RT_INPUT_AUDIO_MANUAL_START = "rt.input.audio.manual.start"
+    RT_INPUT_AUDIO_MANUAL_STOP = "rt.input.audio.manual.stop"
+    def __init__(
+            self,
+            name: Optional[str] = None,
+            data: Optional[dict] = None,
+    ):
+        """
+        Event object class
+        :param name: event name
+        :param data: event data
+        """
+        super(RealtimeEvent, self).__init__(name, data)
+        self.id = "RealtimeEvent"

pygpt_net/core/experts/experts.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.23 15:00:00                  #
+# Updated Date: 2025.08.28 09:00:00                  #
 # ================================================== #
 import json
@@ -49,7 +49,7 @@ class Experts:
         self.allowed_modes = [
             MODE_CHAT,
             MODE_COMPLETION,
-            MODE_VISION,
+            # MODE_VISION,
             # MODE_LANGCHAIN,
             MODE_LLAMA_INDEX,
             MODE_AUDIO,

pygpt_net/core/image/image.py CHANGED Viewed

@@ -12,10 +12,11 @@
 import os
 import uuid
 from time import strftime
-from typing import List
+from typing import List, Dict
 from PySide6.QtCore import Slot, QObject
+from pygpt_net.core.types import IMAGE_AVAILABLE_RESOLUTIONS
 from pygpt_net.item.ctx import CtxItem
 from pygpt_net.utils import trans
@@ -141,3 +142,52 @@ class Image(QObject):
         img_dir = self.window.core.config.get_user_dir("img")
         filename = f"{dt_prefix}_{img_id}.png"
         return os.path.join(img_dir, filename)
+    def get_resolution_option(self) -> dict:
+        """
+        Get image resolution option for UI
+        :return: dict
+        """
+        return {
+            "type": "combo",
+            "slider": True,
+            "label": "img_resolution",
+            "value": "1024x1024",
+            "keys": self.get_available_resolutions(),
+        }
+    def get_available_resolutions(self, model: str = None) -> Dict[str, str]:
+        """
+        Get available image resolutions
+        :param model: model name
+        :return: dict of available resolutions
+        """
+        available = IMAGE_AVAILABLE_RESOLUTIONS
+        model_keys = available.keys()
+        # find by model if specified
+        if model:
+            model = self._normalize_model_name(model)
+            for key in model_keys:
+                if model.startswith(key):
+                    return available[key]
+        # return all available resolutions, but unique only
+        resolutions = {}
+        for key in model_keys:
+            resolutions.update(available[key])
+        return resolutions
+    def _normalize_model_name(self, model: str) -> str:
+        """
+        Normalize model id (strip optional 'models/' prefix).
+        :param model: model id
+        """
+        try:
+            return model.split("/")[-1]
+        except Exception:
+            return model

pygpt_net/core/modes/modes.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.15 23:00:00                  #
+# Updated Date: 2025.08.28 09:00:00                  #
 # ================================================== #
 from typing import Dict, List
@@ -53,7 +53,7 @@ class Modes:
             MODE_IMAGE,
             # MODE_LANGCHAIN,
             MODE_LLAMA_INDEX,
-            MODE_VISION,
+            # MODE_VISION,
             MODE_RESEARCH,
             MODE_COMPUTER,
         )

pygpt_net/core/presets/presets.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.15 23:00:00                  #
+# Updated Date: 2025.08.28 09:00:00                  #
 # ================================================== #
 import copy
@@ -165,8 +165,8 @@ class Presets:
             return MODE_COMPLETION
         if preset.img:
             return MODE_IMAGE
-        if preset.vision:
-            return MODE_VISION
+        # if preset.vision:
+            # return MODE_VISION
         # if preset.langchain:
             # return MODE_LANGCHAIN
         if preset.assistant:

pygpt_net/core/realtime/options.py ADDED Viewed

@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+from dataclasses import dataclass, field
+from typing import Optional
+@dataclass
+class RealtimeOptions:
+    """
+    Options for starting a realtime session.
+    :param provider: Provider name ("google" or "openai")
+    :param model: Model name
+    :param system_prompt: System prompt text
+    :param prompt: User prompt text
+    :param voice: Voice name for TTS
+    :param audio_data: Optional input audio data (bytes)
+    :param audio_format: Format of the input audio (e.g., "pcm16", "wav")
+    :param audio_rate: Sample rate of the input audio (e.g., 16000)
+    :param vad: Voice Activity Detection mode (e.g., "server_vad" or None for manual)
+    :param extra: Free-form dictionary for extra parameters
+    :param rt_signals: Real-time signals for event handling
+    """
+    provider: str = "openai"  # "google" | "openai"
+    model: Optional[str] = None
+    system_prompt: Optional[str] = None
+    prompt: Optional[str] = None
+    voice: Optional[str] = None
+    # Optional input audio
+    audio_data: Optional[bytes] = None
+    audio_format: Optional[str] = None  # e.g., "pcm16", "wav"
+    audio_rate: Optional[int] = None    # e.g., 16000
+    # Provider-specific VAD flag (use None for manual mode)
+    vad: Optional[str] = None           # e.g., "server_vad"
+    vad_end_silence_ms: Optional[int] = 2000  # VAD end silence in ms
+    vad_prefix_padding_ms: Optional[int] = 300  # VAD prefix padding in ms
+    # Real-time signals
+    rt_signals: field() = None  # RT signals
+    # Tools and remote tools
+    tools: Optional[list] = None
+    remote_tools: Optional[list] = None
+    # Auto-turn enable/disable
+    auto_turn: Optional[bool] = False
+    # Transcript enable/disable
+    transcribe: Optional[bool] = True
+    # Last session ID
+    rt_session_id: Optional[str] = None
+    # Extra parameters
+    extra: dict = field(default_factory=dict)
+    def to_dict(self):
+        return {
+            "provider": self.provider,
+            "model": self.model,
+            "system_prompt": self.system_prompt,
+            "prompt": self.prompt,
+            "voice": self.voice,
+            "audio_data (len)": len(self.audio_data) if self.audio_data else 0,
+            "audio_format": self.audio_format,
+            "audio_rate": self.audio_rate,
+            "vad": self.vad,
+            "vad_end_silence_ms": self.vad_end_silence_ms,
+            "vad_prefix_padding_ms": self.vad_prefix_padding_ms,
+            "tools": self.tools,
+            "remote_tools": self.remote_tools,
+            "auto_turn": self.auto_turn,
+            "transcribe": self.transcribe,
+            "rt_session_id": self.rt_session_id,
+            "extra": self.extra,
+        }

pygpt_net/core/realtime/shared/__init__.py ADDED Viewed

File without changes

pygpt_net/core/realtime/shared/audio.py ADDED Viewed

@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+import io
+import math
+import os
+import wave
+import audioop
+from array import array
+import struct
+from typing import Optional, Tuple, List
+DEFAULT_24K = 24000
+def coerce_to_pcm16_mono(data: bytes, fmt: Optional[str], rate_hint: Optional[int], fallback_rate: int = DEFAULT_24K) -> Tuple[int, int, bytes]:
+    """
+    Convert input audio (PCM16 raw or WAV) to PCM16 mono bytes. Float WAV is treated as raw (best effort).
+    Returns (sample_rate, channels=1, pcm16_bytes).
+    """
+    if not data:
+        return fallback_rate, 1, b""
+    fmt = (fmt or "").lower().strip()
+    if fmt in ("pcm16", "pcm", "raw"):
+        sr = int(rate_hint) if rate_hint else fallback_rate
+        return sr, 1, data
+    # WAV path
+    try:
+        with wave.open(io.BytesIO(data), "rb") as wf:
+            sr = wf.getframerate() or fallback_rate
+            ch = wf.getnchannels() or 1
+            sw = wf.getsampwidth() or 2
+            frames = wf.readframes(wf.getnframes())
+        if sw != 2:
+            frames = audioop.lin2lin(frames, sw, 2)
+        if ch == 2:
+            frames = audioop.tomono(frames, 2, 0.5, 0.5)
+        elif ch != 1:
+            frames = audioop.tomono(frames, 2, 1.0, 0.0)
+        return sr, 1, frames
+    except Exception:
+        sr = int(rate_hint) if rate_hint else fallback_rate
+        return sr, 1, data
+def float32_to_int16_bytes(b: bytes) -> bytes:
+    """Convert little-endian float32 PCM [-1.0, 1.0] to int16 PCM."""
+    if not b:
+        return b""
+    try:
+        arr = array("f")
+        arr.frombytes(b)
+        if struct.unpack('<I', struct.pack('=I', 1))[0] != 1:  # fallback if non-little
+            arr.byteswap()
+        out = array("h", (max(-32768, min(32767, int(round(x * 32767.0)))) for x in arr))
+        return out.tobytes()
+    except Exception:
+        try:
+            n = len(b) // 4
+            vals = struct.unpack("<" + "f" * n, b[: n * 4])
+            out = array("h", (max(-32768, min(32767, int(round(x * 32767.0)))) for x in vals))
+            return out.tobytes()
+        except Exception:
+            return b""
+def parse_wav_fmt(data: bytes) -> Optional[dict]:
+    """Minimal WAV fmt chunk parser to detect float/int format."""
+    try:
+        if len(data) < 12 or data[0:4] != b"RIFF" or data[8:12] != b"WAVE":
+            return None
+        p = 12
+        while p + 8 <= len(data):
+            cid = data[p:p+4]
+            sz = int.from_bytes(data[p+4:p+8], "little", signed=False)
+            p += 8
+            if cid == b"fmt ":
+                fmtb = data[p:p+sz]
+                if len(fmtb) < 16:
+                    return None
+                format_tag = int.from_bytes(fmtb[0:2], "little")
+                channels = int.from_bytes(fmtb[2:4], "little")
+                sample_rate = int.from_bytes(fmtb[4:8], "little")
+                bits_per_sample = int.from_bytes(fmtb[14:16], "little")
+                sub_tag = None
+                if format_tag == 65534 and sz >= 40:  # WAVE_FORMAT_EXTENSIBLE
+                    sub_tag = int.from_bytes(fmtb[24:26], "little", signed=False)
+                return {
+                    "format_tag": format_tag,
+                    "channels": channels,
+                    "sample_rate": sample_rate,
+                    "bits_per_sample": bits_per_sample,
+                    "subformat_tag": sub_tag,
+                }
+            p += (sz + 1) & ~1
+        return None
+    except Exception:
+        return None
+def to_pcm16_mono(data: bytes, fmt: Optional[str], rate_hint: Optional[int], target_rate: int) -> Tuple[bytes, int]:
+    """
+    Normalize any input audio (RAW/WAV, int/float) to PCM16 mono at target_rate.
+    Returns (pcm16_bytes, target_rate).
+    """
+    if not data:
+        return b"", target_rate
+    fmt = (fmt or "").lower().strip()
+    if fmt in ("pcm16", "pcm", "raw"):
+        src_rate = int(rate_hint) if rate_hint else target_rate
+        pcm16 = data
+        if src_rate != target_rate:
+            try:
+                pcm16, _ = audioop.ratecv(pcm16, 2, 1, src_rate, target_rate, None)
+            except Exception:
+                return b"", target_rate
+        return pcm16, target_rate
+    # WAV path with float support
+    try:
+        fmt_info = parse_wav_fmt(data)
+        with wave.open(io.BytesIO(data), "rb") as wf:
+            sr = wf.getframerate() or target_rate
+            ch = wf.getnchannels() or 1
+            sw = wf.getsampwidth() or 2
+            frames = wf.readframes(wf.getnframes())
+        format_tag = (fmt_info or {}).get("format_tag", 1)
+        bits_per_sample = (fmt_info or {}).get("bits_per_sample", sw * 8)
+        # float32 -> int16
+        if format_tag == 3 or ((format_tag == 65534) and (fmt_info or {}).get("subformat_tag") == 3):
+            frames16 = float32_to_int16_bytes(frames)
+        else:
+            if sw != 2:
+                frames16 = audioop.lin2lin(frames, sw, 2)
+            else:
+                frames16 = frames
+        # mixdown to mono
+        if ch == 2:
+            try:
+                frames16 = audioop.tomono(frames16, 2, 0.5, 0.5)
+            except Exception:
+                frames16 = frames16[0::2] + b""
+        elif ch != 1:
+            try:
+                frames16 = audioop.tomono(frames16, 2, 1.0, 0.0)
+            except Exception:
+                pass
+        # resample
+        if sr != target_rate:
+            try:
+                frames16, _ = audioop.ratecv(frames16, 2, 1, sr, target_rate, None)
+            except Exception:
+                return b"", target_rate
+        return frames16, target_rate
+    except Exception:
+        return b"", target_rate
+def resample_pcm16_mono(pcm: bytes, src_rate: int, dst_rate: int) -> bytes:
+    if src_rate == dst_rate or not pcm:
+        return pcm
+    try:
+        out, _ = audioop.ratecv(pcm, 2, 1, src_rate, dst_rate, None)
+        return out
+    except Exception:
+        return pcm
+def iter_pcm_chunks(pcm: bytes, sr: int, ms: int = 50) -> List[bytes]:
+    """Split PCM16 mono stream into ~ms byte chunks."""
+    b_per_ms = int(sr * 2 / 1000)
+    n = max(b_per_ms * ms, 1)
+    return [pcm[i:i + n] for i in range(0, len(pcm), n)]
+def dump_wav(path: str, sample_rate: int, pcm16_mono: bytes):
+    try:
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+    except Exception:
+        pass
+    try:
+        with wave.open(path, "wb") as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(int(sample_rate))
+            wf.writeframes(pcm16_mono)
+    except Exception:
+        pass
+def pcm16_stats(pcm16_mono: bytes, sample_rate: int) -> dict:
+    try:
+        n_samp = len(pcm16_mono) // 2
+        dur = n_samp / float(sample_rate or 1)
+        rms = audioop.rms(pcm16_mono, 2)
+        peak = audioop.max(pcm16_mono, 2) if pcm16_mono else 0
+        try:
+            avg = audioop.avg(pcm16_mono, 2)
+        except Exception:
+            avg = 0
+        dbfs = (-999.0 if rms == 0 else 20.0 * math.log10(rms / 32768.0))
+        return {"duration_s": dur, "samples": n_samp, "rms": rms, "peak": peak, "dc_offset": avg, "dbfs": dbfs}
+    except Exception:
+        return {}

pygpt_net/core/realtime/shared/loop.py ADDED Viewed

@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+import asyncio
+import threading
+from typing import Optional
+class BackgroundLoop:
+    """
+    Dedicated background asyncio loop running in its own thread.
+    Safe cross-thread scheduling and sync wrappers.
+    """
+    def __init__(self, name: str = "RT-Loop"):
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        self._thread: Optional[threading.Thread] = None
+        self._name = name
+    @property
+    def loop(self) -> Optional[asyncio.AbstractEventLoop]:
+        return self._loop
+    def ensure(self):
+        if self._loop and self._loop.is_running():
+            return
+        self._loop = asyncio.new_event_loop()
+        def _runner(loop: asyncio.AbstractEventLoop):
+            asyncio.set_event_loop(loop)
+            loop.run_forever()
+        self._thread = threading.Thread(target=_runner, args=(self._loop,), name=self._name, daemon=True)
+        self._thread.start()
+    async def run(self, coro):
+        if not self._loop:
+            raise RuntimeError("Owner loop is not running")
+        cfut = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return await asyncio.wrap_future(cfut)
+    def run_sync(self, coro, timeout: float = 5.0):
+        if not self._loop or not self._loop.is_running():
+            return None
+        fut = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        try:
+            return fut.result(timeout=timeout)
+        except Exception:
+            return None
+    def stop(self, timeout: float = 2.0):
+        loop, thread = self._loop, self._thread
+        if loop and loop.is_running():
+            loop.call_soon_threadsafe(loop.stop)
+        if thread and thread.is_alive():
+            thread.join(timeout=timeout)
+        self._loop = None
+        self._thread = None

pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

pygpt-net 2.6.29py3-none-any.whl → 2.6.31py3-none-any.whl