PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

pygpt_net/CHANGELOG.txt +8 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +4 -0
pygpt_net/controller/__init__.py +5 -2
pygpt_net/controller/audio/audio.py +25 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +29 -3
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +268 -0
pygpt_net/controller/ui/mode.py +7 -0
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +13 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +51 -1
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +164 -0
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +48 -0
pygpt_net/data/config/config.json +10 -4
pygpt_net/data/config/models.json +149 -103
pygpt_net/data/config/settings.json +50 -0
pygpt_net/data/locale/locale.de.ini +5 -5
pygpt_net/data/locale/locale.en.ini +19 -13
pygpt_net/data/locale/locale.es.ini +5 -5
pygpt_net/data/locale/locale.fr.ini +5 -5
pygpt_net/data/locale/locale.it.ini +5 -5
pygpt_net/data/locale/locale.pl.ini +5 -5
pygpt_net/data/locale/locale.uk.ini +5 -5
pygpt_net/data/locale/locale.zh.ini +1 -1
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +39 -6
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +15 -0
pygpt_net/provider/core/model/patch.py +11 -0
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/ui/layout/toolbox/footer.py +16 -0
pygpt_net/ui/layout/toolbox/image.py +5 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0

pygpt_net/provider/api/google/realtime/realtime.py ADDED Viewed

@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+import json
+from typing import Optional, Dict, Any
+from pygpt_net.core.events import RealtimeEvent
+from pygpt_net.core.realtime.options import RealtimeOptions
+from pygpt_net.core.bridge.context import BridgeContext
+from pygpt_net.core.realtime.shared.session import extract_last_session_id
+from pygpt_net.item.model import ModelItem
+from .client import GoogleLiveClient
+class Realtime:
+    PROVIDER = "google"
+    def __init__(self, window=None):
+        """
+        Google GenAI API realtime controller
+        :param window: Window instance
+        """
+        self.window = window
+        self.handler = GoogleLiveClient(window)
+        self.prev_auto_turn = False
+        self.prev_vad_silence = 2000
+        self.prev_vad_prefix = 300
+    def begin(
+            self,
+            context: BridgeContext,
+            model: Optional[ModelItem] = None,
+            extra: Optional[Dict[str, Any]] = None,
+            rt_signals=None
+    ) -> bool:
+        """
+        Begin realtime session if applicable
+        :param context: BridgeContext
+        :param model: Optional[ModelItem]
+        :param extra: Optional dict with extra parameters
+        :param rt_signals: Optional RealtimeSignals
+        :return: bool - True if realtime session started, False otherwise
+        """
+        # Build realtime options
+        mm = context.multimodal_ctx
+        audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
+        audio_format = getattr(mm, "audio_format", None) if mm else None
+        audio_rate = getattr(mm, "audio_rate", None) if mm else None
+        is_debug = self.window.core.config.get("log.realtime", False)
+        auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
+        opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
+        opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
+        # setup manager
+        self.window.controller.realtime.set_current_active(self.PROVIDER)
+        self.window.controller.realtime.set_busy()
+        self.handler.set_debug(is_debug)
+        # handle sub-reply (tool results from tool calls)
+        if context.ctx.internal:
+            if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
+                tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
+                tool_call_id = None
+                if isinstance(tool_calls, list) and len(tool_calls) > 0:
+                    tool_call_id = tool_calls[0].get("call_id", "")  # get first call_id
+                    if not tool_call_id:
+                        tool_call_id = tool_calls[0].get("id", "")  # fallback to id
+                if tool_call_id:
+                    tool_results = context.ctx.input
+                    try:
+                        tool_results = json.loads(tool_results)
+                    except Exception:
+                        pass
+                    self.handler.send_tool_results_sync({
+                        tool_call_id: tool_results
+                    })
+                    return True  # do not start new session, just send tool results
+        # update auto-turn in active session
+        if (self.handler.is_session_active()
+                and (auto_turn != self.prev_auto_turn
+                     or opt_vad_silence != self.prev_vad_silence
+                     or opt_vad_prefix != self.prev_vad_prefix)):
+            self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
+        # Tools
+        tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
+        remote_tools = self.window.core.api.google.build_remote_tools(model)
+        if tools:
+            remote_tools = []  # in Google, remote tools are not allowed if function calling is used
+        # if auto-turn is enabled and prompt is empty, update session and context only
+        if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
+            self.handler.update_session_tools_sync(tools, remote_tools)
+            self.handler.update_ctx(context.ctx)
+            return True  # do not send new request if session is active
+        # Last session ID
+        last_session_id = extract_last_session_id(context.history)
+        if is_debug:
+            print("[realtime session] Last ID", last_session_id)
+        # Voice
+        voice_name = "Kore"
+        try:
+            v = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
+            if v:
+                mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse",
+                           "legend": "Legend"}
+                voice_name = mapping.get(str(v).strip().lower(), str(v))
+        except Exception:
+            pass
+        # Options
+        opts = RealtimeOptions(
+            provider=self.PROVIDER,
+            model=model.id,
+            system_prompt=context.system_prompt,
+            prompt=context.prompt,
+            voice=voice_name,
+            audio_data=audio_bytes,
+            audio_format=audio_format,
+            audio_rate=audio_rate,
+            vad=None,
+            extra=extra or {},
+            tools=tools,
+            remote_tools=remote_tools,
+            rt_signals=rt_signals,
+            rt_session_id=last_session_id,
+            auto_turn=auto_turn,
+            vad_end_silence_ms=opt_vad_silence,
+            vad_prefix_padding_ms=opt_vad_prefix,
+        )
+        # Start or append to realtime session via manager
+        try:
+            if is_debug:
+                print("[realtime] Starting session with options:", opts.to_dict())
+            rt = self.window.controller.realtime.manager
+            rt.start(context.ctx, opts)
+            self.prev_auto_turn = auto_turn
+            self.prev_vad_silence = opt_vad_silence
+            self.prev_vad_prefix = opt_vad_prefix
+            return True
+        except Exception as e:
+            self.window.core.debug.log(e)
+            return False  # fallback to non-live path
+    def handle_audio_input(self, event: RealtimeEvent):
+        """
+        Handle Realtime audio input event
+        :param event: RealtimeEvent
+        """
+        self.handler.rt_handle_audio_input_sync(event)
+    def manual_commit(self):
+        """Manually commit audio input to realtime session"""
+        self.handler.force_response_now_sync()
+    def shutdown(self):
+        """Shutdown realtime loops"""
+        if self.handler.is_session_active():
+            self.handler.close_session_sync()
+        try:
+            self.handler.stop_loop_sync()
+        except Exception:
+            pass
+    def reset(self):
+        """Close realtime session"""
+        if self.handler.is_session_active():
+            self.handler.close_session_sync()

pygpt_net/provider/api/openai/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.19 07:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 from openai import OpenAI
@@ -33,6 +33,7 @@ from .container import Container
 from .image import Image
 from .remote_tools import RemoteTools
 from .responses import Responses
+from .realtime import Realtime
 from .store import Store
 from .summarizer import Summarizer
 from .tools import Tools
@@ -57,6 +58,7 @@ class ApiOpenAI:
         self.image = Image(window)
         self.remote_tools = RemoteTools(window)
         self.responses = Responses(window)
+        self.realtime = Realtime(window)
         self.store = Store(window)
         self.summarizer = Summarizer(window)
         self.tools = Tools(window)
@@ -90,12 +92,18 @@ class ApiOpenAI:
         self.last_client_args = args
         return self.client
-    def call(self, context: BridgeContext, extra: dict = None) -> bool:
+    def call(
+            self,
+            context: BridgeContext,
+            extra: dict = None,
+            rt_signals = None
+    ) -> bool:
         """
         Call OpenAI API
         :param context: Bridge context
         :param extra: Extra arguments
+        :param rt_signals: Realtime signals for audio streaming
         :return: result
         """
         mode = context.mode
@@ -145,6 +153,18 @@ class ApiOpenAI:
             MODE_RESEARCH,
             MODE_COMPUTER,
         ]:
+            if mode == MODE_AUDIO and stream:
+                # Realtime API for audio streaming
+                is_realtime = self.realtime.begin(
+                    context=context,
+                    model=model,
+                    extra=extra or {},
+                    rt_signals=rt_signals
+                )
+                if is_realtime:
+                    return True
             # responses API
             if use_responses_api:
                 response = self.responses.send(

pygpt_net/provider/api/openai/realtime/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+from .realtime import Realtime

pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.31py3-none-any.whl