PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

pygpt_net/CHANGELOG.txt +8 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +4 -0
pygpt_net/controller/__init__.py +5 -2
pygpt_net/controller/audio/audio.py +25 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +29 -3
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +268 -0
pygpt_net/controller/ui/mode.py +7 -0
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +13 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +51 -1
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +164 -0
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +48 -0
pygpt_net/data/config/config.json +10 -4
pygpt_net/data/config/models.json +149 -103
pygpt_net/data/config/settings.json +50 -0
pygpt_net/data/locale/locale.de.ini +5 -5
pygpt_net/data/locale/locale.en.ini +19 -13
pygpt_net/data/locale/locale.es.ini +5 -5
pygpt_net/data/locale/locale.fr.ini +5 -5
pygpt_net/data/locale/locale.it.ini +5 -5
pygpt_net/data/locale/locale.pl.ini +5 -5
pygpt_net/data/locale/locale.uk.ini +5 -5
pygpt_net/data/locale/locale.zh.ini +1 -1
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +39 -6
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +15 -0
pygpt_net/provider/core/model/patch.py +11 -0
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/ui/layout/toolbox/footer.py +16 -0
pygpt_net/ui/layout/toolbox/image.py +5 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0

pygpt_net/data/locale/plugin.audio_output.en.ini CHANGED Viewed

@@ -15,6 +15,10 @@ eleven_labs_voice.description = Specify the Voice ID.
 eleven_labs_voice.label = Voice ID
 google_api_key.description = You can obtain your own API key here: https://console.cloud.google.com/apis/library/texttospeech.googleapis.com
 google_api_key.label = Google Cloud Text-to-speech API Key
+google_genai_tts_model.description = Specify Gemini TTS model, e.g.: gemini-2.5-flash-preview-tts or gemini-2.5-pro-preview-tts
+google_genai_tts_model.label = Model
+google_genai_tts_voice.description = Specify voice, e.g.: Puck, Kore, Charon, Leda, Zephyr... (case-sensitive)
+google_genai_tts_voice.label = Voice
 google_lang.description = Specify the language code.
 google_lang.label = Language code
 google_voice.description = Specify the voice.

pygpt_net/plugin/audio_input/plugin.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.11.26 19:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import os
@@ -23,6 +23,7 @@ from pygpt_net.utils import trans
 from .config import Config
 from .worker import Worker
 from .simple import Simple
+from ...core.types import MODE_AUDIO
 class Plugin(BasePlugin):
@@ -124,13 +125,31 @@ class Plugin(BasePlugin):
             words = [x.strip() for x in words]  # remove white-spaces
         return words
-    def toggle_recording_simple(self):
+    def toggle_recording_simple(
+            self,
+            state: bool = None,
+            auto: bool = False
+    ):
         """
         Event: AUDIO_INPUT_RECORD_TOGGLE
         Toggle recording
+        :param state: state to set
+        :param auto: True if called automatically (not by user)
+        """
+        if self.window.controller.realtime.is_enabled():
+            self.handler_simple.toggle_realtime(state=state, auto=auto)
+            return
+        self.handler_simple.toggle_recording(state=state)
+    def is_recording(self) -> bool:
         """
-        self.handler_simple.toggle_recording()
+        Check if is recording (simple mode)
+        :return: True if is recording
+        """
+        return self.handler_simple.is_recording
     def toggle_speech(self, state: bool):
         """
@@ -214,7 +233,9 @@ class Plugin(BasePlugin):
             self.toggle_speech(data['value'])
         elif name == Event.AUDIO_INPUT_RECORD_TOGGLE:
-            self.toggle_recording_simple()
+            state = data['state'] if 'value' in data else None
+            auto = data['auto'] if 'auto' in data else False
+            self.toggle_recording_simple(state=state, auto=auto)
         elif name == Event.AUDIO_INPUT_STOP:
             self.on_stop()
@@ -492,6 +513,18 @@ class Plugin(BasePlugin):
                 self.window.dispatch(event)  # send text, input clear in send method
                 self.set_status('')
+    def handle_realtime_stopped(self):
+        """Handle realtime stopped"""
+        context = BridgeContext()
+        context.prompt = "..."
+        extra = {}
+        event = KernelEvent(KernelEvent.INPUT_SYSTEM, {
+            'context': context,
+            'extra': extra,
+        })
+        self.window.dispatch(event)  # send text, input clear in send method
+        self.set_status('')
     @Slot(object)
     def handle_status(self, data: str):
         """

pygpt_net/plugin/audio_input/simple.py CHANGED Viewed

@@ -6,14 +6,14 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.27 07:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import os
 from PySide6.QtCore import QTimer
-from pygpt_net.core.events import AppEvent
+from pygpt_net.core.events import AppEvent, RealtimeEvent
 from pygpt_net.core.tabs.tab import Tab
 from pygpt_net.utils import trans
@@ -32,8 +32,46 @@ class Simple:
         self.is_recording = False
         self.timer = None
-    def toggle_recording(self):
-        """Toggle recording"""
+    def toggle_realtime(
+            self,
+            state: bool = None,
+            auto: bool = False
+    ):
+        """
+        Toggle recording
+        :param state: True to start recording, False to stop recording, None to toggle
+        :param auto: True if called automatically (not by user)
+        """
+        if state is not None:
+            if state and not self.is_recording:
+                self.start_recording(realtime=True)
+            elif not state:
+                self.force_stop()
+            else:
+                self.force_stop()
+            return
+        if self.is_recording:
+            self.stop_recording(realtime=True)
+            if not auto:
+                self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP))
+        else:
+            self.start_recording(realtime=True)
+            if not auto:
+                self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START))
+    def toggle_recording(self, state: bool = None):
+        """
+        Toggle recording
+        :param state: True to start recording, False to stop recording, None to toggle
+        """
+        if state is not None:
+            if state and not self.is_recording:
+                self.start_recording()
+            elif not state:
+                self.force_stop()
+            return
         if self.is_recording:
             self.stop_recording()
         else:
@@ -51,11 +89,12 @@ class Simple:
         """Stop timeout"""
         self.stop_recording(timeout=True)
-    def start_recording(self, force: bool = False):
+    def start_recording(self, force: bool = False, realtime: bool = False):
         """
         Start recording
         :param force: True to force recording
+        :param realtime: True if called from realtime callback
         """
         # display snap warning if not displayed yet
         if (not self.plugin.window.core.config.get("audio.input.snap", False)
@@ -89,7 +128,7 @@ class Simple:
             # disable in continuous mode
             timeout = int(self.plugin.window.core.config.get('audio.input.timeout', 120) or 0) # get timeout
             timeout_continuous = self.plugin.window.core.config.get('audio.input.timeout.continuous', False) # enable continuous timeout
-            if timeout > 0:
+            if timeout > 0 and not realtime:
                 if self.timer is None and (not continuous_enabled or timeout_continuous):
                     self.timer = QTimer()
                     self.timer.timeout.connect(self.stop_timeout)
@@ -119,11 +158,12 @@ class Simple:
                 )
             self.switch_btn_start()  # switch button to start
-    def stop_recording(self, timeout: bool = False):
+    def stop_recording(self, timeout: bool = False, realtime: bool = False):
         """
         Stop recording
         :param timeout: True if stopped due to timeout
+        :param realtime: True if called from realtime callback
         """
         self.plugin.window.core.audio.capture.reset_audio_level()
         self.is_recording = False
@@ -143,7 +183,7 @@ class Simple:
                 return
             if self.plugin.window.core.audio.capture.has_frames():
-                if not self.plugin.window.core.audio.capture.has_min_frames():
+                if not self.plugin.window.core.audio.capture.has_min_frames() and not realtime:
                     self.plugin.window.update_status(trans("status.audio.too_short"))
                     self.plugin.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED))  # app event
                     return
@@ -152,6 +192,15 @@ class Simple:
         else:
             self.plugin.window.update_status("")
+    def force_stop(self):
+        """Stop recording"""
+        self.is_recording = False
+        self.plugin.window.dispatch(AppEvent(AppEvent.INPUT_VOICE_LISTEN_STOPPED))  # app event
+        self.switch_btn_start()  # switch button to start
+        if self.plugin.window.core.audio.capture.has_source():
+            self.plugin.window.core.audio.capture.stop()  # stop recording
+            return
     def on_stop(self):
         """Handle auto-transcribe"""
         path = os.path.join(self.plugin.window.core.config.path, self.plugin.input_file)

pygpt_net/plugin/cmd_files/worker.py CHANGED Viewed

@@ -920,6 +920,9 @@ class Worker(BaseWorker):
         :param context: context data
         :return: extra data
         """
+        # disabled in v2.6.31
+        # reason: do not duplicate context in chat
+        return {}
         cmd = item["cmd"]
         extra = {
             'plugin': "cmd_files",

pygpt_net/provider/api/google/__init__.py CHANGED Viewed

@@ -6,13 +6,14 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.28 20:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 from typing import Optional, Dict, Any
 from google.genai import types as gtypes
 from google import genai
 from pygpt_net.core.types import (
     MODE_ASSISTANT,
     MODE_AUDIO,
@@ -29,7 +30,7 @@ from .vision import Vision
 from .tools import Tools
 from .audio import Audio
 from .image import Image
+from .realtime import Realtime
 class ApiGoogle:
     def __init__(self, window=None):
@@ -44,6 +45,7 @@ class ApiGoogle:
         self.tools = Tools(window)
         self.audio = Audio(window)
         self.image = Image(window)
+        self.realtime = Realtime(window)
         self.client: Optional[genai.Client] = None
         self.locked = False
         self.last_client_args: Optional[Dict[str, Any]] = None
@@ -72,12 +74,18 @@ class ApiGoogle:
         self.last_client_args = filtered
         return self.client
-    def call(self, context: BridgeContext, extra: dict = None) -> bool:
+    def call(
+            self,
+            context: BridgeContext,
+            extra: dict = None,
+            rt_signals = None
+    ) -> bool:
         """
         Make an API call to Google GenAI
         :param context: BridgeContext
         :param extra: Extra parameters
+        :param rt_signals: Realtime signals for audio streaming
         :return: True if successful, False otherwise
         """
         mode = context.mode
@@ -94,6 +102,18 @@ class ApiGoogle:
         response = None
         if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
+            # Live API for audio streaming
+            if mode == MODE_AUDIO and stream:
+                is_realtime = self.realtime.begin(
+                    context=context,
+                    model=model,
+                    extra=extra or {},
+                    rt_signals=rt_signals
+                )
+                if is_realtime:
+                    return True
             response = self.chat.send(context=context, extra=extra)
             used_tokens = self.chat.get_used_tokens()
             if ctx:
@@ -135,7 +155,11 @@ class ApiGoogle:
                 pass
         return True
-    def quick_call(self, context: BridgeContext, extra: dict = None) -> str:
+    def quick_call(
+            self,
+            context: BridgeContext,
+            extra: dict = None
+    ) -> str:
         """
         Make a quick API call to Google GenAI and return the output text
@@ -206,9 +230,9 @@ class ApiGoogle:
     def build_remote_tools(self, model: ModelItem = None) -> list:
         """
         Build Google GenAI remote tools based on config flags.
-        - google_tool_search: enables grounding via Google Search (Gemini 2.x)
+        - remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
           or GoogleSearchRetrieval (Gemini 1.5 fallback).
-        - google_tool_code_execution: enables code execution tool.
+        - remote_tools.google.code_interpreter: enables code execution tool.
         Returns a list of gtypes.Tool objects (can be empty).
@@ -242,6 +266,15 @@ class ApiGoogle:
             except Exception as e:
                 self.window.core.debug.log(e)
+        # URL Context tool
+        if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
+            try:
+                # Supported on Gemini 2.x+ models (not on 1.5)
+                if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
+                    tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
+            except Exception as e:
+                self.window.core.debug.log(e)
         return tools

pygpt_net/provider/api/google/audio.py CHANGED Viewed

@@ -24,6 +24,8 @@ class Audio:
         Audio helpers for Google GenAI.
         - Build audio input parts for requests
         - Convert Google PCM output to WAV (base64) for UI compatibility
+        :param window: Window instance
         """
         self.window = window
@@ -103,7 +105,12 @@ class Audio:
     @staticmethod
     def _ensure_bytes(data) -> Optional[bytes]:
-        """Return raw bytes from inline_data.data (bytes or base64 string)."""
+        """
+        Return raw bytes from inline_data.data (bytes or base64 string).
+        :param data: bytes or base64 string
+        :return: bytes or None
+        """
         try:
             if isinstance(data, (bytes, bytearray)):
                 return bytes(data)

pygpt_net/provider/api/google/chat.py CHANGED Viewed

@@ -29,9 +29,17 @@ class Chat:
         self.window = window
         self.input_tokens = 0
-    def send(self, context: BridgeContext, extra: Optional[Dict[str, Any]] = None):
+    def send(
+            self,
+            context: BridgeContext,
+            extra: Optional[Dict[str, Any]] = None
+    ):
         """
         Call Google GenAI for chat / multimodal / audio.
+        :param context: BridgeContext with prompt, model, history, mode, etc.
+        :param extra: Extra parameters (not used currently)
+        :return: Response object or generator (if streaming)
         """
         prompt = context.prompt
         stream = context.stream
@@ -110,9 +118,13 @@ class Chat:
         # Tools -> merge app-defined tools with remote tools
         base_tools = self.window.core.api.google.tools.prepare(model, functions)
         remote_tools = self.window.core.api.google.build_remote_tools(model)
+        # Check tools compatibility
         if base_tools:
-            remote_tools = [] # do not mix local and remote tools
+            remote_tools = [] # remote tools are not allowed if function calling is used
         tools = (base_tools or []) + (remote_tools or [])
+        if "-image" in model.id:
+            tools = None  # function calling is not supported for image models
         # Sampling
         temperature = self.window.core.config.get('temperature')
@@ -144,7 +156,7 @@ class Chat:
             # Voice selection (case-sensitive name)
             voice_name = "Kore"
             try:
-                tmp = self.window.core.plugins.get_option("audio_output", "google_voice_native")
+                tmp = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
                 if tmp:
                     name = str(tmp).strip()
                     mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse", "legend": "Legend"}
@@ -169,9 +181,17 @@ class Chat:
         else:
             return client.models.generate_content(**params)
-    def unpack_response(self, mode: str, response, ctx: CtxItem):
+    def unpack_response(
+            self,
+            mode: str,
+            response, ctx: CtxItem
+    ):
         """
         Unpack non-streaming response from Google GenAI and set context.
+        :param mode: MODE_CHAT or MODE_AUDIO
+        :param response: Response object
+        :param ctx: CtxItem to set output, audio_output, tokens, tool_calls
         """
         if mode == MODE_AUDIO:
             # Prefer audio if present
@@ -229,6 +249,11 @@ class Chat:
     def extract_text(self, response) -> str:
         """
         Extract output text.
+        Prefer response.text (Python SDK), then fallback to parts[].text.
+        :param response: Response object
+        :return: Extracted text
         """
         txt = getattr(response, "text", None) or getattr(response, "output_text", None)
         if txt:
@@ -332,11 +357,17 @@ class Chat:
         return out
-    def _extract_inline_images_and_links(self, response, ctx: CtxItem) -> None:
+    def _extract_inline_images_and_links(
+            self,
+            response, ctx: CtxItem
+    ) -> None:
         """
         Extract inline image parts (Gemini image output) and file links.
         - Saves inline_data (image/*) bytes to files and appends paths to ctx.images.
         - Appends HTTP(S) image URIs from file_data to ctx.urls.
+        :param response: Response object
+        :param ctx: CtxItem to set images and urls
         """
         images: list[str] = []
         urls: list[str] = []
@@ -386,7 +417,12 @@ class Chat:
     @staticmethod
     def _ensure_bytes(data) -> bytes | None:
-        """Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string."""
+        """
+        Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string.
+        :param data: bytes or str
+        :return: bytes or None
+        """
         try:
             if isinstance(data, (bytes, bytearray)):
                 return bytes(data)
@@ -545,6 +581,9 @@ class Chat:
         Heuristic check if the model supports native TTS.
         - Official TTS models contain '-tts' in id (e.g. 'gemini-2.5-flash-preview-tts').
         - Future/preview names may contain 'native-audio'.
+        :param model_id: Model ID
+        :return: True if supports TTS, False otherwise
         """
         if not model_id:
             return False

pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.31py3-none-any.whl