PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

pygpt_net/CHANGELOG.txt +8 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +4 -0
pygpt_net/controller/__init__.py +5 -2
pygpt_net/controller/audio/audio.py +25 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +29 -3
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +268 -0
pygpt_net/controller/ui/mode.py +7 -0
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +13 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +51 -1
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +164 -0
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +48 -0
pygpt_net/data/config/config.json +10 -4
pygpt_net/data/config/models.json +149 -103
pygpt_net/data/config/settings.json +50 -0
pygpt_net/data/locale/locale.de.ini +5 -5
pygpt_net/data/locale/locale.en.ini +19 -13
pygpt_net/data/locale/locale.es.ini +5 -5
pygpt_net/data/locale/locale.fr.ini +5 -5
pygpt_net/data/locale/locale.it.ini +5 -5
pygpt_net/data/locale/locale.pl.ini +5 -5
pygpt_net/data/locale/locale.uk.ini +5 -5
pygpt_net/data/locale/locale.zh.ini +1 -1
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +39 -6
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +15 -0
pygpt_net/provider/core/model/patch.py +11 -0
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/ui/layout/toolbox/footer.py +16 -0
pygpt_net/ui/layout/toolbox/image.py +5 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0

pygpt_net/CHANGELOG.txt CHANGED Viewed

@@ -1,3 +1,11 @@
+2.6.31 (2025-09-01)
+- Chat with Audio mode renamed to Realtime + audio.
+- Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
+- Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
+- Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
+- Added URL Context remote tool support in Google Gen AI.
 2.6.30 (2025-08-29)
 - Added native Google GenAI API support (beta); live audio is not supported yet (#132).

pygpt_net/__init__.py CHANGED Viewed

@@ -6,15 +6,15 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.29 00:00:00                  #
+# Updated Date: 2025.09.01 00:00:00                  #
 # ================================================== #
 __author__ = "Marcin Szczygliński"
 __copyright__ = "Copyright 2025, Marcin Szczygliński"
 __credits__ = ["Marcin Szczygliński"]
 __license__ = "MIT"
-__version__ = "2.6.30"
-__build__ = "2025-08-29"
+__version__ = "2.6.31"
+__build__ = "2025-09-01"
 __maintainer__ = "Marcin Szczygliński"
 __github__ = "https://github.com/szczyglis-dev/py-gpt"
 __report__ = "https://github.com/szczyglis-dev/py-gpt/issues"

pygpt_net/app.py CHANGED Viewed

@@ -171,10 +171,12 @@ from pygpt_net.provider.audio_input.openai_whisper import OpenAIWhisper
 from pygpt_net.provider.audio_input.openai_whisper_local import OpenAIWhisperLocal
 from pygpt_net.provider.audio_input.google_speech_recognition import GoogleSpeechRecognition
 from pygpt_net.provider.audio_input.google_cloud_speech_recognition import GoogleCloudSpeechRecognition
+from pygpt_net.provider.audio_input.google_genai import GoogleGenAIAudioInput
 from pygpt_net.provider.audio_input.bing_speech_recognition import BingSpeechRecognition
 from pygpt_net.provider.audio_output.openai_tts import OpenAITextToSpeech
 from pygpt_net.provider.audio_output.ms_azure_tts import MSAzureTextToSpeech
 from pygpt_net.provider.audio_output.google_tts import GoogleTextToSpeech
+from pygpt_net.provider.audio_output.google_genai_tts import GoogleGenAITextToSpeech
 from pygpt_net.provider.audio_output.eleven_labs import ElevenLabsTextToSpeech
 # web search engine providers
@@ -318,10 +320,12 @@ def run(**kwargs):
     launcher.add_audio_input(OpenAIWhisperLocal())
     launcher.add_audio_input(GoogleSpeechRecognition())
     launcher.add_audio_input(GoogleCloudSpeechRecognition())
+    launcher.add_audio_input(GoogleGenAIAudioInput())
     launcher.add_audio_input(BingSpeechRecognition())
     launcher.add_audio_output(OpenAITextToSpeech())
     launcher.add_audio_output(MSAzureTextToSpeech())
     launcher.add_audio_output(GoogleTextToSpeech())
+    launcher.add_audio_output(GoogleGenAITextToSpeech())
     launcher.add_audio_output(ElevenLabsTextToSpeech())
     # register custom audio providers

pygpt_net/controller/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# !/usr/bin/env python3
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ================================================== #
 # This file is a part of PYGPT package               #
@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.24 23:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 from pygpt_net.controller.access import Access
@@ -34,6 +34,7 @@ from pygpt_net.controller.model import Model
 from pygpt_net.controller.notepad import Notepad
 from pygpt_net.controller.painter import Painter
 from pygpt_net.controller.plugins import Plugins
+from pygpt_net.controller.realtime import Realtime
 from pygpt_net.controller.presets import Presets
 from pygpt_net.controller.settings import Settings
 from pygpt_net.controller.theme import Theme
@@ -76,6 +77,7 @@ class Controller:
         self.painter = Painter(window)
         self.plugins = Plugins(window)
         self.presets = Presets(window)
+        self.realtime = Realtime(window)
         self.settings = Settings(window)
         self.theme = Theme(window)
         self.tools = Tools(window)
@@ -108,6 +110,7 @@ class Controller:
         self.attachment.setup()
         self.camera.setup_ui()
         self.access.setup()
+        self.realtime.setup()
     def post_setup(self):
         """Post-setup, after plugins are loaded"""

pygpt_net/controller/audio/audio.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.27 07:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import os
@@ -43,6 +43,30 @@ class Audio:
         if self.window.core.config.get("audio.input.continuous", False):
             self.window.ui.plugin_addon['audio.input.btn'].continuous.setChecked(True)
+        if self.window.core.config.get("audio.input.auto_turn", False):
+            self.window.ui.nodes['audio.auto_turn'].box.setChecked(True)
+    def execute_input_stop(self):
+        """Execute input stop (from UI)"""
+        self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE, {
+            "state": False,
+            "auto": True,  # do not emit manual event
+        }))
+    def is_recording(self) -> bool:
+        """
+        Check if audio input is recording
+        :return: True if recording
+        """
+        return self.window.core.plugins.get("audio_input").is_recording()
+    def toggle_auto_turn(self):
+        """Toggle auto turn setting"""
+        value = self.window.ui.nodes['audio.auto_turn'].box.isChecked()
+        self.window.core.config.set("audio.input.auto_turn", value)
+        self.window.core.config.save()
     def toggle_input(
             self,
             state: bool,

pygpt_net/controller/audio/ui.py CHANGED Viewed

@@ -183,7 +183,7 @@ class UI:
         """
         self.recording = True
         self.window.ui.nodes['input'].set_icon_state("mic", True)
-        if mode == "input":
+        if mode in ["input", "realtime"]:
             self.window.controller.chat.common.lock_input()
             return
         btn = self.get_input_btn() if mode == 'input' else self.get_input_control_btn()
@@ -198,7 +198,7 @@ class UI:
         """
         self.recording = False
         self.window.ui.nodes['input'].set_icon_state("mic", False)
-        if mode == "input":
+        if mode in ["input", "realtime"]:
             self.window.controller.chat.common.unlock_input()
             return
         btn = self.get_input_btn() if mode == 'input' else self.get_input_control_btn()

pygpt_net/controller/chat/audio.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.12.14 18:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 import base64
@@ -40,19 +40,12 @@ class Audio:
         """Update input/output audio"""
         mode = self.window.core.config.get("mode")
         if mode == MODE_AUDIO:
-            if not self.window.controller.audio.is_output_enabled():
-                self.window.controller.audio.enable_output()
-                self.tmp_output = True
-            else:
-                self.tmp_output = False
             if not self.window.controller.audio.is_input_enabled():
                 self.window.controller.audio.enable_input()
                 self.tmp_input = True
             else:
                 self.tmp_input = False
         else:
-            if self.tmp_output:
-                self.window.controller.audio.disable_output()
             if self.tmp_input:
                 self.window.controller.audio.disable_input()

pygpt_net/controller/chat/common.py CHANGED Viewed

@@ -10,12 +10,13 @@
 # ================================================== #
 import os
+from typing import Any
 from PySide6.QtGui import QTextCursor
 from PySide6.QtWidgets import QFileDialog, QApplication
 from pygpt_net.core.events import Event, AppEvent, RenderEvent, KernelEvent
-from pygpt_net.core.types import MODE_ASSISTANT
+from pygpt_net.core.types import MODE_ASSISTANT, MODE_AUDIO
 from pygpt_net.item.ctx import CtxItem
 from pygpt_net.item.model import ModelItem
 from pygpt_net.utils import trans
@@ -119,6 +120,17 @@ class Common:
         else:
             self.window.ui.config['global']['img_raw'].setChecked(False)
+        # image resolution
+        resolution = self.window.core.config.get('img_resolution', '1024x1024')
+        self.window.controller.config.apply_value(
+            parent_id="global",
+            key="img_resolution",
+            option=self.window.core.image.get_resolution_option(),
+            value=resolution,
+        )
+        if not self.initialized:
+            self.window.ui.add_hook("update.global.img_resolution", self.hook_update)
         # set focus to input
         self.window.ui.nodes['input'].setFocus()
         self.initialized = True
@@ -257,7 +269,7 @@ class Common:
             self.window.controller.access.voice.stop_recording(timeout=True)
         if self.window.core.plugins.get("audio_input").handler_simple.is_recording:
-            self.window.core.plugins.get("audio_input").handler_simple.stop_recording(timeout=False)
+            self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE))
             return
         # stop audio output if playing
@@ -275,7 +287,8 @@ class Common:
         """
         # don't unlock input and leave stop btn if assistant mode or if agent/autonomous is enabled
         # send btn will be unlocked in agent mode on stop
-        if self.can_unlock(ctx):
+        mode = self.window.core.config.get('mode')
+        if self.can_unlock(ctx) and mode != MODE_AUDIO:
             if not self.window.controller.kernel.stopped():
                 self.unlock_input()  # unlock input
                 return True
@@ -452,6 +465,19 @@ class Common:
         else:
             self.img_enable_raw()
+    def hook_update(self, key: str, value: Any, caller, *args, **kwargs):
+        """
+        Hook for updating image resolution
+        :param key: config key
+        :param value: new value
+        :param caller: caller object
+        """
+        if key == "img_resolution":
+            if not value:
+                return
+            self.window.core.config.set('img_resolution', value)
     def save_text(
             self,
             text: str,

pygpt_net/controller/chat/handler/__init__.py ADDED Viewed

File without changes

pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.31py3-none-any.whl