PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +7 -1
pygpt_net/app_core.py +3 -1
pygpt_net/config.py +3 -1
pygpt_net/controller/__init__.py +9 -2
pygpt_net/controller/audio/audio.py +38 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +23 -62
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/lang/custom.py +2 -2
pygpt_net/controller/media/__init__.py +12 -0
pygpt_net/controller/media/media.py +115 -0
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +293 -0
pygpt_net/controller/ui/mode.py +23 -2
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +14 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +56 -5
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +160 -0
pygpt_net/core/render/web/body.py +24 -3
pygpt_net/core/text/utils.py +54 -2
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +54 -0
pygpt_net/core/video/__init__.py +12 -0
pygpt_net/core/video/video.py +290 -0
pygpt_net/data/config/config.json +26 -5
pygpt_net/data/config/models.json +221 -103
pygpt_net/data/config/settings.json +244 -6
pygpt_net/data/css/web-blocks.css +6 -0
pygpt_net/data/css/web-chatgpt.css +6 -0
pygpt_net/data/css/web-chatgpt_wide.css +6 -0
pygpt_net/data/locale/locale.de.ini +35 -7
pygpt_net/data/locale/locale.en.ini +56 -17
pygpt_net/data/locale/locale.es.ini +35 -7
pygpt_net/data/locale/locale.fr.ini +35 -7
pygpt_net/data/locale/locale.it.ini +35 -7
pygpt_net/data/locale/locale.pl.ini +38 -7
pygpt_net/data/locale/locale.uk.ini +35 -7
pygpt_net/data/locale/locale.zh.ini +31 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
pygpt_net/item/model.py +22 -1
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +76 -7
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/video.py +364 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +241 -178
pygpt_net/provider/core/model/patch.py +28 -2
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/web/duckduck_search.py +212 -0
pygpt_net/ui/layout/toolbox/audio.py +55 -0
pygpt_net/ui/layout/toolbox/footer.py +14 -42
pygpt_net/ui/layout/toolbox/image.py +7 -13
pygpt_net/ui/layout/toolbox/raw.py +52 -0
pygpt_net/ui/layout/toolbox/split.py +48 -0
pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
pygpt_net/ui/layout/toolbox/video.py +49 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0

pygpt_net/controller/chat/text.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.23 15:00:00                  #
+# Updated Date: 2025.08.30 06:00:00                  #
 # ================================================== #
 from typing import Optional
@@ -111,6 +111,7 @@ class Text:
         # if prev ctx is not empty, then copy input name to current ctx
         if prev_ctx is not None and prev_ctx.sub_call is True:  # sub_call = sent from expert
             ctx.input_name = prev_ctx.input_name
         if reply:
             ctx.extra["sub_reply"] = True  # mark as sub reply in extra data
@@ -238,7 +239,7 @@ class Text:
         """
         core = self.window.core
         stream = core.config.get("stream")
-        if mode in (MODE_AGENT_LLAMA, MODE_AUDIO):
+        if mode in (MODE_AGENT_LLAMA):
             return False  # TODO: check if this is correct in agent
         elif mode == MODE_LLAMA_INDEX:
             if core.config.get("llama.idx.mode") == "retrieval":

pygpt_net/controller/kernel/kernel.py CHANGED Viewed

@@ -6,13 +6,13 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.06 19:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import threading
 from typing import Any, Dict, Optional, Union, List
-from PySide6.QtCore import QObject, Slot
+from PySide6.QtCore import Slot
 from PySide6.QtWidgets import QApplication
 from pygpt_net.core.types import (
@@ -23,7 +23,7 @@ from pygpt_net.core.types import (
     MODE_EXPERT,
     MODE_LLAMA_INDEX,
 )
-from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent
+from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent, RealtimeEvent, Event
 from pygpt_net.core.bridge.context import BridgeContext
 from pygpt_net.item.ctx import CtxItem
 from pygpt_net.utils import trans
@@ -95,6 +95,13 @@ class Kernel:
             KernelEvent.INPUT_USER,
             KernelEvent.FORCE_CALL,
             KernelEvent.STATUS,
+            Event.AUDIO_INPUT_RECORD_TOGGLE,
+            RealtimeEvent.RT_INPUT_AUDIO_DELTA,
+            RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP,
+            RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START,
+            RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT,
+            RealtimeEvent.RT_OUTPUT_TURN_END,
+            RealtimeEvent.RT_OUTPUT_READY,
         ]
     def init(self):
@@ -281,6 +288,7 @@ class Kernel:
         self.window.dispatch(KernelEvent(KernelEvent.TERMINATE))
         self.stop(exit=True)
         self.window.controller.plugins.destroy()
+        self.window.controller.realtime.shutdown()
     def stop(self, exit: bool = False):
         """

pygpt_net/controller/kernel/reply.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.23 15:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import json
@@ -109,6 +109,10 @@ class Reply:
         core.ctx.update_item(self.reply_ctx)  # update context in db
         self.window.update_status('...')
+        # append tool calls from previous context (used for tool results handling)
+        if self.reply_ctx.tool_calls:
+            prev_ctx.extra["prev_tool_calls"] = self.reply_ctx.tool_calls
         # tool output append
         dispatch(RenderEvent(RenderEvent.TOOL_UPDATE, {
             "meta": self.reply_ctx.meta,

pygpt_net/controller/lang/custom.py CHANGED Viewed

@@ -55,8 +55,8 @@ class Custom:
         self.window.ui.config['preset'][MODE_CHAT].box.setText(trans("preset.chat"))
         self.window.ui.config['preset'][MODE_COMPLETION].box.setText(trans("preset.completion"))
         self.window.ui.config['preset'][MODE_IMAGE].box.setText(trans("preset.img"))
-        self.window.ui.config['preset'][MODE_VISION].box.setText(trans("preset.vision"))
-        #self.window.ui.config['preset'][MODE_LANGCHAIN].box.setText(trans("preset.langchain"))
+        # self.window.ui.config['preset'][MODE_VISION].box.setText(trans("preset.vision"))
+        # self.window.ui.config['preset'][MODE_LANGCHAIN].box.setText(trans("preset.langchain"))
         self.window.ui.config['preset'][MODE_LLAMA_INDEX].box.setText(trans("preset.llama_index"))
         self.window.ui.config['preset'][MODE_AGENT].box.setText(trans("preset.agent"))
         self.window.ui.config['preset'][MODE_AGENT_LLAMA].box.setText(trans("preset.agent_llama"))

pygpt_net/controller/media/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.09.01 23:00:00                  #
+# ================================================== #
+from .media import Media

pygpt_net/controller/media/media.py ADDED Viewed

@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.09.01 23:00:00                  #
+# ================================================== #
+from typing import Any
+class Media:
+    def __init__(self, window=None):
+        """
+        Media (video, image, music) controller
+        :param window: Window instance
+        """
+        self.window = window
+        self.initialized = False
+    def setup(self):
+        """Setup UI"""
+        # raw mode for images/video
+        if self.window.core.config.get('img_raw'):
+            self.window.ui.config['global']['img_raw'].setChecked(True)
+        else:
+            self.window.ui.config['global']['img_raw'].setChecked(False)
+        # image: resolution
+        resolution = self.window.core.config.get('img_resolution', '1024x1024')
+        self.window.controller.config.apply_value(
+            parent_id="global",
+            key="img_resolution",
+            option=self.window.core.image.get_resolution_option(),
+            value=resolution,
+        )
+        # video: aspect ratio
+        aspect_ratio = self.window.core.config.get('video.aspect_ratio', '16:9')
+        self.window.controller.config.apply_value(
+            parent_id="global",
+            key="video.aspect_ratio",
+            option=self.window.core.video.get_aspect_ratio_option(),
+            value=aspect_ratio,
+        )
+        # -- add hooks --
+        if not self.initialized:
+            self.window.ui.add_hook("update.global.img_resolution", self.hook_update)
+            self.window.ui.add_hook("update.global.video.aspect_ratio", self.hook_update)
+    def reload(self):
+        """Reload UI"""
+        self.setup()
+    def hook_update(self, key: str, value: Any, caller, *args, **kwargs):
+        """
+        Hook for updating media options
+        :param key: config key
+        :param value: new value
+        :param caller: caller object
+        """
+        if key == "img_resolution":
+            if not value:
+                return
+            self.window.core.config.set('img_resolution', value)
+        elif key == "video.aspect_ratio":
+            if not value:
+                return
+            self.window.core.config.set('video.aspect_ratio', value)
+    def enable_raw(self):
+        """Enable prompt enhancement for images"""
+        self.window.core.config.set('img_raw', True)
+        self.window.core.config.save()
+    def disable_raw(self):
+        """Disable prompt enhancement for images"""
+        self.window.core.config.set('img_raw', False)
+        self.window.core.config.save()
+    def toggle_raw(self):
+        """Save prompt enhancement option for images"""
+        state = self.window.ui.config['global']['img_raw'].isChecked()
+        if not state:
+            self.disable_raw()
+        else:
+            self.enable_raw()
+    def is_image_model(self) -> bool:
+        """
+        Check if the model is an image generation model
+        :return: True if the model is an image generation model
+        """
+        current = self.window.core.config.get("model")
+        model_data = self.window.core.models.get(current)
+        if model_data:
+            return model_data.is_image_output()
+    def is_video_model(self) -> bool:
+        """
+        Check if the model is a video generation model
+        :return: True if the model is a video generation model
+        """
+        current = self.window.core.config.get("model")
+        model_data = self.window.core.models.get(current)
+        if model_data:
+            return model_data.is_video_output()

pygpt_net/controller/realtime/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.30 06:00:00                  #
+# ================================================== #
+from .realtime import Realtime

pygpt_net/controller/realtime/manager.py ADDED Viewed

@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+from typing import Optional
+from pygpt_net.core.realtime.worker import RealtimeWorker, RealtimeOptions
+from pygpt_net.item.ctx import CtxItem
+class Manager:
+    """
+    Manager that mirrors chat.stream controller shape.
+    Starts a RealtimeWorker and routes text events and lifecycle to the UI.
+    Audio is forwarded by the main-thread via RT_OUTPUT_AUDIO_DELTA events.
+    """
+    def __init__(self, window=None):
+        self.window = window
+        self.worker: Optional[RealtimeWorker] = None
+        self.ctx: Optional[CtxItem] = None
+        self.provider: Optional[str] = None
+        self.opts: Optional[RealtimeOptions] = None
+    def start(
+            self,
+            ctx: CtxItem,
+            opts: RealtimeOptions
+    ):
+        """
+        Start realtime worker
+        :param ctx: CtxItem
+        :param opts: RealtimeOptions
+        """
+        self.ctx = ctx
+        self.opts = opts
+        self.provider = opts.provider
+        worker = RealtimeWorker(self.window, ctx, opts)
+        self.worker = worker
+        self.window.core.debug.info(f"[realtime] Begin: provider={opts.provider}, model={opts.model}")
+        self.window.threadpool.start(worker)
+    def shutdown(self):
+        """Shutdown realtime worker"""
+        self.worker = None

pygpt_net/controller/realtime/realtime.py ADDED Viewed

@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+from PySide6.QtCore import Slot, QTimer
+from pygpt_net.core.events import (
+    RealtimeEvent,
+    RenderEvent,
+    BaseEvent,
+    AppEvent,
+    KernelEvent,
+    Event,
+)
+from pygpt_net.core.realtime.worker import RealtimeSignals
+from pygpt_net.core.types import MODE_AUDIO
+from pygpt_net.utils import trans
+from pygpt_net.core.tabs import Tab
+from .manager import Manager
+class Realtime:
+    def __init__(self, window=None):
+        """
+        Realtime controller
+        :param window: Window instance
+        """
+        self.window = window
+        self.manager = Manager(window)
+        self.signals = RealtimeSignals()
+        self.signals.response.connect(self.handle_response)
+        self.current_active = None # openai | google
+        self.allowed_modes = [MODE_AUDIO]
+        self.manual_commit_sent = False
+    def setup(self):
+        """Setup realtime core, signals, etc. in main thread"""
+        self.window.core.audio.setup()  # setup RT signals in audio input/output core
+    def is_enabled(self) -> bool:
+        """
+        Check if realtime is enabled in settings
+        :return: True if enabled, False otherwise
+        """
+        mode = self.window.core.config.get("mode")
+        if mode == MODE_AUDIO:
+            if self.window.controller.ui.tabs.get_current_type() != Tab.TAB_NOTEPAD:
+                return True
+        return False
+    @Slot(object)
+    def handle(self, event: BaseEvent):
+        """
+        Handle realtime event (returned from dispatcher)
+        :param event: RealtimeEvent instance
+        """
+        # check if mode is supported
+        if not self.is_supported() and isinstance(event, RealtimeEvent):
+            event.stop = True # stop further propagation
+            return # ignore if not in realtime mode
+        # ----------------------------------------------------
+        # audio output chunk: send to audio output handler
+        if event.name == RealtimeEvent.RT_OUTPUT_AUDIO_DELTA:
+            self.set_idle()
+            payload = event.data.get("payload", None)
+            if payload:
+                self.window.core.audio.output.handle_realtime(payload, self.signals)
+        # audio input chunk: send to the active realtime client
+        elif event.name == RealtimeEvent.RT_INPUT_AUDIO_DELTA:
+            self.set_idle()
+            if self.current_active == "google":
+                self.window.core.api.google.realtime.handle_audio_input(event)
+            elif self.current_active == "openai":
+                self.window.core.api.openai.realtime.handle_audio_input(event)
+        # begin: first text chunk or audio chunk received, start rendering
+        elif event.name == RealtimeEvent.RT_OUTPUT_READY:
+            ctx = event.data.get('ctx', None)
+            if ctx:
+                self.window.dispatch(RenderEvent(RenderEvent.STREAM_BEGIN, {
+                    "meta": ctx.meta,
+                    "ctx": ctx,
+                }))
+                self.set_busy()
+        # commit: audio buffer sent, stop audio input and finalize the response
+        elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT:
+            self.set_busy()
+            if self.manual_commit_sent:
+                self.manual_commit_sent = False
+                return # abort if manual commit was already sent
+            self.window.controller.audio.execute_input_stop()
+        elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP:
+            self.manual_commit_sent = True
+            self.set_busy()
+            QTimer.singleShot(0, lambda: self.manual_commit())
+        elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START:
+            self.set_idle()
+            self.window.controller.chat.input.execute("...", force=True)
+            self.window.dispatch(KernelEvent(KernelEvent.STATUS, {
+                'status': trans("speech.listening"),
+            }))
+        # text delta: append text chunk to the response
+        elif event.name == RealtimeEvent.RT_OUTPUT_TEXT_DELTA:
+            self.set_idle()
+            ctx = event.data.get('ctx', None)
+            chunk = event.data.get('chunk', "")
+            if chunk and ctx:
+                self.window.dispatch(RenderEvent(RenderEvent.STREAM_APPEND, {
+                    "meta": ctx.meta,
+                    "ctx": ctx,
+                    "chunk": chunk,
+                    "begin": False,
+                }))
+        # audio end: on stop audio playback
+        elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_END:
+            self.set_idle()
+            self.window.controller.chat.common.unlock_input()
+            if self.is_loop():
+                QTimer.singleShot(500, lambda: self.next_turn())  # wait a bit before next turn
+        # end of turn: finalize the response
+        elif event.name == RealtimeEvent.RT_OUTPUT_TURN_END:
+            self.set_idle()
+            ctx = event.data.get('ctx', None)
+            if ctx:
+                self.end_turn(ctx)
+            if self.window.controller.audio.is_recording():
+                self.window.update_status(trans("speech.listening"))
+            self.window.controller.chat.common.unlock_input()
+        # volume change: update volume in audio output handler
+        elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_VOLUME_CHANGED:
+            volume = event.data.get("volume", 1.0)
+            self.window.controller.audio.ui.on_output_volume_change(volume)
+        # error: audio output error
+        elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_ERROR:
+            self.set_idle()
+            error = event.data.get("error")
+            self.window.core.debug.log(error)
+            self.window.controller.chat.common.unlock_input()
+        # -----------------------------------
+        # app events, always handled
+        elif event.name == AppEvent.MODE_SELECTED:
+            mode = self.window.core.config.get("mode")
+            if mode != MODE_AUDIO:
+                QTimer.singleShot(0, lambda: self.reset())
+        elif event.name == AppEvent.CTX_CREATED:
+            QTimer.singleShot(0, lambda: self.reset())
+        elif event.name == AppEvent.CTX_SELECTED:
+            QTimer.singleShot(0, lambda: self.reset())
+    def next_turn(self):
+        """Start next turn in loop mode (if enabled)"""
+        self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE))
+        if self.window.controller.audio.is_recording():
+            QTimer.singleShot(100, lambda: self.window.update_status(trans("speech.listening")))
+    def is_loop(self) -> bool:
+        """
+        Check if loop recording is enabled
+        :return: True if loop recording is enabled, False otherwise
+        """
+        if self.window.controller.kernel.stopped():
+            return False
+        return self.window.core.config.get("audio.input.loop", False)
+    @Slot(object)
+    def handle_response(self, event: RealtimeEvent):
+        """
+        Handle response event (send to kernel -> dispatcher)
+        :param event: RealtimeEvent instance
+        """
+        self.window.controller.kernel.listener(event)
+    def is_auto_turn(self) -> bool:
+        """
+        Check if auto-turn is enabled
+        :return: True if auto-turn is enabled, False otherwise
+        """
+        return self.window.core.config.get("audio.input.auto_turn", True)
+    def manual_commit(self):
+        """Manually commit the response (end of turn)"""
+        if self.current_active == "google":
+            self.window.core.api.google.realtime.manual_commit()
+        elif self.current_active == "openai":
+            self.window.core.api.openai.realtime.manual_commit()
+    def end_turn(self, ctx):
+        """
+        End of realtime turn - finalize the response
+        :param ctx: Context instance
+        """
+        self.set_idle()
+        if not ctx:
+            return
+        self.window.controller.chat.output.handle_after(
+            ctx=ctx,
+            mode=MODE_AUDIO,
+            stream=True,
+        )
+        self.window.controller.chat.output.post_handle(
+            ctx=ctx,
+            mode=MODE_AUDIO,
+            stream=True,
+        )
+        self.window.controller.chat.output.handle_end(
+            ctx=ctx,
+            mode=MODE_AUDIO,
+        )
+        self.window.controller.chat.common.show_response_tokens(ctx)
+    def shutdown(self):
+        """Shutdown all realtime threads and async loops"""
+        try:
+            self.window.core.api.openai.realtime.shutdown()
+        except Exception as e:
+            self.window.core.debug.log(f"[openai] Realtime shutdown error: {e}")
+        try:
+            self.window.core.api.google.realtime.shutdown()
+        except Exception as e:
+            self.window.core.debug.log(f"[google] Realtime shutdown error: {e}")
+        try:
+            self.manager.shutdown()
+        except Exception as e:
+            self.window.core.debug.log(f"[manager] Realtime shutdown error: {e}")
+    def reset(self):
+        """Reset realtime session"""
+        try:
+            self.window.core.api.openai.realtime.reset()
+        except Exception as e:
+            self.window.core.debug.log(f"[openai] Realtime reset error: {e}")
+        try:
+            self.window.core.api.google.realtime.reset()
+        except Exception as e:
+            self.window.core.debug.log(f"[google] Realtime reset error: {e}")
+    def is_supported(self) -> bool:
+        """
+        Check if current mode supports realtime
+        :return: True if mode supports realtime, False otherwise
+        """
+        mode = self.window.core.config.get("mode")
+        return mode in self.allowed_modes
+    def set_current_active(self, provider: str):
+        """
+        Set the current active realtime provider
+        :param provider: Provider name (openai, google)
+        """
+        self.current_active = provider.lower() if provider else None
+    def set_idle(self):
+        """Set kernel state to IDLE"""
+        QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_IDLE, {
+            "id": "realtime",
+        })))
+    def set_busy(self):
+        """Set kernel state to BUSY"""
+        QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {
+            "id": "realtime",
+        })))

pygpt_net/controller/ui/mode.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.15 23:00:00                  #
+# Updated Date: 2025.09.01 23:00:00                  #
 # ================================================== #
 from pygpt_net.core.types import (
@@ -20,6 +20,7 @@ from pygpt_net.core.types import (
     MODE_COMPUTER,
     MODE_AGENT_OPENAI,
     MODE_COMPLETION,
+    MODE_AUDIO,
 )
 from pygpt_net.core.tabs.tab import Tab
 from pygpt_net.core.events import Event
@@ -55,6 +56,14 @@ class Mode:
         is_image = mode == MODE_IMAGE
         is_llama_index = mode == MODE_LLAMA_INDEX
         is_completion = mode == MODE_COMPLETION
+        is_audio = mode == MODE_AUDIO
+        if not is_audio:
+            self.window.ui.nodes['audio.auto_turn'].setVisible(False)
+            self.window.ui.nodes["audio.loop"].setVisible(False)
+        else:
+            self.window.ui.nodes['audio.auto_turn'].setVisible(True)
+            self.window.ui.nodes["audio.loop"].setVisible(True)
         if not is_assistant:
             ui_nodes['presets.widget'].setVisible(True)
@@ -131,9 +140,21 @@ class Mode:
             ui_tabs['preset.editor.extra'].setTabText(0, trans("preset.prompt"))
         if is_image:
-            ui_nodes['dalle.options'].setVisible(True)
+            ui_nodes['media.raw'].setVisible(True)
+            if ctrl.media.is_video_model():
+                ui_nodes['video.options'].setVisible(True)
+                ui_nodes['dalle.options'].setVisible(False)
+            elif ctrl.media.is_image_model():
+                ui_nodes['dalle.options'].setVisible(True)
+                ui_nodes['video.options'].setVisible(False)
+            else:
+                ui_nodes['media.raw'].setVisible(False)
+                ui_nodes['dalle.options'].setVisible(False)
+                ui_nodes['video.options'].setVisible(False)
         else:
+            ui_nodes['media.raw'].setVisible(False)
             ui_nodes['dalle.options'].setVisible(False)
+            ui_nodes['video.options'].setVisible(False)
         if is_agent:
             ui_nodes['agent.options'].setVisible(True)

pygpt_net/controller/ui/ui.py CHANGED Viewed

@@ -13,6 +13,7 @@ from typing import Optional
 from PySide6.QtGui import QColor
+from pygpt_net.core.types import MODE_IMAGE
 from pygpt_net.core.events import BaseEvent, Event
 from pygpt_net.utils import trans
@@ -64,6 +65,7 @@ class UI:
         self.update_tokens()
         self.vision.update()
         self.window.controller.agent.legacy.update()
+        self.img_update_available_resolutions()
     def handle(self, event: BaseEvent):
         """
@@ -215,4 +217,20 @@ class UI:
     def on_global_stop(self):
         """Global stop button action"""
         if self.stop_action == "idx":
-            self.window.controller.idx.force_stop()
+            self.window.controller.idx.force_stop()
+    def img_update_available_resolutions(self):
+        """Update available resolutions for images"""
+        mode = self.window.core.config.get('mode')
+        if mode != MODE_IMAGE:
+            return
+        model = self.window.core.config.get('model')
+        keys = self.window.core.image.get_available_resolutions(model)
+        current = self.window.core.config.get('img_resolution', '1024x1024')
+        self.window.ui.config['global']['img_resolution'].set_keys(keys, lock=False)
+        self.window.controller.config.apply_value(
+            parent_id="global",
+            key="img_resolution",
+            option=self.window.core.image.get_resolution_option(),
+            value=current,
+        )

pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl