PyPI - pygpt-net - Versions diffs - 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl - Mend

pygpt-net 2.6.29py3-none-any.whl → 2.6.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +4 -0
pygpt_net/{container.py → app_core.py} +5 -6
pygpt_net/controller/__init__.py +5 -2
pygpt_net/controller/access/control.py +1 -9
pygpt_net/controller/assistant/assistant.py +4 -4
pygpt_net/controller/assistant/batch.py +7 -7
pygpt_net/controller/assistant/files.py +4 -4
pygpt_net/controller/assistant/threads.py +3 -3
pygpt_net/controller/attachment/attachment.py +4 -7
pygpt_net/controller/audio/audio.py +25 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +30 -4
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +4 -405
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/chat/vision.py +11 -19
pygpt_net/controller/config/placeholder.py +1 -1
pygpt_net/controller/ctx/ctx.py +1 -1
pygpt_net/controller/ctx/summarizer.py +1 -1
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/mode/mode.py +21 -12
pygpt_net/controller/plugins/settings.py +3 -2
pygpt_net/controller/presets/editor.py +112 -99
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +268 -0
pygpt_net/controller/theme/theme.py +3 -2
pygpt_net/controller/ui/mode.py +7 -0
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/controller/ui/vision.py +4 -4
pygpt_net/core/agents/legacy.py +2 -2
pygpt_net/core/agents/runners/openai_workflow.py +2 -2
pygpt_net/core/assistants/files.py +5 -5
pygpt_net/core/assistants/store.py +4 -4
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +13 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +4 -3
pygpt_net/core/bridge/worker.py +31 -9
pygpt_net/core/debug/console/console.py +2 -2
pygpt_net/core/debug/presets.py +2 -2
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/experts/experts.py +2 -2
pygpt_net/core/image/image.py +51 -1
pygpt_net/core/modes/modes.py +2 -2
pygpt_net/core/presets/presets.py +3 -3
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +164 -0
pygpt_net/core/tokens/tokens.py +4 -4
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +48 -0
pygpt_net/core/types/mode.py +5 -2
pygpt_net/core/vision/analyzer.py +1 -1
pygpt_net/data/config/config.json +13 -4
pygpt_net/data/config/models.json +219 -101
pygpt_net/data/config/modes.json +3 -9
pygpt_net/data/config/settings.json +135 -27
pygpt_net/data/config/settings_section.json +2 -2
pygpt_net/data/locale/locale.de.ini +7 -7
pygpt_net/data/locale/locale.en.ini +25 -12
pygpt_net/data/locale/locale.es.ini +7 -7
pygpt_net/data/locale/locale.fr.ini +7 -7
pygpt_net/data/locale/locale.it.ini +7 -7
pygpt_net/data/locale/locale.pl.ini +8 -8
pygpt_net/data/locale/locale.uk.ini +7 -7
pygpt_net/data/locale/locale.zh.ini +3 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/item/model.py +23 -3
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/plugin/openai_dalle/plugin.py +4 -4
pygpt_net/plugin/openai_vision/plugin.py +12 -13
pygpt_net/provider/agents/openai/agent.py +5 -5
pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
pygpt_net/provider/agents/openai/agent_planner.py +5 -6
pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
pygpt_net/provider/agents/openai/evolve.py +5 -5
pygpt_net/provider/agents/openai/supervisor.py +4 -4
pygpt_net/provider/api/__init__.py +27 -0
pygpt_net/provider/api/anthropic/__init__.py +68 -0
pygpt_net/provider/api/google/__init__.py +295 -0
pygpt_net/provider/api/google/audio.py +121 -0
pygpt_net/provider/api/google/chat.py +591 -0
pygpt_net/provider/api/google/image.py +427 -0
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/tools.py +222 -0
pygpt_net/provider/api/google/vision.py +129 -0
pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
pygpt_net/provider/api/openai/agents/__init__.py +0 -0
pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
pygpt_net/provider/api/openai/worker/__init__.py +0 -0
pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_input/openai_whisper.py +1 -1
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/openai_tts.py +9 -6
pygpt_net/provider/core/config/patch.py +26 -0
pygpt_net/provider/core/model/patch.py +20 -0
pygpt_net/provider/core/preset/json_file.py +2 -4
pygpt_net/provider/llms/anthropic.py +2 -5
pygpt_net/provider/llms/base.py +4 -3
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/llms/openai.py +1 -1
pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
pygpt_net/ui/dialog/preset.py +71 -55
pygpt_net/ui/layout/toolbox/footer.py +16 -0
pygpt_net/ui/layout/toolbox/image.py +5 -0
pygpt_net/ui/main.py +6 -4
pygpt_net/ui/widget/option/combo.py +15 -1
pygpt_net/utils.py +9 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
pygpt_net/core/audio/backend/pyaudio.py +0 -554
/pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
/pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0

pygpt_net/provider/api/openai/realtime/realtime.py ADDED Viewed

@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+import json
+from typing import Optional, Dict, Any
+from pygpt_net.core.bridge import BridgeContext
+from pygpt_net.core.events import RealtimeEvent
+from pygpt_net.core.realtime.options import RealtimeOptions
+from pygpt_net.core.realtime.shared.session import extract_last_session_id
+from pygpt_net.item.model import ModelItem
+from pygpt_net.utils import trans
+from .client import OpenAIRealtimeClient
+class Realtime:
+    PROVIDER = "openai"
+    def __init__(self, window=None):
+        """
+        OpenAI API realtime controller
+        :param window: Window instance
+        """
+        self.window = window
+        self.handler = OpenAIRealtimeClient(window)
+        self.prev_auto_turn = False
+        self.prev_vad_silence = 2000
+        self.prev_vad_prefix = 300
+    def begin(
+            self,
+            context: BridgeContext,
+            model: Optional[ModelItem] = None,
+            extra: Optional[Dict[str, Any]] = None,
+            rt_signals=None
+    ) -> bool:
+        """
+        Begin realtime session if applicable
+        :param context: BridgeContext
+        :param model: Optional[ModelItem]
+        :param extra: Optional dict with extra parameters
+        :param rt_signals: RealtimeSignals
+        :return: True if realtime session started, False otherwise
+        """
+        mm = context.multimodal_ctx
+        audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
+        audio_format = getattr(mm, "audio_format", None) if mm else None
+        audio_rate = getattr(mm, "audio_rate", None) if mm else None
+        is_debug = self.window.core.config.get("log.realtime", False)
+        auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
+        opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
+        opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
+        # setup manager
+        self.window.controller.realtime.set_current_active(self.PROVIDER)
+        self.window.controller.realtime.set_busy()
+        self.handler.set_debug(is_debug)
+        # tools
+        tools = self.window.core.api.openai.tools.prepare(model, context.external_functions)
+        # remote tools
+        remote_tools = []
+        remote_tools = self.window.core.api.openai.remote_tools.append_to_tools(
+            mode=context.mode,
+            model=model,
+            stream=context.stream,
+            is_expert_call=context.is_expert_call,
+            tools=remote_tools,
+            preset=context.preset,
+        )
+        # handle sub-reply (tool results from tool calls)
+        if context.ctx.internal:
+            if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
+                tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
+                tool_call_id = None
+                if isinstance(tool_calls, list) and len(tool_calls) > 0:
+                    tool_call_id = tool_calls[0].get("call_id", "")  # get first call_id
+                    if not tool_call_id:
+                        tool_call_id = tool_calls[0].get("id", "")  # fallback to id
+                if tool_call_id:
+                    tool_results = context.ctx.input
+                    try:
+                        tool_results = json.loads(tool_results)
+                    except Exception:
+                        pass
+                    self.handler.send_tool_results_sync({
+                        tool_call_id: tool_results
+                    })
+                    self.handler.update_ctx(context.ctx)
+                    return True  # do not start new session, just send tool results
+        # update auto-turn in active session
+        if (self.handler.is_session_active()
+                and (auto_turn != self.prev_auto_turn
+                     or opt_vad_silence != self.prev_vad_silence
+                     or opt_vad_prefix != self.prev_vad_prefix)):
+            print("updating")
+            self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
+        # if auto-turn is enabled and prompt is empty, update session and context only
+        if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
+            self.handler.update_session_tools_sync(tools, remote_tools)
+            self.handler.update_ctx(context.ctx)
+            self.window.update_status(trans("speech.listening"))
+            return True # do not send new request if session is active
+        # Last session ID
+        last_session_id = extract_last_session_id(context.history)
+        if is_debug:
+            print("[realtime session] Last ID", last_session_id)
+        # Voice
+        voice = "alloy"
+        try:
+            v = self.window.core.plugins.get_option("audio_output", "openai_voice")
+            if v:
+                voice = str(v)
+        except Exception:
+            pass
+        # Options
+        opts = RealtimeOptions(
+            provider=self.PROVIDER,
+            model=context.model.id,
+            system_prompt=context.system_prompt,
+            prompt=context.prompt,
+            voice=voice,
+            audio_data=audio_bytes,
+            audio_format=audio_format,
+            audio_rate=audio_rate,
+            vad="server_vad",
+            extra=extra or {},
+            tools=tools,
+            remote_tools=remote_tools,
+            rt_signals=rt_signals,
+            rt_session_id=last_session_id,
+            auto_turn=auto_turn,
+            vad_end_silence_ms=opt_vad_silence,
+            vad_prefix_padding_ms=opt_vad_prefix,
+        )
+        # Start or append to realtime session via manager
+        try:
+            if is_debug:
+                print("[realtime] Starting session with options:", opts.to_dict())
+            rt = self.window.controller.realtime.manager
+            rt.start(context.ctx, opts)
+            self.prev_auto_turn = auto_turn
+            self.prev_vad_silence = opt_vad_silence
+            self.prev_vad_prefix = opt_vad_prefix
+            return True
+        except Exception as e:
+            self.window.core.debug.log(e)
+            return False  # fallback to non-live path
+    def handle_audio_input(self, event: RealtimeEvent):
+        """
+        Handle Realtime audio input event
+        :param event: RealtimeEvent
+        """
+        self.handler.rt_handle_audio_input_sync(event)
+    def manual_commit(self):
+        """Manually commit audio input to realtime session"""
+        self.handler.force_response_now_sync()
+    def shutdown(self):
+        """Shutdown realtime loops"""
+        if self.handler.is_session_active():
+            self.handler.close_session_sync()
+        try:
+            self.handler.stop_loop_sync()
+        except Exception:
+            pass
+    def reset(self):
+        """Close realtime session"""
+        if self.handler.is_session_active():
+            self.handler.close_session_sync()

pygpt_net/provider/{gpt → api/openai}/remote_tools.py RENAMED Viewed

@@ -113,7 +113,7 @@ class RemoteTools:
         # extend local tools with remote tools
         if enabled["computer_use"]:
             if not model.id in OPENAI_REMOTE_TOOL_DISABLE_COMPUTER_USE:
-                tools.append(self.window.core.gpt.computer.get_tool())
+                tools.append(self.window.core.api.openai.computer.get_tool())
         else:
             if not model.id in OPENAI_REMOTE_TOOL_DISABLE_WEB_SEARCH:
                 if enabled["web_search"]:

pygpt_net/provider/{gpt → api/openai}/responses.py RENAMED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.05 00:00:00                  #
+# Updated Date: 2025.08.28 09:00:00                  #
 # ================================================== #
 import base64
@@ -92,7 +92,7 @@ class Responses:
         user_name = ctx.input_name  # from ctx
         ai_name = ctx.output_name  # from ctx
-        client = self.window.core.gpt.get_client(mode, model)
+        client = self.window.core.api.openai.get_client(mode, model)
         # build chat messages
         messages = self.build(
@@ -122,7 +122,7 @@ class Responses:
         response_kwargs = {}
         # tools / functions
-        tools = self.window.core.gpt.tools.prepare_responses_api(model, functions)
+        tools = self.window.core.api.openai.tools.prepare_responses_api(model, functions)
         # extra arguments, o3 only
         if model.extra and "reasoning_effort" in model.extra:
@@ -130,7 +130,7 @@ class Responses:
             response_kwargs['reasoning']['effort'] = model.extra["reasoning_effort"]
         # append remote tools
-        tools = self.window.core.gpt.remote_tools.append_to_tools(
+        tools = self.window.core.api.openai.remote_tools.append_to_tools(
             mode=mode,
             model=model,
             stream=stream,
@@ -250,20 +250,31 @@ class Responses:
                 used_tokens,
                 max_ctx_tokens,
             )
+            has_response_id_in_last_item = False
+            if items and len(items) > 0:
+                last_item = items[-1]
+                if last_item and last_item.msg_id:
+                    has_response_id_in_last_item = True
             for item in items:
                 # input
                 if item.final_input is not None and item.final_input != "":
-                    messages.append({
-                        "role": "user",
-                        "content": item.final_input,
-                    })
+                    if not has_response_id_in_last_item:
+                        messages.append({
+                            "role": "user",
+                            "content": item.final_input,
+                        })
                 # output
                 if item.final_output is not None and item.final_output != "":
-                    msg = {
-                        "role": "assistant",
-                        "content": item.final_output,
-                    }
+                    if not has_response_id_in_last_item:
+                        msg = {
+                            "role": "assistant",
+                            "content": item.final_output,
+                        }
+                    else:
+                        msg = {}
                     # append previous audio ID
                     if MODE_AUDIO in model.mode:
                         if item.audio_id:
@@ -281,7 +292,9 @@ class Responses:
                                 msg["audio"] = {
                                     "id": self.audio_prev_id
                                 }
-                    messages.append(msg)
+                    if msg:
+                        messages.append(msg)
                     # ---- tool output ----
                     is_tool_output = False  # reset tool output flag
@@ -340,7 +353,7 @@ class Responses:
                                     # computer call output
                                     elif output_type == "computer_call":
-                                        base64img = self.window.core.gpt.vision.get_attachment(attachments)
+                                        base64img = self.window.core.api.openai.vision.get_attachment(attachments)
                                         if base64img and "call_id" in tool_call:
                                             if tool_call["call_id"]:
                                                 # tool output
@@ -382,13 +395,13 @@ class Responses:
             if (model.is_image_input()
                     and mode != MODE_COMPUTER
                     and not model.id.startswith("computer-use")):
-                content = self.window.core.gpt.vision.build_content(
+                content = self.window.core.api.openai.vision.build_content(
                     content=content,
                     attachments=attachments,
                     responses_api=True,
                 )
             if model.is_audio_input():
-                content = self.window.core.gpt.audio.build_content(
+                content = self.window.core.api.openai.audio.build_content(
                     content=content,
                     multimodal_ctx=multimodal_ctx,
                 )
@@ -404,6 +417,7 @@ class Responses:
             messages,
             model.id,
         )
         return messages
     def reset_tokens(self):
@@ -431,7 +445,7 @@ class Responses:
         if mode in [
             MODE_CHAT,
-            MODE_VISION,
+            # MODE_VISION,
             MODE_RESEARCH,
             MODE_COMPUTER,
         ]:
@@ -499,7 +513,7 @@ class Responses:
                 id = output.id
                 call_id = output.call_id
                 action = output.action
-                tool_calls, is_call = self.window.core.gpt.computer.handle_action(
+                tool_calls, is_call = self.window.core.api.openai.computer.handle_action(
                     id=id,
                     call_id=call_id,
                     action=action,
@@ -567,7 +581,7 @@ class Responses:
         if files:
             self.window.core.debug.info("[chat] Container files found, downloading...")
             try:
-                self.window.core.gpt.container.download_files(ctx, files)
+                self.window.core.api.openai.container.download_files(ctx, files)
             except Exception as e:
                 self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
@@ -631,7 +645,7 @@ class Responses:
         if files:
             self.window.core.debug.info("[chat] Container files found, downloading...")
             try:
-                self.window.core.gpt.container.download_files(ctx, files)
+                self.window.core.api.openai.container.download_files(ctx, files)
             except Exception as e:
                 self.window.core.debug.error(f"[chat] Error downloading container files: {e}")

pygpt_net/provider/{gpt → api/openai}/store.py RENAMED Viewed

@@ -30,7 +30,7 @@ class Store:
         :return: OpenAI client
         """
-        return self.window.core.gpt.get_client()
+        return self.window.core.api.openai.get_client()
     def log(
             self,
@@ -92,7 +92,7 @@ class Store:
         :param file_id: file ID
         :param path: path to save file
         """
-        client = self.window.core.gpt.get_client()
+        client = self.window.core.api.openai.get_client()
         content = client.files.content(file_id)
         data = content.read()
         with open(path, 'wb', ) as f:

pygpt_net/provider/{gpt → api/openai}/vision.py RENAMED Viewed

@@ -54,7 +54,7 @@ class Vision:
         attachments = context.attachments
         model = context.model
         model_id = model.id
-        client = self.window.core.gpt.get_client()
+        client = self.window.core.api.openai.get_client()
         # extra API kwargs
         response_kwargs = {}

pygpt_net/provider/api/openai/worker/__init__.py ADDED Viewed

File without changes

pygpt_net/provider/{gpt → api/openai}/worker/assistants.py RENAMED Viewed

@@ -565,7 +565,7 @@ class Worker(QRunnable):
         """
         try:
             if self.stream:  # stream mode
-                run = self.window.core.gpt.assistants.run_create_stream(
+                run = self.window.core.api.openai.assistants.run_create_stream(
                     self.signals,
                     self.ctx,
                     self.thread_id,
@@ -575,7 +575,7 @@ class Worker(QRunnable):
                 )
             else:
                 # not stream mode
-                run = self.window.core.gpt.assistants.run_create(
+                run = self.window.core.api.openai.assistants.run_create(
                     self.thread_id,
                     self.assistant_id,
                     self.model,
@@ -596,7 +596,7 @@ class Worker(QRunnable):
         :return: result
         """
         try:
-            response = self.window.core.gpt.assistants.msg_send(
+            response = self.window.core.api.openai.assistants.msg_send(
                 self.thread_id,
                 self.prompt,
                 self.file_ids,
@@ -615,7 +615,7 @@ class Worker(QRunnable):
         :return: result
         """
         try:
-            run = self.window.core.gpt.assistants.run_submit_tool(self.ctx, self.tools_outputs)
+            run = self.window.core.api.openai.assistants.run_submit_tool(self.ctx, self.tools_outputs)
             if run is not None:
                 self.ctx.run_id = run.id  # update run id
                 self.signals.finished.emit(self.ctx, run, False)  # continue status check

pygpt_net/provider/{gpt → api/openai}/worker/importer.py RENAMED Viewed

@@ -237,7 +237,7 @@ class ImportWorker(QRunnable):
             self.log("Importing assistants...")
             self.window.core.assistants.clear()
             items = self.window.core.assistants.get_all()
-            self.window.core.gpt.assistants.import_all(items, callback=self.callback)
+            self.window.core.api.openai.assistants.import_all(items, callback=self.callback)
             self.window.core.assistants.items = items
             self.window.core.assistants.save()
@@ -266,7 +266,7 @@ class ImportWorker(QRunnable):
             self.log("Importing vector stores...")
             self.window.core.assistants.store.clear()
             items = {}
-            self.window.core.gpt.store.import_stores(items, callback=self.callback)
+            self.window.core.api.openai.store.import_stores(items, callback=self.callback)
             self.window.core.assistants.store.import_items(items)
             if not silent:
                 self.signals.finished.emit("vector_stores", self.store_id, len(items))
@@ -285,7 +285,7 @@ class ImportWorker(QRunnable):
         """
         try:
             self.log("Truncating stores...")
-            num = self.window.core.gpt.store.remove_all(callback=self.callback)
+            num = self.window.core.api.openai.store.remove_all(callback=self.callback)
             self.window.core.assistants.store.items = {}
             self.window.core.assistants.store.save()
             if not silent:
@@ -336,12 +336,12 @@ class ImportWorker(QRunnable):
                 self.log("Truncating all files...")
                 self.window.core.assistants.files.truncate() # clear all files
                 # remove all files in API
-                num = self.window.core.gpt.store.remove_files(callback=self.callback)
+                num = self.window.core.api.openai.store.remove_files(callback=self.callback)
             else:
                 self.log("Truncating files for store: {}".format(self.store_id))
                 self.window.core.assistants.files.truncate(self.store_id)  # clear store files, remove from stores / DB
                 # remove store files in API
-                num = self.window.core.gpt.store.remove_store_files(
+                num = self.window.core.api.openai.store.remove_store_files(
                     self.store_id,
                     callback=self.callback,
                 )
@@ -365,14 +365,14 @@ class ImportWorker(QRunnable):
             self.log("Uploading files...")
             for file in self.files:
                 try:
-                    file_id = self.window.core.gpt.store.upload(file)
+                    file_id = self.window.core.api.openai.store.upload(file)
                     if file_id is not None:
-                        stored_file = self.window.core.gpt.store.add_file(
+                        stored_file = self.window.core.api.openai.store.add_file(
                             self.store_id,
                             file_id,
                         )
                         if stored_file is not None:
-                            data = self.window.core.gpt.store.get_file(file_id)
+                            data = self.window.core.api.openai.store.get_file(file_id)
                             self.window.core.assistants.files.insert(self.store_id, data)  # insert to DB
                             msg = "Uploaded file: {}/{}".format((num + 1), len(self.files))
                             self.signals.status.emit("upload_files", msg)
@@ -403,11 +403,11 @@ class ImportWorker(QRunnable):
             if self.store_id is None:
                 self.log("Importing all files...")
                 self.window.core.assistants.files.truncate_local()  # clear local DB (all)
-                num = self.window.core.gpt.store.import_stores_files(self.callback)  # import all files
+                num = self.window.core.api.openai.store.import_stores_files(self.callback)  # import all files
             else:
                 self.log("Importing files for store: {}".format(self.store_id))
                 self.window.core.assistants.files.truncate_local(self.store_id)  # clear local DB (all)
-                items = self.window.core.gpt.store.import_store_files(
+                items = self.window.core.api.openai.store.import_store_files(
                     self.store_id,
                     [],
                     callback=self.callback,

pygpt_net/provider/audio_input/google_genai.py ADDED Viewed

@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.29 18:00:00                  #
+# ================================================== #
+from .base import BaseProvider
+class GoogleGenAIAudioInput(BaseProvider):
+    PROMPT_TRANSCRIBE = (
+        "You are a speech-to-text transcriber. "
+        "Return only the verbatim transcript as plain text. "
+        "Do not add any explanations, timestamps, labels or formatting."
+    )
+    def __init__(self, *args, **kwargs):
+        """
+        Google GenAI (Gemini) audio provider for transcription (via API).
+        :param args: args
+        :param kwargs: kwargs
+        """
+        super(GoogleGenAIAudioInput, self).__init__(*args, **kwargs)
+        self.plugin = kwargs.get("plugin")
+        self.id = "google_genai"
+        self.name = "Google GenAI"
+    def init_options(self):
+        """Initialize options"""
+        # Keep option shape consistent with Whisper provider
+        self.plugin.add_option(
+            "google_genai_audio_model",
+            type="text",
+            value="gemini-2.5-flash",
+            label="Model",
+            tab="google_genai",
+            description="Specify Gemini model supporting audio, e.g., gemini-2.5-flash",
+        )
+        self.plugin.add_option(
+            "google_genai_audio_prompt",
+            type="textarea",
+            value=self.PROMPT_TRANSCRIBE,
+            label="System Prompt",
+            tab="google_genai",
+            description="System prompt to guide the transcription output",
+            tooltip="System prompt for transcription",
+            persist=True,
+        )
+    def transcribe(self, path: str) -> str:
+        """
+        Audio to text transcription using Google GenAI (Gemini).
+        :param path: path to audio file to transcribe
+        :return: transcribed text
+        """
+        # Get pre-configured GenAI client from the app core
+        client = self.plugin.window.core.api.google.get_client()
+        # Upload the audio file via the Files API
+        uploaded_file = client.files.upload(file=path)
+        # Ask the model to produce a plain text transcript only
+        # Using system_instruction keeps the public API surface simple (no extra options needed)
+        config = {
+            "system_instruction": self.plugin.get_option_value("google_genai_audio_prompt") or self.PROMPT_TRANSCRIBE,
+            "temperature": 0.0,
+        }
+        # Generate content (transcription) with the selected model
+        model_name = self.plugin.get_option_value("google_genai_audio_model")
+        response = client.models.generate_content(
+            model=model_name,
+            contents=[uploaded_file],
+            config=config,
+        )
+        # The SDK exposes the unified .text property for convenience
+        return response.text or ""
+    def is_configured(self) -> bool:
+        """
+        Check if provider is configured
+        :return: True if configured, False otherwise
+        """
+        api_key = self.plugin.window.core.config.get("api_key_google")
+        return api_key is not None and api_key != ""
+    def get_config_message(self) -> str:
+        """
+        Return message to display when provider is not configured
+        :return: message
+        """
+        return "Google GenAI API key is not set yet. Please configure it in settings."

pygpt_net/provider/audio_input/openai_whisper.py CHANGED Viewed

@@ -43,7 +43,7 @@ class OpenAIWhisper(BaseProvider):
         :param path: path to audio file to transcribe
         :return: transcribed text
         """
-        client = self.plugin.window.core.gpt.get_client()
+        client = self.plugin.window.core.api.openai.get_client()
         with open(path, "rb") as audio_file:
             return client.audio.transcriptions.create(
                 model=self.plugin.get_option_value('whisper_model'),

pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

pygpt-net 2.6.29py3-none-any.whl → 2.6.31py3-none-any.whl