PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +7 -1
pygpt_net/app_core.py +3 -1
pygpt_net/config.py +3 -1
pygpt_net/controller/__init__.py +9 -2
pygpt_net/controller/audio/audio.py +38 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +23 -62
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/lang/custom.py +2 -2
pygpt_net/controller/media/__init__.py +12 -0
pygpt_net/controller/media/media.py +115 -0
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +293 -0
pygpt_net/controller/ui/mode.py +23 -2
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +14 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +56 -5
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +160 -0
pygpt_net/core/render/web/body.py +24 -3
pygpt_net/core/text/utils.py +54 -2
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +54 -0
pygpt_net/core/video/__init__.py +12 -0
pygpt_net/core/video/video.py +290 -0
pygpt_net/data/config/config.json +26 -5
pygpt_net/data/config/models.json +221 -103
pygpt_net/data/config/settings.json +244 -6
pygpt_net/data/css/web-blocks.css +6 -0
pygpt_net/data/css/web-chatgpt.css +6 -0
pygpt_net/data/css/web-chatgpt_wide.css +6 -0
pygpt_net/data/locale/locale.de.ini +35 -7
pygpt_net/data/locale/locale.en.ini +56 -17
pygpt_net/data/locale/locale.es.ini +35 -7
pygpt_net/data/locale/locale.fr.ini +35 -7
pygpt_net/data/locale/locale.it.ini +35 -7
pygpt_net/data/locale/locale.pl.ini +38 -7
pygpt_net/data/locale/locale.uk.ini +35 -7
pygpt_net/data/locale/locale.zh.ini +31 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
pygpt_net/item/model.py +22 -1
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +76 -7
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/video.py +364 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +241 -178
pygpt_net/provider/core/model/patch.py +28 -2
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/web/duckduck_search.py +212 -0
pygpt_net/ui/layout/toolbox/audio.py +55 -0
pygpt_net/ui/layout/toolbox/footer.py +14 -42
pygpt_net/ui/layout/toolbox/image.py +7 -13
pygpt_net/ui/layout/toolbox/raw.py +52 -0
pygpt_net/ui/layout/toolbox/split.py +48 -0
pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
pygpt_net/ui/layout/toolbox/video.py +49 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0

pygpt_net/provider/api/google/__init__.py CHANGED Viewed

@@ -6,13 +6,15 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.28 20:00:00                  #
+# Updated Date: 2025.09.01 23:00:00                  #
 # ================================================== #
+import os
 from typing import Optional, Dict, Any
 from google.genai import types as gtypes
 from google import genai
 from pygpt_net.core.types import (
     MODE_ASSISTANT,
     MODE_AUDIO,
@@ -29,7 +31,8 @@ from .vision import Vision
 from .tools import Tools
 from .audio import Audio
 from .image import Image
+from .realtime import Realtime
+from .video import Video
 class ApiGoogle:
     def __init__(self, window=None):
@@ -44,6 +47,8 @@ class ApiGoogle:
         self.tools = Tools(window)
         self.audio = Audio(window)
         self.image = Image(window)
+        self.realtime = Realtime(window)
+        self.video = Video(window)
         self.client: Optional[genai.Client] = None
         self.locked = False
         self.last_client_args: Optional[Dict[str, Any]] = None
@@ -64,20 +69,56 @@ class ApiGoogle:
             model = ModelItem()
             model.provider = "google"
         args = self.window.core.models.prepare_client_args(mode, model)
+        config = self.window.core.config
         filtered = {}
         if args.get("api_key"):
             filtered["api_key"] = args["api_key"]
+        # setup VertexAI
+        use_vertex = False
+        if config.get("api_native_google.use_vertex", False):
+            use_vertex = True
+            os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "1"
+            os.environ["GOOGLE_CLOUD_PROJECT"] = config.get("api_native_google.cloud_project", "")
+            os.environ["GOOGLE_CLOUD_LOCATION"] = config.get("api_native_google.cloud_location", "us-central1")
+            if config.get("api_native_google.app_credentials", ""):
+                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = config.get("api_native_google.app_credentials", "")
+        else:
+            if os.environ.get("GOOGLE_GENAI_USE_VERTEXAI"):
+                del os.environ["GOOGLE_GENAI_USE_VERTEXAI"]
+            if os.environ.get("GOOGLE_CLOUD_PROJECT"):
+                del os.environ["GOOGLE_CLOUD_PROJECT"]
+            if os.environ.get("GOOGLE_CLOUD_LOCATION"):
+                del os.environ["GOOGLE_CLOUD_LOCATION"]
+            if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
+                del os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
+        # append VertexAI params to client args
+        if use_vertex:
+            filtered["vertexai"] = True
+            filtered["project"] = os.environ.get("GOOGLE_CLOUD_PROJECT")
+            filtered["location"] = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
+            # filtered["http_options"] = gtypes.HttpOptions(api_version="v1")
         if self.client is None or self.last_client_args != filtered:
             self.client = genai.Client(**filtered)
         self.last_client_args = filtered
         return self.client
-    def call(self, context: BridgeContext, extra: dict = None) -> bool:
+    def call(
+            self,
+            context: BridgeContext,
+            extra: dict = None,
+            rt_signals = None
+    ) -> bool:
         """
         Make an API call to Google GenAI
         :param context: BridgeContext
         :param extra: Extra parameters
+        :param rt_signals: Realtime signals for audio streaming
         :return: True if successful, False otherwise
         """
         mode = context.mode
@@ -94,13 +135,28 @@ class ApiGoogle:
         response = None
         if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
+            # Live API for audio streaming
+            if mode == MODE_AUDIO and stream:
+                is_realtime = self.realtime.begin(
+                    context=context,
+                    model=model,
+                    extra=extra or {},
+                    rt_signals=rt_signals
+                )
+                if is_realtime:
+                    return True
             response = self.chat.send(context=context, extra=extra)
             used_tokens = self.chat.get_used_tokens()
             if ctx:
                 self.vision.append_images(ctx)
         elif mode == MODE_IMAGE:
-            return self.image.generate(context=context, extra=extra)
+            if context.model.is_video_output():
+                return self.video.generate(context=context, extra=extra)  # veo, etc.
+            else:
+                return self.image.generate(context=context, extra=extra) # imagen, etc.
         elif mode == MODE_ASSISTANT:
             return False  # not implemented for Google
@@ -135,7 +191,11 @@ class ApiGoogle:
                 pass
         return True
-    def quick_call(self, context: BridgeContext, extra: dict = None) -> str:
+    def quick_call(
+            self,
+            context: BridgeContext,
+            extra: dict = None
+    ) -> str:
         """
         Make a quick API call to Google GenAI and return the output text
@@ -206,9 +266,9 @@ class ApiGoogle:
     def build_remote_tools(self, model: ModelItem = None) -> list:
         """
         Build Google GenAI remote tools based on config flags.
-        - google_tool_search: enables grounding via Google Search (Gemini 2.x)
+        - remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
           or GoogleSearchRetrieval (Gemini 1.5 fallback).
-        - google_tool_code_execution: enables code execution tool.
+        - remote_tools.google.code_interpreter: enables code execution tool.
         Returns a list of gtypes.Tool objects (can be empty).
@@ -242,6 +302,15 @@ class ApiGoogle:
             except Exception as e:
                 self.window.core.debug.log(e)
+        # URL Context tool
+        if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
+            try:
+                # Supported on Gemini 2.x+ models (not on 1.5)
+                if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
+                    tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
+            except Exception as e:
+                self.window.core.debug.log(e)
         return tools

pygpt_net/provider/api/google/audio.py CHANGED Viewed

@@ -24,6 +24,8 @@ class Audio:
         Audio helpers for Google GenAI.
         - Build audio input parts for requests
         - Convert Google PCM output to WAV (base64) for UI compatibility
+        :param window: Window instance
         """
         self.window = window
@@ -103,7 +105,12 @@ class Audio:
     @staticmethod
     def _ensure_bytes(data) -> Optional[bytes]:
-        """Return raw bytes from inline_data.data (bytes or base64 string)."""
+        """
+        Return raw bytes from inline_data.data (bytes or base64 string).
+        :param data: bytes or base64 string
+        :return: bytes or None
+        """
         try:
             if isinstance(data, (bytes, bytearray)):
                 return bytes(data)

pygpt_net/provider/api/google/chat.py CHANGED Viewed

@@ -29,9 +29,17 @@ class Chat:
         self.window = window
         self.input_tokens = 0
-    def send(self, context: BridgeContext, extra: Optional[Dict[str, Any]] = None):
+    def send(
+            self,
+            context: BridgeContext,
+            extra: Optional[Dict[str, Any]] = None
+    ):
         """
         Call Google GenAI for chat / multimodal / audio.
+        :param context: BridgeContext with prompt, model, history, mode, etc.
+        :param extra: Extra parameters (not used currently)
+        :return: Response object or generator (if streaming)
         """
         prompt = context.prompt
         stream = context.stream
@@ -110,9 +118,13 @@ class Chat:
         # Tools -> merge app-defined tools with remote tools
         base_tools = self.window.core.api.google.tools.prepare(model, functions)
         remote_tools = self.window.core.api.google.build_remote_tools(model)
+        # Check tools compatibility
         if base_tools:
-            remote_tools = [] # do not mix local and remote tools
+            remote_tools = [] # remote tools are not allowed if function calling is used
         tools = (base_tools or []) + (remote_tools or [])
+        if "-image" in model.id:
+            tools = None  # function calling is not supported for image models
         # Sampling
         temperature = self.window.core.config.get('temperature')
@@ -144,7 +156,7 @@ class Chat:
             # Voice selection (case-sensitive name)
             voice_name = "Kore"
             try:
-                tmp = self.window.core.plugins.get_option("audio_output", "google_voice_native")
+                tmp = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
                 if tmp:
                     name = str(tmp).strip()
                     mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse", "legend": "Legend"}
@@ -169,9 +181,17 @@ class Chat:
         else:
             return client.models.generate_content(**params)
-    def unpack_response(self, mode: str, response, ctx: CtxItem):
+    def unpack_response(
+            self,
+            mode: str,
+            response, ctx: CtxItem
+    ):
         """
         Unpack non-streaming response from Google GenAI and set context.
+        :param mode: MODE_CHAT or MODE_AUDIO
+        :param response: Response object
+        :param ctx: CtxItem to set output, audio_output, tokens, tool_calls
         """
         if mode == MODE_AUDIO:
             # Prefer audio if present
@@ -229,6 +249,11 @@ class Chat:
     def extract_text(self, response) -> str:
         """
         Extract output text.
+        Prefer response.text (Python SDK), then fallback to parts[].text.
+        :param response: Response object
+        :return: Extracted text
         """
         txt = getattr(response, "text", None) or getattr(response, "output_text", None)
         if txt:
@@ -332,11 +357,17 @@ class Chat:
         return out
-    def _extract_inline_images_and_links(self, response, ctx: CtxItem) -> None:
+    def _extract_inline_images_and_links(
+            self,
+            response, ctx: CtxItem
+    ) -> None:
         """
         Extract inline image parts (Gemini image output) and file links.
         - Saves inline_data (image/*) bytes to files and appends paths to ctx.images.
         - Appends HTTP(S) image URIs from file_data to ctx.urls.
+        :param response: Response object
+        :param ctx: CtxItem to set images and urls
         """
         images: list[str] = []
         urls: list[str] = []
@@ -386,7 +417,12 @@ class Chat:
     @staticmethod
     def _ensure_bytes(data) -> bytes | None:
-        """Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string."""
+        """
+        Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string.
+        :param data: bytes or str
+        :return: bytes or None
+        """
         try:
             if isinstance(data, (bytes, bytearray)):
                 return bytes(data)
@@ -545,6 +581,9 @@ class Chat:
         Heuristic check if the model supports native TTS.
         - Official TTS models contain '-tts' in id (e.g. 'gemini-2.5-flash-preview-tts').
         - Future/preview names may contain 'native-audio'.
+        :param model_id: Model ID
+        :return: True if supports TTS, False otherwise
         """
         if not model_id:
             return False

pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl