PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +7 -1
pygpt_net/app_core.py +3 -1
pygpt_net/config.py +3 -1
pygpt_net/controller/__init__.py +9 -2
pygpt_net/controller/audio/audio.py +38 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +23 -62
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/lang/custom.py +2 -2
pygpt_net/controller/media/__init__.py +12 -0
pygpt_net/controller/media/media.py +115 -0
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +293 -0
pygpt_net/controller/ui/mode.py +23 -2
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +14 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +56 -5
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +160 -0
pygpt_net/core/render/web/body.py +24 -3
pygpt_net/core/text/utils.py +54 -2
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +54 -0
pygpt_net/core/video/__init__.py +12 -0
pygpt_net/core/video/video.py +290 -0
pygpt_net/data/config/config.json +26 -5
pygpt_net/data/config/models.json +221 -103
pygpt_net/data/config/settings.json +244 -6
pygpt_net/data/css/web-blocks.css +6 -0
pygpt_net/data/css/web-chatgpt.css +6 -0
pygpt_net/data/css/web-chatgpt_wide.css +6 -0
pygpt_net/data/locale/locale.de.ini +35 -7
pygpt_net/data/locale/locale.en.ini +56 -17
pygpt_net/data/locale/locale.es.ini +35 -7
pygpt_net/data/locale/locale.fr.ini +35 -7
pygpt_net/data/locale/locale.it.ini +35 -7
pygpt_net/data/locale/locale.pl.ini +38 -7
pygpt_net/data/locale/locale.uk.ini +35 -7
pygpt_net/data/locale/locale.zh.ini +31 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
pygpt_net/item/model.py +22 -1
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +76 -7
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/video.py +364 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +241 -178
pygpt_net/provider/core/model/patch.py +28 -2
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/web/duckduck_search.py +212 -0
pygpt_net/ui/layout/toolbox/audio.py +55 -0
pygpt_net/ui/layout/toolbox/footer.py +14 -42
pygpt_net/ui/layout/toolbox/image.py +7 -13
pygpt_net/ui/layout/toolbox/raw.py +52 -0
pygpt_net/ui/layout/toolbox/split.py +48 -0
pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
pygpt_net/ui/layout/toolbox/video.py +49 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0

pygpt_net/provider/api/google/image.py CHANGED Viewed

@@ -6,9 +6,10 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.28 20:00:00                  #
+# Updated Date: 2025.08.29 20:40:00                  #
 # ================================================== #
+import mimetypes
 from typing import Optional, Dict, Any, List
 from google import genai
 from google.genai import types as gtypes
@@ -37,12 +38,12 @@ class Image:
             sync: bool = True
     ) -> bool:
         """
-        Generate image(s) using Google GenAI API
+        Generate or edit image(s) using Google GenAI API (Developer API or Vertex AI).
-        :param context: BridgeContext
-        :param extra: Extra parameters (num, inline)
-        :param sync: Run synchronously if True
-        :return: bool
+        :param context: BridgeContext with prompt, model, attachments
+        :param extra: extra parameters (num, inline)
+        :param sync: run synchronously (blocking) if True
+        :return: True if started
         """
         extra = extra or {}
         ctx = context.ctx or CtxItem()
@@ -51,6 +52,14 @@ class Image:
         num = int(extra.get("num", 1))
         inline = bool(extra.get("inline", False))
+        # decide sub-mode based on attachments
+        sub_mode = self.MODE_GENERATE
+        attachments = context.attachments
+        if attachments and len(attachments) > 0:
+            pass # TODO: implement edit!
+            # sub_mode = self.MODE_EDIT
+        # model used to improve the prompt (not image model)
         prompt_model = self.window.core.models.from_defaults()
         tmp = self.window.core.config.get('img_prompt_model')
         if self.window.core.models.has(tmp):
@@ -60,9 +69,11 @@ class Image:
         worker.window = self.window
         worker.client = self.window.core.api.google.get_client()
         worker.ctx = ctx
-        worker.model = model.id
+        worker.mode = sub_mode
+        worker.attachments = attachments or {}
+        worker.model = model.id  # image model id
         worker.input_prompt = prompt
-        worker.model_prompt = prompt_model
+        worker.model_prompt = prompt_model  # LLM for prompt rewriting
         worker.system_prompt = self.window.core.prompt.get('img')
         worker.raw = self.window.core.config.get('img_raw')
         worker.num = num
@@ -87,10 +98,10 @@ class Image:
 class ImageSignals(QObject):
-    finished = Signal(object, list, str)  # ctx, paths, prompt
+    finished = Signal(object, list, str)         # ctx, paths, prompt
     finished_inline = Signal(object, list, str)  # ctx, paths, prompt
-    status = Signal(object) # message
-    error = Signal(object) # exception
+    status = Signal(object)                      # message
+    error = Signal(object)                       # exception
 class ImageWorker(QRunnable):
@@ -100,7 +111,11 @@ class ImageWorker(QRunnable):
         self.window = None
         self.client: Optional[genai.Client] = None
         self.ctx: Optional[CtxItem] = None
-        self.model = "imagen-4.0-generate-001"
+        # params
+        self.mode = Image.MODE_GENERATE
+        self.attachments: Dict[str, Any] = {}
+        self.model = "imagen-4.0-generate-preview-06-06"
         self.model_prompt = None
         self.input_prompt = ""
         self.system_prompt = ""
@@ -109,11 +124,17 @@ class ImageWorker(QRunnable):
         self.num = 1
         self.resolution = "1024x1024"  # used to derive aspect ratio for Imagen
+        # limits
+        self.imagen_max_num = 4  # Imagen returns up to 4 images
+        # fallbacks
+        self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.0-flash-preview-image-generation"
     @Slot()
     def run(self):
         try:
-            # Optional prompt enhancement
-            if not self.raw and not not self.inline:
+            # optional prompt enhancement
+            if not self.raw and not self.inline:
                 try:
                     self.signals.status.emit(trans('img.status.prompt.wait'))
                     bridge_context = BridgeContext(
@@ -135,68 +156,98 @@ class ImageWorker(QRunnable):
             self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
             paths: List[str] = []
-            if self._is_imagen(self.model):
-                # Imagen: generate_images
-                resp = self._imagen_generate(self.input_prompt, self.num, self.resolution)
-                imgs = getattr(resp, "generated_images", None) or []
-                for idx, gi in enumerate(imgs[: self.num]):
-                    data = self._extract_imagen_bytes(gi)
-                    p = self._save(idx, data)
-                    if p:
-                        paths.append(p)
+            if self.mode == Image.MODE_EDIT:
+                # EDIT
+                if self._using_vertex():
+                    # Vertex Imagen edit API (preferred)
+                    resp = self._imagen_edit(self.input_prompt, self.attachments, self.num)
+                    imgs = getattr(resp, "generated_images", None) or []
+                    for idx, gi in enumerate(imgs[: self.num]):
+                        data = self._extract_imagen_bytes(gi)
+                        p = self._save(idx, data)
+                        if p:
+                            paths.append(p)
+                else:
+                    # Developer API fallback via Gemini image model; force v1 to avoid 404
+                    resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
+                    saved = 0
+                    for cand in getattr(resp, "candidates", []) or []:
+                        parts = getattr(getattr(cand, "content", None), "parts", None) or []
+                        for part in parts:
+                            inline = getattr(part, "inline_data", None)
+                            if inline and getattr(inline, "data", None):
+                                p = self._save(saved, inline.data)
+                                if p:
+                                    paths.append(p)
+                                    saved += 1
+                                    if saved >= self.num:
+                                        break
+                        if saved >= self.num:
+                            break
             else:
-                # Gemini image preview: generate_content -> parts[].inline_data.data
-                resp = self.client.models.generate_content(
-                    model=self.model,
-                    contents=[self.input_prompt],
-                )
-                from PIL import Image as PILImage
-                from io import BytesIO
-                cands = getattr(resp, "candidates", None) or []
-                saved = 0
-                for cand in cands:
-                    parts = getattr(getattr(cand, "content", None), "parts", None) or []
-                    for part in parts:
-                        inline = getattr(part, "inline_data", None)
-                        if inline and getattr(inline, "data", None):
-                            data = inline.data
-                            p = self._save(saved, data)
-                            if p:
-                                paths.append(p)
-                                saved += 1
-                                if saved >= self.num:
-                                    break
-                    if saved >= self.num:
-                        break
+                # GENERATE
+                if self._is_imagen_generate(self.model) and self._using_vertex():
+                    num = min(self.num, self.imagen_max_num)
+                    resp = self._imagen_generate(self.input_prompt, num, self.resolution)
+                    imgs = getattr(resp, "generated_images", None) or []
+                    for idx, gi in enumerate(imgs[: num]):
+                        data = self._extract_imagen_bytes(gi)
+                        p = self._save(idx, data)
+                        if p:
+                            paths.append(p)
+                else:
+                    # Gemini Developer API image generation (needs response_modalities)
+                    resp = self.client.models.generate_content(
+                        model=self.model,
+                        contents=[self.input_prompt],
+                        config=gtypes.GenerateContentConfig(
+                            response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
+                        ),
+                    )
+                    saved = 0
+                    for cand in getattr(resp, "candidates", []) or []:
+                        parts = getattr(getattr(cand, "content", None), "parts", None) or []
+                        for part in parts:
+                            inline = getattr(part, "inline_data", None)
+                            if inline and getattr(inline, "data", None):
+                                p = self._save(saved, inline.data)
+                                if p:
+                                    paths.append(p)
+                                    saved += 1
+                                    if saved >= self.num:
+                                        break
+                        if saved >= self.num:
+                            break
             if self.inline:
                 self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
             else:
                 self.signals.finished.emit(self.ctx, paths, self.input_prompt)
         except Exception as e:
             self.signals.error.emit(e)
         finally:
             self._cleanup()
-    def _is_imagen(self, model_id: str) -> bool:
-        """
-        Check if model_id is an Imagen model
+    # ---------- helpers ----------
-        :param model_id: Model ID
-        :return: True if Imagen model
+    def _using_vertex(self) -> bool:
         """
-        return "imagen" in str(model_id).lower()
-    def _imagen_generate(self, prompt: str, num: int, resolution: str):
+        Detect if Vertex AI is configured via env vars.
         """
-        Call Imagen generate_images with config (number_of_images, optional aspect_ratio).
+        val = os.getenv("GOOGLE_GENAI_USE_VERTEXAI") or ""
+        return str(val).lower() in ("1", "true", "yes", "y")
-        :param prompt: Prompt text
-        :param num: Number of images to generate
-        :param resolution: Resolution string, e.g. "1024x1024"
-        :return: GenerateImagesResponse
-        """
-        aspect = self._aspect_from_resolution(resolution)  # "1:1", "3:4", …
+    def _is_imagen_generate(self, model_id: str) -> bool:
+        """True for Imagen generate models."""
+        mid = str(model_id).lower()
+        return "imagen" in mid and "generate" in mid
+    def _imagen_generate(self, prompt: str, num: int, resolution: str):
+        """Imagen text-to-image."""
+        aspect = self._aspect_from_resolution(resolution)
         cfg = gtypes.GenerateImagesConfig(number_of_images=num)
         if aspect:
             cfg.aspect_ratio = aspect
@@ -206,32 +257,114 @@ class ImageWorker(QRunnable):
             config=cfg,
         )
-    def _aspect_from_resolution(self, resolution: str) -> Optional[str]:
+    def _imagen_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
+        """
+        Imagen edit: requires Vertex AI and capability model (e.g. imagen-3.0-capability-001).
+        First attachment = base image, optional second = mask.
         """
-        Derive aspect ratio string from resolution.
+        paths = self._collect_attachment_paths(attachments)
+        if len(paths) == 0:
+            raise RuntimeError("No attachment provided for edit mode.")
+        base_img = gtypes.Image.from_file(location=paths[0])
+        raw_ref = gtypes.RawReferenceImage(reference_id=0, reference_image=base_img)
+        if len(paths) >= 2:
+            mask_img = gtypes.Image.from_file(location=paths[1])
+            mask_ref = gtypes.MaskReferenceImage(
+                reference_id=1,
+                reference_image=mask_img,
+                config=gtypes.MaskReferenceConfig(
+                    mask_mode="MASK_MODE_USER_PROVIDED",
+                    mask_dilation=0.0,
+                ),
+            )
+            edit_mode = "EDIT_MODE_INPAINT_INSERTION"
+        else:
+            mask_ref = gtypes.MaskReferenceImage(
+                reference_id=1,
+                reference_image=None,
+                config=gtypes.MaskReferenceConfig(
+                    mask_mode="MASK_MODE_BACKGROUND",
+                    mask_dilation=0.0,
+                ),
+            )
+            edit_mode = "EDIT_MODE_BGSWAP"
+        cfg = gtypes.EditImageConfig(
+            edit_mode=edit_mode,
+            number_of_images=min(num, self.imagen_max_num),
+            include_rai_reason=True,
+        )
+        # Ensure capability model for edit
+        model_id = "imagen-3.0-capability-001"
+        return self.client.models.edit_image(
+            model=model_id,
+            prompt=prompt,
+            reference_images=[raw_ref, mask_ref],
+            config=cfg,
+        )
-        :param resolution: Resolution string, e.g. "1024x1024"
-        :return: Aspect ratio string, e.g. "1:1", "3:4", or None if unknown
+    def _gemini_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
         """
+        Gemini image-to-image editing via generate_content (Developer/Vertex depending on client).
+        The first attachment is used as the input image.
+        """
+        paths = self._collect_attachment_paths(attachments)
+        if len(paths) == 0:
+            raise RuntimeError("No attachment provided for edit mode.")
+        img_path = paths[0]
+        with open(img_path, "rb") as f:
+            img_bytes = f.read()
+        mime = self._guess_mime(img_path)
+        return self.client.models.generate_content(
+            model=self.model,
+            contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
+        )
+    def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
+        """Extract file paths from attachments dict."""
+        out: List[str] = []
+        for _, att in (attachments or {}).items():
+            try:
+                if getattr(att, "path", None) and os.path.exists(att.path):
+                    out.append(att.path)
+            except Exception:
+                continue
+        return out
+    def _aspect_from_resolution(self, resolution: str) -> Optional[str]:
+        """Derive aspect ratio for Imagen."""
         try:
-            w, h = [int(x) for x in resolution.lower().split("x")]
-            # Reduce to small set supported in docs
-            ratios = {(1, 1): "1:1", (3, 4): "3:4", (4, 3): "4:3", (9, 16): "9:16", (16, 9): "16:9"}
-            # Find nearest
             from math import gcd
+            tolerance = 0.08
+            w_str, h_str = resolution.lower().replace("×", "x").split("x")
+            w, h = int(w_str.strip()), int(h_str.strip())
+            if w <= 0 or h <= 0:
+                return None
+            supported = {
+                "1:1": 1 / 1,
+                "3:4": 3 / 4,
+                "4:3": 4 / 3,
+                "9:16": 9 / 16,
+                "16:9": 16 / 9,
+            }
             g = gcd(w, h)
-            key = (w // g, h // g)
-            return ratios.get(key)
+            key = f"{w // g}:{h // g}"
+            if key in supported:
+                return key
+            r = w / h
+            best = min(supported.keys(), key=lambda k: abs(r - supported[k]))
+            rel_err = abs(r - supported[best]) / supported[best]
+            return best if rel_err <= tolerance else None
         except Exception:
             return None
     def _extract_imagen_bytes(self, generated_image) -> Optional[bytes]:
-        """
-        Extract bytes from Imagen generated image object.
-        :param generated_image: GeneratedImage object
-        :return: Image bytes or None
-        """
+        """Extract bytes from Imagen GeneratedImage."""
         img = getattr(generated_image, "image", None)
         if not img:
             return None
@@ -243,7 +376,6 @@ class ImageWorker(QRunnable):
                 return base64.b64decode(data)
             except Exception:
                 return None
-        # fallback: url/uri if present
         url = getattr(img, "url", None) or getattr(img, "uri", None)
         if url:
             try:
@@ -255,13 +387,7 @@ class ImageWorker(QRunnable):
         return None
     def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
-        """
-        Save image bytes to file and return path.
-        :param idx: Image index (for filename)
-        :param data: Image bytes
-        :return: Path string or None
-        """
+        """Save image bytes to file and return path."""
         if not data:
             return None
         name = (
@@ -276,8 +402,22 @@ class ImageWorker(QRunnable):
             return path
         return None
+    def _guess_mime(self, path: str) -> str:
+        """
+        Guess MIME type for a local image file.
+        """
+        mime, _ = mimetypes.guess_type(path)
+        if mime:
+            return mime
+        ext = os.path.splitext(path.lower())[1]
+        if ext in ('.jpg', '.jpeg'):
+            return 'image/jpeg'
+        if ext == '.webp':
+            return 'image/webp'
+        return 'image/png'
     def _cleanup(self):
-        """Cleanup resources"""
+        """Cleanup resources."""
         sig = self.signals
         self.signals = None
         if sig is not None:

pygpt_net/provider/api/google/realtime/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ================================================== #
+# This file is a part of PYGPT package               #
+# Website: https://pygpt.net                         #
+# GitHub:  https://github.com/szczyglis-dev/py-gpt   #
+# MIT License                                        #
+# Created By  : Marcin Szczygliński                  #
+# Updated Date: 2025.08.31 23:00:00                  #
+# ================================================== #
+from .realtime import Realtime

pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl