PyPI - pygpt-net - Versions diffs - 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl - Mend

pygpt-net 2.7.2py3-none-any.whl → 2.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

pygpt_net/CHANGELOG.txt +4 -0
pygpt_net/__init__.py +3 -3
pygpt_net/controller/chat/image.py +26 -3
pygpt_net/controller/media/media.py +70 -1
pygpt_net/data/config/config.json +5 -3
pygpt_net/data/config/models.json +3 -3
pygpt_net/data/locale/locale.de.ini +4 -0
pygpt_net/data/locale/locale.en.ini +4 -0
pygpt_net/data/locale/locale.es.ini +4 -0
pygpt_net/data/locale/locale.fr.ini +4 -0
pygpt_net/data/locale/locale.it.ini +4 -0
pygpt_net/data/locale/locale.pl.ini +5 -1
pygpt_net/data/locale/locale.uk.ini +4 -0
pygpt_net/data/locale/locale.zh.ini +4 -0
pygpt_net/provider/api/google/image.py +246 -7
pygpt_net/provider/api/google/video.py +152 -1
pygpt_net/provider/api/openai/image.py +163 -78
pygpt_net/provider/api/openai/video.py +73 -23
pygpt_net/provider/core/config/patch.py +10 -1
pygpt_net/ui/layout/chat/painter.py +0 -0
pygpt_net/ui/layout/toolbox/image.py +20 -10
pygpt_net/ui/layout/toolbox/raw.py +2 -2
pygpt_net/ui/layout/toolbox/video.py +21 -9
{pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/METADATA +12 -13
{pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/RECORD +27 -27
{pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/LICENSE +0 -0
{pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/WHEEL +0 -0
{pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/entry_points.txt +0 -0

pygpt_net/provider/api/google/image.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.12.25 20:00:00                  #
+# Updated Date: 2025.12.30 22:00:00                  #
 # ================================================== #
 import mimetypes
@@ -14,7 +14,7 @@ from typing import Optional, Dict, Any, List
 from google import genai
 from google.genai import types as gtypes
 from PySide6.QtCore import QObject, Signal, QRunnable, Slot
-import base64, datetime, os, requests
+import base64, datetime, os, requests, tempfile
 from pygpt_net.core.events import KernelEvent
 from pygpt_net.core.bridge.context import BridgeContext
@@ -80,6 +80,9 @@ class Image:
         worker.num = num
         worker.inline = inline
+        # remix: previous image reference (ID/URI/path) from extra
+        worker.image_id = extra.get("image_id")
         if attachments and len(attachments) > 0:
             mid = str(model.id).lower()
             if "imagen" in mid:
@@ -121,7 +124,7 @@ class ImageWorker(QRunnable):
         # params
         self.mode = Image.MODE_GENERATE
         self.attachments: Dict[str, Any] = {}
-        self.model = "imagen-4.0-generate-preview-06-06"
+        self.model = "imagen-4.0-generate-001"
         self.model_prompt = None
         self.input_prompt = ""
         self.system_prompt = ""
@@ -129,6 +132,7 @@ class ImageWorker(QRunnable):
         self.raw = False
         self.num = 1
         self.resolution = "1024x1024"  # used to derive aspect ratio or image_size
+        self.image_id: Optional[str] = None  # remix/extend previous image
         # limits
         self.imagen_max_num = 4  # Imagen returns up to 4 images
@@ -174,10 +178,89 @@ class ImageWorker(QRunnable):
                     self.signals.error.emit(e)
                     self.signals.status.emit(trans('img.status.prompt.error') + ": " + str(e))
-            self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
             paths: List[str] = []
+            # Remix path: if image_id provided, prefer image-to-image remix using the given identifier.
+            if self.image_id:
+                self.signals.status.emit(trans('img.status.generating') + " (remix): " + (self.input_prompt or "") + "...")
+                if self._using_vertex() and self._is_imagen_generate(self.model):
+                    # Vertex / Imagen edit flow with a single base image (no explicit mask).
+                    img_ref = self._imagen_image_from_identifier(self.image_id)
+                    if not img_ref:
+                        raise RuntimeError("Invalid image_id for remix. Provide a valid local path, Files API name, or gs:// URI.")
+                    raw_ref = gtypes.RawReferenceImage(reference_id=0, reference_image=img_ref)
+                    mask_ref = gtypes.MaskReferenceImage(
+                        reference_id=1,
+                        reference_image=None,
+                        config=gtypes.MaskReferenceConfig(
+                            mask_mode="MASK_MODE_BACKGROUND",
+                            mask_dilation=0.0,
+                        ),
+                    )
+                    cfg = gtypes.EditImageConfig(
+                        edit_mode="EDIT_MODE_DEFAULT",
+                        number_of_images=min(self.num, self.imagen_max_num),
+                        include_rai_reason=True,
+                    )
+                    resp = self.client.models.edit_image(
+                        model="imagen-3.0-capability-001",
+                        prompt=self.input_prompt or "",
+                        reference_images=[raw_ref, mask_ref],
+                        config=cfg,
+                    )
+                    imgs = getattr(resp, "generated_images", None) or []
+                    for idx, gi in enumerate(imgs[: min(self.num, self.imagen_max_num)]):
+                        data = self._extract_imagen_bytes(gi)
+                        p = self._save(idx, data)
+                        if p:
+                            paths.append(p)
+                    # store reference for future remix: prefer remote URI if available, otherwise saved path
+                    if paths:
+                        self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
+                else:
+                    # Gemini Developer API remix via generate_content with prompt + reference image part.
+                    ref_part = self._image_part_from_identifier(self.image_id)
+                    if not ref_part:
+                        raise RuntimeError("Invalid image_id for remix. Provide a valid local path, Files API name, http(s) URL, or gs:// URI.")
+                    img_cfg = self._build_gemini_image_config(self.model, self.resolution)
+                    resp = self.client.models.generate_content(
+                        model=self.model or self.DEFAULT_GEMINI_IMAGE_MODEL,
+                        contents=[self.input_prompt or "", ref_part],
+                        config=gtypes.GenerateContentConfig(
+                            image_config=img_cfg,
+                        ),
+                    )
+                    saved = 0
+                    for cand in getattr(resp, "candidates", []) or []:
+                        parts = getattr(getattr(cand, "content", None), "parts", None) or []
+                        for part in parts:
+                            inline = getattr(part, "inline_data", None)
+                            if inline and getattr(inline, "data", None):
+                                p = self._save(saved, inline.data)
+                                if p:
+                                    paths.append(p)
+                                    saved += 1
+                                    if saved >= self.num:
+                                        break
+                        if saved >= self.num:
+                            break
+                    # store reference: saved local path is a reusable identifier for next remix
+                    if paths:
+                        self._store_image_id(paths[0])
+                if self.inline:
+                    self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
+                else:
+                    self.signals.finished.emit(self.ctx, paths, self.input_prompt)
+                return  # remix path finished
+            # Normal paths
+            self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
             if self.mode == Image.MODE_EDIT:
                 # EDIT
                 if self._using_vertex():
@@ -189,6 +272,9 @@ class ImageWorker(QRunnable):
                         p = self._save(idx, data)
                         if p:
                             paths.append(p)
+                    # store reference
+                    if paths:
+                        self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
                 else:
                     # Gemini Developer API via Gemini image models (Nano Banana / Nano Banana Pro)
                     resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
@@ -206,6 +292,9 @@ class ImageWorker(QRunnable):
                                         break
                         if saved >= self.num:
                             break
+                    # store reference
+                    if paths:
+                        self._store_image_id(paths[0])
             else:
                 # GENERATE
@@ -218,6 +307,9 @@ class ImageWorker(QRunnable):
                         p = self._save(idx, data)
                         if p:
                             paths.append(p)
+                    # store reference
+                    if paths:
+                        self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
                 else:
                     # Gemini Developer API image generation (Nano Banana / Nano Banana Pro) with robust sizing + optional reference images
                     resp = self._gemini_generate_image(self.input_prompt, self.model, self.resolution)
@@ -235,6 +327,9 @@ class ImageWorker(QRunnable):
                                         break
                         if saved >= self.num:
                             break
+                    # store reference
+                    if paths:
+                        self._store_image_id(paths[0])
             if self.inline:
                 self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
@@ -397,9 +492,7 @@ class ImageWorker(QRunnable):
         def _do_call(icfg: Optional[gtypes.ImageConfig]):
             contents: List[Any] = []
-            # Always include the textual prompt (can be empty string).
             contents.append(prompt or "")
-            # Append reference images, if any.
             if image_parts:
                 contents.extend(image_parts)
             return self.client.models.generate_content(
@@ -461,6 +554,117 @@ class ImageWorker(QRunnable):
                     return _do_call(cfg2)
             raise
+    def _image_part_from_identifier(self, identifier: str) -> Optional[gtypes.Part]:
+        """
+        Build a Gemini Part from a generic image identifier:
+        - Local path -> Part.from_bytes
+        - Files API name (files/...) -> resolve to URI + mime and use Part.from_uri
+        - gs:// URI -> Part.from_uri
+        - http(s) URL -> download bytes and use Part.from_bytes
+        - data: URI (base64) -> decode and use Part.from_bytes
+        """
+        if not identifier:
+            return None
+        ident = str(identifier).strip()
+        # Local file
+        if os.path.exists(ident):
+            mime = self._guess_mime(ident)
+            with open(ident, "rb") as f:
+                return gtypes.Part.from_bytes(data=f.read(), mime_type=mime)
+        # Files API
+        if ident.startswith("files/"):
+            try:
+                f = self.client.files.get(name=ident)
+                file_uri = getattr(f, "uri", None)
+                mime = getattr(f, "mime_type", None) or self._guess_mime_from_uri(file_uri)
+                if file_uri and mime:
+                    return gtypes.Part.from_uri(file_uri=file_uri, mime_type=mime)
+            except Exception:
+                pass
+        # gs://
+        if ident.startswith("gs://"):
+            mime = self._guess_mime_from_uri(ident) or "image/png"
+            return gtypes.Part.from_uri(file_uri=ident, mime_type=mime)
+        # http(s)
+        if ident.startswith("http://") or ident.startswith("https://"):
+            try:
+                r = requests.get(ident, timeout=60)
+                if r.status_code == 200:
+                    mime = r.headers.get("Content-Type") or self._guess_mime_from_uri(ident) or "image/png"
+                    return gtypes.Part.from_bytes(data=r.content, mime_type=mime)
+            except Exception:
+                return None
+        # data:
+        if ident.startswith("data:"):
+            try:
+                head, b64 = ident.split(",", 1)
+                mime = head.split(";")[0][5:] if ";" in head else "image/png"
+                return gtypes.Part.from_bytes(data=base64.b64decode(b64), mime_type=mime)
+            except Exception:
+                return None
+        return None
+    def _imagen_image_from_identifier(self, identifier: str) -> Optional[gtypes.Image]:
+        """
+        Build a gtypes.Image for Imagen edit:
+        - Local path -> Image.from_file
+        - Files API name -> resolve to URI; if gs:// use gcs_uri, otherwise download to temp and from_file
+        - gs:// -> Image(gcs_uri=...)
+        - http(s) -> download to temp file, then from_file
+        """
+        if not identifier:
+            return None
+        ident = str(identifier).strip()
+        if os.path.exists(ident):
+            return gtypes.Image.from_file(location=ident)
+        if ident.startswith("files/"):
+            try:
+                f = self.client.files.get(name=ident)
+                uri = getattr(f, "uri", None)
+                if uri and uri.startswith("gs://"):
+                    return gtypes.Image(gcs_uri=uri)
+                if uri and (uri.startswith("http://") or uri.startswith("https://")):
+                    tmp = self._download_to_temp(uri)
+                    return gtypes.Image.from_file(location=tmp) if tmp else None
+            except Exception:
+                return None
+        if ident.startswith("gs://"):
+            return gtypes.Image(gcs_uri=ident)
+        if ident.startswith("http://") or ident.startswith("https://"):
+            tmp = self._download_to_temp(ident)
+            return gtypes.Image.from_file(location=tmp) if tmp else None
+        return None
+    def _download_to_temp(self, url: str) -> Optional[str]:
+        """Download URL to a temporary file and return its path."""
+        try:
+            r = requests.get(url, timeout=60)
+            if r.status_code == 200:
+                ext = ".png"
+                ct = r.headers.get("Content-Type") or ""
+                if "jpeg" in ct:
+                    ext = ".jpg"
+                elif "webp" in ct:
+                    ext = ".webp"
+                fd, path = tempfile.mkstemp(suffix=ext)
+                with os.fdopen(fd, "wb") as f:
+                    f.write(r.content)
+                return path
+        except Exception:
+            return None
+        return None
     def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
         """Extract file paths from attachments dict."""
         out: List[str] = []
@@ -527,6 +731,34 @@ class ImageWorker(QRunnable):
                 pass
         return None
+    def _store_image_reference_imagen(self, generated_image_item: Any, fallback_path: Optional[str]) -> None:
+        """
+        Persist a reusable image reference to ctx.extra['image_id'].
+        Prefer remote URI/name if provided by Imagen; fallback to the saved local path.
+        """
+        ref = None
+        try:
+            img = getattr(generated_image_item, "image", None) if generated_image_item else None
+            if img:
+                ref = getattr(img, "uri", None) or getattr(img, "url", None) or getattr(img, "name", None)
+        except Exception:
+            ref = None
+        self._store_image_id(ref or fallback_path)
+    def _store_image_id(self, value: Optional[str]) -> None:
+        """
+        Store image_id reference in ctx.extra and persist the context item.
+        """
+        if not value:
+            return
+        try:
+            if not isinstance(self.ctx.extra, dict):
+                self.ctx.extra = {}
+            self.ctx.extra["image_id"] = str(value)
+            self.window.core.ctx.update_item(self.ctx)
+        except Exception:
+            pass
     def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
         """Save image bytes to file and return path."""
         if not data:
@@ -559,6 +791,13 @@ class ImageWorker(QRunnable):
             return 'image/heic'
         return 'image/png'
+    def _guess_mime_from_uri(self, uri: Optional[str]) -> Optional[str]:
+        """Best-effort MIME guess from URI or file extension."""
+        if not uri:
+            return None
+        mime, _ = mimetypes.guess_type(uri)
+        return mime or None
     def _cleanup(self):
         """Cleanup resources."""
         sig = self.signals

pygpt_net/provider/api/google/video.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.12.25 20:00:00                  #
+# Updated Date: 2025.12.30 22:00:00                  #
 # ================================================== #
 import base64, datetime, os, requests
@@ -54,6 +54,7 @@ class Video:
         prompt = context.prompt
         num = int(extra.get("num", 1))
         inline = bool(extra.get("inline", False))
+        video_id = extra.get("video_id")
         # decide sub-mode based on attachments (image-to-video when image is attached)
         sub_mode = self.MODE_GENERATE
@@ -80,6 +81,7 @@ class Video:
         worker.raw = self.window.core.config.get('img_raw')
         worker.num = num
         worker.inline = inline
+        worker.video_id = video_id
         # optional params
         worker.aspect_ratio = str(extra.get("aspect_ratio") or self.window.core.config.get('video.aspect_ratio') or "16:9")
@@ -141,6 +143,7 @@ class VideoWorker(QRunnable):
         self.input_prompt = ""
         self.system_prompt = ""
         self.inline = False
+        self.video_id = None
         self.raw = False
         self.num = 1
@@ -162,6 +165,7 @@ class VideoWorker(QRunnable):
     @Slot()
     def run(self):
         try:
+            kernel = self.window.controller.kernel
             # optional prompt enhancement
             if not self.raw and not self.inline and self.input_prompt:
                 try:
@@ -208,6 +212,70 @@ class VideoWorker(QRunnable):
             cfg_try = dict(cfg_kwargs)
             cfg_try["duration_seconds"] = int(self._duration_for_model(self.model, self.duration_seconds))
+            # remix / extension: if video_id provided, prefer video-to-video path
+            is_remix = bool(self.video_id)
+            if is_remix:
+                # Veo extension support varies by API and model; choose a compatible model if needed
+                model_for_ext = self._select_extension_model(self.model)
+                if model_for_ext != self.model:
+                    self.signals.status.emit(f"Please switch model for extension: {self.model} -> {model_for_ext}")
+                    # self.model = model_for_ext # <-- do not override user selection, just inform
+                # Build video input from identifier (URI, files/<id>, http(s), gs://, or local path)
+                video_input = self._video_from_identifier(self.video_id)
+                if not video_input:
+                    raise RuntimeError("Invalid video_id for remix/extension. Provide a valid URI, file name, or local path.")
+                # Minimal config for extension to avoid server-side rejections
+                ext_config = gtypes.GenerateVideosConfig(number_of_videos=1)
+                label = trans('vid.status.generating') + " (remix)"
+                self.signals.status.emit(label + f": {self.input_prompt or ''}...")
+                # Start operation: video extension, prompt optional
+                operation = self.client.models.generate_videos(
+                    model=self.model or self.DEFAULT_VEO_MODEL,
+                    prompt=self.input_prompt or "",
+                    video=video_input,
+                    config=ext_config,
+                )
+                # poll until done
+                while not getattr(operation, "done", False):
+                    if kernel.stopped():
+                        break
+                    time.sleep(10)
+                    if kernel.stopped():
+                        break
+                    operation = self.client.operations.get(operation)
+                # extract response payload
+                op_resp = getattr(operation, "response", None) or getattr(operation, "result", None)
+                if not op_resp:
+                    raise RuntimeError("Empty operation response.")
+                gen_list = getattr(op_resp, "generated_videos", None) or []
+                if not gen_list:
+                    raise RuntimeError("No videos generated.")
+                # store remote reference for next remix calls (URI/name) in ctx
+                self._store_video_reference(gen_list[0])
+                # download and save
+                paths: List[str] = []
+                for idx, gv in enumerate(gen_list[:1]):
+                    data = self._download_video_bytes(getattr(gv, "video", None))
+                    p = self._save(idx, data)
+                    if p:
+                        paths.append(p)
+                if self.inline:
+                    self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
+                else:
+                    self.signals.finished.emit(self.ctx, paths, self.input_prompt)
+                return  # remix path completed
+            # normal generation path (text-to-video or image-to-video)
             self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
             try:
@@ -235,7 +303,11 @@ class VideoWorker(QRunnable):
             # poll until done
             while not getattr(operation, "done", False):
+                if kernel.stopped():
+                    break
                 time.sleep(10)
+                if kernel.stopped():
+                    break
                 operation = self.client.operations.get(operation)
             # extract response payload
@@ -247,6 +319,9 @@ class VideoWorker(QRunnable):
             if not gen_list:
                 raise RuntimeError("No videos generated.")
+            # store remote reference for potential future remix/extension
+            self._store_video_reference(gen_list[0])
             # download and save all outputs up to num
             paths: List[str] = []
             for idx, gv in enumerate(gen_list[:num]):
@@ -329,6 +404,82 @@ class VideoWorker(QRunnable):
                 continue
         return None
+    def _video_from_identifier(self, identifier: str) -> Optional[gtypes.Video]:
+        """
+        Build a Video object from a generic identifier:
+        - Local file path -> upload via types.Video.from_file
+        - files/<id> -> resolve to URI using Files API
+        - http(s) or gs:// URI -> pass-through
+        """
+        try:
+            if not identifier:
+                return None
+            ident = str(identifier).strip()
+            # Local path
+            if os.path.exists(ident):
+                return gtypes.Video.from_file(ident)
+            # Files API name
+            if ident.startswith("files/"):
+                try:
+                    f = self.client.files.get(name=ident)
+                    uri = getattr(f, "uri", None)
+                    if uri:
+                        return gtypes.Video(uri=uri)
+                except Exception:
+                    pass
+            # Generic URI (Gemini accepts URIs, Vertex expects GCS; SDK honors both via uri field)
+            if ident.startswith("http://") or ident.startswith("https://") or ident.startswith("gs://"):
+                return gtypes.Video(uri=ident)
+        except Exception:
+            return None
+        return None
+    def _select_extension_model(self, model_id: str) -> str:
+        """
+        Choose a compatible model for video extension:
+        - Gemini API: Veo 3.1 only supports extension
+        - Vertex AI: extension supported on Veo 2.0
+        """
+        mid = str(model_id or "").lower()
+        use_vertex = bool(getattr(self.client, "vertexai", False))
+        # Gemini Developer API path
+        if not use_vertex:
+            if "veo-3.1" in mid:
+                return model_id
+            # Prefer 3.1 preview if user selected older Veo
+            return "veo-3.1-generate-preview"
+        # Vertex AI path
+        if "veo-2.0" in mid:
+            return model_id
+        return "veo-2.0-generate-001"
+    def _store_video_reference(self, generated_video_item: Any) -> None:
+        """
+        Persist a reusable video reference (URI or name) to ctx.extra['video_id'] for future remix/extension calls.
+        """
+        try:
+            vref = getattr(generated_video_item, "video", None)
+            if not vref:
+                return
+            # Prefer URI, fallback to name
+            uri = getattr(vref, "uri", None) or getattr(vref, "download_uri", None)
+            name = getattr(vref, "name", None)
+            ref = uri or name
+            if not ref:
+                return
+            if not isinstance(self.ctx.extra, dict):
+                self.ctx.extra = {}
+            self.ctx.extra["video_id"] = ref
+            self.window.core.ctx.update_item(self.ctx)
+        except Exception:
+            pass
     def _download_video_bytes(self, file_ref) -> Optional[bytes]:
         """
         Download video bytes using the Files service.

pygpt-net 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

pygpt-net 2.7.2py3-none-any.whl → 2.7.3py3-none-any.whl