pygpt-net 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +4 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/controller/chat/image.py +26 -3
- pygpt_net/controller/media/media.py +70 -1
- pygpt_net/data/config/config.json +5 -3
- pygpt_net/data/config/models.json +3 -3
- pygpt_net/data/locale/locale.de.ini +4 -0
- pygpt_net/data/locale/locale.en.ini +4 -0
- pygpt_net/data/locale/locale.es.ini +4 -0
- pygpt_net/data/locale/locale.fr.ini +4 -0
- pygpt_net/data/locale/locale.it.ini +4 -0
- pygpt_net/data/locale/locale.pl.ini +5 -1
- pygpt_net/data/locale/locale.uk.ini +4 -0
- pygpt_net/data/locale/locale.zh.ini +4 -0
- pygpt_net/provider/api/google/image.py +246 -7
- pygpt_net/provider/api/google/video.py +152 -1
- pygpt_net/provider/api/openai/image.py +163 -78
- pygpt_net/provider/api/openai/video.py +73 -23
- pygpt_net/provider/core/config/patch.py +10 -1
- pygpt_net/ui/layout/chat/painter.py +0 -0
- pygpt_net/ui/layout/toolbox/image.py +20 -10
- pygpt_net/ui/layout/toolbox/raw.py +2 -2
- pygpt_net/ui/layout/toolbox/video.py +21 -9
- {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/METADATA +12 -13
- {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/RECORD +27 -27
- {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/LICENSE +0 -0
- {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/WHEEL +0 -0
- {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.3.dist-info}/entry_points.txt +0 -0
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.12.
|
|
9
|
+
# Updated Date: 2025.12.30 22:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import mimetypes
|
|
@@ -14,7 +14,7 @@ from typing import Optional, Dict, Any, List
|
|
|
14
14
|
from google import genai
|
|
15
15
|
from google.genai import types as gtypes
|
|
16
16
|
from PySide6.QtCore import QObject, Signal, QRunnable, Slot
|
|
17
|
-
import base64, datetime, os, requests
|
|
17
|
+
import base64, datetime, os, requests, tempfile
|
|
18
18
|
|
|
19
19
|
from pygpt_net.core.events import KernelEvent
|
|
20
20
|
from pygpt_net.core.bridge.context import BridgeContext
|
|
@@ -80,6 +80,9 @@ class Image:
|
|
|
80
80
|
worker.num = num
|
|
81
81
|
worker.inline = inline
|
|
82
82
|
|
|
83
|
+
# remix: previous image reference (ID/URI/path) from extra
|
|
84
|
+
worker.image_id = extra.get("image_id")
|
|
85
|
+
|
|
83
86
|
if attachments and len(attachments) > 0:
|
|
84
87
|
mid = str(model.id).lower()
|
|
85
88
|
if "imagen" in mid:
|
|
@@ -121,7 +124,7 @@ class ImageWorker(QRunnable):
|
|
|
121
124
|
# params
|
|
122
125
|
self.mode = Image.MODE_GENERATE
|
|
123
126
|
self.attachments: Dict[str, Any] = {}
|
|
124
|
-
self.model = "imagen-4.0-generate-
|
|
127
|
+
self.model = "imagen-4.0-generate-001"
|
|
125
128
|
self.model_prompt = None
|
|
126
129
|
self.input_prompt = ""
|
|
127
130
|
self.system_prompt = ""
|
|
@@ -129,6 +132,7 @@ class ImageWorker(QRunnable):
|
|
|
129
132
|
self.raw = False
|
|
130
133
|
self.num = 1
|
|
131
134
|
self.resolution = "1024x1024" # used to derive aspect ratio or image_size
|
|
135
|
+
self.image_id: Optional[str] = None # remix/extend previous image
|
|
132
136
|
|
|
133
137
|
# limits
|
|
134
138
|
self.imagen_max_num = 4 # Imagen returns up to 4 images
|
|
@@ -174,10 +178,89 @@ class ImageWorker(QRunnable):
|
|
|
174
178
|
self.signals.error.emit(e)
|
|
175
179
|
self.signals.status.emit(trans('img.status.prompt.error') + ": " + str(e))
|
|
176
180
|
|
|
177
|
-
self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
|
|
178
|
-
|
|
179
181
|
paths: List[str] = []
|
|
180
182
|
|
|
183
|
+
# Remix path: if image_id provided, prefer image-to-image remix using the given identifier.
|
|
184
|
+
if self.image_id:
|
|
185
|
+
self.signals.status.emit(trans('img.status.generating') + " (remix): " + (self.input_prompt or "") + "...")
|
|
186
|
+
if self._using_vertex() and self._is_imagen_generate(self.model):
|
|
187
|
+
# Vertex / Imagen edit flow with a single base image (no explicit mask).
|
|
188
|
+
img_ref = self._imagen_image_from_identifier(self.image_id)
|
|
189
|
+
if not img_ref:
|
|
190
|
+
raise RuntimeError("Invalid image_id for remix. Provide a valid local path, Files API name, or gs:// URI.")
|
|
191
|
+
|
|
192
|
+
raw_ref = gtypes.RawReferenceImage(reference_id=0, reference_image=img_ref)
|
|
193
|
+
mask_ref = gtypes.MaskReferenceImage(
|
|
194
|
+
reference_id=1,
|
|
195
|
+
reference_image=None,
|
|
196
|
+
config=gtypes.MaskReferenceConfig(
|
|
197
|
+
mask_mode="MASK_MODE_BACKGROUND",
|
|
198
|
+
mask_dilation=0.0,
|
|
199
|
+
),
|
|
200
|
+
)
|
|
201
|
+
cfg = gtypes.EditImageConfig(
|
|
202
|
+
edit_mode="EDIT_MODE_DEFAULT",
|
|
203
|
+
number_of_images=min(self.num, self.imagen_max_num),
|
|
204
|
+
include_rai_reason=True,
|
|
205
|
+
)
|
|
206
|
+
resp = self.client.models.edit_image(
|
|
207
|
+
model="imagen-3.0-capability-001",
|
|
208
|
+
prompt=self.input_prompt or "",
|
|
209
|
+
reference_images=[raw_ref, mask_ref],
|
|
210
|
+
config=cfg,
|
|
211
|
+
)
|
|
212
|
+
imgs = getattr(resp, "generated_images", None) or []
|
|
213
|
+
for idx, gi in enumerate(imgs[: min(self.num, self.imagen_max_num)]):
|
|
214
|
+
data = self._extract_imagen_bytes(gi)
|
|
215
|
+
p = self._save(idx, data)
|
|
216
|
+
if p:
|
|
217
|
+
paths.append(p)
|
|
218
|
+
|
|
219
|
+
# store reference for future remix: prefer remote URI if available, otherwise saved path
|
|
220
|
+
if paths:
|
|
221
|
+
self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
|
|
222
|
+
|
|
223
|
+
else:
|
|
224
|
+
# Gemini Developer API remix via generate_content with prompt + reference image part.
|
|
225
|
+
ref_part = self._image_part_from_identifier(self.image_id)
|
|
226
|
+
if not ref_part:
|
|
227
|
+
raise RuntimeError("Invalid image_id for remix. Provide a valid local path, Files API name, http(s) URL, or gs:// URI.")
|
|
228
|
+
img_cfg = self._build_gemini_image_config(self.model, self.resolution)
|
|
229
|
+
resp = self.client.models.generate_content(
|
|
230
|
+
model=self.model or self.DEFAULT_GEMINI_IMAGE_MODEL,
|
|
231
|
+
contents=[self.input_prompt or "", ref_part],
|
|
232
|
+
config=gtypes.GenerateContentConfig(
|
|
233
|
+
image_config=img_cfg,
|
|
234
|
+
),
|
|
235
|
+
)
|
|
236
|
+
saved = 0
|
|
237
|
+
for cand in getattr(resp, "candidates", []) or []:
|
|
238
|
+
parts = getattr(getattr(cand, "content", None), "parts", None) or []
|
|
239
|
+
for part in parts:
|
|
240
|
+
inline = getattr(part, "inline_data", None)
|
|
241
|
+
if inline and getattr(inline, "data", None):
|
|
242
|
+
p = self._save(saved, inline.data)
|
|
243
|
+
if p:
|
|
244
|
+
paths.append(p)
|
|
245
|
+
saved += 1
|
|
246
|
+
if saved >= self.num:
|
|
247
|
+
break
|
|
248
|
+
if saved >= self.num:
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
# store reference: saved local path is a reusable identifier for next remix
|
|
252
|
+
if paths:
|
|
253
|
+
self._store_image_id(paths[0])
|
|
254
|
+
|
|
255
|
+
if self.inline:
|
|
256
|
+
self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
|
|
257
|
+
else:
|
|
258
|
+
self.signals.finished.emit(self.ctx, paths, self.input_prompt)
|
|
259
|
+
return # remix path finished
|
|
260
|
+
|
|
261
|
+
# Normal paths
|
|
262
|
+
self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
|
|
263
|
+
|
|
181
264
|
if self.mode == Image.MODE_EDIT:
|
|
182
265
|
# EDIT
|
|
183
266
|
if self._using_vertex():
|
|
@@ -189,6 +272,9 @@ class ImageWorker(QRunnable):
|
|
|
189
272
|
p = self._save(idx, data)
|
|
190
273
|
if p:
|
|
191
274
|
paths.append(p)
|
|
275
|
+
# store reference
|
|
276
|
+
if paths:
|
|
277
|
+
self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
|
|
192
278
|
else:
|
|
193
279
|
# Gemini Developer API via Gemini image models (Nano Banana / Nano Banana Pro)
|
|
194
280
|
resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
|
|
@@ -206,6 +292,9 @@ class ImageWorker(QRunnable):
|
|
|
206
292
|
break
|
|
207
293
|
if saved >= self.num:
|
|
208
294
|
break
|
|
295
|
+
# store reference
|
|
296
|
+
if paths:
|
|
297
|
+
self._store_image_id(paths[0])
|
|
209
298
|
|
|
210
299
|
else:
|
|
211
300
|
# GENERATE
|
|
@@ -218,6 +307,9 @@ class ImageWorker(QRunnable):
|
|
|
218
307
|
p = self._save(idx, data)
|
|
219
308
|
if p:
|
|
220
309
|
paths.append(p)
|
|
310
|
+
# store reference
|
|
311
|
+
if paths:
|
|
312
|
+
self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
|
|
221
313
|
else:
|
|
222
314
|
# Gemini Developer API image generation (Nano Banana / Nano Banana Pro) with robust sizing + optional reference images
|
|
223
315
|
resp = self._gemini_generate_image(self.input_prompt, self.model, self.resolution)
|
|
@@ -235,6 +327,9 @@ class ImageWorker(QRunnable):
|
|
|
235
327
|
break
|
|
236
328
|
if saved >= self.num:
|
|
237
329
|
break
|
|
330
|
+
# store reference
|
|
331
|
+
if paths:
|
|
332
|
+
self._store_image_id(paths[0])
|
|
238
333
|
|
|
239
334
|
if self.inline:
|
|
240
335
|
self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
|
|
@@ -397,9 +492,7 @@ class ImageWorker(QRunnable):
|
|
|
397
492
|
|
|
398
493
|
def _do_call(icfg: Optional[gtypes.ImageConfig]):
|
|
399
494
|
contents: List[Any] = []
|
|
400
|
-
# Always include the textual prompt (can be empty string).
|
|
401
495
|
contents.append(prompt or "")
|
|
402
|
-
# Append reference images, if any.
|
|
403
496
|
if image_parts:
|
|
404
497
|
contents.extend(image_parts)
|
|
405
498
|
return self.client.models.generate_content(
|
|
@@ -461,6 +554,117 @@ class ImageWorker(QRunnable):
|
|
|
461
554
|
return _do_call(cfg2)
|
|
462
555
|
raise
|
|
463
556
|
|
|
557
|
+
def _image_part_from_identifier(self, identifier: str) -> Optional[gtypes.Part]:
|
|
558
|
+
"""
|
|
559
|
+
Build a Gemini Part from a generic image identifier:
|
|
560
|
+
- Local path -> Part.from_bytes
|
|
561
|
+
- Files API name (files/...) -> resolve to URI + mime and use Part.from_uri
|
|
562
|
+
- gs:// URI -> Part.from_uri
|
|
563
|
+
- http(s) URL -> download bytes and use Part.from_bytes
|
|
564
|
+
- data: URI (base64) -> decode and use Part.from_bytes
|
|
565
|
+
"""
|
|
566
|
+
if not identifier:
|
|
567
|
+
return None
|
|
568
|
+
ident = str(identifier).strip()
|
|
569
|
+
|
|
570
|
+
# Local file
|
|
571
|
+
if os.path.exists(ident):
|
|
572
|
+
mime = self._guess_mime(ident)
|
|
573
|
+
with open(ident, "rb") as f:
|
|
574
|
+
return gtypes.Part.from_bytes(data=f.read(), mime_type=mime)
|
|
575
|
+
|
|
576
|
+
# Files API
|
|
577
|
+
if ident.startswith("files/"):
|
|
578
|
+
try:
|
|
579
|
+
f = self.client.files.get(name=ident)
|
|
580
|
+
file_uri = getattr(f, "uri", None)
|
|
581
|
+
mime = getattr(f, "mime_type", None) or self._guess_mime_from_uri(file_uri)
|
|
582
|
+
if file_uri and mime:
|
|
583
|
+
return gtypes.Part.from_uri(file_uri=file_uri, mime_type=mime)
|
|
584
|
+
except Exception:
|
|
585
|
+
pass
|
|
586
|
+
|
|
587
|
+
# gs://
|
|
588
|
+
if ident.startswith("gs://"):
|
|
589
|
+
mime = self._guess_mime_from_uri(ident) or "image/png"
|
|
590
|
+
return gtypes.Part.from_uri(file_uri=ident, mime_type=mime)
|
|
591
|
+
|
|
592
|
+
# http(s)
|
|
593
|
+
if ident.startswith("http://") or ident.startswith("https://"):
|
|
594
|
+
try:
|
|
595
|
+
r = requests.get(ident, timeout=60)
|
|
596
|
+
if r.status_code == 200:
|
|
597
|
+
mime = r.headers.get("Content-Type") or self._guess_mime_from_uri(ident) or "image/png"
|
|
598
|
+
return gtypes.Part.from_bytes(data=r.content, mime_type=mime)
|
|
599
|
+
except Exception:
|
|
600
|
+
return None
|
|
601
|
+
|
|
602
|
+
# data:
|
|
603
|
+
if ident.startswith("data:"):
|
|
604
|
+
try:
|
|
605
|
+
head, b64 = ident.split(",", 1)
|
|
606
|
+
mime = head.split(";")[0][5:] if ";" in head else "image/png"
|
|
607
|
+
return gtypes.Part.from_bytes(data=base64.b64decode(b64), mime_type=mime)
|
|
608
|
+
except Exception:
|
|
609
|
+
return None
|
|
610
|
+
|
|
611
|
+
return None
|
|
612
|
+
|
|
613
|
+
def _imagen_image_from_identifier(self, identifier: str) -> Optional[gtypes.Image]:
|
|
614
|
+
"""
|
|
615
|
+
Build a gtypes.Image for Imagen edit:
|
|
616
|
+
- Local path -> Image.from_file
|
|
617
|
+
- Files API name -> resolve to URI; if gs:// use gcs_uri, otherwise download to temp and from_file
|
|
618
|
+
- gs:// -> Image(gcs_uri=...)
|
|
619
|
+
- http(s) -> download to temp file, then from_file
|
|
620
|
+
"""
|
|
621
|
+
if not identifier:
|
|
622
|
+
return None
|
|
623
|
+
ident = str(identifier).strip()
|
|
624
|
+
|
|
625
|
+
if os.path.exists(ident):
|
|
626
|
+
return gtypes.Image.from_file(location=ident)
|
|
627
|
+
|
|
628
|
+
if ident.startswith("files/"):
|
|
629
|
+
try:
|
|
630
|
+
f = self.client.files.get(name=ident)
|
|
631
|
+
uri = getattr(f, "uri", None)
|
|
632
|
+
if uri and uri.startswith("gs://"):
|
|
633
|
+
return gtypes.Image(gcs_uri=uri)
|
|
634
|
+
if uri and (uri.startswith("http://") or uri.startswith("https://")):
|
|
635
|
+
tmp = self._download_to_temp(uri)
|
|
636
|
+
return gtypes.Image.from_file(location=tmp) if tmp else None
|
|
637
|
+
except Exception:
|
|
638
|
+
return None
|
|
639
|
+
|
|
640
|
+
if ident.startswith("gs://"):
|
|
641
|
+
return gtypes.Image(gcs_uri=ident)
|
|
642
|
+
|
|
643
|
+
if ident.startswith("http://") or ident.startswith("https://"):
|
|
644
|
+
tmp = self._download_to_temp(ident)
|
|
645
|
+
return gtypes.Image.from_file(location=tmp) if tmp else None
|
|
646
|
+
|
|
647
|
+
return None
|
|
648
|
+
|
|
649
|
+
def _download_to_temp(self, url: str) -> Optional[str]:
|
|
650
|
+
"""Download URL to a temporary file and return its path."""
|
|
651
|
+
try:
|
|
652
|
+
r = requests.get(url, timeout=60)
|
|
653
|
+
if r.status_code == 200:
|
|
654
|
+
ext = ".png"
|
|
655
|
+
ct = r.headers.get("Content-Type") or ""
|
|
656
|
+
if "jpeg" in ct:
|
|
657
|
+
ext = ".jpg"
|
|
658
|
+
elif "webp" in ct:
|
|
659
|
+
ext = ".webp"
|
|
660
|
+
fd, path = tempfile.mkstemp(suffix=ext)
|
|
661
|
+
with os.fdopen(fd, "wb") as f:
|
|
662
|
+
f.write(r.content)
|
|
663
|
+
return path
|
|
664
|
+
except Exception:
|
|
665
|
+
return None
|
|
666
|
+
return None
|
|
667
|
+
|
|
464
668
|
def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
|
|
465
669
|
"""Extract file paths from attachments dict."""
|
|
466
670
|
out: List[str] = []
|
|
@@ -527,6 +731,34 @@ class ImageWorker(QRunnable):
|
|
|
527
731
|
pass
|
|
528
732
|
return None
|
|
529
733
|
|
|
734
|
+
def _store_image_reference_imagen(self, generated_image_item: Any, fallback_path: Optional[str]) -> None:
|
|
735
|
+
"""
|
|
736
|
+
Persist a reusable image reference to ctx.extra['image_id'].
|
|
737
|
+
Prefer remote URI/name if provided by Imagen; fallback to the saved local path.
|
|
738
|
+
"""
|
|
739
|
+
ref = None
|
|
740
|
+
try:
|
|
741
|
+
img = getattr(generated_image_item, "image", None) if generated_image_item else None
|
|
742
|
+
if img:
|
|
743
|
+
ref = getattr(img, "uri", None) or getattr(img, "url", None) or getattr(img, "name", None)
|
|
744
|
+
except Exception:
|
|
745
|
+
ref = None
|
|
746
|
+
self._store_image_id(ref or fallback_path)
|
|
747
|
+
|
|
748
|
+
def _store_image_id(self, value: Optional[str]) -> None:
|
|
749
|
+
"""
|
|
750
|
+
Store image_id reference in ctx.extra and persist the context item.
|
|
751
|
+
"""
|
|
752
|
+
if not value:
|
|
753
|
+
return
|
|
754
|
+
try:
|
|
755
|
+
if not isinstance(self.ctx.extra, dict):
|
|
756
|
+
self.ctx.extra = {}
|
|
757
|
+
self.ctx.extra["image_id"] = str(value)
|
|
758
|
+
self.window.core.ctx.update_item(self.ctx)
|
|
759
|
+
except Exception:
|
|
760
|
+
pass
|
|
761
|
+
|
|
530
762
|
def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
|
|
531
763
|
"""Save image bytes to file and return path."""
|
|
532
764
|
if not data:
|
|
@@ -559,6 +791,13 @@ class ImageWorker(QRunnable):
|
|
|
559
791
|
return 'image/heic'
|
|
560
792
|
return 'image/png'
|
|
561
793
|
|
|
794
|
+
def _guess_mime_from_uri(self, uri: Optional[str]) -> Optional[str]:
|
|
795
|
+
"""Best-effort MIME guess from URI or file extension."""
|
|
796
|
+
if not uri:
|
|
797
|
+
return None
|
|
798
|
+
mime, _ = mimetypes.guess_type(uri)
|
|
799
|
+
return mime or None
|
|
800
|
+
|
|
562
801
|
def _cleanup(self):
|
|
563
802
|
"""Cleanup resources."""
|
|
564
803
|
sig = self.signals
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.12.
|
|
9
|
+
# Updated Date: 2025.12.30 22:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import base64, datetime, os, requests
|
|
@@ -54,6 +54,7 @@ class Video:
|
|
|
54
54
|
prompt = context.prompt
|
|
55
55
|
num = int(extra.get("num", 1))
|
|
56
56
|
inline = bool(extra.get("inline", False))
|
|
57
|
+
video_id = extra.get("video_id")
|
|
57
58
|
|
|
58
59
|
# decide sub-mode based on attachments (image-to-video when image is attached)
|
|
59
60
|
sub_mode = self.MODE_GENERATE
|
|
@@ -80,6 +81,7 @@ class Video:
|
|
|
80
81
|
worker.raw = self.window.core.config.get('img_raw')
|
|
81
82
|
worker.num = num
|
|
82
83
|
worker.inline = inline
|
|
84
|
+
worker.video_id = video_id
|
|
83
85
|
|
|
84
86
|
# optional params
|
|
85
87
|
worker.aspect_ratio = str(extra.get("aspect_ratio") or self.window.core.config.get('video.aspect_ratio') or "16:9")
|
|
@@ -141,6 +143,7 @@ class VideoWorker(QRunnable):
|
|
|
141
143
|
self.input_prompt = ""
|
|
142
144
|
self.system_prompt = ""
|
|
143
145
|
self.inline = False
|
|
146
|
+
self.video_id = None
|
|
144
147
|
self.raw = False
|
|
145
148
|
self.num = 1
|
|
146
149
|
|
|
@@ -162,6 +165,7 @@ class VideoWorker(QRunnable):
|
|
|
162
165
|
@Slot()
|
|
163
166
|
def run(self):
|
|
164
167
|
try:
|
|
168
|
+
kernel = self.window.controller.kernel
|
|
165
169
|
# optional prompt enhancement
|
|
166
170
|
if not self.raw and not self.inline and self.input_prompt:
|
|
167
171
|
try:
|
|
@@ -208,6 +212,70 @@ class VideoWorker(QRunnable):
|
|
|
208
212
|
cfg_try = dict(cfg_kwargs)
|
|
209
213
|
cfg_try["duration_seconds"] = int(self._duration_for_model(self.model, self.duration_seconds))
|
|
210
214
|
|
|
215
|
+
# remix / extension: if video_id provided, prefer video-to-video path
|
|
216
|
+
is_remix = bool(self.video_id)
|
|
217
|
+
if is_remix:
|
|
218
|
+
# Veo extension support varies by API and model; choose a compatible model if needed
|
|
219
|
+
model_for_ext = self._select_extension_model(self.model)
|
|
220
|
+
if model_for_ext != self.model:
|
|
221
|
+
self.signals.status.emit(f"Please switch model for extension: {self.model} -> {model_for_ext}")
|
|
222
|
+
# self.model = model_for_ext # <-- do not override user selection, just inform
|
|
223
|
+
|
|
224
|
+
# Build video input from identifier (URI, files/<id>, http(s), gs://, or local path)
|
|
225
|
+
video_input = self._video_from_identifier(self.video_id)
|
|
226
|
+
if not video_input:
|
|
227
|
+
raise RuntimeError("Invalid video_id for remix/extension. Provide a valid URI, file name, or local path.")
|
|
228
|
+
|
|
229
|
+
# Minimal config for extension to avoid server-side rejections
|
|
230
|
+
ext_config = gtypes.GenerateVideosConfig(number_of_videos=1)
|
|
231
|
+
|
|
232
|
+
label = trans('vid.status.generating') + " (remix)"
|
|
233
|
+
self.signals.status.emit(label + f": {self.input_prompt or ''}...")
|
|
234
|
+
|
|
235
|
+
# Start operation: video extension, prompt optional
|
|
236
|
+
operation = self.client.models.generate_videos(
|
|
237
|
+
model=self.model or self.DEFAULT_VEO_MODEL,
|
|
238
|
+
prompt=self.input_prompt or "",
|
|
239
|
+
video=video_input,
|
|
240
|
+
config=ext_config,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# poll until done
|
|
244
|
+
while not getattr(operation, "done", False):
|
|
245
|
+
if kernel.stopped():
|
|
246
|
+
break
|
|
247
|
+
time.sleep(10)
|
|
248
|
+
if kernel.stopped():
|
|
249
|
+
break
|
|
250
|
+
operation = self.client.operations.get(operation)
|
|
251
|
+
|
|
252
|
+
# extract response payload
|
|
253
|
+
op_resp = getattr(operation, "response", None) or getattr(operation, "result", None)
|
|
254
|
+
if not op_resp:
|
|
255
|
+
raise RuntimeError("Empty operation response.")
|
|
256
|
+
|
|
257
|
+
gen_list = getattr(op_resp, "generated_videos", None) or []
|
|
258
|
+
if not gen_list:
|
|
259
|
+
raise RuntimeError("No videos generated.")
|
|
260
|
+
|
|
261
|
+
# store remote reference for next remix calls (URI/name) in ctx
|
|
262
|
+
self._store_video_reference(gen_list[0])
|
|
263
|
+
|
|
264
|
+
# download and save
|
|
265
|
+
paths: List[str] = []
|
|
266
|
+
for idx, gv in enumerate(gen_list[:1]):
|
|
267
|
+
data = self._download_video_bytes(getattr(gv, "video", None))
|
|
268
|
+
p = self._save(idx, data)
|
|
269
|
+
if p:
|
|
270
|
+
paths.append(p)
|
|
271
|
+
|
|
272
|
+
if self.inline:
|
|
273
|
+
self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
|
|
274
|
+
else:
|
|
275
|
+
self.signals.finished.emit(self.ctx, paths, self.input_prompt)
|
|
276
|
+
return # remix path completed
|
|
277
|
+
|
|
278
|
+
# normal generation path (text-to-video or image-to-video)
|
|
211
279
|
self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
|
|
212
280
|
|
|
213
281
|
try:
|
|
@@ -235,7 +303,11 @@ class VideoWorker(QRunnable):
|
|
|
235
303
|
|
|
236
304
|
# poll until done
|
|
237
305
|
while not getattr(operation, "done", False):
|
|
306
|
+
if kernel.stopped():
|
|
307
|
+
break
|
|
238
308
|
time.sleep(10)
|
|
309
|
+
if kernel.stopped():
|
|
310
|
+
break
|
|
239
311
|
operation = self.client.operations.get(operation)
|
|
240
312
|
|
|
241
313
|
# extract response payload
|
|
@@ -247,6 +319,9 @@ class VideoWorker(QRunnable):
|
|
|
247
319
|
if not gen_list:
|
|
248
320
|
raise RuntimeError("No videos generated.")
|
|
249
321
|
|
|
322
|
+
# store remote reference for potential future remix/extension
|
|
323
|
+
self._store_video_reference(gen_list[0])
|
|
324
|
+
|
|
250
325
|
# download and save all outputs up to num
|
|
251
326
|
paths: List[str] = []
|
|
252
327
|
for idx, gv in enumerate(gen_list[:num]):
|
|
@@ -329,6 +404,82 @@ class VideoWorker(QRunnable):
|
|
|
329
404
|
continue
|
|
330
405
|
return None
|
|
331
406
|
|
|
407
|
+
def _video_from_identifier(self, identifier: str) -> Optional[gtypes.Video]:
|
|
408
|
+
"""
|
|
409
|
+
Build a Video object from a generic identifier:
|
|
410
|
+
- Local file path -> upload via types.Video.from_file
|
|
411
|
+
- files/<id> -> resolve to URI using Files API
|
|
412
|
+
- http(s) or gs:// URI -> pass-through
|
|
413
|
+
"""
|
|
414
|
+
try:
|
|
415
|
+
if not identifier:
|
|
416
|
+
return None
|
|
417
|
+
ident = str(identifier).strip()
|
|
418
|
+
|
|
419
|
+
# Local path
|
|
420
|
+
if os.path.exists(ident):
|
|
421
|
+
return gtypes.Video.from_file(ident)
|
|
422
|
+
|
|
423
|
+
# Files API name
|
|
424
|
+
if ident.startswith("files/"):
|
|
425
|
+
try:
|
|
426
|
+
f = self.client.files.get(name=ident)
|
|
427
|
+
uri = getattr(f, "uri", None)
|
|
428
|
+
if uri:
|
|
429
|
+
return gtypes.Video(uri=uri)
|
|
430
|
+
except Exception:
|
|
431
|
+
pass
|
|
432
|
+
|
|
433
|
+
# Generic URI (Gemini accepts URIs, Vertex expects GCS; SDK honors both via uri field)
|
|
434
|
+
if ident.startswith("http://") or ident.startswith("https://") or ident.startswith("gs://"):
|
|
435
|
+
return gtypes.Video(uri=ident)
|
|
436
|
+
except Exception:
|
|
437
|
+
return None
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
def _select_extension_model(self, model_id: str) -> str:
|
|
441
|
+
"""
|
|
442
|
+
Choose a compatible model for video extension:
|
|
443
|
+
- Gemini API: Veo 3.1 only supports extension
|
|
444
|
+
- Vertex AI: extension supported on Veo 2.0
|
|
445
|
+
"""
|
|
446
|
+
mid = str(model_id or "").lower()
|
|
447
|
+
use_vertex = bool(getattr(self.client, "vertexai", False))
|
|
448
|
+
|
|
449
|
+
# Gemini Developer API path
|
|
450
|
+
if not use_vertex:
|
|
451
|
+
if "veo-3.1" in mid:
|
|
452
|
+
return model_id
|
|
453
|
+
# Prefer 3.1 preview if user selected older Veo
|
|
454
|
+
return "veo-3.1-generate-preview"
|
|
455
|
+
|
|
456
|
+
# Vertex AI path
|
|
457
|
+
if "veo-2.0" in mid:
|
|
458
|
+
return model_id
|
|
459
|
+
return "veo-2.0-generate-001"
|
|
460
|
+
|
|
461
|
+
def _store_video_reference(self, generated_video_item: Any) -> None:
|
|
462
|
+
"""
|
|
463
|
+
Persist a reusable video reference (URI or name) to ctx.extra['video_id'] for future remix/extension calls.
|
|
464
|
+
"""
|
|
465
|
+
try:
|
|
466
|
+
vref = getattr(generated_video_item, "video", None)
|
|
467
|
+
if not vref:
|
|
468
|
+
return
|
|
469
|
+
# Prefer URI, fallback to name
|
|
470
|
+
uri = getattr(vref, "uri", None) or getattr(vref, "download_uri", None)
|
|
471
|
+
name = getattr(vref, "name", None)
|
|
472
|
+
ref = uri or name
|
|
473
|
+
if not ref:
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
if not isinstance(self.ctx.extra, dict):
|
|
477
|
+
self.ctx.extra = {}
|
|
478
|
+
self.ctx.extra["video_id"] = ref
|
|
479
|
+
self.window.core.ctx.update_item(self.ctx)
|
|
480
|
+
except Exception:
|
|
481
|
+
pass
|
|
482
|
+
|
|
332
483
|
def _download_video_bytes(self, file_ref) -> Optional[bytes]:
|
|
333
484
|
"""
|
|
334
485
|
Download video bytes using the Files service.
|