pygpt-net 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. pygpt_net/CHANGELOG.txt +12 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +382 -350
  4. pygpt_net/controller/chat/attachment.py +5 -1
  5. pygpt_net/controller/chat/image.py +40 -5
  6. pygpt_net/controller/files/files.py +3 -1
  7. pygpt_net/controller/layout/layout.py +2 -2
  8. pygpt_net/controller/media/media.py +70 -1
  9. pygpt_net/controller/theme/nodes.py +2 -1
  10. pygpt_net/controller/ui/mode.py +5 -1
  11. pygpt_net/controller/ui/ui.py +17 -2
  12. pygpt_net/core/filesystem/url.py +4 -1
  13. pygpt_net/core/render/web/helpers.py +5 -0
  14. pygpt_net/data/config/config.json +5 -4
  15. pygpt_net/data/config/models.json +3 -3
  16. pygpt_net/data/config/settings.json +0 -14
  17. pygpt_net/data/css/web-blocks.css +3 -0
  18. pygpt_net/data/css/web-chatgpt.css +3 -0
  19. pygpt_net/data/locale/locale.de.ini +6 -0
  20. pygpt_net/data/locale/locale.en.ini +7 -1
  21. pygpt_net/data/locale/locale.es.ini +6 -0
  22. pygpt_net/data/locale/locale.fr.ini +6 -0
  23. pygpt_net/data/locale/locale.it.ini +6 -0
  24. pygpt_net/data/locale/locale.pl.ini +7 -1
  25. pygpt_net/data/locale/locale.uk.ini +6 -0
  26. pygpt_net/data/locale/locale.zh.ini +6 -0
  27. pygpt_net/launcher.py +115 -55
  28. pygpt_net/preload.py +243 -0
  29. pygpt_net/provider/api/google/image.py +317 -10
  30. pygpt_net/provider/api/google/video.py +160 -4
  31. pygpt_net/provider/api/openai/image.py +201 -93
  32. pygpt_net/provider/api/openai/video.py +99 -24
  33. pygpt_net/provider/api/x_ai/image.py +25 -2
  34. pygpt_net/provider/core/config/patch.py +17 -1
  35. pygpt_net/ui/layout/chat/input.py +20 -2
  36. pygpt_net/ui/layout/chat/painter.py +6 -4
  37. pygpt_net/ui/layout/toolbox/image.py +21 -11
  38. pygpt_net/ui/layout/toolbox/raw.py +2 -2
  39. pygpt_net/ui/layout/toolbox/video.py +22 -9
  40. pygpt_net/ui/main.py +84 -3
  41. pygpt_net/ui/widget/dialog/base.py +3 -10
  42. pygpt_net/ui/widget/option/combo.py +119 -1
  43. pygpt_net/ui/widget/textarea/input_extra.py +664 -0
  44. {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.4.dist-info}/METADATA +27 -20
  45. {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.4.dist-info}/RECORD +48 -46
  46. {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.4.dist-info}/LICENSE +0 -0
  47. {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.4.dist-info}/WHEEL +0 -0
  48. {pygpt_net-2.7.2.dist-info → pygpt_net-2.7.4.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.12.25 20:00:00 #
9
+ # Updated Date: 2025.12.31 16:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import mimetypes
@@ -14,7 +14,7 @@ from typing import Optional, Dict, Any, List
14
14
  from google import genai
15
15
  from google.genai import types as gtypes
16
16
  from PySide6.QtCore import QObject, Signal, QRunnable, Slot
17
- import base64, datetime, os, requests
17
+ import base64, datetime, os, requests, tempfile
18
18
 
19
19
  from pygpt_net.core.events import KernelEvent
20
20
  from pygpt_net.core.bridge.context import BridgeContext
@@ -55,6 +55,7 @@ class Image:
55
55
  prompt = context.prompt
56
56
  num = int(extra.get("num", 1))
57
57
  inline = bool(extra.get("inline", False))
58
+ extra_prompt = extra.get("extra_prompt", "")
58
59
 
59
60
  # decide sub-mode based on attachments
60
61
  sub_mode = self.MODE_GENERATE
@@ -79,6 +80,10 @@ class Image:
79
80
  worker.raw = self.window.core.config.get('img_raw')
80
81
  worker.num = num
81
82
  worker.inline = inline
83
+ worker.extra_prompt = extra_prompt
84
+
85
+ # remix: previous image reference (ID/URI/path) from extra
86
+ worker.image_id = extra.get("image_id")
82
87
 
83
88
  if attachments and len(attachments) > 0:
84
89
  mid = str(model.id).lower()
@@ -121,14 +126,16 @@ class ImageWorker(QRunnable):
121
126
  # params
122
127
  self.mode = Image.MODE_GENERATE
123
128
  self.attachments: Dict[str, Any] = {}
124
- self.model = "imagen-4.0-generate-preview-06-06"
129
+ self.model = "imagen-4.0-generate-001"
125
130
  self.model_prompt = None
126
131
  self.input_prompt = ""
127
132
  self.system_prompt = ""
128
133
  self.inline = False
134
+ self.extra_prompt: Optional[str] = None
129
135
  self.raw = False
130
136
  self.num = 1
131
137
  self.resolution = "1024x1024" # used to derive aspect ratio or image_size
138
+ self.image_id: Optional[str] = None # remix/extend previous image
132
139
 
133
140
  # limits
134
141
  self.imagen_max_num = 4 # Imagen returns up to 4 images
@@ -174,10 +181,111 @@ class ImageWorker(QRunnable):
174
181
  self.signals.error.emit(e)
175
182
  self.signals.status.emit(trans('img.status.prompt.error') + ": " + str(e))
176
183
 
177
- self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
184
+ # Decide how to apply negative prompt: native param on Vertex Imagen 3.0 (-001) or inline fallback.
185
+ use_param = (
186
+ bool(self.extra_prompt and str(self.extra_prompt).strip())
187
+ and self._using_vertex()
188
+ and self._imagen_supports_negative_prompt(self.model)
189
+ )
190
+ if (self.extra_prompt and str(self.extra_prompt).strip()) and not use_param:
191
+ try:
192
+ self.input_prompt = self._merge_negative_prompt(self.input_prompt or "", self.extra_prompt)
193
+ except Exception:
194
+ pass
178
195
 
179
196
  paths: List[str] = []
180
197
 
198
+ # Remix path: if image_id provided, prefer image-to-image remix using the given identifier.
199
+ if self.image_id:
200
+ self.signals.status.emit(trans('img.status.generating') + " (remix): " + (self.input_prompt or "") + "...")
201
+ if self._using_vertex() and self._is_imagen_generate(self.model):
202
+ # Vertex / Imagen edit flow with a single base image (no explicit mask).
203
+ img_ref = self._imagen_image_from_identifier(self.image_id)
204
+ if not img_ref:
205
+ raise RuntimeError("Invalid image_id for remix. Provide a valid local path, Files API name, or gs:// URI.")
206
+
207
+ raw_ref = gtypes.RawReferenceImage(reference_id=0, reference_image=img_ref)
208
+ mask_ref = gtypes.MaskReferenceImage(
209
+ reference_id=1,
210
+ reference_image=None,
211
+ config=gtypes.MaskReferenceConfig(
212
+ mask_mode="MASK_MODE_BACKGROUND",
213
+ mask_dilation=0.0,
214
+ ),
215
+ )
216
+ # Prepare edit config with optional negative prompt when supported
217
+ cfg_kwargs = dict(
218
+ edit_mode="EDIT_MODE_DEFAULT",
219
+ number_of_images=min(self.num, self.imagen_max_num),
220
+ include_rai_reason=True,
221
+ )
222
+ if self.extra_prompt and self._imagen_supports_negative_prompt(self.model):
223
+ cfg_kwargs["negative_prompt"] = self.extra_prompt
224
+ try:
225
+ cfg = gtypes.EditImageConfig(**cfg_kwargs)
226
+ except Exception:
227
+ # Fallback without negative_prompt if SDK doesn't recognize it
228
+ cfg_kwargs.pop("negative_prompt", None)
229
+ cfg = gtypes.EditImageConfig(**cfg_kwargs)
230
+
231
+ resp = self.client.models.edit_image(
232
+ model="imagen-3.0-capability-001",
233
+ prompt=self.input_prompt or "",
234
+ reference_images=[raw_ref, mask_ref],
235
+ config=cfg,
236
+ )
237
+ imgs = getattr(resp, "generated_images", None) or []
238
+ for idx, gi in enumerate(imgs[: min(self.num, self.imagen_max_num)]):
239
+ data = self._extract_imagen_bytes(gi)
240
+ p = self._save(idx, data)
241
+ if p:
242
+ paths.append(p)
243
+
244
+ # store reference for future remix: prefer remote URI if available, otherwise saved path
245
+ if paths:
246
+ self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
247
+
248
+ else:
249
+ # Gemini Developer API remix via generate_content with prompt + reference image part.
250
+ ref_part = self._image_part_from_identifier(self.image_id)
251
+ if not ref_part:
252
+ raise RuntimeError("Invalid image_id for remix. Provide a valid local path, Files API name, http(s) URL, or gs:// URI.")
253
+ img_cfg = self._build_gemini_image_config(self.model, self.resolution)
254
+ resp = self.client.models.generate_content(
255
+ model=self.model or self.DEFAULT_GEMINI_IMAGE_MODEL,
256
+ contents=[self.input_prompt or "", ref_part],
257
+ config=gtypes.GenerateContentConfig(
258
+ image_config=img_cfg,
259
+ ),
260
+ )
261
+ saved = 0
262
+ for cand in getattr(resp, "candidates", []) or []:
263
+ parts = getattr(getattr(cand, "content", None), "parts", None) or []
264
+ for part in parts:
265
+ inline = getattr(part, "inline_data", None)
266
+ if inline and getattr(inline, "data", None):
267
+ p = self._save(saved, inline.data)
268
+ if p:
269
+ paths.append(p)
270
+ saved += 1
271
+ if saved >= self.num:
272
+ break
273
+ if saved >= self.num:
274
+ break
275
+
276
+ # store reference: saved local path is a reusable identifier for next remix
277
+ if paths:
278
+ self._store_image_id(paths[0])
279
+
280
+ if self.inline:
281
+ self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
282
+ else:
283
+ self.signals.finished.emit(self.ctx, paths, self.input_prompt)
284
+ return # remix path finished
285
+
286
+ # Normal paths
287
+ self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
288
+
181
289
  if self.mode == Image.MODE_EDIT:
182
290
  # EDIT
183
291
  if self._using_vertex():
@@ -189,6 +297,9 @@ class ImageWorker(QRunnable):
189
297
  p = self._save(idx, data)
190
298
  if p:
191
299
  paths.append(p)
300
+ # store reference
301
+ if paths:
302
+ self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
192
303
  else:
193
304
  # Gemini Developer API via Gemini image models (Nano Banana / Nano Banana Pro)
194
305
  resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
@@ -206,6 +317,9 @@ class ImageWorker(QRunnable):
206
317
  break
207
318
  if saved >= self.num:
208
319
  break
320
+ # store reference
321
+ if paths:
322
+ self._store_image_id(paths[0])
209
323
 
210
324
  else:
211
325
  # GENERATE
@@ -218,6 +332,9 @@ class ImageWorker(QRunnable):
218
332
  p = self._save(idx, data)
219
333
  if p:
220
334
  paths.append(p)
335
+ # store reference
336
+ if paths:
337
+ self._store_image_reference_imagen(imgs[0] if imgs else None, paths[0])
221
338
  else:
222
339
  # Gemini Developer API image generation (Nano Banana / Nano Banana Pro) with robust sizing + optional reference images
223
340
  resp = self._gemini_generate_image(self.input_prompt, self.model, self.resolution)
@@ -235,6 +352,9 @@ class ImageWorker(QRunnable):
235
352
  break
236
353
  if saved >= self.num:
237
354
  break
355
+ # store reference
356
+ if paths:
357
+ self._store_image_id(paths[0])
238
358
 
239
359
  if self.inline:
240
360
  self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
@@ -260,12 +380,34 @@ class ImageWorker(QRunnable):
260
380
  mid = str(model_id).lower()
261
381
  return "imagen" in mid and "generate" in mid
262
382
 
383
+ def _imagen_supports_negative_prompt(self, model_id: str) -> bool:
384
+ """
385
+ Return True if the Imagen model supports native negative_prompt.
386
+ Supported: imagen-3.0-generate-001, imagen-3.0-fast-generate-001, imagen-3.0-capability-001.
387
+ """
388
+ mid = str(model_id or "").lower()
389
+ return any(x in mid for x in (
390
+ "imagen-3.0-generate-001",
391
+ "imagen-3.0-fast-generate-001",
392
+ "imagen-3.0-capability-001",
393
+ ))
394
+
263
395
  def _imagen_generate(self, prompt: str, num: int, resolution: str):
264
396
  """Imagen text-to-image."""
265
397
  aspect = self._aspect_from_resolution(resolution)
266
- cfg = gtypes.GenerateImagesConfig(number_of_images=num)
398
+ # Build config with optional negative_prompt when supported by model and provided.
399
+ cfg_kwargs: Dict[str, Any] = {"number_of_images": num}
267
400
  if aspect:
268
- cfg.aspect_ratio = aspect
401
+ cfg_kwargs["aspect_ratio"] = aspect
402
+ if self.extra_prompt and self._imagen_supports_negative_prompt(self.model):
403
+ cfg_kwargs["negative_prompt"] = self.extra_prompt
404
+ try:
405
+ cfg = gtypes.GenerateImagesConfig(**cfg_kwargs)
406
+ except Exception:
407
+ # Fallback without negative_prompt if SDK doesn't recognize it
408
+ cfg_kwargs.pop("negative_prompt", None)
409
+ cfg = gtypes.GenerateImagesConfig(**cfg_kwargs)
410
+
269
411
  return self.client.models.generate_images(
270
412
  model=self.model,
271
413
  prompt=prompt,
@@ -306,11 +448,19 @@ class ImageWorker(QRunnable):
306
448
  )
307
449
  edit_mode = "EDIT_MODE_BGSWAP"
308
450
 
309
- cfg = gtypes.EditImageConfig(
451
+ # Build edit config with optional negative_prompt
452
+ cfg_kwargs = dict(
310
453
  edit_mode=edit_mode,
311
454
  number_of_images=min(num, self.imagen_max_num),
312
455
  include_rai_reason=True,
313
456
  )
457
+ if self.extra_prompt and self._imagen_supports_negative_prompt(self.model):
458
+ cfg_kwargs["negative_prompt"] = self.extra_prompt
459
+ try:
460
+ cfg = gtypes.EditImageConfig(**cfg_kwargs)
461
+ except Exception:
462
+ cfg_kwargs.pop("negative_prompt", None)
463
+ cfg = gtypes.EditImageConfig(**cfg_kwargs)
314
464
 
315
465
  # Ensure capability model for edit
316
466
  model_id = "imagen-3.0-capability-001"
@@ -397,9 +547,7 @@ class ImageWorker(QRunnable):
397
547
 
398
548
  def _do_call(icfg: Optional[gtypes.ImageConfig]):
399
549
  contents: List[Any] = []
400
- # Always include the textual prompt (can be empty string).
401
550
  contents.append(prompt or "")
402
- # Append reference images, if any.
403
551
  if image_parts:
404
552
  contents.extend(image_parts)
405
553
  return self.client.models.generate_content(
@@ -461,6 +609,117 @@ class ImageWorker(QRunnable):
461
609
  return _do_call(cfg2)
462
610
  raise
463
611
 
612
+ def _image_part_from_identifier(self, identifier: str) -> Optional[gtypes.Part]:
613
+ """
614
+ Build a Gemini Part from a generic image identifier:
615
+ - Local path -> Part.from_bytes
616
+ - Files API name (files/...) -> resolve to URI + mime and use Part.from_uri
617
+ - gs:// URI -> Part.from_uri
618
+ - http(s) URL -> download bytes and use Part.from_bytes
619
+ - data: URI (base64) -> decode and use Part.from_bytes
620
+ """
621
+ if not identifier:
622
+ return None
623
+ ident = str(identifier).strip()
624
+
625
+ # Local file
626
+ if os.path.exists(ident):
627
+ mime = self._guess_mime(ident)
628
+ with open(ident, "rb") as f:
629
+ return gtypes.Part.from_bytes(data=f.read(), mime_type=mime)
630
+
631
+ # Files API
632
+ if ident.startswith("files/"):
633
+ try:
634
+ f = self.client.files.get(name=ident)
635
+ file_uri = getattr(f, "uri", None)
636
+ mime = getattr(f, "mime_type", None) or self._guess_mime_from_uri(file_uri)
637
+ if file_uri and mime:
638
+ return gtypes.Part.from_uri(file_uri=file_uri, mime_type=mime)
639
+ except Exception:
640
+ pass
641
+
642
+ # gs://
643
+ if ident.startswith("gs://"):
644
+ mime = self._guess_mime_from_uri(ident) or "image/png"
645
+ return gtypes.Part.from_uri(file_uri=ident, mime_type=mime)
646
+
647
+ # http(s)
648
+ if ident.startswith("http://") or ident.startswith("https://"):
649
+ try:
650
+ r = requests.get(ident, timeout=60)
651
+ if r.status_code == 200:
652
+ mime = r.headers.get("Content-Type") or self._guess_mime_from_uri(ident) or "image/png"
653
+ return gtypes.Part.from_bytes(data=r.content, mime_type=mime)
654
+ except Exception:
655
+ return None
656
+
657
+ # data:
658
+ if ident.startswith("data:"):
659
+ try:
660
+ head, b64 = ident.split(",", 1)
661
+ mime = head.split(";")[0][5:] if ";" in head else "image/png"
662
+ return gtypes.Part.from_bytes(data=base64.b64decode(b64), mime_type=mime)
663
+ except Exception:
664
+ return None
665
+
666
+ return None
667
+
668
+ def _imagen_image_from_identifier(self, identifier: str) -> Optional[gtypes.Image]:
669
+ """
670
+ Build a gtypes.Image for Imagen edit:
671
+ - Local path -> Image.from_file
672
+ - Files API name -> resolve to URI; if gs:// use gcs_uri, otherwise download to temp and from_file
673
+ - gs:// -> Image(gcs_uri=...)
674
+ - http(s) -> download to temp file, then from_file
675
+ """
676
+ if not identifier:
677
+ return None
678
+ ident = str(identifier).strip()
679
+
680
+ if os.path.exists(ident):
681
+ return gtypes.Image.from_file(location=ident)
682
+
683
+ if ident.startswith("files/"):
684
+ try:
685
+ f = self.client.files.get(name=ident)
686
+ uri = getattr(f, "uri", None)
687
+ if uri and uri.startswith("gs://"):
688
+ return gtypes.Image(gcs_uri=uri)
689
+ if uri and (uri.startswith("http://") or uri.startswith("https://")):
690
+ tmp = self._download_to_temp(uri)
691
+ return gtypes.Image.from_file(location=tmp) if tmp else None
692
+ except Exception:
693
+ return None
694
+
695
+ if ident.startswith("gs://"):
696
+ return gtypes.Image(gcs_uri=ident)
697
+
698
+ if ident.startswith("http://") or ident.startswith("https://"):
699
+ tmp = self._download_to_temp(ident)
700
+ return gtypes.Image.from_file(location=tmp) if tmp else None
701
+
702
+ return None
703
+
704
+ def _download_to_temp(self, url: str) -> Optional[str]:
705
+ """Download URL to a temporary file and return its path."""
706
+ try:
707
+ r = requests.get(url, timeout=60)
708
+ if r.status_code == 200:
709
+ ext = ".png"
710
+ ct = r.headers.get("Content-Type") or ""
711
+ if "jpeg" in ct:
712
+ ext = ".jpg"
713
+ elif "webp" in ct:
714
+ ext = ".webp"
715
+ fd, path = tempfile.mkstemp(suffix=ext)
716
+ with os.fdopen(fd, "wb") as f:
717
+ f.write(r.content)
718
+ return path
719
+ except Exception:
720
+ return None
721
+ return None
722
+
464
723
  def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
465
724
  """Extract file paths from attachments dict."""
466
725
  out: List[str] = []
@@ -527,6 +786,34 @@ class ImageWorker(QRunnable):
527
786
  pass
528
787
  return None
529
788
 
789
+ def _store_image_reference_imagen(self, generated_image_item: Any, fallback_path: Optional[str]) -> None:
790
+ """
791
+ Persist a reusable image reference to ctx.extra['image_id'].
792
+ Prefer remote URI/name if provided by Imagen; fallback to the saved local path.
793
+ """
794
+ ref = None
795
+ try:
796
+ img = getattr(generated_image_item, "image", None) if generated_image_item else None
797
+ if img:
798
+ ref = getattr(img, "uri", None) or getattr(img, "url", None) or getattr(img, "name", None)
799
+ except Exception:
800
+ ref = None
801
+ self._store_image_id(ref or fallback_path)
802
+
803
+ def _store_image_id(self, value: Optional[str]) -> None:
804
+ """
805
+ Store image_id reference in ctx.extra and persist the context item.
806
+ """
807
+ if not value:
808
+ return
809
+ try:
810
+ if not isinstance(self.ctx.extra, dict):
811
+ self.ctx.extra = {}
812
+ self.ctx.extra["image_id"] = str(value)
813
+ self.window.core.ctx.update_item(self.ctx)
814
+ except Exception:
815
+ pass
816
+
530
817
  def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
531
818
  """Save image bytes to file and return path."""
532
819
  if not data:
@@ -559,6 +846,13 @@ class ImageWorker(QRunnable):
559
846
  return 'image/heic'
560
847
  return 'image/png'
561
848
 
849
+ def _guess_mime_from_uri(self, uri: Optional[str]) -> Optional[str]:
850
+ """Best-effort MIME guess from URI or file extension."""
851
+ if not uri:
852
+ return None
853
+ mime, _ = mimetypes.guess_type(uri)
854
+ return mime or None
855
+
562
856
  def _cleanup(self):
563
857
  """Cleanup resources."""
564
858
  sig = self.signals
@@ -567,4 +861,17 @@ class ImageWorker(QRunnable):
567
861
  try:
568
862
  sig.deleteLater()
569
863
  except RuntimeError:
570
- pass
864
+ pass
865
+
866
+ # ---------- prompt utilities ----------
867
+
868
+ @staticmethod
869
+ def _merge_negative_prompt(prompt: str, negative: Optional[str]) -> str:
870
+ """
871
+ Append a negative prompt to the main text prompt when the provider has no native negative_prompt field.
872
+ """
873
+ base = (prompt or "").strip()
874
+ neg = (negative or "").strip()
875
+ if not neg:
876
+ return base
877
+ return (base + ("\n" if base else "") + f"Negative prompt: {neg}").strip()
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.12.25 20:00:00 #
9
+ # Updated Date: 2025.12.31 16:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import base64, datetime, os, requests
@@ -54,6 +54,8 @@ class Video:
54
54
  prompt = context.prompt
55
55
  num = int(extra.get("num", 1))
56
56
  inline = bool(extra.get("inline", False))
57
+ video_id = extra.get("video_id")
58
+ extra_prompt = extra.get("extra_prompt", "")
57
59
 
58
60
  # decide sub-mode based on attachments (image-to-video when image is attached)
59
61
  sub_mode = self.MODE_GENERATE
@@ -80,6 +82,8 @@ class Video:
80
82
  worker.raw = self.window.core.config.get('img_raw')
81
83
  worker.num = num
82
84
  worker.inline = inline
85
+ worker.extra_prompt = extra_prompt
86
+ worker.video_id = video_id
83
87
 
84
88
  # optional params
85
89
  worker.aspect_ratio = str(extra.get("aspect_ratio") or self.window.core.config.get('video.aspect_ratio') or "16:9")
@@ -141,6 +145,8 @@ class VideoWorker(QRunnable):
141
145
  self.input_prompt = ""
142
146
  self.system_prompt = ""
143
147
  self.inline = False
148
+ self.extra_prompt: Optional[str] = None
149
+ self.video_id = None
144
150
  self.raw = False
145
151
  self.num = 1
146
152
 
@@ -149,7 +155,6 @@ class VideoWorker(QRunnable):
149
155
  self.duration_seconds = 8
150
156
  self.fps = 24
151
157
  self.seed: Optional[int] = None
152
- self.negative_prompt: Optional[str] = None
153
158
  self.generate_audio: bool = False # generation includes audio by default on Veo 3.x
154
159
  self.resolution: str = "720p" # Veo supports 720p/1080p depending on variant
155
160
 
@@ -162,6 +167,7 @@ class VideoWorker(QRunnable):
162
167
  @Slot()
163
168
  def run(self):
164
169
  try:
170
+ kernel = self.window.controller.kernel
165
171
  # optional prompt enhancement
166
172
  if not self.raw and not self.inline and self.input_prompt:
167
173
  try:
@@ -201,13 +207,80 @@ class VideoWorker(QRunnable):
201
207
  # set optional controls
202
208
  if self.seed is not None:
203
209
  cfg_kwargs["seed"] = int(self.seed)
204
- if self.negative_prompt:
205
- cfg_kwargs["negative_prompt"] = self.negative_prompt
210
+ if self.extra_prompt:
211
+ cfg_kwargs["negative_prompt"] = self.extra_prompt
206
212
 
207
213
  # set durationSeconds when supported; fall back gracefully if rejected by model
208
214
  cfg_try = dict(cfg_kwargs)
209
215
  cfg_try["duration_seconds"] = int(self._duration_for_model(self.model, self.duration_seconds))
210
216
 
217
+ # remix / extension: if video_id provided, prefer video-to-video path
218
+ is_remix = bool(self.video_id)
219
+ if is_remix:
220
+ # Veo extension support varies by API and model; choose a compatible model if needed
221
+ model_for_ext = self._select_extension_model(self.model)
222
+ if model_for_ext != self.model:
223
+ self.signals.status.emit(f"Please switch model for extension: {self.model} -> {model_for_ext}")
224
+ # self.model = model_for_ext # <-- do not override user selection, just inform
225
+
226
+ # Build video input from identifier (URI, files/<id>, http(s), gs://, or local path)
227
+ video_input = self._video_from_identifier(self.video_id)
228
+ if not video_input:
229
+ raise RuntimeError("Invalid video_id for remix/extension. Provide a valid URI, file name, or local path.")
230
+
231
+ # Minimal config for extension to avoid server-side rejections
232
+ ext_config = gtypes.GenerateVideosConfig(number_of_videos=1)
233
+ # Pass negative prompt to extension when provided
234
+ if self.extra_prompt:
235
+ ext_config.negative_prompt = self.extra_prompt # supported in python-genai
236
+
237
+ label = trans('vid.status.generating') + " (remix)"
238
+ self.signals.status.emit(label + f": {self.input_prompt or ''}...")
239
+
240
+ # Start operation: video extension, prompt optional
241
+ operation = self.client.models.generate_videos(
242
+ model=self.model or self.DEFAULT_VEO_MODEL,
243
+ prompt=self.input_prompt or "",
244
+ video=video_input,
245
+ config=ext_config,
246
+ )
247
+
248
+ # poll until done
249
+ while not getattr(operation, "done", False):
250
+ if kernel.stopped():
251
+ break
252
+ time.sleep(10)
253
+ if kernel.stopped():
254
+ break
255
+ operation = self.client.operations.get(operation)
256
+
257
+ # extract response payload
258
+ op_resp = getattr(operation, "response", None) or getattr(operation, "result", None)
259
+ if not op_resp:
260
+ raise RuntimeError("Empty operation response.")
261
+
262
+ gen_list = getattr(op_resp, "generated_videos", None) or []
263
+ if not gen_list:
264
+ raise RuntimeError("No videos generated.")
265
+
266
+ # store remote reference for next remix calls (URI/name) in ctx
267
+ self._store_video_reference(gen_list[0])
268
+
269
+ # download and save
270
+ paths: List[str] = []
271
+ for idx, gv in enumerate(gen_list[:1]):
272
+ data = self._download_video_bytes(getattr(gv, "video", None))
273
+ p = self._save(idx, data)
274
+ if p:
275
+ paths.append(p)
276
+
277
+ if self.inline:
278
+ self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
279
+ else:
280
+ self.signals.finished.emit(self.ctx, paths, self.input_prompt)
281
+ return # remix path completed
282
+
283
+ # normal generation path (text-to-video or image-to-video)
211
284
  self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
212
285
 
213
286
  try:
@@ -235,7 +308,11 @@ class VideoWorker(QRunnable):
235
308
 
236
309
  # poll until done
237
310
  while not getattr(operation, "done", False):
311
+ if kernel.stopped():
312
+ break
238
313
  time.sleep(10)
314
+ if kernel.stopped():
315
+ break
239
316
  operation = self.client.operations.get(operation)
240
317
 
241
318
  # extract response payload
@@ -247,6 +324,9 @@ class VideoWorker(QRunnable):
247
324
  if not gen_list:
248
325
  raise RuntimeError("No videos generated.")
249
326
 
327
+ # store remote reference for potential future remix/extension
328
+ self._store_video_reference(gen_list[0])
329
+
250
330
  # download and save all outputs up to num
251
331
  paths: List[str] = []
252
332
  for idx, gv in enumerate(gen_list[:num]):
@@ -329,6 +409,82 @@ class VideoWorker(QRunnable):
329
409
  continue
330
410
  return None
331
411
 
412
+ def _video_from_identifier(self, identifier: str) -> Optional[gtypes.Video]:
413
+ """
414
+ Build a Video object from a generic identifier:
415
+ - Local file path -> upload via types.Video.from_file
416
+ - files/<id> -> resolve to URI using Files API
417
+ - http(s) or gs:// URI -> pass-through
418
+ """
419
+ try:
420
+ if not identifier:
421
+ return None
422
+ ident = str(identifier).strip()
423
+
424
+ # Local path
425
+ if os.path.exists(ident):
426
+ return gtypes.Video.from_file(ident)
427
+
428
+ # Files API name
429
+ if ident.startswith("files/"):
430
+ try:
431
+ f = self.client.files.get(name=ident)
432
+ uri = getattr(f, "uri", None)
433
+ if uri:
434
+ return gtypes.Video(uri=uri)
435
+ except Exception:
436
+ pass
437
+
438
+ # Generic URI (Gemini accepts URIs, Vertex expects GCS; SDK honors both via uri field)
439
+ if ident.startswith("http://") or ident.startswith("https://") or ident.startswith("gs://"):
440
+ return gtypes.Video(uri=ident)
441
+ except Exception:
442
+ return None
443
+ return None
444
+
445
+ def _select_extension_model(self, model_id: str) -> str:
446
+ """
447
+ Choose a compatible model for video extension:
448
+ - Gemini API: Veo 3.1 only supports extension
449
+ - Vertex AI: extension supported on Veo 2.0
450
+ """
451
+ mid = str(model_id or "").lower()
452
+ use_vertex = bool(getattr(self.client, "vertexai", False))
453
+
454
+ # Gemini Developer API path
455
+ if not use_vertex:
456
+ if "veo-3.1" in mid:
457
+ return model_id
458
+ # Prefer 3.1 preview if user selected older Veo
459
+ return "veo-3.1-generate-preview"
460
+
461
+ # Vertex AI path
462
+ if "veo-2.0" in mid:
463
+ return model_id
464
+ return "veo-2.0-generate-001"
465
+
466
+ def _store_video_reference(self, generated_video_item: Any) -> None:
467
+ """
468
+ Persist a reusable video reference (URI or name) to ctx.extra['video_id'] for future remix/extension calls.
469
+ """
470
+ try:
471
+ vref = getattr(generated_video_item, "video", None)
472
+ if not vref:
473
+ return
474
+ # Prefer URI, fallback to name
475
+ uri = getattr(vref, "uri", None) or getattr(vref, "download_uri", None)
476
+ name = getattr(vref, "name", None)
477
+ ref = uri or name
478
+ if not ref:
479
+ return
480
+
481
+ if not isinstance(self.ctx.extra, dict):
482
+ self.ctx.extra = {}
483
+ self.ctx.extra["video_id"] = ref
484
+ self.window.core.ctx.update_item(self.ctx)
485
+ except Exception:
486
+ pass
487
+
332
488
  def _download_video_bytes(self, file_ref) -> Optional[bytes]:
333
489
  """
334
490
  Download video bytes using the Files service.