pygpt-net 2.6.64__py3-none-any.whl → 2.6.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. pygpt_net/CHANGELOG.txt +21 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +5 -1
  4. pygpt_net/controller/chat/chat.py +0 -0
  5. pygpt_net/controller/chat/handler/openai_stream.py +137 -7
  6. pygpt_net/controller/chat/render.py +0 -0
  7. pygpt_net/controller/config/field/checkbox_list.py +34 -1
  8. pygpt_net/controller/files/files.py +71 -2
  9. pygpt_net/controller/media/media.py +20 -1
  10. pygpt_net/controller/presets/editor.py +137 -22
  11. pygpt_net/controller/presets/presets.py +4 -1
  12. pygpt_net/controller/ui/mode.py +14 -10
  13. pygpt_net/controller/ui/ui.py +18 -1
  14. pygpt_net/core/agents/custom/__init__.py +18 -2
  15. pygpt_net/core/agents/custom/runner.py +2 -2
  16. pygpt_net/core/attachments/clipboard.py +146 -0
  17. pygpt_net/core/image/image.py +34 -1
  18. pygpt_net/core/render/web/renderer.py +33 -11
  19. pygpt_net/core/tabs/tabs.py +0 -0
  20. pygpt_net/core/types/image.py +61 -3
  21. pygpt_net/data/config/config.json +4 -3
  22. pygpt_net/data/config/models.json +629 -41
  23. pygpt_net/data/css/style.dark.css +12 -0
  24. pygpt_net/data/css/style.light.css +12 -0
  25. pygpt_net/data/icons/pin2.svg +1 -0
  26. pygpt_net/data/icons/pin3.svg +3 -0
  27. pygpt_net/data/icons/point.svg +1 -0
  28. pygpt_net/data/icons/target.svg +1 -0
  29. pygpt_net/data/js/app/ui.js +19 -2
  30. pygpt_net/data/js/app/user.js +22 -54
  31. pygpt_net/data/js/app.min.js +7 -9
  32. pygpt_net/data/locale/locale.de.ini +4 -0
  33. pygpt_net/data/locale/locale.en.ini +8 -0
  34. pygpt_net/data/locale/locale.es.ini +4 -0
  35. pygpt_net/data/locale/locale.fr.ini +4 -0
  36. pygpt_net/data/locale/locale.it.ini +4 -0
  37. pygpt_net/data/locale/locale.pl.ini +4 -0
  38. pygpt_net/data/locale/locale.uk.ini +4 -0
  39. pygpt_net/data/locale/locale.zh.ini +4 -0
  40. pygpt_net/icons.qrc +4 -0
  41. pygpt_net/icons_rc.py +274 -137
  42. pygpt_net/item/model.py +15 -19
  43. pygpt_net/js_rc.py +2038 -2075
  44. pygpt_net/provider/agents/openai/agent.py +0 -0
  45. pygpt_net/provider/api/google/__init__.py +20 -9
  46. pygpt_net/provider/api/google/image.py +161 -28
  47. pygpt_net/provider/api/google/video.py +73 -36
  48. pygpt_net/provider/api/openai/__init__.py +21 -11
  49. pygpt_net/provider/api/openai/agents/client.py +0 -0
  50. pygpt_net/provider/api/openai/video.py +562 -0
  51. pygpt_net/provider/core/config/patch.py +15 -0
  52. pygpt_net/provider/core/model/patch.py +29 -3
  53. pygpt_net/provider/vector_stores/qdrant.py +117 -0
  54. pygpt_net/ui/__init__.py +6 -1
  55. pygpt_net/ui/dialog/preset.py +9 -4
  56. pygpt_net/ui/layout/chat/attachments.py +18 -1
  57. pygpt_net/ui/layout/status.py +3 -3
  58. pygpt_net/ui/layout/toolbox/raw.py +7 -1
  59. pygpt_net/ui/widget/element/status.py +55 -0
  60. pygpt_net/ui/widget/filesystem/explorer.py +116 -2
  61. pygpt_net/ui/widget/lists/context.py +26 -16
  62. pygpt_net/ui/widget/option/checkbox_list.py +14 -2
  63. pygpt_net/ui/widget/textarea/input.py +71 -17
  64. {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/METADATA +76 -25
  65. {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/RECORD +63 -55
  66. {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/LICENSE +0 -0
  67. {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/WHEEL +0 -0
  68. {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/entry_points.txt +0 -0
File without changes
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.14 00:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -89,7 +89,14 @@ class ApiGoogle:
89
89
  filtered["location"] = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
90
90
  # filtered["http_options"] = gtypes.HttpOptions(api_version="v1")
91
91
 
92
- return genai.Client(**filtered)
92
+ # use previous client if args are the same
93
+ if self.client and self.last_client_args == filtered:
94
+ return self.client
95
+
96
+ self.last_client_args = filtered
97
+ self.client = genai.Client(**filtered)
98
+
99
+ return self.client
93
100
 
94
101
  def call(
95
102
  self,
@@ -138,13 +145,17 @@ class ApiGoogle:
138
145
 
139
146
  elif mode == MODE_IMAGE:
140
147
  # Route to video / music / image based on selected model.
141
- if context.model.is_video_output():
142
- return self.video.generate(context=context, extra=extra) # veo, etc.
143
- # Lyria / music models
144
- if self.music.is_music_model(model.id if model else ""):
145
- return self.music.generate(context=context, extra=extra) # lyria, etc.
146
- # Default: image
147
- return self.image.generate(context=context, extra=extra) # imagen, etc.
148
+ media_mode = self.window.controller.media.get_mode()
149
+ if media_mode == "video":
150
+ if context.model.is_video_output():
151
+ return self.video.generate(context=context, extra=extra) # veo, etc.
152
+ elif media_mode == "music":
153
+ # Lyria / music models
154
+ if self.music.is_music_model(model.id if model else ""):
155
+ return self.music.generate(context=context, extra=extra) # lyria, etc.
156
+ elif media_mode == "image":
157
+ # Default: image
158
+ return self.image.generate(context=context, extra=extra) # imagen, etc.
148
159
 
149
160
  elif mode == MODE_ASSISTANT:
150
161
  return False # not implemented for Google
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.14 00:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import mimetypes
@@ -39,14 +39,8 @@ class Image:
39
39
  ) -> bool:
40
40
  """
41
41
  Generate or edit image(s) using Google GenAI API (Developer API or Vertex AI).
42
-
43
- :param context: BridgeContext with prompt, model, attachments
44
- :param extra: extra parameters (num, inline)
45
- :param sync: run synchronously (blocking) if True
46
- :return: True if started
47
42
  """
48
43
  # Music fast-path: delegate to Music flow if a music model is selected (e.g., Lyria).
49
- # This keeps image flow unchanged while enabling music in the same "image" mode.
50
44
  try:
51
45
  model_id = (context.model.id if context and context.model else "") or ""
52
46
  if self.window and hasattr(self.window.core.api.google, "music"):
@@ -65,9 +59,6 @@ class Image:
65
59
  # decide sub-mode based on attachments
66
60
  sub_mode = self.MODE_GENERATE
67
61
  attachments = context.attachments
68
- if attachments and len(attachments) > 0:
69
- pass # TODO: implement edit!
70
- # sub_mode = self.MODE_EDIT
71
62
 
72
63
  # model used to improve the prompt (not image model)
73
64
  prompt_model = self.window.core.models.from_defaults()
@@ -89,6 +80,11 @@ class Image:
89
80
  worker.num = num
90
81
  worker.inline = inline
91
82
 
83
+ if attachments and len(attachments) > 0:
84
+ mid = str(model.id).lower()
85
+ if "imagen" in mid:
86
+ worker.mode = self.MODE_EDIT
87
+
92
88
  if self.window.core.config.has('img_resolution'):
93
89
  worker.resolution = self.window.core.config.get('img_resolution') or "1024x1024"
94
90
 
@@ -132,13 +128,28 @@ class ImageWorker(QRunnable):
132
128
  self.inline = False
133
129
  self.raw = False
134
130
  self.num = 1
135
- self.resolution = "1024x1024" # used to derive aspect ratio for Imagen
131
+ self.resolution = "1024x1024" # used to derive aspect ratio or image_size
136
132
 
137
133
  # limits
138
134
  self.imagen_max_num = 4 # Imagen returns up to 4 images
139
135
 
140
136
  # fallbacks
141
- self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.0-flash-preview-image-generation"
137
+ self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.5-flash-image"
138
+
139
+ # Canonical 1K dimensions for Nano Banana Pro (Gemini 3 Pro Image Preview).
140
+ # Used to infer 2K/4K by 2x/4x multiples and to normalize UI inputs.
141
+ self._NB_PRO_1K = {
142
+ "1024x1024", # 1:1
143
+ "848x1264", # 2:3
144
+ "1264x848", # 3:2
145
+ "896x1200", # 3:4
146
+ "1200x896", # 4:3
147
+ "928x1152", # 4:5
148
+ "1152x928", # 5:4
149
+ "768x1376", # 9:16
150
+ "1376x768", # 16:9
151
+ "1584x672", # 21:9
152
+ }
142
153
 
143
154
  @Slot()
144
155
  def run(self):
@@ -179,7 +190,7 @@ class ImageWorker(QRunnable):
179
190
  if p:
180
191
  paths.append(p)
181
192
  else:
182
- # Developer API fallback via Gemini image model; force v1 to avoid 404
193
+ # Gemini Developer API via Gemini image models (Nano Banana / Nano Banana Pro)
183
194
  resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
184
195
  saved = 0
185
196
  for cand in getattr(resp, "candidates", []) or []:
@@ -208,14 +219,8 @@ class ImageWorker(QRunnable):
208
219
  if p:
209
220
  paths.append(p)
210
221
  else:
211
- # Gemini Developer API image generation (needs response_modalities)
212
- resp = self.client.models.generate_content(
213
- model=self.model,
214
- contents=[self.input_prompt],
215
- config=gtypes.GenerateContentConfig(
216
- response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
217
- ),
218
- )
222
+ # Gemini Developer API image generation (Nano Banana / Nano Banana Pro) with robust sizing + optional reference images
223
+ resp = self._gemini_generate_image(self.input_prompt, self.model, self.resolution)
219
224
  saved = 0
220
225
  for cand in getattr(resp, "candidates", []) or []:
221
226
  parts = getattr(getattr(cand, "content", None), "parts", None) or []
@@ -316,10 +321,114 @@ class ImageWorker(QRunnable):
316
321
  config=cfg,
317
322
  )
318
323
 
324
+ def _is_gemini_pro_image_model(self, model_id: str) -> bool:
325
+ """
326
+ Detect Gemini 3 Pro Image (Nano Banana Pro) by id or UI alias.
327
+ """
328
+ mid = (model_id or "").lower()
329
+ return mid.startswith("gemini-") or mid.startswith("nano-banana") or mid.startswith("nb-")
330
+
331
+ def _infer_nb_pro_size_for_dims(self, w: int, h: int) -> Optional[str]:
332
+ """
333
+ Infer '1K' | '2K' | '4K' for Nano Banana Pro from WxH.
334
+ """
335
+ dims = f"{w}x{h}"
336
+ if dims in self._NB_PRO_1K:
337
+ return "1K"
338
+ if (w % 2 == 0) and (h % 2 == 0):
339
+ if f"{w // 2}x{h // 2}" in self._NB_PRO_1K:
340
+ return "2K"
341
+ if (w % 4 == 0) and (h % 4 == 0):
342
+ if f"{w // 4}x{h // 4}" in self._NB_PRO_1K:
343
+ return "4K"
344
+ mx = max(w, h)
345
+ if mx >= 4000:
346
+ return "4K"
347
+ if mx >= 2000:
348
+ return "2K"
349
+ return "1K"
350
+
351
+ def _build_gemini_image_config(self, model_id: str, resolution: str) -> Optional[gtypes.ImageConfig]:
352
+ """
353
+ Build ImageConfig for Gemini image models.
354
+ """
355
+ try:
356
+ aspect = self._aspect_from_resolution(resolution)
357
+ cfg = gtypes.ImageConfig()
358
+ if aspect:
359
+ cfg.aspect_ratio = aspect
360
+
361
+ # Only Pro supports image_size; detect by id/alias and set 1K/2K/4K from WxH.
362
+ if self._is_gemini_pro_image_model(model_id):
363
+ w_str, h_str = resolution.lower().replace("×", "x").split("x")
364
+ w, h = int(w_str.strip()), int(h_str.strip())
365
+ k = self._infer_nb_pro_size_for_dims(w, h)
366
+ if k:
367
+ cfg.image_size = k
368
+ return cfg
369
+ except Exception:
370
+ return None
371
+
372
+ def _attachment_image_parts(self) -> List[gtypes.Part]:
373
+ """
374
+ Build image Parts from current attachments for Gemini models.
375
+ """
376
+ parts: List[gtypes.Part] = []
377
+ paths = self._collect_attachment_paths(self.attachments)
378
+ for p in paths:
379
+ try:
380
+ mime = self._guess_mime(p)
381
+ if not mime or not mime.startswith("image/"):
382
+ continue
383
+ with open(p, "rb") as f:
384
+ data = f.read()
385
+ parts.append(gtypes.Part.from_bytes(data=data, mime_type=mime))
386
+ except Exception:
387
+ continue
388
+ return parts
389
+
390
+ def _gemini_generate_image(self, prompt: str, model_id: str, resolution: str):
391
+ """
392
+ Call Gemini generate_content with robust fallback for image_size.
393
+ Supports optional reference images uploaded as attachments.
394
+ """
395
+ cfg = self._build_gemini_image_config(model_id, resolution)
396
+ image_parts = self._attachment_image_parts()
397
+
398
+ def _do_call(icfg: Optional[gtypes.ImageConfig]):
399
+ contents: List[Any] = []
400
+ # Always include the textual prompt (can be empty string).
401
+ contents.append(prompt or "")
402
+ # Append reference images, if any.
403
+ if image_parts:
404
+ contents.extend(image_parts)
405
+ return self.client.models.generate_content(
406
+ model=model_id or self.DEFAULT_GEMINI_IMAGE_MODEL,
407
+ contents=contents,
408
+ config=gtypes.GenerateContentConfig(
409
+ response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
410
+ image_config=icfg,
411
+ ),
412
+ )
413
+
414
+ try:
415
+ return _do_call(cfg)
416
+ except Exception as e:
417
+ msg = str(e)
418
+ if "imageSize" in msg or "image_size" in msg or "Unrecognized" in msg or "unsupported" in msg:
419
+ try:
420
+ if cfg and getattr(cfg, "image_size", None):
421
+ cfg2 = gtypes.ImageConfig()
422
+ cfg2.aspect_ratio = getattr(cfg, "aspect_ratio", None)
423
+ return _do_call(cfg2)
424
+ except Exception:
425
+ pass
426
+ raise
427
+
319
428
  def _gemini_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
320
429
  """
321
- Gemini image-to-image editing via generate_content (Developer/Vertex depending on client).
322
- The first attachment is used as the input image.
430
+ Gemini image-to-image editing via generate_content.
431
+ The first attachment is used as the input image. Honors aspect_ratio and (for Pro) image_size.
323
432
  """
324
433
  paths = self._collect_attachment_paths(attachments)
325
434
  if len(paths) == 0:
@@ -330,10 +439,27 @@ class ImageWorker(QRunnable):
330
439
  img_bytes = f.read()
331
440
  mime = self._guess_mime(img_path)
332
441
 
333
- return self.client.models.generate_content(
334
- model=self.model,
335
- contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
336
- )
442
+ cfg = self._build_gemini_image_config(self.model, self.resolution)
443
+
444
+ def _do_call(icfg: Optional[gtypes.ImageConfig]):
445
+ return self.client.models.generate_content(
446
+ model=self.model or self.DEFAULT_GEMINI_IMAGE_MODEL,
447
+ contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
448
+ config=gtypes.GenerateContentConfig(
449
+ image_config=icfg,
450
+ ),
451
+ )
452
+
453
+ try:
454
+ return _do_call(cfg)
455
+ except Exception as e:
456
+ msg = str(e)
457
+ if "imageSize" in msg or "image_size" in msg or "Unrecognized" in msg or "unsupported" in msg:
458
+ if cfg and getattr(cfg, "image_size", None):
459
+ cfg2 = gtypes.ImageConfig()
460
+ cfg2.aspect_ratio = getattr(cfg, "aspect_ratio", None)
461
+ return _do_call(cfg2)
462
+ raise
337
463
 
338
464
  def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
339
465
  """Extract file paths from attachments dict."""
@@ -347,7 +473,7 @@ class ImageWorker(QRunnable):
347
473
  return out
348
474
 
349
475
  def _aspect_from_resolution(self, resolution: str) -> Optional[str]:
350
- """Derive aspect ratio for Imagen."""
476
+ """Derive aspect ratio from WxH across supported set."""
351
477
  try:
352
478
  from math import gcd
353
479
  tolerance = 0.08
@@ -357,10 +483,15 @@ class ImageWorker(QRunnable):
357
483
  return None
358
484
  supported = {
359
485
  "1:1": 1 / 1,
486
+ "2:3": 2 / 3,
487
+ "3:2": 3 / 2,
360
488
  "3:4": 3 / 4,
361
489
  "4:3": 4 / 3,
490
+ "4:5": 4 / 5,
491
+ "5:4": 5 / 4,
362
492
  "9:16": 9 / 16,
363
493
  "16:9": 16 / 9,
494
+ "21:9": 21 / 9,
364
495
  }
365
496
  g = gcd(w, h)
366
497
  key = f"{w // g}:{h // g}"
@@ -424,6 +555,8 @@ class ImageWorker(QRunnable):
424
555
  return 'image/jpeg'
425
556
  if ext == '.webp':
426
557
  return 'image/webp'
558
+ if ext in ('.heic', '.heif'):
559
+ return 'image/heic'
427
560
  return 'image/png'
428
561
 
429
562
  def _cleanup(self):
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.01 23:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import base64, datetime, os, requests
@@ -150,11 +150,10 @@ class VideoWorker(QRunnable):
150
150
  self.fps = 24
151
151
  self.seed: Optional[int] = None
152
152
  self.negative_prompt: Optional[str] = None
153
- self.generate_audio: bool = False # Veo 3 only
154
- self.resolution: str = "720p" # Veo 3 supports 720p/1080p
153
+ self.generate_audio: bool = False # generation includes audio by default on Veo 3.x
154
+ self.resolution: str = "720p" # Veo supports 720p/1080p depending on variant
155
155
 
156
156
  # limits / capabilities
157
- # self.veo_max_num = 4 # Veo returns up to 4 videos
158
157
  self.veo_max_num = 1 # limit to 1 in Gemini API
159
158
 
160
159
  # fallbacks
@@ -187,42 +186,52 @@ class VideoWorker(QRunnable):
187
186
  num = min(self.num, self.veo_max_num)
188
187
  cfg_kwargs = {
189
188
  "number_of_videos": num,
190
- #"duration_seconds": self._duration_for_model(self.model, self.duration_seconds),
191
189
  }
192
- if self.aspect_ratio:
193
- cfg_kwargs["aspect_ratio"] = self.aspect_ratio
190
+
191
+ # normalize and set aspect ratio
192
+ ar = self._normalize_aspect_ratio(self.aspect_ratio)
193
+ if ar:
194
+ cfg_kwargs["aspect_ratio"] = ar
195
+
196
+ # normalize and set resolution if supported
197
+ res = self._normalize_resolution(self.resolution)
198
+ if res:
199
+ cfg_kwargs["resolution"] = res
200
+
201
+ # set optional controls
194
202
  if self.seed is not None:
195
203
  cfg_kwargs["seed"] = int(self.seed)
196
204
  if self.negative_prompt:
197
205
  cfg_kwargs["negative_prompt"] = self.negative_prompt
198
- if self._is_veo3(self.model):
199
- # Veo 3 supports audio and resolution
200
- # WARN: but not Gemini API:
201
- pass
202
- """
203
- cfg_kwargs["generate_audio"] = bool(self.generate_audio)
204
- if self.resolution:
205
- cfg_kwargs["resolution"] = self.resolution
206
- """
207
-
208
- config = gtypes.GenerateVideosConfig(**cfg_kwargs)
209
-
210
- # build request
211
- req_kwargs = {
212
- "model": self.model or self.DEFAULT_VEO_MODEL,
213
- "prompt": self.input_prompt or "",
214
- "config": config,
215
- }
216
206
 
217
- # image-to-video if an image attachment is present and supported
218
- base_img = self._first_image_attachment(self.attachments)
219
- if self.mode == Video.MODE_IMAGE_TO_VIDEO and base_img is not None and self._supports_image_to_video(self.model):
220
- req_kwargs["image"] = gtypes.Image.from_file(location=base_img)
207
+ # set durationSeconds when supported; fall back gracefully if rejected by model
208
+ cfg_try = dict(cfg_kwargs)
209
+ cfg_try["duration_seconds"] = int(self._duration_for_model(self.model, self.duration_seconds))
221
210
 
222
211
  self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
223
212
 
224
- # start long-running operation
225
- operation = self.client.models.generate_videos(**req_kwargs)
213
+ try:
214
+ config = gtypes.GenerateVideosConfig(**cfg_try)
215
+ operation = self.client.models.generate_videos(
216
+ model=self.model or self.DEFAULT_VEO_MODEL,
217
+ prompt=self.input_prompt or "",
218
+ config=config,
219
+ image=self._image_part_if_needed(),
220
+ video=None,
221
+ )
222
+ except Exception as e:
223
+ if "durationSeconds isn't supported" in str(e) or "Unrecognized" in str(e):
224
+ # retry without duration_seconds
225
+ config = gtypes.GenerateVideosConfig(**cfg_kwargs)
226
+ operation = self.client.models.generate_videos(
227
+ model=self.model or self.DEFAULT_VEO_MODEL,
228
+ prompt=self.input_prompt or "",
229
+ config=config,
230
+ image=self._image_part_if_needed(),
231
+ video=None,
232
+ )
233
+ else:
234
+ raise
226
235
 
227
236
  # poll until done
228
237
  while not getattr(operation, "done", False):
@@ -258,6 +267,22 @@ class VideoWorker(QRunnable):
258
267
 
259
268
  # ---------- helpers ----------
260
269
 
270
+ def _normalize_aspect_ratio(self, ar: str) -> str:
271
+ """Normalize aspect ratio to Veo-supported values."""
272
+ val = (ar or "").strip()
273
+ return val if val in ("16:9", "9:16") else "16:9"
274
+
275
+ def _normalize_resolution(self, res: str) -> Optional[str]:
276
+ """Normalize resolution to '720p' or '1080p'."""
277
+ val = (res or "").lower().replace(" ", "")
278
+ if val in ("720p", "1080p"):
279
+ return val
280
+ if val in ("1280x720", "720x1280"):
281
+ return "720p"
282
+ if val in ("1920x1080", "1080x1920"):
283
+ return "1080p"
284
+ return None
285
+
261
286
  def _is_veo3(self, model_id: str) -> bool:
262
287
  mid = str(model_id or "").lower()
263
288
  return mid.startswith("veo-3.")
@@ -265,20 +290,32 @@ class VideoWorker(QRunnable):
265
290
  def _supports_image_to_video(self, model_id: str) -> bool:
266
291
  """Return True if the model supports image->video."""
267
292
  mid = str(model_id or "").lower()
268
- # Official support for image-to-video on veo-2 and veo-3 preview; keep extendable.
269
- return ("veo-2.0" in mid) or ("veo-3.0-generate-preview" in mid) or ("veo-3.0-fast-generate-preview" in mid)
293
+ return any(p in mid for p in (
294
+ "veo-2.0",
295
+ "veo-3.0-generate",
296
+ "veo-3.0-fast-generate",
297
+ "veo-3.1-generate",
298
+ "veo-3.1-fast-generate",
299
+ ))
270
300
 
271
301
  def _duration_for_model(self, model_id: str, requested: int) -> int:
272
302
  """Adjust duration constraints to model-specific limits."""
273
303
  mid = str(model_id or "").lower()
274
304
  if "veo-2.0" in mid:
275
- # Veo 2 supports 5–8s, default 8s.
276
305
  return max(5, min(8, int(requested or 8)))
306
+ if "veo-3.1" in mid:
307
+ return max(4, min(8, int(requested or 8)))
277
308
  if "veo-3.0" in mid:
278
- # Veo 3 commonly uses 8s clips; honor request if provided, otherwise 8s.
279
- return int(requested or 8)
309
+ return max(4, min(8, int(requested or 8)))
280
310
  return int(requested or 8)
281
311
 
312
+ def _image_part_if_needed(self) -> Optional[gtypes.Image]:
313
+ """Return Image part when in image-to-video mode and supported."""
314
+ if self.mode != Video.MODE_IMAGE_TO_VIDEO:
315
+ return None
316
+ base_img = self._first_image_attachment(self.attachments)
317
+ return gtypes.Image.from_file(location=base_img) if base_img else None
318
+
282
319
  def _first_image_attachment(self, attachments: Dict[str, Any]) -> Optional[str]:
283
320
  """Return path of the first image attachment, if any."""
284
321
  for _, att in (attachments or {}).items():
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.12 20:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from openai import OpenAI
@@ -38,6 +38,7 @@ from .store import Store
38
38
  from .summarizer import Summarizer
39
39
  from .tools import Tools
40
40
  from .vision import Vision
41
+ from .video import Video
41
42
 
42
43
 
43
44
  class ApiOpenAI:
@@ -63,6 +64,7 @@ class ApiOpenAI:
63
64
  self.summarizer = Summarizer(window)
64
65
  self.tools = Tools(window)
65
66
  self.vision = Vision(window)
67
+ self.video = Video(window)
66
68
  self.client = None
67
69
  self.locked = False
68
70
  self.last_client_args = None # last client args used, for debug purposes
@@ -87,7 +89,7 @@ class ApiOpenAI:
87
89
  self,
88
90
  context: BridgeContext,
89
91
  extra: dict = None,
90
- rt_signals = None
92
+ rt_signals=None
91
93
  ) -> bool:
92
94
  """
93
95
  Call OpenAI API
@@ -157,7 +159,7 @@ class ApiOpenAI:
157
159
  if is_realtime:
158
160
  return True
159
161
 
160
- if fixtures.is_enabled("stream"): # fake stream for testing
162
+ if fixtures.is_enabled("stream"): # fake stream for testing
161
163
  use_responses_api = False
162
164
  response = fixtures.get_stream_generator(ctx)
163
165
  else:
@@ -181,12 +183,20 @@ class ApiOpenAI:
181
183
 
182
184
  self.vision.append_images(ctx) # append images to ctx if provided
183
185
 
184
- # image
186
+ # image / video
185
187
  elif mode == MODE_IMAGE:
186
- return self.image.generate(
187
- context=context,
188
- extra=extra,
189
- ) # return here, async handled
188
+ media_mode = self.window.controller.media.get_mode()
189
+ if media_mode == "video":
190
+ if context.model and context.model.is_video_output():
191
+ return self.video.generate(
192
+ context=context,
193
+ extra=extra,
194
+ ) # async handled if allowed
195
+ elif media_mode == "image":
196
+ return self.image.generate(
197
+ context=context,
198
+ extra=extra,
199
+ )
190
200
 
191
201
  # vision
192
202
  elif mode == MODE_VISION:
@@ -294,13 +304,13 @@ class ApiOpenAI:
294
304
  messages.append({"role": "user", "content": prompt})
295
305
  additional_kwargs = {}
296
306
  # if max_tokens > 0:
297
- # additional_kwargs["max_tokens"] = max_tokens
307
+ # additional_kwargs["max_tokens"] = max_tokens
298
308
 
299
309
  # tools / functions
300
310
  tools = self.window.core.api.openai.tools.prepare(model, functions)
301
311
  if len(tools) > 0 and "disable_tools" not in extra:
302
312
  additional_kwargs["tools"] = tools
303
-
313
+
304
314
  try:
305
315
  response = client.chat.completions.create(
306
316
  messages=messages,
@@ -349,4 +359,4 @@ class ApiOpenAI:
349
359
  self.client = None
350
360
  except Exception as e:
351
361
  self.window.core.debug.log(e)
352
- print("Error closing client:", e)
362
+ print("Error closing client:", e)
File without changes