pygpt-net 2.6.64__py3-none-any.whl → 2.6.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +21 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +5 -1
- pygpt_net/controller/chat/chat.py +0 -0
- pygpt_net/controller/chat/handler/openai_stream.py +137 -7
- pygpt_net/controller/chat/render.py +0 -0
- pygpt_net/controller/config/field/checkbox_list.py +34 -1
- pygpt_net/controller/files/files.py +71 -2
- pygpt_net/controller/media/media.py +20 -1
- pygpt_net/controller/presets/editor.py +137 -22
- pygpt_net/controller/presets/presets.py +4 -1
- pygpt_net/controller/ui/mode.py +14 -10
- pygpt_net/controller/ui/ui.py +18 -1
- pygpt_net/core/agents/custom/__init__.py +18 -2
- pygpt_net/core/agents/custom/runner.py +2 -2
- pygpt_net/core/attachments/clipboard.py +146 -0
- pygpt_net/core/image/image.py +34 -1
- pygpt_net/core/render/web/renderer.py +33 -11
- pygpt_net/core/tabs/tabs.py +0 -0
- pygpt_net/core/types/image.py +61 -3
- pygpt_net/data/config/config.json +4 -3
- pygpt_net/data/config/models.json +629 -41
- pygpt_net/data/css/style.dark.css +12 -0
- pygpt_net/data/css/style.light.css +12 -0
- pygpt_net/data/icons/pin2.svg +1 -0
- pygpt_net/data/icons/pin3.svg +3 -0
- pygpt_net/data/icons/point.svg +1 -0
- pygpt_net/data/icons/target.svg +1 -0
- pygpt_net/data/js/app/ui.js +19 -2
- pygpt_net/data/js/app/user.js +22 -54
- pygpt_net/data/js/app.min.js +7 -9
- pygpt_net/data/locale/locale.de.ini +4 -0
- pygpt_net/data/locale/locale.en.ini +8 -0
- pygpt_net/data/locale/locale.es.ini +4 -0
- pygpt_net/data/locale/locale.fr.ini +4 -0
- pygpt_net/data/locale/locale.it.ini +4 -0
- pygpt_net/data/locale/locale.pl.ini +4 -0
- pygpt_net/data/locale/locale.uk.ini +4 -0
- pygpt_net/data/locale/locale.zh.ini +4 -0
- pygpt_net/icons.qrc +4 -0
- pygpt_net/icons_rc.py +274 -137
- pygpt_net/item/model.py +15 -19
- pygpt_net/js_rc.py +2038 -2075
- pygpt_net/provider/agents/openai/agent.py +0 -0
- pygpt_net/provider/api/google/__init__.py +20 -9
- pygpt_net/provider/api/google/image.py +161 -28
- pygpt_net/provider/api/google/video.py +73 -36
- pygpt_net/provider/api/openai/__init__.py +21 -11
- pygpt_net/provider/api/openai/agents/client.py +0 -0
- pygpt_net/provider/api/openai/video.py +562 -0
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +29 -3
- pygpt_net/provider/vector_stores/qdrant.py +117 -0
- pygpt_net/ui/__init__.py +6 -1
- pygpt_net/ui/dialog/preset.py +9 -4
- pygpt_net/ui/layout/chat/attachments.py +18 -1
- pygpt_net/ui/layout/status.py +3 -3
- pygpt_net/ui/layout/toolbox/raw.py +7 -1
- pygpt_net/ui/widget/element/status.py +55 -0
- pygpt_net/ui/widget/filesystem/explorer.py +116 -2
- pygpt_net/ui/widget/lists/context.py +26 -16
- pygpt_net/ui/widget/option/checkbox_list.py +14 -2
- pygpt_net/ui/widget/textarea/input.py +71 -17
- {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/METADATA +76 -25
- {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/RECORD +63 -55
- {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.64.dist-info → pygpt_net-2.6.66.dist-info}/entry_points.txt +0 -0
|
File without changes
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.12.25 20:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -89,7 +89,14 @@ class ApiGoogle:
|
|
|
89
89
|
filtered["location"] = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
|
|
90
90
|
# filtered["http_options"] = gtypes.HttpOptions(api_version="v1")
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
# use previous client if args are the same
|
|
93
|
+
if self.client and self.last_client_args == filtered:
|
|
94
|
+
return self.client
|
|
95
|
+
|
|
96
|
+
self.last_client_args = filtered
|
|
97
|
+
self.client = genai.Client(**filtered)
|
|
98
|
+
|
|
99
|
+
return self.client
|
|
93
100
|
|
|
94
101
|
def call(
|
|
95
102
|
self,
|
|
@@ -138,13 +145,17 @@ class ApiGoogle:
|
|
|
138
145
|
|
|
139
146
|
elif mode == MODE_IMAGE:
|
|
140
147
|
# Route to video / music / image based on selected model.
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
+
media_mode = self.window.controller.media.get_mode()
|
|
149
|
+
if media_mode == "video":
|
|
150
|
+
if context.model.is_video_output():
|
|
151
|
+
return self.video.generate(context=context, extra=extra) # veo, etc.
|
|
152
|
+
elif media_mode == "music":
|
|
153
|
+
# Lyria / music models
|
|
154
|
+
if self.music.is_music_model(model.id if model else ""):
|
|
155
|
+
return self.music.generate(context=context, extra=extra) # lyria, etc.
|
|
156
|
+
elif media_mode == "image":
|
|
157
|
+
# Default: image
|
|
158
|
+
return self.image.generate(context=context, extra=extra) # imagen, etc.
|
|
148
159
|
|
|
149
160
|
elif mode == MODE_ASSISTANT:
|
|
150
161
|
return False # not implemented for Google
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.12.25 20:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import mimetypes
|
|
@@ -39,14 +39,8 @@ class Image:
|
|
|
39
39
|
) -> bool:
|
|
40
40
|
"""
|
|
41
41
|
Generate or edit image(s) using Google GenAI API (Developer API or Vertex AI).
|
|
42
|
-
|
|
43
|
-
:param context: BridgeContext with prompt, model, attachments
|
|
44
|
-
:param extra: extra parameters (num, inline)
|
|
45
|
-
:param sync: run synchronously (blocking) if True
|
|
46
|
-
:return: True if started
|
|
47
42
|
"""
|
|
48
43
|
# Music fast-path: delegate to Music flow if a music model is selected (e.g., Lyria).
|
|
49
|
-
# This keeps image flow unchanged while enabling music in the same "image" mode.
|
|
50
44
|
try:
|
|
51
45
|
model_id = (context.model.id if context and context.model else "") or ""
|
|
52
46
|
if self.window and hasattr(self.window.core.api.google, "music"):
|
|
@@ -65,9 +59,6 @@ class Image:
|
|
|
65
59
|
# decide sub-mode based on attachments
|
|
66
60
|
sub_mode = self.MODE_GENERATE
|
|
67
61
|
attachments = context.attachments
|
|
68
|
-
if attachments and len(attachments) > 0:
|
|
69
|
-
pass # TODO: implement edit!
|
|
70
|
-
# sub_mode = self.MODE_EDIT
|
|
71
62
|
|
|
72
63
|
# model used to improve the prompt (not image model)
|
|
73
64
|
prompt_model = self.window.core.models.from_defaults()
|
|
@@ -89,6 +80,11 @@ class Image:
|
|
|
89
80
|
worker.num = num
|
|
90
81
|
worker.inline = inline
|
|
91
82
|
|
|
83
|
+
if attachments and len(attachments) > 0:
|
|
84
|
+
mid = str(model.id).lower()
|
|
85
|
+
if "imagen" in mid:
|
|
86
|
+
worker.mode = self.MODE_EDIT
|
|
87
|
+
|
|
92
88
|
if self.window.core.config.has('img_resolution'):
|
|
93
89
|
worker.resolution = self.window.core.config.get('img_resolution') or "1024x1024"
|
|
94
90
|
|
|
@@ -132,13 +128,28 @@ class ImageWorker(QRunnable):
|
|
|
132
128
|
self.inline = False
|
|
133
129
|
self.raw = False
|
|
134
130
|
self.num = 1
|
|
135
|
-
self.resolution = "1024x1024" # used to derive aspect ratio
|
|
131
|
+
self.resolution = "1024x1024" # used to derive aspect ratio or image_size
|
|
136
132
|
|
|
137
133
|
# limits
|
|
138
134
|
self.imagen_max_num = 4 # Imagen returns up to 4 images
|
|
139
135
|
|
|
140
136
|
# fallbacks
|
|
141
|
-
self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.
|
|
137
|
+
self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.5-flash-image"
|
|
138
|
+
|
|
139
|
+
# Canonical 1K dimensions for Nano Banana Pro (Gemini 3 Pro Image Preview).
|
|
140
|
+
# Used to infer 2K/4K by 2x/4x multiples and to normalize UI inputs.
|
|
141
|
+
self._NB_PRO_1K = {
|
|
142
|
+
"1024x1024", # 1:1
|
|
143
|
+
"848x1264", # 2:3
|
|
144
|
+
"1264x848", # 3:2
|
|
145
|
+
"896x1200", # 3:4
|
|
146
|
+
"1200x896", # 4:3
|
|
147
|
+
"928x1152", # 4:5
|
|
148
|
+
"1152x928", # 5:4
|
|
149
|
+
"768x1376", # 9:16
|
|
150
|
+
"1376x768", # 16:9
|
|
151
|
+
"1584x672", # 21:9
|
|
152
|
+
}
|
|
142
153
|
|
|
143
154
|
@Slot()
|
|
144
155
|
def run(self):
|
|
@@ -179,7 +190,7 @@ class ImageWorker(QRunnable):
|
|
|
179
190
|
if p:
|
|
180
191
|
paths.append(p)
|
|
181
192
|
else:
|
|
182
|
-
# Developer API
|
|
193
|
+
# Gemini Developer API via Gemini image models (Nano Banana / Nano Banana Pro)
|
|
183
194
|
resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
|
|
184
195
|
saved = 0
|
|
185
196
|
for cand in getattr(resp, "candidates", []) or []:
|
|
@@ -208,14 +219,8 @@ class ImageWorker(QRunnable):
|
|
|
208
219
|
if p:
|
|
209
220
|
paths.append(p)
|
|
210
221
|
else:
|
|
211
|
-
# Gemini Developer API image generation (
|
|
212
|
-
resp = self.
|
|
213
|
-
model=self.model,
|
|
214
|
-
contents=[self.input_prompt],
|
|
215
|
-
config=gtypes.GenerateContentConfig(
|
|
216
|
-
response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
|
|
217
|
-
),
|
|
218
|
-
)
|
|
222
|
+
# Gemini Developer API image generation (Nano Banana / Nano Banana Pro) with robust sizing + optional reference images
|
|
223
|
+
resp = self._gemini_generate_image(self.input_prompt, self.model, self.resolution)
|
|
219
224
|
saved = 0
|
|
220
225
|
for cand in getattr(resp, "candidates", []) or []:
|
|
221
226
|
parts = getattr(getattr(cand, "content", None), "parts", None) or []
|
|
@@ -316,10 +321,114 @@ class ImageWorker(QRunnable):
|
|
|
316
321
|
config=cfg,
|
|
317
322
|
)
|
|
318
323
|
|
|
324
|
+
def _is_gemini_pro_image_model(self, model_id: str) -> bool:
|
|
325
|
+
"""
|
|
326
|
+
Detect Gemini 3 Pro Image (Nano Banana Pro) by id or UI alias.
|
|
327
|
+
"""
|
|
328
|
+
mid = (model_id or "").lower()
|
|
329
|
+
return mid.startswith("gemini-") or mid.startswith("nano-banana") or mid.startswith("nb-")
|
|
330
|
+
|
|
331
|
+
def _infer_nb_pro_size_for_dims(self, w: int, h: int) -> Optional[str]:
|
|
332
|
+
"""
|
|
333
|
+
Infer '1K' | '2K' | '4K' for Nano Banana Pro from WxH.
|
|
334
|
+
"""
|
|
335
|
+
dims = f"{w}x{h}"
|
|
336
|
+
if dims in self._NB_PRO_1K:
|
|
337
|
+
return "1K"
|
|
338
|
+
if (w % 2 == 0) and (h % 2 == 0):
|
|
339
|
+
if f"{w // 2}x{h // 2}" in self._NB_PRO_1K:
|
|
340
|
+
return "2K"
|
|
341
|
+
if (w % 4 == 0) and (h % 4 == 0):
|
|
342
|
+
if f"{w // 4}x{h // 4}" in self._NB_PRO_1K:
|
|
343
|
+
return "4K"
|
|
344
|
+
mx = max(w, h)
|
|
345
|
+
if mx >= 4000:
|
|
346
|
+
return "4K"
|
|
347
|
+
if mx >= 2000:
|
|
348
|
+
return "2K"
|
|
349
|
+
return "1K"
|
|
350
|
+
|
|
351
|
+
def _build_gemini_image_config(self, model_id: str, resolution: str) -> Optional[gtypes.ImageConfig]:
|
|
352
|
+
"""
|
|
353
|
+
Build ImageConfig for Gemini image models.
|
|
354
|
+
"""
|
|
355
|
+
try:
|
|
356
|
+
aspect = self._aspect_from_resolution(resolution)
|
|
357
|
+
cfg = gtypes.ImageConfig()
|
|
358
|
+
if aspect:
|
|
359
|
+
cfg.aspect_ratio = aspect
|
|
360
|
+
|
|
361
|
+
# Only Pro supports image_size; detect by id/alias and set 1K/2K/4K from WxH.
|
|
362
|
+
if self._is_gemini_pro_image_model(model_id):
|
|
363
|
+
w_str, h_str = resolution.lower().replace("×", "x").split("x")
|
|
364
|
+
w, h = int(w_str.strip()), int(h_str.strip())
|
|
365
|
+
k = self._infer_nb_pro_size_for_dims(w, h)
|
|
366
|
+
if k:
|
|
367
|
+
cfg.image_size = k
|
|
368
|
+
return cfg
|
|
369
|
+
except Exception:
|
|
370
|
+
return None
|
|
371
|
+
|
|
372
|
+
def _attachment_image_parts(self) -> List[gtypes.Part]:
|
|
373
|
+
"""
|
|
374
|
+
Build image Parts from current attachments for Gemini models.
|
|
375
|
+
"""
|
|
376
|
+
parts: List[gtypes.Part] = []
|
|
377
|
+
paths = self._collect_attachment_paths(self.attachments)
|
|
378
|
+
for p in paths:
|
|
379
|
+
try:
|
|
380
|
+
mime = self._guess_mime(p)
|
|
381
|
+
if not mime or not mime.startswith("image/"):
|
|
382
|
+
continue
|
|
383
|
+
with open(p, "rb") as f:
|
|
384
|
+
data = f.read()
|
|
385
|
+
parts.append(gtypes.Part.from_bytes(data=data, mime_type=mime))
|
|
386
|
+
except Exception:
|
|
387
|
+
continue
|
|
388
|
+
return parts
|
|
389
|
+
|
|
390
|
+
def _gemini_generate_image(self, prompt: str, model_id: str, resolution: str):
|
|
391
|
+
"""
|
|
392
|
+
Call Gemini generate_content with robust fallback for image_size.
|
|
393
|
+
Supports optional reference images uploaded as attachments.
|
|
394
|
+
"""
|
|
395
|
+
cfg = self._build_gemini_image_config(model_id, resolution)
|
|
396
|
+
image_parts = self._attachment_image_parts()
|
|
397
|
+
|
|
398
|
+
def _do_call(icfg: Optional[gtypes.ImageConfig]):
|
|
399
|
+
contents: List[Any] = []
|
|
400
|
+
# Always include the textual prompt (can be empty string).
|
|
401
|
+
contents.append(prompt or "")
|
|
402
|
+
# Append reference images, if any.
|
|
403
|
+
if image_parts:
|
|
404
|
+
contents.extend(image_parts)
|
|
405
|
+
return self.client.models.generate_content(
|
|
406
|
+
model=model_id or self.DEFAULT_GEMINI_IMAGE_MODEL,
|
|
407
|
+
contents=contents,
|
|
408
|
+
config=gtypes.GenerateContentConfig(
|
|
409
|
+
response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
|
|
410
|
+
image_config=icfg,
|
|
411
|
+
),
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
try:
|
|
415
|
+
return _do_call(cfg)
|
|
416
|
+
except Exception as e:
|
|
417
|
+
msg = str(e)
|
|
418
|
+
if "imageSize" in msg or "image_size" in msg or "Unrecognized" in msg or "unsupported" in msg:
|
|
419
|
+
try:
|
|
420
|
+
if cfg and getattr(cfg, "image_size", None):
|
|
421
|
+
cfg2 = gtypes.ImageConfig()
|
|
422
|
+
cfg2.aspect_ratio = getattr(cfg, "aspect_ratio", None)
|
|
423
|
+
return _do_call(cfg2)
|
|
424
|
+
except Exception:
|
|
425
|
+
pass
|
|
426
|
+
raise
|
|
427
|
+
|
|
319
428
|
def _gemini_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
|
|
320
429
|
"""
|
|
321
|
-
Gemini image-to-image editing via generate_content
|
|
322
|
-
The first attachment is used as the input image.
|
|
430
|
+
Gemini image-to-image editing via generate_content.
|
|
431
|
+
The first attachment is used as the input image. Honors aspect_ratio and (for Pro) image_size.
|
|
323
432
|
"""
|
|
324
433
|
paths = self._collect_attachment_paths(attachments)
|
|
325
434
|
if len(paths) == 0:
|
|
@@ -330,10 +439,27 @@ class ImageWorker(QRunnable):
|
|
|
330
439
|
img_bytes = f.read()
|
|
331
440
|
mime = self._guess_mime(img_path)
|
|
332
441
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
442
|
+
cfg = self._build_gemini_image_config(self.model, self.resolution)
|
|
443
|
+
|
|
444
|
+
def _do_call(icfg: Optional[gtypes.ImageConfig]):
|
|
445
|
+
return self.client.models.generate_content(
|
|
446
|
+
model=self.model or self.DEFAULT_GEMINI_IMAGE_MODEL,
|
|
447
|
+
contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
|
|
448
|
+
config=gtypes.GenerateContentConfig(
|
|
449
|
+
image_config=icfg,
|
|
450
|
+
),
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
try:
|
|
454
|
+
return _do_call(cfg)
|
|
455
|
+
except Exception as e:
|
|
456
|
+
msg = str(e)
|
|
457
|
+
if "imageSize" in msg or "image_size" in msg or "Unrecognized" in msg or "unsupported" in msg:
|
|
458
|
+
if cfg and getattr(cfg, "image_size", None):
|
|
459
|
+
cfg2 = gtypes.ImageConfig()
|
|
460
|
+
cfg2.aspect_ratio = getattr(cfg, "aspect_ratio", None)
|
|
461
|
+
return _do_call(cfg2)
|
|
462
|
+
raise
|
|
337
463
|
|
|
338
464
|
def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
|
|
339
465
|
"""Extract file paths from attachments dict."""
|
|
@@ -347,7 +473,7 @@ class ImageWorker(QRunnable):
|
|
|
347
473
|
return out
|
|
348
474
|
|
|
349
475
|
def _aspect_from_resolution(self, resolution: str) -> Optional[str]:
|
|
350
|
-
"""Derive aspect ratio
|
|
476
|
+
"""Derive aspect ratio from WxH across supported set."""
|
|
351
477
|
try:
|
|
352
478
|
from math import gcd
|
|
353
479
|
tolerance = 0.08
|
|
@@ -357,10 +483,15 @@ class ImageWorker(QRunnable):
|
|
|
357
483
|
return None
|
|
358
484
|
supported = {
|
|
359
485
|
"1:1": 1 / 1,
|
|
486
|
+
"2:3": 2 / 3,
|
|
487
|
+
"3:2": 3 / 2,
|
|
360
488
|
"3:4": 3 / 4,
|
|
361
489
|
"4:3": 4 / 3,
|
|
490
|
+
"4:5": 4 / 5,
|
|
491
|
+
"5:4": 5 / 4,
|
|
362
492
|
"9:16": 9 / 16,
|
|
363
493
|
"16:9": 16 / 9,
|
|
494
|
+
"21:9": 21 / 9,
|
|
364
495
|
}
|
|
365
496
|
g = gcd(w, h)
|
|
366
497
|
key = f"{w // g}:{h // g}"
|
|
@@ -424,6 +555,8 @@ class ImageWorker(QRunnable):
|
|
|
424
555
|
return 'image/jpeg'
|
|
425
556
|
if ext == '.webp':
|
|
426
557
|
return 'image/webp'
|
|
558
|
+
if ext in ('.heic', '.heif'):
|
|
559
|
+
return 'image/heic'
|
|
427
560
|
return 'image/png'
|
|
428
561
|
|
|
429
562
|
def _cleanup(self):
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.12.25 20:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import base64, datetime, os, requests
|
|
@@ -150,11 +150,10 @@ class VideoWorker(QRunnable):
|
|
|
150
150
|
self.fps = 24
|
|
151
151
|
self.seed: Optional[int] = None
|
|
152
152
|
self.negative_prompt: Optional[str] = None
|
|
153
|
-
self.generate_audio: bool = False # Veo 3
|
|
154
|
-
self.resolution: str = "720p" # Veo
|
|
153
|
+
self.generate_audio: bool = False # generation includes audio by default on Veo 3.x
|
|
154
|
+
self.resolution: str = "720p" # Veo supports 720p/1080p depending on variant
|
|
155
155
|
|
|
156
156
|
# limits / capabilities
|
|
157
|
-
# self.veo_max_num = 4 # Veo returns up to 4 videos
|
|
158
157
|
self.veo_max_num = 1 # limit to 1 in Gemini API
|
|
159
158
|
|
|
160
159
|
# fallbacks
|
|
@@ -187,42 +186,52 @@ class VideoWorker(QRunnable):
|
|
|
187
186
|
num = min(self.num, self.veo_max_num)
|
|
188
187
|
cfg_kwargs = {
|
|
189
188
|
"number_of_videos": num,
|
|
190
|
-
#"duration_seconds": self._duration_for_model(self.model, self.duration_seconds),
|
|
191
189
|
}
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
|
|
191
|
+
# normalize and set aspect ratio
|
|
192
|
+
ar = self._normalize_aspect_ratio(self.aspect_ratio)
|
|
193
|
+
if ar:
|
|
194
|
+
cfg_kwargs["aspect_ratio"] = ar
|
|
195
|
+
|
|
196
|
+
# normalize and set resolution if supported
|
|
197
|
+
res = self._normalize_resolution(self.resolution)
|
|
198
|
+
if res:
|
|
199
|
+
cfg_kwargs["resolution"] = res
|
|
200
|
+
|
|
201
|
+
# set optional controls
|
|
194
202
|
if self.seed is not None:
|
|
195
203
|
cfg_kwargs["seed"] = int(self.seed)
|
|
196
204
|
if self.negative_prompt:
|
|
197
205
|
cfg_kwargs["negative_prompt"] = self.negative_prompt
|
|
198
|
-
if self._is_veo3(self.model):
|
|
199
|
-
# Veo 3 supports audio and resolution
|
|
200
|
-
# WARN: but not Gemini API:
|
|
201
|
-
pass
|
|
202
|
-
"""
|
|
203
|
-
cfg_kwargs["generate_audio"] = bool(self.generate_audio)
|
|
204
|
-
if self.resolution:
|
|
205
|
-
cfg_kwargs["resolution"] = self.resolution
|
|
206
|
-
"""
|
|
207
|
-
|
|
208
|
-
config = gtypes.GenerateVideosConfig(**cfg_kwargs)
|
|
209
|
-
|
|
210
|
-
# build request
|
|
211
|
-
req_kwargs = {
|
|
212
|
-
"model": self.model or self.DEFAULT_VEO_MODEL,
|
|
213
|
-
"prompt": self.input_prompt or "",
|
|
214
|
-
"config": config,
|
|
215
|
-
}
|
|
216
206
|
|
|
217
|
-
#
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
req_kwargs["image"] = gtypes.Image.from_file(location=base_img)
|
|
207
|
+
# set durationSeconds when supported; fall back gracefully if rejected by model
|
|
208
|
+
cfg_try = dict(cfg_kwargs)
|
|
209
|
+
cfg_try["duration_seconds"] = int(self._duration_for_model(self.model, self.duration_seconds))
|
|
221
210
|
|
|
222
211
|
self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
|
|
223
212
|
|
|
224
|
-
|
|
225
|
-
|
|
213
|
+
try:
|
|
214
|
+
config = gtypes.GenerateVideosConfig(**cfg_try)
|
|
215
|
+
operation = self.client.models.generate_videos(
|
|
216
|
+
model=self.model or self.DEFAULT_VEO_MODEL,
|
|
217
|
+
prompt=self.input_prompt or "",
|
|
218
|
+
config=config,
|
|
219
|
+
image=self._image_part_if_needed(),
|
|
220
|
+
video=None,
|
|
221
|
+
)
|
|
222
|
+
except Exception as e:
|
|
223
|
+
if "durationSeconds isn't supported" in str(e) or "Unrecognized" in str(e):
|
|
224
|
+
# retry without duration_seconds
|
|
225
|
+
config = gtypes.GenerateVideosConfig(**cfg_kwargs)
|
|
226
|
+
operation = self.client.models.generate_videos(
|
|
227
|
+
model=self.model or self.DEFAULT_VEO_MODEL,
|
|
228
|
+
prompt=self.input_prompt or "",
|
|
229
|
+
config=config,
|
|
230
|
+
image=self._image_part_if_needed(),
|
|
231
|
+
video=None,
|
|
232
|
+
)
|
|
233
|
+
else:
|
|
234
|
+
raise
|
|
226
235
|
|
|
227
236
|
# poll until done
|
|
228
237
|
while not getattr(operation, "done", False):
|
|
@@ -258,6 +267,22 @@ class VideoWorker(QRunnable):
|
|
|
258
267
|
|
|
259
268
|
# ---------- helpers ----------
|
|
260
269
|
|
|
270
|
+
def _normalize_aspect_ratio(self, ar: str) -> str:
|
|
271
|
+
"""Normalize aspect ratio to Veo-supported values."""
|
|
272
|
+
val = (ar or "").strip()
|
|
273
|
+
return val if val in ("16:9", "9:16") else "16:9"
|
|
274
|
+
|
|
275
|
+
def _normalize_resolution(self, res: str) -> Optional[str]:
|
|
276
|
+
"""Normalize resolution to '720p' or '1080p'."""
|
|
277
|
+
val = (res or "").lower().replace(" ", "")
|
|
278
|
+
if val in ("720p", "1080p"):
|
|
279
|
+
return val
|
|
280
|
+
if val in ("1280x720", "720x1280"):
|
|
281
|
+
return "720p"
|
|
282
|
+
if val in ("1920x1080", "1080x1920"):
|
|
283
|
+
return "1080p"
|
|
284
|
+
return None
|
|
285
|
+
|
|
261
286
|
def _is_veo3(self, model_id: str) -> bool:
|
|
262
287
|
mid = str(model_id or "").lower()
|
|
263
288
|
return mid.startswith("veo-3.")
|
|
@@ -265,20 +290,32 @@ class VideoWorker(QRunnable):
|
|
|
265
290
|
def _supports_image_to_video(self, model_id: str) -> bool:
|
|
266
291
|
"""Return True if the model supports image->video."""
|
|
267
292
|
mid = str(model_id or "").lower()
|
|
268
|
-
|
|
269
|
-
|
|
293
|
+
return any(p in mid for p in (
|
|
294
|
+
"veo-2.0",
|
|
295
|
+
"veo-3.0-generate",
|
|
296
|
+
"veo-3.0-fast-generate",
|
|
297
|
+
"veo-3.1-generate",
|
|
298
|
+
"veo-3.1-fast-generate",
|
|
299
|
+
))
|
|
270
300
|
|
|
271
301
|
def _duration_for_model(self, model_id: str, requested: int) -> int:
|
|
272
302
|
"""Adjust duration constraints to model-specific limits."""
|
|
273
303
|
mid = str(model_id or "").lower()
|
|
274
304
|
if "veo-2.0" in mid:
|
|
275
|
-
# Veo 2 supports 5–8s, default 8s.
|
|
276
305
|
return max(5, min(8, int(requested or 8)))
|
|
306
|
+
if "veo-3.1" in mid:
|
|
307
|
+
return max(4, min(8, int(requested or 8)))
|
|
277
308
|
if "veo-3.0" in mid:
|
|
278
|
-
|
|
279
|
-
return int(requested or 8)
|
|
309
|
+
return max(4, min(8, int(requested or 8)))
|
|
280
310
|
return int(requested or 8)
|
|
281
311
|
|
|
312
|
+
def _image_part_if_needed(self) -> Optional[gtypes.Image]:
|
|
313
|
+
"""Return Image part when in image-to-video mode and supported."""
|
|
314
|
+
if self.mode != Video.MODE_IMAGE_TO_VIDEO:
|
|
315
|
+
return None
|
|
316
|
+
base_img = self._first_image_attachment(self.attachments)
|
|
317
|
+
return gtypes.Image.from_file(location=base_img) if base_img else None
|
|
318
|
+
|
|
282
319
|
def _first_image_attachment(self, attachments: Dict[str, Any]) -> Optional[str]:
|
|
283
320
|
"""Return path of the first image attachment, if any."""
|
|
284
321
|
for _, att in (attachments or {}).items():
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.12.25 20:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from openai import OpenAI
|
|
@@ -38,6 +38,7 @@ from .store import Store
|
|
|
38
38
|
from .summarizer import Summarizer
|
|
39
39
|
from .tools import Tools
|
|
40
40
|
from .vision import Vision
|
|
41
|
+
from .video import Video
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
class ApiOpenAI:
|
|
@@ -63,6 +64,7 @@ class ApiOpenAI:
|
|
|
63
64
|
self.summarizer = Summarizer(window)
|
|
64
65
|
self.tools = Tools(window)
|
|
65
66
|
self.vision = Vision(window)
|
|
67
|
+
self.video = Video(window)
|
|
66
68
|
self.client = None
|
|
67
69
|
self.locked = False
|
|
68
70
|
self.last_client_args = None # last client args used, for debug purposes
|
|
@@ -87,7 +89,7 @@ class ApiOpenAI:
|
|
|
87
89
|
self,
|
|
88
90
|
context: BridgeContext,
|
|
89
91
|
extra: dict = None,
|
|
90
|
-
rt_signals
|
|
92
|
+
rt_signals=None
|
|
91
93
|
) -> bool:
|
|
92
94
|
"""
|
|
93
95
|
Call OpenAI API
|
|
@@ -157,7 +159,7 @@ class ApiOpenAI:
|
|
|
157
159
|
if is_realtime:
|
|
158
160
|
return True
|
|
159
161
|
|
|
160
|
-
if fixtures.is_enabled("stream"):
|
|
162
|
+
if fixtures.is_enabled("stream"): # fake stream for testing
|
|
161
163
|
use_responses_api = False
|
|
162
164
|
response = fixtures.get_stream_generator(ctx)
|
|
163
165
|
else:
|
|
@@ -181,12 +183,20 @@ class ApiOpenAI:
|
|
|
181
183
|
|
|
182
184
|
self.vision.append_images(ctx) # append images to ctx if provided
|
|
183
185
|
|
|
184
|
-
# image
|
|
186
|
+
# image / video
|
|
185
187
|
elif mode == MODE_IMAGE:
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
media_mode = self.window.controller.media.get_mode()
|
|
189
|
+
if media_mode == "video":
|
|
190
|
+
if context.model and context.model.is_video_output():
|
|
191
|
+
return self.video.generate(
|
|
192
|
+
context=context,
|
|
193
|
+
extra=extra,
|
|
194
|
+
) # async handled if allowed
|
|
195
|
+
elif media_mode == "image":
|
|
196
|
+
return self.image.generate(
|
|
197
|
+
context=context,
|
|
198
|
+
extra=extra,
|
|
199
|
+
)
|
|
190
200
|
|
|
191
201
|
# vision
|
|
192
202
|
elif mode == MODE_VISION:
|
|
@@ -294,13 +304,13 @@ class ApiOpenAI:
|
|
|
294
304
|
messages.append({"role": "user", "content": prompt})
|
|
295
305
|
additional_kwargs = {}
|
|
296
306
|
# if max_tokens > 0:
|
|
297
|
-
|
|
307
|
+
# additional_kwargs["max_tokens"] = max_tokens
|
|
298
308
|
|
|
299
309
|
# tools / functions
|
|
300
310
|
tools = self.window.core.api.openai.tools.prepare(model, functions)
|
|
301
311
|
if len(tools) > 0 and "disable_tools" not in extra:
|
|
302
312
|
additional_kwargs["tools"] = tools
|
|
303
|
-
|
|
313
|
+
|
|
304
314
|
try:
|
|
305
315
|
response = client.chat.completions.create(
|
|
306
316
|
messages=messages,
|
|
@@ -349,4 +359,4 @@ class ApiOpenAI:
|
|
|
349
359
|
self.client = None
|
|
350
360
|
except Exception as e:
|
|
351
361
|
self.window.core.debug.log(e)
|
|
352
|
-
print("Error closing client:", e)
|
|
362
|
+
print("Error closing client:", e)
|
|
File without changes
|