pygpt-net 2.6.31__py3-none-any.whl → 2.6.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. pygpt_net/CHANGELOG.txt +7 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +3 -1
  4. pygpt_net/app_core.py +3 -1
  5. pygpt_net/config.py +3 -1
  6. pygpt_net/controller/__init__.py +5 -1
  7. pygpt_net/controller/audio/audio.py +13 -0
  8. pygpt_net/controller/chat/common.py +18 -83
  9. pygpt_net/controller/lang/custom.py +2 -2
  10. pygpt_net/controller/media/__init__.py +12 -0
  11. pygpt_net/controller/media/media.py +115 -0
  12. pygpt_net/controller/realtime/realtime.py +27 -2
  13. pygpt_net/controller/ui/mode.py +16 -2
  14. pygpt_net/core/audio/backend/pyaudio/realtime.py +51 -14
  15. pygpt_net/core/audio/output.py +3 -2
  16. pygpt_net/core/image/image.py +6 -5
  17. pygpt_net/core/realtime/worker.py +1 -5
  18. pygpt_net/core/render/web/body.py +24 -3
  19. pygpt_net/core/text/utils.py +54 -2
  20. pygpt_net/core/types/image.py +7 -1
  21. pygpt_net/core/video/__init__.py +12 -0
  22. pygpt_net/core/video/video.py +290 -0
  23. pygpt_net/data/config/config.json +19 -4
  24. pygpt_net/data/config/models.json +75 -3
  25. pygpt_net/data/config/settings.json +194 -6
  26. pygpt_net/data/css/web-blocks.css +6 -0
  27. pygpt_net/data/css/web-chatgpt.css +6 -0
  28. pygpt_net/data/css/web-chatgpt_wide.css +6 -0
  29. pygpt_net/data/locale/locale.de.ini +30 -2
  30. pygpt_net/data/locale/locale.en.ini +40 -7
  31. pygpt_net/data/locale/locale.es.ini +30 -2
  32. pygpt_net/data/locale/locale.fr.ini +30 -2
  33. pygpt_net/data/locale/locale.it.ini +30 -2
  34. pygpt_net/data/locale/locale.pl.ini +33 -2
  35. pygpt_net/data/locale/locale.uk.ini +30 -2
  36. pygpt_net/data/locale/locale.zh.ini +30 -2
  37. pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
  38. pygpt_net/item/model.py +22 -1
  39. pygpt_net/provider/api/google/__init__.py +38 -2
  40. pygpt_net/provider/api/google/video.py +364 -0
  41. pygpt_net/provider/api/openai/realtime/realtime.py +1 -2
  42. pygpt_net/provider/core/config/patch.py +226 -178
  43. pygpt_net/provider/core/model/patch.py +17 -2
  44. pygpt_net/provider/web/duckduck_search.py +212 -0
  45. pygpt_net/ui/layout/toolbox/audio.py +55 -0
  46. pygpt_net/ui/layout/toolbox/footer.py +14 -58
  47. pygpt_net/ui/layout/toolbox/image.py +3 -14
  48. pygpt_net/ui/layout/toolbox/raw.py +52 -0
  49. pygpt_net/ui/layout/toolbox/split.py +48 -0
  50. pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
  51. pygpt_net/ui/layout/toolbox/video.py +49 -0
  52. {pygpt_net-2.6.31.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +23 -11
  53. {pygpt_net-2.6.31.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +56 -46
  54. {pygpt_net-2.6.31.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
  55. {pygpt_net-2.6.31.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
  56. {pygpt_net-2.6.31.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,364 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.09.01 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import base64, datetime, os, requests
13
+ import mimetypes
14
+ import time
15
+
16
+ from typing import Optional, Dict, Any, List
17
+ from google import genai
18
+ from google.genai import types as gtypes
19
+
20
+ from PySide6.QtCore import QObject, Signal, QRunnable, Slot
21
+
22
+ from pygpt_net.core.events import KernelEvent
23
+ from pygpt_net.core.bridge.context import BridgeContext
24
+ from pygpt_net.item.ctx import CtxItem
25
+ from pygpt_net.utils import trans
26
+
27
+
28
+ class Video:
29
+
30
+ MODE_GENERATE = "generate"
31
+ MODE_IMAGE_TO_VIDEO = "image2video"
32
+
33
+ def __init__(self, window=None):
34
+ self.window = window
35
+ self.worker = None
36
+
37
+ def generate(
38
+ self,
39
+ context: BridgeContext,
40
+ extra: Optional[Dict[str, Any]] = None,
41
+ sync: bool = True
42
+ ) -> bool:
43
+ """
44
+ Generate video(s) using Google GenAI Veo.
45
+
46
+ :param context: BridgeContext with prompt, model, attachments
47
+ :param extra: extra parameters (num, inline, duration, aspect_ratio)
48
+ :param sync: run synchronously (blocking) if True
49
+ :return: True if started
50
+ """
51
+ extra = extra or {}
52
+ ctx = context.ctx or CtxItem()
53
+ model = context.model
54
+ prompt = context.prompt
55
+ num = int(extra.get("num", 1))
56
+ inline = bool(extra.get("inline", False))
57
+
58
+ # decide sub-mode based on attachments (image-to-video when image is attached)
59
+ sub_mode = self.MODE_GENERATE
60
+ attachments = context.attachments or {}
61
+ if self._has_image_attachment(attachments):
62
+ sub_mode = self.MODE_IMAGE_TO_VIDEO
63
+
64
+ # model used to improve the prompt (not video model)
65
+ prompt_model = self.window.core.models.from_defaults()
66
+ tmp = self.window.core.config.get('video.prompt_model')
67
+ if self.window.core.models.has(tmp):
68
+ prompt_model = self.window.core.models.get(tmp)
69
+
70
+ worker = VideoWorker()
71
+ worker.window = self.window
72
+ worker.client = self.window.core.api.google.get_client()
73
+ worker.ctx = ctx
74
+ worker.mode = sub_mode
75
+ worker.attachments = attachments
76
+ worker.model = model.id # Veo model id
77
+ worker.input_prompt = prompt
78
+ worker.model_prompt = prompt_model # LLM for prompt rewriting
79
+ worker.system_prompt = self.window.core.prompt.get('video')
80
+ worker.raw = self.window.core.config.get('img_raw')
81
+ worker.num = num
82
+ worker.inline = inline
83
+
84
+ # optional params
85
+ worker.aspect_ratio = str(extra.get("aspect_ratio") or self.window.core.config.get('video.aspect_ratio') or "16:9")
86
+ worker.duration_seconds = int(extra.get("duration") or self.window.core.config.get('video.duration') or 8)
87
+ worker.fps = int(extra.get("fps") or self.window.core.config.get('video.fps') or 24)
88
+ worker.seed = extra.get("seed") or self.window.core.config.get('video.seed') or None
89
+ worker.negative_prompt = extra.get("negative_prompt") or self.window.core.config.get('video.negative_prompt') or None
90
+ worker.generate_audio = bool(extra.get("generate_audio", self.window.core.config.get('video.generate_audio') or False))
91
+ worker.resolution = (extra.get("resolution") or self.window.core.config.get('video.resolution') or "720p")
92
+
93
+ self.worker = worker
94
+ self.worker.signals.finished.connect(self.window.core.video.handle_finished)
95
+ self.worker.signals.finished_inline.connect(self.window.core.video.handle_finished_inline)
96
+ self.worker.signals.status.connect(self.window.core.video.handle_status)
97
+ self.worker.signals.error.connect(self.window.core.video.handle_error)
98
+
99
+ if sync or not self.window.controller.kernel.async_allowed(ctx):
100
+ self.worker.run()
101
+ return True
102
+
103
+ self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {"id": "video"}))
104
+ self.window.threadpool.start(self.worker)
105
+ return True
106
+
107
+ def _has_image_attachment(self, attachments: Dict[str, Any]) -> bool:
108
+ """Check if at least one image attachment is present."""
109
+ for _, att in (attachments or {}).items():
110
+ try:
111
+ p = getattr(att, "path", None)
112
+ if p and os.path.exists(p):
113
+ mt, _ = mimetypes.guess_type(p)
114
+ if mt and mt.startswith("image/"):
115
+ return True
116
+ except Exception:
117
+ continue
118
+ return False
119
+
120
+
121
+ class VideoSignals(QObject):
122
+ finished = Signal(object, list, str) # ctx, paths, prompt
123
+ finished_inline = Signal(object, list, str) # ctx, paths, prompt
124
+ status = Signal(object) # message
125
+ error = Signal(object) # exception
126
+
127
+
128
+ class VideoWorker(QRunnable):
129
+ def __init__(self, *args, **kwargs):
130
+ super().__init__()
131
+ self.signals = VideoSignals()
132
+ self.window = None
133
+ self.client: Optional[genai.Client] = None
134
+ self.ctx: Optional[CtxItem] = None
135
+
136
+ # params
137
+ self.mode = Video.MODE_GENERATE
138
+ self.attachments: Dict[str, Any] = {}
139
+ self.model = "veo-3.0-generate-001"
140
+ self.model_prompt = None
141
+ self.input_prompt = ""
142
+ self.system_prompt = ""
143
+ self.inline = False
144
+ self.raw = False
145
+ self.num = 1
146
+
147
+ # video generation params
148
+ self.aspect_ratio = "16:9"
149
+ self.duration_seconds = 8
150
+ self.fps = 24
151
+ self.seed: Optional[int] = None
152
+ self.negative_prompt: Optional[str] = None
153
+ self.generate_audio: bool = False # Veo 3 only
154
+ self.resolution: str = "720p" # Veo 3 supports 720p/1080p
155
+
156
+ # limits / capabilities
157
+ # self.veo_max_num = 4 # Veo returns up to 4 videos
158
+ self.veo_max_num = 1 # limit to 1 in Gemini API
159
+
160
+ # fallbacks
161
+ self.DEFAULT_VEO_MODEL = "veo-3.0-generate-001"
162
+
163
+ @Slot()
164
+ def run(self):
165
+ try:
166
+ # optional prompt enhancement
167
+ if not self.raw and not self.inline and self.input_prompt:
168
+ try:
169
+ self.signals.status.emit(trans('vid.status.prompt.wait'))
170
+ bridge_context = BridgeContext(
171
+ prompt=self.input_prompt,
172
+ system_prompt=self.system_prompt,
173
+ model=self.model_prompt,
174
+ max_tokens=200,
175
+ temperature=1.0,
176
+ )
177
+ ev = KernelEvent(KernelEvent.CALL, {'context': bridge_context, 'extra': {}})
178
+ self.window.dispatch(ev)
179
+ resp = ev.data.get('response')
180
+ if resp:
181
+ self.input_prompt = resp
182
+ except Exception as e:
183
+ self.signals.error.emit(e)
184
+ self.signals.status.emit(trans('vid.status.prompt.error') + ": " + str(e))
185
+
186
+ # prepare config
187
+ num = min(self.num, self.veo_max_num)
188
+ cfg_kwargs = {
189
+ "number_of_videos": num,
190
+ #"duration_seconds": self._duration_for_model(self.model, self.duration_seconds),
191
+ }
192
+ if self.aspect_ratio:
193
+ cfg_kwargs["aspect_ratio"] = self.aspect_ratio
194
+ if self.seed is not None:
195
+ cfg_kwargs["seed"] = int(self.seed)
196
+ if self.negative_prompt:
197
+ cfg_kwargs["negative_prompt"] = self.negative_prompt
198
+ if self._is_veo3(self.model):
199
+ # Veo 3 supports audio and resolution
200
+ # WARN: but not Gemini API:
201
+ pass
202
+ """
203
+ cfg_kwargs["generate_audio"] = bool(self.generate_audio)
204
+ if self.resolution:
205
+ cfg_kwargs["resolution"] = self.resolution
206
+ """
207
+
208
+ config = gtypes.GenerateVideosConfig(**cfg_kwargs)
209
+
210
+ # build request
211
+ req_kwargs = {
212
+ "model": self.model or self.DEFAULT_VEO_MODEL,
213
+ "prompt": self.input_prompt or "",
214
+ "config": config,
215
+ }
216
+
217
+ # image-to-video if an image attachment is present and supported
218
+ base_img = self._first_image_attachment(self.attachments)
219
+ if self.mode == Video.MODE_IMAGE_TO_VIDEO and base_img is not None and self._supports_image_to_video(self.model):
220
+ req_kwargs["image"] = gtypes.Image.from_file(location=base_img)
221
+
222
+ self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
223
+
224
+ # start long-running operation
225
+ operation = self.client.models.generate_videos(**req_kwargs)
226
+
227
+ # poll until done
228
+ while not getattr(operation, "done", False):
229
+ time.sleep(10)
230
+ operation = self.client.operations.get(operation)
231
+
232
+ # extract response payload
233
+ op_resp = getattr(operation, "response", None) or getattr(operation, "result", None)
234
+ if not op_resp:
235
+ raise RuntimeError("Empty operation response.")
236
+
237
+ gen_list = getattr(op_resp, "generated_videos", None) or []
238
+ if not gen_list:
239
+ raise RuntimeError("No videos generated.")
240
+
241
+ # download and save all outputs up to num
242
+ paths: List[str] = []
243
+ for idx, gv in enumerate(gen_list[:num]):
244
+ data = self._download_video_bytes(getattr(gv, "video", None))
245
+ p = self._save(idx, data)
246
+ if p:
247
+ paths.append(p)
248
+
249
+ if self.inline:
250
+ self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
251
+ else:
252
+ self.signals.finished.emit(self.ctx, paths, self.input_prompt)
253
+
254
+ except Exception as e:
255
+ self.signals.error.emit(e)
256
+ finally:
257
+ self._cleanup()
258
+
259
+ # ---------- helpers ----------
260
+
261
+ def _is_veo3(self, model_id: str) -> bool:
262
+ mid = str(model_id or "").lower()
263
+ return mid.startswith("veo-3.")
264
+
265
+ def _supports_image_to_video(self, model_id: str) -> bool:
266
+ """Return True if the model supports image->video."""
267
+ mid = str(model_id or "").lower()
268
+ # Official support for image-to-video on veo-2 and veo-3 preview; keep extendable.
269
+ return ("veo-2.0" in mid) or ("veo-3.0-generate-preview" in mid) or ("veo-3.0-fast-generate-preview" in mid)
270
+
271
+ def _duration_for_model(self, model_id: str, requested: int) -> int:
272
+ """Adjust duration constraints to model-specific limits."""
273
+ mid = str(model_id or "").lower()
274
+ if "veo-2.0" in mid:
275
+ # Veo 2 supports 5–8s, default 8s.
276
+ return max(5, min(8, int(requested or 8)))
277
+ if "veo-3.0" in mid:
278
+ # Veo 3 commonly uses 8s clips; honor request if provided, otherwise 8s.
279
+ return int(requested or 8)
280
+ return int(requested or 8)
281
+
282
+ def _first_image_attachment(self, attachments: Dict[str, Any]) -> Optional[str]:
283
+ """Return path of the first image attachment, if any."""
284
+ for _, att in (attachments or {}).items():
285
+ try:
286
+ p = getattr(att, "path", None)
287
+ if p and os.path.exists(p):
288
+ mt, _ = mimetypes.guess_type(p)
289
+ if mt and mt.startswith("image/"):
290
+ return p
291
+ except Exception:
292
+ continue
293
+ return None
294
+
295
+ def _download_video_bytes(self, file_ref) -> Optional[bytes]:
296
+ """
297
+ Download video bytes using the Files service.
298
+ Falls back to direct URL download if necessary.
299
+ """
300
+ if not file_ref:
301
+ return None
302
+
303
+ # Preferred: SDK-managed download (handles URIs and sets video_bytes).
304
+ try:
305
+ data = self.client.files.download(file=file_ref)
306
+ if isinstance(data, (bytes, bytearray)):
307
+ return bytes(data)
308
+ except Exception:
309
+ pass
310
+
311
+ # Fallback: try to fetch by uri or url.
312
+ uri = getattr(file_ref, "uri", None) or getattr(file_ref, "url", None) or getattr(file_ref, "download_uri", None)
313
+ if uri:
314
+ try:
315
+ r = requests.get(uri, timeout=120)
316
+ if r.status_code == 200:
317
+ return r.content
318
+ except Exception:
319
+ pass
320
+
321
+ # Last resort: try inline/base64 if present.
322
+ b64 = getattr(file_ref, "video_bytes", None)
323
+ if isinstance(b64, (bytes, bytearray)):
324
+ return bytes(b64)
325
+ if isinstance(b64, str):
326
+ try:
327
+ return base64.b64decode(b64)
328
+ except Exception:
329
+ return None
330
+ return None
331
+
332
+ def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
333
+ """Save video bytes to file and return path."""
334
+ if not data:
335
+ return None
336
+ name = (
337
+ datetime.date.today().strftime("%Y-%m-%d") + "_" +
338
+ datetime.datetime.now().strftime("%H-%M-%S") + "-" +
339
+ self.window.core.video.make_safe_filename(self.input_prompt) + "-" +
340
+ str(idx + 1) + ".mp4"
341
+ )
342
+ path = os.path.join(self.window.core.config.get_user_dir("video"), name)
343
+ self.signals.status.emit(trans('vid.status.downloading') + f" ({idx + 1} / {self.num}) -> {path}")
344
+
345
+ if self.window.core.video.save_video(path, data):
346
+ return str(path)
347
+
348
+ try:
349
+ os.makedirs(os.path.dirname(path), exist_ok=True)
350
+ with open(path, "wb") as f:
351
+ f.write(data)
352
+ return str(path)
353
+ except Exception:
354
+ return None
355
+
356
+ def _cleanup(self):
357
+ """Cleanup resources."""
358
+ sig = self.signals
359
+ self.signals = None
360
+ if sig is not None:
361
+ try:
362
+ sig.deleteLater()
363
+ except RuntimeError:
364
+ pass
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.31 23:00:00 #
9
+ # Updated Date: 2025.09.01 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -107,7 +107,6 @@ class Realtime:
107
107
  and (auto_turn != self.prev_auto_turn
108
108
  or opt_vad_silence != self.prev_vad_silence
109
109
  or opt_vad_prefix != self.prev_vad_prefix)):
110
- print("updating")
111
110
  self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
112
111
 
113
112
  # if auto-turn is enabled and prompt is empty, update session and context only