pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,427 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.29 20:40:00 #
10
+ # ================================================== #
11
+
12
+ import mimetypes
13
+ from typing import Optional, Dict, Any, List
14
+ from google import genai
15
+ from google.genai import types as gtypes
16
+ from PySide6.QtCore import QObject, Signal, QRunnable, Slot
17
+ import base64, datetime, os, requests
18
+
19
+ from pygpt_net.core.events import KernelEvent
20
+ from pygpt_net.core.bridge.context import BridgeContext
21
+ from pygpt_net.item.ctx import CtxItem
22
+ from pygpt_net.utils import trans
23
+
24
+
25
+ class Image:
26
+
27
+ MODE_GENERATE = "generate"
28
+ MODE_EDIT = "edit"
29
+
30
+ def __init__(self, window=None):
31
+ self.window = window
32
+ self.worker = None
33
+
34
+ def generate(
35
+ self,
36
+ context: BridgeContext,
37
+ extra: Optional[Dict[str, Any]] = None,
38
+ sync: bool = True
39
+ ) -> bool:
40
+ """
41
+ Generate or edit image(s) using Google GenAI API (Developer API or Vertex AI).
42
+
43
+ :param context: BridgeContext with prompt, model, attachments
44
+ :param extra: extra parameters (num, inline)
45
+ :param sync: run synchronously (blocking) if True
46
+ :return: True if started
47
+ """
48
+ extra = extra or {}
49
+ ctx = context.ctx or CtxItem()
50
+ model = context.model
51
+ prompt = context.prompt
52
+ num = int(extra.get("num", 1))
53
+ inline = bool(extra.get("inline", False))
54
+
55
+ # decide sub-mode based on attachments
56
+ sub_mode = self.MODE_GENERATE
57
+ attachments = context.attachments
58
+ if attachments and len(attachments) > 0:
59
+ pass # TODO: implement edit!
60
+ # sub_mode = self.MODE_EDIT
61
+
62
+ # model used to improve the prompt (not image model)
63
+ prompt_model = self.window.core.models.from_defaults()
64
+ tmp = self.window.core.config.get('img_prompt_model')
65
+ if self.window.core.models.has(tmp):
66
+ prompt_model = self.window.core.models.get(tmp)
67
+
68
+ worker = ImageWorker()
69
+ worker.window = self.window
70
+ worker.client = self.window.core.api.google.get_client()
71
+ worker.ctx = ctx
72
+ worker.mode = sub_mode
73
+ worker.attachments = attachments or {}
74
+ worker.model = model.id # image model id
75
+ worker.input_prompt = prompt
76
+ worker.model_prompt = prompt_model # LLM for prompt rewriting
77
+ worker.system_prompt = self.window.core.prompt.get('img')
78
+ worker.raw = self.window.core.config.get('img_raw')
79
+ worker.num = num
80
+ worker.inline = inline
81
+
82
+ if self.window.core.config.has('img_resolution'):
83
+ worker.resolution = self.window.core.config.get('img_resolution') or "1024x1024"
84
+
85
+ self.worker = worker
86
+ self.worker.signals.finished.connect(self.window.core.image.handle_finished)
87
+ self.worker.signals.finished_inline.connect(self.window.core.image.handle_finished_inline)
88
+ self.worker.signals.status.connect(self.window.core.image.handle_status)
89
+ self.worker.signals.error.connect(self.window.core.image.handle_error)
90
+
91
+ if sync or not self.window.controller.kernel.async_allowed(ctx):
92
+ self.worker.run()
93
+ return True
94
+
95
+ self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {"id": "img"}))
96
+ self.window.threadpool.start(self.worker)
97
+ return True
98
+
99
+
100
+ class ImageSignals(QObject):
101
+ finished = Signal(object, list, str) # ctx, paths, prompt
102
+ finished_inline = Signal(object, list, str) # ctx, paths, prompt
103
+ status = Signal(object) # message
104
+ error = Signal(object) # exception
105
+
106
+
107
+ class ImageWorker(QRunnable):
108
+ def __init__(self, *args, **kwargs):
109
+ super().__init__()
110
+ self.signals = ImageSignals()
111
+ self.window = None
112
+ self.client: Optional[genai.Client] = None
113
+ self.ctx: Optional[CtxItem] = None
114
+
115
+ # params
116
+ self.mode = Image.MODE_GENERATE
117
+ self.attachments: Dict[str, Any] = {}
118
+ self.model = "imagen-4.0-generate-preview-06-06"
119
+ self.model_prompt = None
120
+ self.input_prompt = ""
121
+ self.system_prompt = ""
122
+ self.inline = False
123
+ self.raw = False
124
+ self.num = 1
125
+ self.resolution = "1024x1024" # used to derive aspect ratio for Imagen
126
+
127
+ # limits
128
+ self.imagen_max_num = 4 # Imagen returns up to 4 images
129
+
130
+ # fallbacks
131
+ self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.0-flash-preview-image-generation"
132
+
133
+ @Slot()
134
+ def run(self):
135
+ try:
136
+ # optional prompt enhancement
137
+ if not self.raw and not self.inline:
138
+ try:
139
+ self.signals.status.emit(trans('img.status.prompt.wait'))
140
+ bridge_context = BridgeContext(
141
+ prompt=self.input_prompt,
142
+ system_prompt=self.system_prompt,
143
+ model=self.model_prompt,
144
+ max_tokens=200,
145
+ temperature=1.0,
146
+ )
147
+ ev = KernelEvent(KernelEvent.CALL, {'context': bridge_context, 'extra': {}})
148
+ self.window.dispatch(ev)
149
+ resp = ev.data.get('response')
150
+ if resp:
151
+ self.input_prompt = resp
152
+ except Exception as e:
153
+ self.signals.error.emit(e)
154
+ self.signals.status.emit(trans('img.status.prompt.error') + ": " + str(e))
155
+
156
+ self.signals.status.emit(trans('img.status.generating') + f": {self.input_prompt}...")
157
+
158
+ paths: List[str] = []
159
+
160
+ if self.mode == Image.MODE_EDIT:
161
+ # EDIT
162
+ if self._using_vertex():
163
+ # Vertex Imagen edit API (preferred)
164
+ resp = self._imagen_edit(self.input_prompt, self.attachments, self.num)
165
+ imgs = getattr(resp, "generated_images", None) or []
166
+ for idx, gi in enumerate(imgs[: self.num]):
167
+ data = self._extract_imagen_bytes(gi)
168
+ p = self._save(idx, data)
169
+ if p:
170
+ paths.append(p)
171
+ else:
172
+ # Developer API fallback via Gemini image model; force v1 to avoid 404
173
+ resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
174
+ saved = 0
175
+ for cand in getattr(resp, "candidates", []) or []:
176
+ parts = getattr(getattr(cand, "content", None), "parts", None) or []
177
+ for part in parts:
178
+ inline = getattr(part, "inline_data", None)
179
+ if inline and getattr(inline, "data", None):
180
+ p = self._save(saved, inline.data)
181
+ if p:
182
+ paths.append(p)
183
+ saved += 1
184
+ if saved >= self.num:
185
+ break
186
+ if saved >= self.num:
187
+ break
188
+
189
+ else:
190
+ # GENERATE
191
+ if self._is_imagen_generate(self.model) and self._using_vertex():
192
+ num = min(self.num, self.imagen_max_num)
193
+ resp = self._imagen_generate(self.input_prompt, num, self.resolution)
194
+ imgs = getattr(resp, "generated_images", None) or []
195
+ for idx, gi in enumerate(imgs[: num]):
196
+ data = self._extract_imagen_bytes(gi)
197
+ p = self._save(idx, data)
198
+ if p:
199
+ paths.append(p)
200
+ else:
201
+ # Gemini Developer API image generation (needs response_modalities)
202
+ resp = self.client.models.generate_content(
203
+ model=self.model,
204
+ contents=[self.input_prompt],
205
+ config=gtypes.GenerateContentConfig(
206
+ response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
207
+ ),
208
+ )
209
+ saved = 0
210
+ for cand in getattr(resp, "candidates", []) or []:
211
+ parts = getattr(getattr(cand, "content", None), "parts", None) or []
212
+ for part in parts:
213
+ inline = getattr(part, "inline_data", None)
214
+ if inline and getattr(inline, "data", None):
215
+ p = self._save(saved, inline.data)
216
+ if p:
217
+ paths.append(p)
218
+ saved += 1
219
+ if saved >= self.num:
220
+ break
221
+ if saved >= self.num:
222
+ break
223
+
224
+ if self.inline:
225
+ self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
226
+ else:
227
+ self.signals.finished.emit(self.ctx, paths, self.input_prompt)
228
+
229
+ except Exception as e:
230
+ self.signals.error.emit(e)
231
+ finally:
232
+ self._cleanup()
233
+
234
+ # ---------- helpers ----------
235
+
236
+ def _using_vertex(self) -> bool:
237
+ """
238
+ Detect if Vertex AI is configured via env vars.
239
+ """
240
+ val = os.getenv("GOOGLE_GENAI_USE_VERTEXAI") or ""
241
+ return str(val).lower() in ("1", "true", "yes", "y")
242
+
243
+ def _is_imagen_generate(self, model_id: str) -> bool:
244
+ """True for Imagen generate models."""
245
+ mid = str(model_id).lower()
246
+ return "imagen" in mid and "generate" in mid
247
+
248
+ def _imagen_generate(self, prompt: str, num: int, resolution: str):
249
+ """Imagen text-to-image."""
250
+ aspect = self._aspect_from_resolution(resolution)
251
+ cfg = gtypes.GenerateImagesConfig(number_of_images=num)
252
+ if aspect:
253
+ cfg.aspect_ratio = aspect
254
+ return self.client.models.generate_images(
255
+ model=self.model,
256
+ prompt=prompt,
257
+ config=cfg,
258
+ )
259
+
260
+ def _imagen_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
261
+ """
262
+ Imagen edit: requires Vertex AI and capability model (e.g. imagen-3.0-capability-001).
263
+ First attachment = base image, optional second = mask.
264
+ """
265
+ paths = self._collect_attachment_paths(attachments)
266
+ if len(paths) == 0:
267
+ raise RuntimeError("No attachment provided for edit mode.")
268
+
269
+ base_img = gtypes.Image.from_file(location=paths[0])
270
+ raw_ref = gtypes.RawReferenceImage(reference_id=0, reference_image=base_img)
271
+
272
+ if len(paths) >= 2:
273
+ mask_img = gtypes.Image.from_file(location=paths[1])
274
+ mask_ref = gtypes.MaskReferenceImage(
275
+ reference_id=1,
276
+ reference_image=mask_img,
277
+ config=gtypes.MaskReferenceConfig(
278
+ mask_mode="MASK_MODE_USER_PROVIDED",
279
+ mask_dilation=0.0,
280
+ ),
281
+ )
282
+ edit_mode = "EDIT_MODE_INPAINT_INSERTION"
283
+ else:
284
+ mask_ref = gtypes.MaskReferenceImage(
285
+ reference_id=1,
286
+ reference_image=None,
287
+ config=gtypes.MaskReferenceConfig(
288
+ mask_mode="MASK_MODE_BACKGROUND",
289
+ mask_dilation=0.0,
290
+ ),
291
+ )
292
+ edit_mode = "EDIT_MODE_BGSWAP"
293
+
294
+ cfg = gtypes.EditImageConfig(
295
+ edit_mode=edit_mode,
296
+ number_of_images=min(num, self.imagen_max_num),
297
+ include_rai_reason=True,
298
+ )
299
+
300
+ # Ensure capability model for edit
301
+ model_id = "imagen-3.0-capability-001"
302
+ return self.client.models.edit_image(
303
+ model=model_id,
304
+ prompt=prompt,
305
+ reference_images=[raw_ref, mask_ref],
306
+ config=cfg,
307
+ )
308
+
309
+ def _gemini_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
310
+ """
311
+ Gemini image-to-image editing via generate_content (Developer/Vertex depending on client).
312
+ The first attachment is used as the input image.
313
+ """
314
+ paths = self._collect_attachment_paths(attachments)
315
+ if len(paths) == 0:
316
+ raise RuntimeError("No attachment provided for edit mode.")
317
+
318
+ img_path = paths[0]
319
+ with open(img_path, "rb") as f:
320
+ img_bytes = f.read()
321
+ mime = self._guess_mime(img_path)
322
+
323
+ return self.client.models.generate_content(
324
+ model=self.model,
325
+ contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
326
+ )
327
+
328
+ def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
329
+ """Extract file paths from attachments dict."""
330
+ out: List[str] = []
331
+ for _, att in (attachments or {}).items():
332
+ try:
333
+ if getattr(att, "path", None) and os.path.exists(att.path):
334
+ out.append(att.path)
335
+ except Exception:
336
+ continue
337
+ return out
338
+
339
+ def _aspect_from_resolution(self, resolution: str) -> Optional[str]:
340
+ """Derive aspect ratio for Imagen."""
341
+ try:
342
+ from math import gcd
343
+ tolerance = 0.08
344
+ w_str, h_str = resolution.lower().replace("×", "x").split("x")
345
+ w, h = int(w_str.strip()), int(h_str.strip())
346
+ if w <= 0 or h <= 0:
347
+ return None
348
+ supported = {
349
+ "1:1": 1 / 1,
350
+ "3:4": 3 / 4,
351
+ "4:3": 4 / 3,
352
+ "9:16": 9 / 16,
353
+ "16:9": 16 / 9,
354
+ }
355
+ g = gcd(w, h)
356
+ key = f"{w // g}:{h // g}"
357
+ if key in supported:
358
+ return key
359
+ r = w / h
360
+ best = min(supported.keys(), key=lambda k: abs(r - supported[k]))
361
+ rel_err = abs(r - supported[best]) / supported[best]
362
+ return best if rel_err <= tolerance else None
363
+ except Exception:
364
+ return None
365
+
366
+ def _extract_imagen_bytes(self, generated_image) -> Optional[bytes]:
367
+ """Extract bytes from Imagen GeneratedImage."""
368
+ img = getattr(generated_image, "image", None)
369
+ if not img:
370
+ return None
371
+ data = getattr(img, "image_bytes", None)
372
+ if isinstance(data, (bytes, bytearray)):
373
+ return bytes(data)
374
+ if isinstance(data, str):
375
+ try:
376
+ return base64.b64decode(data)
377
+ except Exception:
378
+ return None
379
+ url = getattr(img, "url", None) or getattr(img, "uri", None)
380
+ if url:
381
+ try:
382
+ r = requests.get(url, timeout=30)
383
+ if r.status_code == 200:
384
+ return r.content
385
+ except Exception:
386
+ pass
387
+ return None
388
+
389
+ def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
390
+ """Save image bytes to file and return path."""
391
+ if not data:
392
+ return None
393
+ name = (
394
+ datetime.date.today().strftime("%Y-%m-%d") + "_" +
395
+ datetime.datetime.now().strftime("%H-%M-%S") + "-" +
396
+ self.window.core.image.make_safe_filename(self.input_prompt) + "-" +
397
+ str(idx + 1) + ".png"
398
+ )
399
+ path = os.path.join(self.window.core.config.get_user_dir("img"), name)
400
+ self.signals.status.emit(trans('img.status.downloading') + f" ({idx + 1} / {self.num}) -> {path}")
401
+ if self.window.core.image.save_image(path, data):
402
+ return path
403
+ return None
404
+
405
+ def _guess_mime(self, path: str) -> str:
406
+ """
407
+ Guess MIME type for a local image file.
408
+ """
409
+ mime, _ = mimetypes.guess_type(path)
410
+ if mime:
411
+ return mime
412
+ ext = os.path.splitext(path.lower())[1]
413
+ if ext in ('.jpg', '.jpeg'):
414
+ return 'image/jpeg'
415
+ if ext == '.webp':
416
+ return 'image/webp'
417
+ return 'image/png'
418
+
419
+ def _cleanup(self):
420
+ """Cleanup resources."""
421
+ sig = self.signals
422
+ self.signals = None
423
+ if sig is not None:
424
+ try:
425
+ sig.deleteLater()
426
+ except RuntimeError:
427
+ pass
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from .realtime import Realtime