pygpt-net 2.6.65__py3-none-any.whl → 2.6.67__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. pygpt_net/CHANGELOG.txt +17 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +2 -0
  4. pygpt_net/controller/chat/chat.py +0 -0
  5. pygpt_net/controller/chat/handler/openai_stream.py +137 -7
  6. pygpt_net/controller/chat/render.py +0 -0
  7. pygpt_net/controller/config/field/checkbox_list.py +34 -1
  8. pygpt_net/controller/config/field/textarea.py +2 -2
  9. pygpt_net/controller/dialogs/info.py +2 -2
  10. pygpt_net/controller/media/media.py +48 -1
  11. pygpt_net/controller/model/editor.py +74 -9
  12. pygpt_net/controller/presets/presets.py +4 -1
  13. pygpt_net/controller/settings/editor.py +25 -1
  14. pygpt_net/controller/ui/mode.py +14 -10
  15. pygpt_net/controller/ui/ui.py +18 -1
  16. pygpt_net/core/image/image.py +34 -1
  17. pygpt_net/core/tabs/tabs.py +0 -0
  18. pygpt_net/core/types/image.py +70 -3
  19. pygpt_net/core/video/video.py +43 -3
  20. pygpt_net/data/config/config.json +4 -3
  21. pygpt_net/data/config/models.json +637 -38
  22. pygpt_net/data/locale/locale.de.ini +5 -0
  23. pygpt_net/data/locale/locale.en.ini +5 -0
  24. pygpt_net/data/locale/locale.es.ini +5 -0
  25. pygpt_net/data/locale/locale.fr.ini +5 -0
  26. pygpt_net/data/locale/locale.it.ini +5 -0
  27. pygpt_net/data/locale/locale.pl.ini +5 -0
  28. pygpt_net/data/locale/locale.uk.ini +5 -0
  29. pygpt_net/data/locale/locale.zh.ini +5 -0
  30. pygpt_net/item/model.py +15 -19
  31. pygpt_net/provider/agents/openai/agent.py +0 -0
  32. pygpt_net/provider/api/google/__init__.py +20 -9
  33. pygpt_net/provider/api/google/image.py +161 -28
  34. pygpt_net/provider/api/google/video.py +73 -36
  35. pygpt_net/provider/api/openai/__init__.py +21 -11
  36. pygpt_net/provider/api/openai/agents/client.py +0 -0
  37. pygpt_net/provider/api/openai/video.py +562 -0
  38. pygpt_net/provider/core/config/patch.py +7 -0
  39. pygpt_net/provider/core/model/patch.py +54 -3
  40. pygpt_net/provider/vector_stores/qdrant.py +117 -0
  41. pygpt_net/ui/dialog/models.py +10 -1
  42. pygpt_net/ui/layout/toolbox/raw.py +7 -1
  43. pygpt_net/ui/layout/toolbox/video.py +14 -6
  44. pygpt_net/ui/widget/option/checkbox_list.py +14 -2
  45. pygpt_net/ui/widget/option/input.py +3 -1
  46. {pygpt_net-2.6.65.dist-info → pygpt_net-2.6.67.dist-info}/METADATA +72 -25
  47. {pygpt_net-2.6.65.dist-info → pygpt_net-2.6.67.dist-info}/RECORD +45 -43
  48. {pygpt_net-2.6.65.dist-info → pygpt_net-2.6.67.dist-info}/LICENSE +0 -0
  49. {pygpt_net-2.6.65.dist-info → pygpt_net-2.6.67.dist-info}/WHEEL +0 -0
  50. {pygpt_net-2.6.65.dist-info → pygpt_net-2.6.67.dist-info}/entry_points.txt +0 -0
@@ -51,6 +51,7 @@ action.save = Speichern
51
51
  action.save_as = Speichern unter...
52
52
  action.save_selection_as = Auswahl speichern unter...
53
53
  action.select_all = Alle auswählen
54
+ action.select_unselect_all = Alle auswählen/abwählen
54
55
  action.tab.add.chat: Neuen Chat hinzufügen
55
56
  action.tab.add.chat.tooltip: Neuen Chat hinzufügen (RMT für mehr Optionen...)
56
57
  action.tab.add.notepad = Neuen Notizblock hinzufügen
@@ -788,6 +789,7 @@ ipython.docker.install = Für die Ausführung von IPython muss Docker installier
788
789
  ipython.docker.install.snap = \n\nSNAP VERSION: \nSie sind nicht mit dem eingebauten Docker-Daemon verbunden. Bitte verbinden Sie die Slots mit:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...und starten Sie die Anwendung neu.
789
790
  ipython.image.build = Das Docker-Image für IPython wurde noch nicht erstellt. Dies wird jetzt geschehen und es kann eine Weile dauern (aber es ist ein einmaliger Vorgang). Die Ausführung des Befehls wurde pausiert. Sobald das Image erstellt ist, führen Sie den Befehl bitte erneut aus.
790
791
  layout.split = Bildschirm teilen
792
+ list.all = --- ALLE ---
791
793
  menu.audio = Audio / Stimme
792
794
  menu.audio.cache.clear = Audio-Cache löschen...
793
795
  menu.audio.control.global = Sprachsteuerung (global)
@@ -902,7 +904,10 @@ mode.computer.tooltip = Computerbenutzung (Maus, Tastatur, Navigation)
902
904
  mode.expert = Experten (Kooperation)
903
905
  mode.expert.tooltip = Experten zum Hintergrund rufen
904
906
  mode.img = Bild und Video
907
+ mode.img.image = Bild
908
+ mode.img.music = Musik
905
909
  mode.img.tooltip = Bildgenerierung mit DALL-E
910
+ mode.img.video = Video
906
911
  mode.langchain = Langchain
907
912
  mode.langchain.tooltip = Chat mit Modellen von Langchain
908
913
  model.ctx = Kontext-Token
@@ -51,6 +51,7 @@ action.save = Save
51
51
  action.save_as = Save as...
52
52
  action.save_selection_as = Save selection as...
53
53
  action.select_all = Select all
54
+ action.select_unselect_all = Select/Unselect All
54
55
  action.tab.add.chat = Add a new chat
55
56
  action.tab.add.chat.tooltip = Add a new chat (RMB click to more options...)
56
57
  action.tab.add.notepad = Add a new notepad
@@ -802,6 +803,7 @@ ipython.docker.install = Running IPython requires Docker to be installed. Docker
802
803
  ipython.docker.install.snap = \n\nSNAP VERSION: \nYou are not connected to built-in Docker daemon. Please connect the slots with:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...and restart the application.
803
804
  ipython.image.build = The Docker image for IPython has not been built yet. This will happen now, and it may take a while (but it's a one-time procedure). The execution of the command has been paused. Once the image is built, please execute the command again.
804
805
  layout.split = Split screen
806
+ list.all = --- ALL ---
805
807
  menu.audio = Audio / Voice
806
808
  menu.audio.cache.clear = Clear audio cache...
807
809
  menu.audio.control.global = Voice control (global)
@@ -917,7 +919,10 @@ mode.computer.tooltip = Computer use (mouse, keyboard, navigation)
917
919
  mode.expert = Experts (Co-op)
918
920
  mode.expert.tooltip = Experts to call in the background
919
921
  mode.img = Image and video
922
+ mode.img.image = Image
923
+ mode.img.music = Music
920
924
  mode.img.tooltip = Image generation
925
+ mode.img.video = Video
921
926
  mode.langchain = Langchain
922
927
  mode.langchain.tooltip = Chat with models provided by Langchain
923
928
  model.ctx = Context tokens
@@ -51,6 +51,7 @@ action.save = Guardar
51
51
  action.save_as = Guardar como...
52
52
  action.save_selection_as = Guardar selección como...
53
53
  action.select_all = Seleccionar todo
54
+ action.select_unselect_all = Seleccionar/Deseleccionar todo
54
55
  action.tab.add.chat: Añadir un nuevo chat
55
56
  action.tab.add.chat.tooltip: Añadir un nuevo chat (clic derecho para más opciones...)
56
57
  action.tab.add.notepad = Agregar un nuevo bloc de notas
@@ -789,6 +790,7 @@ ipython.docker.install = Ejecutar IPython requiere que Docker esté instalado. D
789
790
  ipython.docker.install.snap = \n\nVERSIÓN SNAP: \nNo está conectado al demonio Docker incorporado. Por favor conecte los espacios con:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...y reinicie la aplicación.
790
791
  ipython.image.build = La imagen de Docker para IPython aún no ha sido construida. Esto sucederá ahora, y puede tardar un tiempo (pero es un procedimiento único). La ejecución del comando ha sido pausada. Una vez que la imagen esté construida, por favor ejecute el comando nuevamente.
791
792
  layout.split = Pantalla dividida
793
+ list.all = --- TODOS ---
792
794
  menu.audio = Audio / Voz
793
795
  menu.audio.cache.clear = Limpiar caché de audio...
794
796
  menu.audio.control.global = Control de voz (global)
@@ -903,7 +905,10 @@ mode.computer.tooltip = Uso de la computadora (ratón, teclado, navegación)
903
905
  mode.expert = Expertos (cooperación)
904
906
  mode.expert.tooltip = Llamar a expertos en segundo plano
905
907
  mode.img = Imagen y video
908
+ mode.img.image = Imagen
909
+ mode.img.music = Música
906
910
  mode.img.tooltip = Generar imagen usando DALL-E
911
+ mode.img.video = Vídeo
907
912
  mode.langchain = Langchain
908
913
  mode.langchain.tooltip = Chatear con modelos proporcionados por Langchain
909
914
  model.ctx = Tokens de contexto
@@ -51,6 +51,7 @@ action.save = Enregistrer
51
51
  action.save_as = Enregistrer sous...
52
52
  action.save_selection_as = Enregistrer la sélection sous...
53
53
  action.select_all = Sélectionner tout
54
+ action.select_unselect_all = Sélectionner/Désélectionner tout
54
55
  action.tab.add.chat: Ajouter une nouvelle discussion
55
56
  action.tab.add.chat.tooltip: Ajouter une nouvelle discussion (Clic Droit pour plus d'options...)
56
57
  action.tab.add.notepad = Ajouter un nouveau bloc-notes
@@ -788,6 +789,7 @@ ipython.docker.install = L'exécution d'IPython nécessite l'installation de Doc
788
789
  ipython.docker.install.snap = \n\nVERSION SNAP : \nVous n'êtes pas connecté au démon Docker intégré. Veuillez connecter les slots avec :\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...et redémarrez l'application.
789
790
  ipython.image.build = L'image Docker pour IPython n'a pas encore été construite. Cela va se produire maintenant, et cela peut prendre un certain temps (mais c'est une procédure unique). L'exécution de la commande a été mise en pause. Une fois l'image construite, veuillez exécuter à nouveau la commande.
790
791
  layout.split = Écran scindé
792
+ list.all = --- TOUS ---
791
793
  menu.audio = Audio / Voix
792
794
  menu.audio.cache.clear = Effacer le cache audio...
793
795
  menu.audio.control.global = Contrôle vocal (global)
@@ -902,7 +904,10 @@ mode.computer.tooltip = Utilisation de l'ordinateur (souris, clavier, navigation
902
904
  mode.expert = Experts (co-opération)
903
905
  mode.expert.tooltip = Appeler des experts en arrière-plan
904
906
  mode.img = Image et vidéo
907
+ mode.img.image = Image
908
+ mode.img.music = Musique
905
909
  mode.img.tooltip = Génération d'image avec DALL-E
910
+ mode.img.video = Vidéo
906
911
  mode.langchain = Langchain
907
912
  mode.langchain.tooltip = Discussion avec les modèles fournis par Langchain
908
913
  model.ctx = Jetons de contexte
@@ -51,6 +51,7 @@ action.save = Salva
51
51
  action.save_as = Salva con nome...
52
52
  action.save_selection_as = Salva selezione come...
53
53
  action.select_all = Seleziona tutto
54
+ action.select_unselect_all = Seleziona/Deseleziona tutto
54
55
  action.tab.add.chat: Aggiungi una nuova chat
55
56
  action.tab.add.chat.tooltip: Aggiungi una nuova chat (Clic Destro per maggiori opzioni...)
56
57
  action.tab.add.notepad = Aggiungi un nuovo blocco note
@@ -788,6 +789,7 @@ ipython.docker.install = L'esecuzione di IPython richiede l'installazione di Doc
788
789
  ipython.docker.install.snap = \n\nVERSIONE SNAP: \nNon sei connesso al demone Docker integrato. Connetti gli slot con:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...e riavvia l'applicazione.
789
790
  ipython.image.build = L'immagine Docker per IPython non è stata ancora costruita. Questo avverrà ora e potrebbe richiedere del tempo (ma è una procedura una tantum). L'esecuzione del comando è stata messa in pausa. Una volta costruita l'immagine, esegui nuovamente il comando.
790
791
  layout.split = Schermo diviso
792
+ list.all = --- TUTTI ---
791
793
  menu.audio = Audio / Voce
792
794
  menu.audio.cache.clear = Cancella cache audio...
793
795
  menu.audio.control.global = Controllo vocale (globale)
@@ -902,7 +904,10 @@ mode.computer.tooltip = Uso del computer (mouse, tastiera, navigazione)
902
904
  mode.expert = Esperti (cooperazione)
903
905
  mode.expert.tooltip = Chiamare esperti in background
904
906
  mode.img = Immagine e video
907
+ mode.img.image = Immagine
908
+ mode.img.music = Musica
905
909
  mode.img.tooltip = Generazione immagini con DALL-E
910
+ mode.img.video = Video
906
911
  mode.langchain = Langchain
907
912
  mode.langchain.tooltip = Chattare con modelli forniti da Langchain
908
913
  model.ctx = Token di contesto
@@ -51,6 +51,7 @@ action.save = Zapisz
51
51
  action.save_as = Zapisz jako...
52
52
  action.save_selection_as = Zapisz zaznaczenie jako...
53
53
  action.select_all = Wybierz wszystko
54
+ action.select_unselect_all = Zaznacz/Odznacz wszystko
54
55
  action.tab.add.chat: Dodaj nowy czat
55
56
  action.tab.add.chat.tooltip: Dodaj nowy czat (PPM dla więcej opcji...)
56
57
  action.tab.add.notepad = Dodaj nowy notatnik
@@ -789,6 +790,7 @@ ipython.docker.install = Uruchomienie IPython wymaga zainstalowania Dockera. Doc
789
790
  ipython.docker.install.snap = \n\nWERSJA SNAP: \nNie jesteś połączony z wbudowanym daemonem Docker. Połącz sloty za pomocą:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...i uruchom ponownie aplikację.
790
791
  ipython.image.build = Obraz Dockera dla IPython nie został jeszcze zbudowany. Nastąpi to teraz i może to potrwać chwilę (ale to jednorazowa procedura). Wykonywanie polecenia zostało wstrzymane. Po zbudowaniu obrazu możesz ponownie poprosić o wykonanie polecenia.
791
792
  layout.split = Podziel ekran
793
+ list.all = --- WSZYSTKIE ---
792
794
  menu.audio = Audio / Mowa
793
795
  menu.audio.cache.clear = Wyczyść pamięć podręczną audio...
794
796
  menu.audio.control.global = Kontrola głosowa (globalna)
@@ -903,7 +905,10 @@ mode.computer.tooltip = Kontrola komputera (mysz, klawiatura)
903
905
  mode.expert = Eksperci (współpraca)
904
906
  mode.expert.tooltip = Eksperci do wezwania w tle
905
907
  mode.img = Obraz i wideo
908
+ mode.img.image = Obraz
909
+ mode.img.music = Muzyka
906
910
  mode.img.tooltip = Generowanie obrazów przy użyciu DALL-E
911
+ mode.img.video = Wideo
907
912
  mode.langchain = Langchain
908
913
  mode.langchain.tooltip = Czat z modelami dostarczonymi przez Langchain
909
914
  model.ctx = Tokeny kontekstu
@@ -51,6 +51,7 @@ action.save = Зберегти
51
51
  action.save_as = Зберегти як...
52
52
  action.save_selection_as = Зберегти вибір як...
53
53
  action.select_all = Вибрати все
54
+ action.select_unselect_all = Вибрати/Скасувати вибір всіх
54
55
  action.tab.add.chat: Додати новий чат
55
56
  action.tab.add.chat.tooltip: Додати новий чат (ПКМ для більше опцій...)
56
57
  action.tab.add.notepad = Додати новий блокнот
@@ -788,6 +789,7 @@ ipython.docker.install = Для запуску IPython необхідно вст
788
789
  ipython.docker.install.snap = \n\nVERSIÓN SNAP: \nВи не підключені до вбудованого демона Docker. Будь ласка, підключіть слоти з:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...та перезапустіть програму.
789
790
  ipython.image.build = Docker-образ для IPython ще не був створений. Це відбудеться зараз, і це може зайняти деякий час (але це одноразова процедура). Виконання команди було призупинено. Після створення образу, будь ласка, виконайте команду знову.
790
791
  layout.split = Розділити екран
792
+ list.all = --- ВСІ ---
791
793
  menu.audio = Аудіо / Голос
792
794
  menu.audio.cache.clear = Очистити кеш аудіо...
793
795
  menu.audio.control.global = Контроль голосу (глобальний)
@@ -902,7 +904,10 @@ mode.computer.tooltip = Використання комп'ютера (миша,
902
904
  mode.expert = Експерти (співпраця)
903
905
  mode.expert.tooltip = Виклик експертів на задній план
904
906
  mode.img = Зображення та відео
907
+ mode.img.image = Зображення
908
+ mode.img.music = Музика
905
909
  mode.img.tooltip = Генерація зображень за допомогою DALL-E
910
+ mode.img.video = Відео
906
911
  mode.langchain = Langchain
907
912
  mode.langchain.tooltip = Чат з моделями, наданими Langchain
908
913
  model.ctx = Токени контексту
@@ -51,6 +51,7 @@ action.save = 保存
51
51
  action.save_as = 另存為...
52
52
  action.save_selection_as = 将选择保存为...
53
53
  action.select_all = 选择全部
54
+ action.select_unselect_all = 全选/取消全选
54
55
  action.tab.add.chat: 添加新聊天
55
56
  action.tab.add.chat.tooltip: 添加新聊天(右键单击获取更多选项...)
56
57
  action.tab.add.notepad = 添加新记事本
@@ -788,6 +789,7 @@ ipython.docker.install = 运行 IPython 需要安装 Docker。系统上未检测
788
789
  ipython.docker.install.snap = \n\nSNAP 版本:\n您未连接到内置的 Docker 守护进程。请连接插槽:\n\nsudo snap connect pygpt:docker-executables docker:docker-executables\n\nsudo snap connect pygpt:docker docker:docker-daemon\n\n...并重启应用程序。
789
790
  ipython.image.build = IPython 的 Docker 镜像尚未构建。这将立即发生,可能需要一段时间(但这是一次性过程)。命令的执行已暂停。镜像构建完成后,请再次执行命令。
790
791
  layout.split = 分屏
792
+ list.all = --- 所有 ---
791
793
  menu.audio = 音頻/語音
792
794
  menu.audio.cache.clear = 清除音频缓存...
793
795
  menu.audio.control.global = 语音控制(全局)
@@ -902,7 +904,10 @@ mode.computer.tooltip = 使用计算机(鼠标、键盘、导航)
902
904
  mode.expert = 专家 (合作)
903
905
  mode.expert.tooltip = 背景中呼叫专家
904
906
  mode.img = 图像和视频
907
+ mode.img.image = 图片
908
+ mode.img.music = 音乐
905
909
  mode.img.tooltip = 使用DALL-E生成圖像
910
+ mode.img.video = 视频
906
911
  mode.langchain = Langchain模式
907
912
  mode.langchain.tooltip = 使用Langchain提供的模型進行聊天
908
913
  model.ctx = 上下文令牌
pygpt_net/item/model.py CHANGED
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.15 01:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -274,9 +274,7 @@ class ModelItem:
274
274
 
275
275
  :return: True if supports image input
276
276
  """
277
- if MULTIMODAL_IMAGE in self.input:
278
- return True
279
- return False
277
+ return MULTIMODAL_IMAGE in self.input
280
278
 
281
279
  def is_image_output(self) -> bool:
282
280
  """
@@ -284,9 +282,7 @@ class ModelItem:
284
282
 
285
283
  :return: True if supports image output
286
284
  """
287
- if "image" in self.output or MODE_VISION in self.mode:
288
- return True
289
- return False
285
+ return MULTIMODAL_IMAGE in self.output or MODE_VISION in self.mode
290
286
 
291
287
  def is_audio_input(self) -> bool:
292
288
  """
@@ -294,9 +290,7 @@ class ModelItem:
294
290
 
295
291
  :return: True if supports audio input
296
292
  """
297
- if MULTIMODAL_AUDIO in self.input:
298
- return True
299
- return False
293
+ return MULTIMODAL_AUDIO in self.input
300
294
 
301
295
  def is_audio_output(self) -> bool:
302
296
  """
@@ -304,9 +298,7 @@ class ModelItem:
304
298
 
305
299
  :return: True if supports audio output
306
300
  """
307
- if MULTIMODAL_AUDIO in self.output:
308
- return True
309
- return False
301
+ return MULTIMODAL_AUDIO in self.output
310
302
 
311
303
  def is_video_input(self) -> bool:
312
304
  """
@@ -314,9 +306,7 @@ class ModelItem:
314
306
 
315
307
  :return: True if supports video input
316
308
  """
317
- if MULTIMODAL_VIDEO in self.input:
318
- return True
319
- return False
309
+ return MULTIMODAL_VIDEO in self.input
320
310
 
321
311
  def is_video_output(self) -> bool:
322
312
  """
@@ -324,9 +314,15 @@ class ModelItem:
324
314
 
325
315
  :return: True if supports video output
326
316
  """
327
- if MULTIMODAL_VIDEO in self.output:
328
- return True
329
- return False
317
+ return MULTIMODAL_VIDEO in self.output
318
+
319
+ def is_music_output(self) -> bool:
320
+ """
321
+ Check if model supports music output
322
+
323
+ :return: True if supports music output
324
+ """
325
+ return MULTIMODAL_AUDIO in self.output
330
326
 
331
327
  def dump(self) -> str:
332
328
  """
File without changes
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.14 00:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -89,7 +89,14 @@ class ApiGoogle:
89
89
  filtered["location"] = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
90
90
  # filtered["http_options"] = gtypes.HttpOptions(api_version="v1")
91
91
 
92
- return genai.Client(**filtered)
92
+ # use previous client if args are the same
93
+ if self.client and self.last_client_args == filtered:
94
+ return self.client
95
+
96
+ self.last_client_args = filtered
97
+ self.client = genai.Client(**filtered)
98
+
99
+ return self.client
93
100
 
94
101
  def call(
95
102
  self,
@@ -138,13 +145,17 @@ class ApiGoogle:
138
145
 
139
146
  elif mode == MODE_IMAGE:
140
147
  # Route to video / music / image based on selected model.
141
- if context.model.is_video_output():
142
- return self.video.generate(context=context, extra=extra) # veo, etc.
143
- # Lyria / music models
144
- if self.music.is_music_model(model.id if model else ""):
145
- return self.music.generate(context=context, extra=extra) # lyria, etc.
146
- # Default: image
147
- return self.image.generate(context=context, extra=extra) # imagen, etc.
148
+ media_mode = self.window.controller.media.get_mode()
149
+ if media_mode == "video":
150
+ if context.model.is_video_output():
151
+ return self.video.generate(context=context, extra=extra) # veo, etc.
152
+ elif media_mode == "music":
153
+ # Lyria / music models
154
+ if self.music.is_music_model(model.id if model else ""):
155
+ return self.music.generate(context=context, extra=extra) # lyria, etc.
156
+ elif media_mode == "image":
157
+ # Default: image
158
+ return self.image.generate(context=context, extra=extra) # imagen, etc.
148
159
 
149
160
  elif mode == MODE_ASSISTANT:
150
161
  return False # not implemented for Google
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.14 00:00:00 #
9
+ # Updated Date: 2025.12.25 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import mimetypes
@@ -39,14 +39,8 @@ class Image:
39
39
  ) -> bool:
40
40
  """
41
41
  Generate or edit image(s) using Google GenAI API (Developer API or Vertex AI).
42
-
43
- :param context: BridgeContext with prompt, model, attachments
44
- :param extra: extra parameters (num, inline)
45
- :param sync: run synchronously (blocking) if True
46
- :return: True if started
47
42
  """
48
43
  # Music fast-path: delegate to Music flow if a music model is selected (e.g., Lyria).
49
- # This keeps image flow unchanged while enabling music in the same "image" mode.
50
44
  try:
51
45
  model_id = (context.model.id if context and context.model else "") or ""
52
46
  if self.window and hasattr(self.window.core.api.google, "music"):
@@ -65,9 +59,6 @@ class Image:
65
59
  # decide sub-mode based on attachments
66
60
  sub_mode = self.MODE_GENERATE
67
61
  attachments = context.attachments
68
- if attachments and len(attachments) > 0:
69
- pass # TODO: implement edit!
70
- # sub_mode = self.MODE_EDIT
71
62
 
72
63
  # model used to improve the prompt (not image model)
73
64
  prompt_model = self.window.core.models.from_defaults()
@@ -89,6 +80,11 @@ class Image:
89
80
  worker.num = num
90
81
  worker.inline = inline
91
82
 
83
+ if attachments and len(attachments) > 0:
84
+ mid = str(model.id).lower()
85
+ if "imagen" in mid:
86
+ worker.mode = self.MODE_EDIT
87
+
92
88
  if self.window.core.config.has('img_resolution'):
93
89
  worker.resolution = self.window.core.config.get('img_resolution') or "1024x1024"
94
90
 
@@ -132,13 +128,28 @@ class ImageWorker(QRunnable):
132
128
  self.inline = False
133
129
  self.raw = False
134
130
  self.num = 1
135
- self.resolution = "1024x1024" # used to derive aspect ratio for Imagen
131
+ self.resolution = "1024x1024" # used to derive aspect ratio or image_size
136
132
 
137
133
  # limits
138
134
  self.imagen_max_num = 4 # Imagen returns up to 4 images
139
135
 
140
136
  # fallbacks
141
- self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.0-flash-preview-image-generation"
137
+ self.DEFAULT_GEMINI_IMAGE_MODEL = "gemini-2.5-flash-image"
138
+
139
+ # Canonical 1K dimensions for Nano Banana Pro (Gemini 3 Pro Image Preview).
140
+ # Used to infer 2K/4K by 2x/4x multiples and to normalize UI inputs.
141
+ self._NB_PRO_1K = {
142
+ "1024x1024", # 1:1
143
+ "848x1264", # 2:3
144
+ "1264x848", # 3:2
145
+ "896x1200", # 3:4
146
+ "1200x896", # 4:3
147
+ "928x1152", # 4:5
148
+ "1152x928", # 5:4
149
+ "768x1376", # 9:16
150
+ "1376x768", # 16:9
151
+ "1584x672", # 21:9
152
+ }
142
153
 
143
154
  @Slot()
144
155
  def run(self):
@@ -179,7 +190,7 @@ class ImageWorker(QRunnable):
179
190
  if p:
180
191
  paths.append(p)
181
192
  else:
182
- # Developer API fallback via Gemini image model; force v1 to avoid 404
193
+ # Gemini Developer API via Gemini image models (Nano Banana / Nano Banana Pro)
183
194
  resp = self._gemini_edit(self.input_prompt, self.attachments, self.num)
184
195
  saved = 0
185
196
  for cand in getattr(resp, "candidates", []) or []:
@@ -208,14 +219,8 @@ class ImageWorker(QRunnable):
208
219
  if p:
209
220
  paths.append(p)
210
221
  else:
211
- # Gemini Developer API image generation (needs response_modalities)
212
- resp = self.client.models.generate_content(
213
- model=self.model,
214
- contents=[self.input_prompt],
215
- config=gtypes.GenerateContentConfig(
216
- response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
217
- ),
218
- )
222
+ # Gemini Developer API image generation (Nano Banana / Nano Banana Pro) with robust sizing + optional reference images
223
+ resp = self._gemini_generate_image(self.input_prompt, self.model, self.resolution)
219
224
  saved = 0
220
225
  for cand in getattr(resp, "candidates", []) or []:
221
226
  parts = getattr(getattr(cand, "content", None), "parts", None) or []
@@ -316,10 +321,114 @@ class ImageWorker(QRunnable):
316
321
  config=cfg,
317
322
  )
318
323
 
324
+ def _is_gemini_pro_image_model(self, model_id: str) -> bool:
325
+ """
326
+ Detect Gemini 3 Pro Image (Nano Banana Pro) by id or UI alias.
327
+ """
328
+ mid = (model_id or "").lower()
329
+ return mid.startswith("gemini-") or mid.startswith("nano-banana") or mid.startswith("nb-")
330
+
331
+ def _infer_nb_pro_size_for_dims(self, w: int, h: int) -> Optional[str]:
332
+ """
333
+ Infer '1K' | '2K' | '4K' for Nano Banana Pro from WxH.
334
+ """
335
+ dims = f"{w}x{h}"
336
+ if dims in self._NB_PRO_1K:
337
+ return "1K"
338
+ if (w % 2 == 0) and (h % 2 == 0):
339
+ if f"{w // 2}x{h // 2}" in self._NB_PRO_1K:
340
+ return "2K"
341
+ if (w % 4 == 0) and (h % 4 == 0):
342
+ if f"{w // 4}x{h // 4}" in self._NB_PRO_1K:
343
+ return "4K"
344
+ mx = max(w, h)
345
+ if mx >= 4000:
346
+ return "4K"
347
+ if mx >= 2000:
348
+ return "2K"
349
+ return "1K"
350
+
351
+ def _build_gemini_image_config(self, model_id: str, resolution: str) -> Optional[gtypes.ImageConfig]:
352
+ """
353
+ Build ImageConfig for Gemini image models.
354
+ """
355
+ try:
356
+ aspect = self._aspect_from_resolution(resolution)
357
+ cfg = gtypes.ImageConfig()
358
+ if aspect:
359
+ cfg.aspect_ratio = aspect
360
+
361
+ # Only Pro supports image_size; detect by id/alias and set 1K/2K/4K from WxH.
362
+ if self._is_gemini_pro_image_model(model_id):
363
+ w_str, h_str = resolution.lower().replace("×", "x").split("x")
364
+ w, h = int(w_str.strip()), int(h_str.strip())
365
+ k = self._infer_nb_pro_size_for_dims(w, h)
366
+ if k:
367
+ cfg.image_size = k
368
+ return cfg
369
+ except Exception:
370
+ return None
371
+
372
+ def _attachment_image_parts(self) -> List[gtypes.Part]:
373
+ """
374
+ Build image Parts from current attachments for Gemini models.
375
+ """
376
+ parts: List[gtypes.Part] = []
377
+ paths = self._collect_attachment_paths(self.attachments)
378
+ for p in paths:
379
+ try:
380
+ mime = self._guess_mime(p)
381
+ if not mime or not mime.startswith("image/"):
382
+ continue
383
+ with open(p, "rb") as f:
384
+ data = f.read()
385
+ parts.append(gtypes.Part.from_bytes(data=data, mime_type=mime))
386
+ except Exception:
387
+ continue
388
+ return parts
389
+
390
+ def _gemini_generate_image(self, prompt: str, model_id: str, resolution: str):
391
+ """
392
+ Call Gemini generate_content with robust fallback for image_size.
393
+ Supports optional reference images uploaded as attachments.
394
+ """
395
+ cfg = self._build_gemini_image_config(model_id, resolution)
396
+ image_parts = self._attachment_image_parts()
397
+
398
+ def _do_call(icfg: Optional[gtypes.ImageConfig]):
399
+ contents: List[Any] = []
400
+ # Always include the textual prompt (can be empty string).
401
+ contents.append(prompt or "")
402
+ # Append reference images, if any.
403
+ if image_parts:
404
+ contents.extend(image_parts)
405
+ return self.client.models.generate_content(
406
+ model=model_id or self.DEFAULT_GEMINI_IMAGE_MODEL,
407
+ contents=contents,
408
+ config=gtypes.GenerateContentConfig(
409
+ response_modalities=[gtypes.Modality.TEXT, gtypes.Modality.IMAGE],
410
+ image_config=icfg,
411
+ ),
412
+ )
413
+
414
+ try:
415
+ return _do_call(cfg)
416
+ except Exception as e:
417
+ msg = str(e)
418
+ if "imageSize" in msg or "image_size" in msg or "Unrecognized" in msg or "unsupported" in msg:
419
+ try:
420
+ if cfg and getattr(cfg, "image_size", None):
421
+ cfg2 = gtypes.ImageConfig()
422
+ cfg2.aspect_ratio = getattr(cfg, "aspect_ratio", None)
423
+ return _do_call(cfg2)
424
+ except Exception:
425
+ pass
426
+ raise
427
+
319
428
  def _gemini_edit(self, prompt: str, attachments: Dict[str, Any], num: int):
320
429
  """
321
- Gemini image-to-image editing via generate_content (Developer/Vertex depending on client).
322
- The first attachment is used as the input image.
430
+ Gemini image-to-image editing via generate_content.
431
+ The first attachment is used as the input image. Honors aspect_ratio and (for Pro) image_size.
323
432
  """
324
433
  paths = self._collect_attachment_paths(attachments)
325
434
  if len(paths) == 0:
@@ -330,10 +439,27 @@ class ImageWorker(QRunnable):
330
439
  img_bytes = f.read()
331
440
  mime = self._guess_mime(img_path)
332
441
 
333
- return self.client.models.generate_content(
334
- model=self.model,
335
- contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
336
- )
442
+ cfg = self._build_gemini_image_config(self.model, self.resolution)
443
+
444
+ def _do_call(icfg: Optional[gtypes.ImageConfig]):
445
+ return self.client.models.generate_content(
446
+ model=self.model or self.DEFAULT_GEMINI_IMAGE_MODEL,
447
+ contents=[prompt, gtypes.Part.from_bytes(data=img_bytes, mime_type=mime)],
448
+ config=gtypes.GenerateContentConfig(
449
+ image_config=icfg,
450
+ ),
451
+ )
452
+
453
+ try:
454
+ return _do_call(cfg)
455
+ except Exception as e:
456
+ msg = str(e)
457
+ if "imageSize" in msg or "image_size" in msg or "Unrecognized" in msg or "unsupported" in msg:
458
+ if cfg and getattr(cfg, "image_size", None):
459
+ cfg2 = gtypes.ImageConfig()
460
+ cfg2.aspect_ratio = getattr(cfg, "aspect_ratio", None)
461
+ return _do_call(cfg2)
462
+ raise
337
463
 
338
464
  def _collect_attachment_paths(self, attachments: Dict[str, Any]) -> List[str]:
339
465
  """Extract file paths from attachments dict."""
@@ -347,7 +473,7 @@ class ImageWorker(QRunnable):
347
473
  return out
348
474
 
349
475
  def _aspect_from_resolution(self, resolution: str) -> Optional[str]:
350
- """Derive aspect ratio for Imagen."""
476
+ """Derive aspect ratio from WxH across supported set."""
351
477
  try:
352
478
  from math import gcd
353
479
  tolerance = 0.08
@@ -357,10 +483,15 @@ class ImageWorker(QRunnable):
357
483
  return None
358
484
  supported = {
359
485
  "1:1": 1 / 1,
486
+ "2:3": 2 / 3,
487
+ "3:2": 3 / 2,
360
488
  "3:4": 3 / 4,
361
489
  "4:3": 4 / 3,
490
+ "4:5": 4 / 5,
491
+ "5:4": 5 / 4,
362
492
  "9:16": 9 / 16,
363
493
  "16:9": 16 / 9,
494
+ "21:9": 21 / 9,
364
495
  }
365
496
  g = gcd(w, h)
366
497
  key = f"{w // g}:{h // g}"
@@ -424,6 +555,8 @@ class ImageWorker(QRunnable):
424
555
  return 'image/jpeg'
425
556
  if ext == '.webp':
426
557
  return 'image/webp'
558
+ if ext in ('.heic', '.heif'):
559
+ return 'image/heic'
427
560
  return 'image/png'
428
561
 
429
562
  def _cleanup(self):