pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pygpt-net
3
- Version: 2.6.29
3
+ Version: 2.6.31
4
4
  Summary: Desktop AI Assistant powered by: OpenAI GPT-5, GPT-4, o1, o3, Gemini, Claude, Grok, DeepSeek, and other models supported by Llama Index, and Ollama. Chatbot, agents, completion, image generation, vision analysis, speech-to-text, plugins, internet access, file handling, command execution and more.
5
5
  License: MIT
6
6
  Keywords: ai,api,api key,app,assistant,bielik,chat,chatbot,chatgpt,claude,dall-e,deepseek,desktop,gemini,gpt,gpt-3.5,gpt-4,gpt-4-vision,gpt-4o,gpt-5,gpt-oss,gpt3.5,gpt4,grok,langchain,llama-index,llama3,mistral,o1,o3,ollama,openai,presets,py-gpt,py_gpt,pygpt,pyside,qt,text completion,tts,ui,vision,whisper
@@ -117,7 +117,7 @@ Description-Content-Type: text/markdown
117
117
 
118
118
  [![pygpt](https://snapcraft.io/pygpt/badge.svg)](https://snapcraft.io/pygpt)
119
119
 
120
- Release: **2.6.29** | build: **2025-08-27** | Python: **>=3.10, <3.14**
120
+ Release: **2.6.31** | build: **2025-09-01** | Python: **>=3.10, <3.14**
121
121
 
122
122
  > Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
123
123
  >
@@ -157,7 +157,7 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
157
157
 
158
158
  - Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.
159
159
  - Works similarly to `ChatGPT`, but locally (on a desktop computer).
160
- - 12 modes of operation: Chat, Chat with Files, Chat with Audio, Research (Perplexity), Completion, Image generation, Vision, Assistants, Experts, Computer use, Agents and Autonomous Mode.
160
+ - 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
161
161
  - Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3/R1`, `Perplexity / Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.
162
162
  - Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video/audio, images and other data types, or use conversation history as additional context provided to the model.
163
163
  - Built-in vector databases support and automated files and data embedding.
@@ -181,7 +181,7 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
181
181
  - Includes simple painter / drawing tool.
182
182
  - Supports multiple languages.
183
183
  - Requires no previous knowledge of using AI models.
184
- - Simplifies image generation using `DALL-E`.
184
+ - Simplifies image generation using image models like `DALL-E` and `Imagen`.
185
185
  - Fully configurable.
186
186
  - Themes support.
187
187
  - Real-time code syntax highlighting.
@@ -439,9 +439,9 @@ Alternatively, you can try removing snap and reinstalling it:
439
439
  `sudo snap install pygpt`
440
440
 
441
441
 
442
- **Access to microphone and audio in Windows version:**
442
+ **Access to a microphone and audio in Windows version:**
443
443
 
444
- If you have a problems with audio or microphone in the non-binary PIP/Python version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https://phoenixnap.com/kb/ffmpeg-windows. The binary version already includes FFmpeg.
444
+ If you have a problems with audio or a microphone in the non-binary PIP/Python version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https://phoenixnap.com/kb/ffmpeg-windows. The binary version already includes FFmpeg.
445
445
 
446
446
  **Windows and VC++ Redistributable**
447
447
 
@@ -519,9 +519,16 @@ Here, you can add or manage API keys for any supported provider.
519
519
 
520
520
  **+ Inline Vision and Image generation**
521
521
 
522
- This mode in **PyGPT** mirrors `ChatGPT`, allowing you to chat with models such as `GPT-5`, `GPT-4`, `o1`, `o3`, and`Claude`, `Gemini`, `Grok`, `Perplexity (sonar)`, `Deepseek`, and others. It works by using the `Responses` and `ChatCompletions` OpenAI API (or compatible). You can select the API endpoint to use in: `Config -> Settings -> API Keys`.
522
+ In **PyGPT**, this mode mirrors `ChatGPT`, allowing you to chat with models like `GPT-5`, `GPT-4`, `o1`, `o3`, `Claude`, `Gemini`, `Grok`, `Perplexity (Sonar)`, `Deepseek`, and more. It works using the OpenAI API `Responses` and `ChatCompletions`, or the `Google GenAI SDK` if the Google native client is enabled. You can choose the API endpoint for `ChatCompletions` in `Config -> Settings -> API Keys`.
523
523
 
524
- **Tip: This mode directly uses the OpenAI SDK. Other models, such as Gemini, Claude, Grok, Sonar, or Llama3, are supported in Chat mode via LlamaIndex or OpenAI API compatible endpoints (if available), which the application switches to in the background when working with models other than OpenAI.**
524
+ **Tip:** This mode uses the provider SDK directly. If there's no native client built into the app, models like Gemini, Claude, Grok, Sonar, or Llama3 are supported in Chat mode via LlamaIndex or compatible OpenAI API endpoints. The app automatically switches to these endpoints when using non-OpenAI models.
525
+
526
+ Currently built-in native clients:
527
+
528
+ - OpenAI SDK
529
+ - Google GenAI SDK
530
+
531
+ Support for Anthropic and xAI native clients is coming soon.
525
532
 
526
533
  The main part of the interface is a chat window where you see your conversations. Below it is a message box for typing. On the right side, you can set up or change the model and system prompt. You can also save these settings as presets to easily switch between models or tasks.
527
534
 
@@ -655,11 +662,11 @@ In the `Settings -> LlamaIndex -> Data loaders` section you can define the addit
655
662
 
656
663
  ## Chat with Audio
657
664
 
658
- This mode works like the Chat mode but with native support for audio input and output using a multimodal model - `gpt-4o-audio`. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
665
+ This mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
659
666
 
660
- More info: https://platform.openai.com/docs/guides/audio/quickstart
667
+ Currently, in beta.
661
668
 
662
- Currently, in beta. Tool and function calls are not enabled in this mode.
669
+ At this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.
663
670
 
664
671
  ## Research
665
672
 
@@ -683,17 +690,16 @@ From version `2.0.107` the `davinci` models are deprecated and has been replaced
683
690
 
684
691
  ## Image generation
685
692
 
686
- ### DALL-E 3
693
+ ### OpenAI DALL-E 3 / Google Imagen 3 and 4
687
694
 
688
- **PyGPT** enables quick and easy image creation with `DALL-E 3` or `gpt-image-1`.
689
- The older model version, `DALL-E 2`, is also accessible. Generating images is akin to a chat conversation - a user's prompt triggers the generation, followed by downloading, saving to the computer,
690
- and displaying the image onscreen. You can send raw prompt to `DALL-E` in `Image generation` mode or ask the model for the best prompt.
695
+ **PyGPT** enables quick and easy image creation with image models like `DALL-E 3`, `gpt-image-1` or `Google Imagen`.
696
+ Generating images is akin to a chat conversation - a user's prompt triggers the generation, followed by downloading, saving to the computer, and displaying the image onscreen. You can send raw prompt to the model in `Image generation` mode or ask the model for the best prompt.
691
697
 
692
698
  ![v3_img](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img.png)
693
699
 
694
- Image generation using DALL-E is available in every mode via plugin `Image Generation (inline)`. Just ask any model, in any mode, like e.g. GPT-4 to generate an image and it will do it inline, without need to mode change.
700
+ Image generation using image models is also available in every mode via plugin `Image Generation (inline)`. Just ask any model, in any mode, like e.g. GPT or Gemini to generate an image and it will do it inline, without need to mode change.
695
701
 
696
- If you want to generate images (using DALL-E) directly in chat you must enable plugin **Image generation (inline)** in the Plugins menu.
702
+ If you want to generate images directly in chat you must enable plugin **Image generation (inline)** in the Plugins menu.
697
703
  Plugin allows you to generate images in Chat mode:
698
704
 
699
705
  ![v3_img_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img_chat.png)
@@ -708,7 +714,7 @@ the bottom of the screen. This replaces the conversation temperature slider when
708
714
 
709
715
  There is an option for switching prompt generation mode.
710
716
 
711
- If **Raw Mode** is enabled, DALL-E will receive the prompt exactly as you have provided it.
717
+ If **Raw Mode** is enabled, a model will receive the prompt exactly as you have provided it.
712
718
  If **Raw Mode** is disabled, a model will generate the best prompt for you based on your instructions.
713
719
 
714
720
  ### Image storage
@@ -724,31 +730,6 @@ prompts for creating new images.
724
730
 
725
731
  Images are stored in ``img`` directory in **PyGPT** user data folder.
726
732
 
727
-
728
- ## Vision
729
-
730
- This mode enables image analysis using the `GPT-5`, `GPT-4o` and other vision (multimodal) models. Functioning much like the chat mode,
731
- it also allows you to upload images or provide URLs to images. The vision feature can analyze both local
732
- images and those found online.
733
-
734
- Vision is also integrated into any chat mode via plugin `Vision (inline)`. Just enable the plugin and use Vision in other work modes, such as Chat or Chat with Files.
735
-
736
- Vision mode also includes real-time video capture from camera. To capture image from camera and append it to chat just click on video at left side. You can also enable `Auto capture` - image will be captured and appended to chat message every time you send message.
737
-
738
- **1) Video camera real-time image capture**
739
-
740
- ![v3_vision_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_vision_chat.png)
741
-
742
- **2) you can also provide an image URL**
743
-
744
- ![v2_mode_vision](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v2_mode_vision.png)
745
-
746
- **3) or you can just upload your local images or use the inline Vision in the standard chat mode:**
747
-
748
- ![v2_mode_vision_upload](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v2_mode_vision_upload.png)
749
-
750
- **Tip:** When using `Vision (inline)` by utilizing a plugin in standard mode, such as `Chat` (not `Vision` mode), the `+ Vision` label will appear at the bottom of the Chat window.
751
-
752
733
  ## Assistants
753
734
 
754
735
  This mode uses the OpenAI's **Assistants API**.
@@ -1167,7 +1148,7 @@ The name of the currently active profile is shown as (Profile Name) in the windo
1167
1148
 
1168
1149
  ## Built-in models
1169
1150
 
1170
- PyGPT has a preconfigured list of models (as of 2025-07-26):
1151
+ PyGPT has a preconfigured list of models (as of 2025-08-31):
1171
1152
 
1172
1153
  - `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)
1173
1154
  - `chatgpt-4o-latest` (OpenAI)
@@ -1191,6 +1172,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1191
1172
  - `gemini-1.5-pro` (Google)
1192
1173
  - `gemini-2.0-flash-exp` (Google)
1193
1174
  - `gemini-2.5-flash` (Google)
1175
+ - `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)
1194
1176
  - `gemini-2.5-pro` (Google)
1195
1177
  - `gpt-3.5-turbo` (OpenAI)
1196
1178
  - `gpt-3.5-turbo-16k` (OpenAI)
@@ -1203,7 +1185,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1203
1185
  - `gpt-4.1-mini` (OpenAI)
1204
1186
  - `gpt-4.1-nano` (OpenAI)
1205
1187
  - `gpt-4o` (OpenAI)
1206
- - `gpt-4o-audio-preview` (OpenAI)
1188
+ - `gpt-4o-realtime-preview` (OpenAI, real-time)
1207
1189
  - `gpt-4o-mini` (OpenAI)
1208
1190
  - `gpt-5` (OpenAI)
1209
1191
  - `gpt-5-mini` (OpenAI)
@@ -1211,6 +1193,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1211
1193
  - `gpt-image-1` (OpenAI)
1212
1194
  - `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)
1213
1195
  - `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)
1196
+ - `gpt-realtime` (OpenAI, real-time)
1214
1197
  - `grok-2-vision` (xAI)
1215
1198
  - `grok-3` (xAI)
1216
1199
  - `grok-3-fast` (xAI)
@@ -2455,17 +2438,15 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
2455
2438
 
2456
2439
  - `Experts: Master prompt`: Prompt to instruct how to handle experts.
2457
2440
 
2458
- - `DALL-E: image generate`: Prompt for generating prompts for DALL-E (if raw-mode is disabled).
2441
+ - `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).
2459
2442
 
2460
2443
  **Images**
2461
2444
 
2462
- - `DALL-E Image size`: The resolution of the generated images (DALL-E). Default: 1792x1024.
2445
+ - `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.
2463
2446
 
2464
- - `DALL-E Image quality`: The image quality of the generated images (DALL-E). Default: standard.
2447
+ - `Image quality`: The image quality of the generated images (DALL-E). Default: standard.
2465
2448
 
2466
- - `Open image dialog after generate`: Enable the image dialog to open after an image is generated in Image mode.
2467
-
2468
- - `DALL-E: prompt generation model`: Model used for generating prompts for DALL-E (if raw-mode is disabled).
2449
+ - `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).
2469
2450
 
2470
2451
  **Vision**
2471
2452
 
@@ -2499,6 +2480,10 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
2499
2480
 
2500
2481
  - `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).
2501
2482
 
2483
+ - `VAD prefix padding (in ms)`: VAD prefix padding in ms, default: 300ms (Realtime audio mode)
2484
+
2485
+ - `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)
2486
+
2502
2487
  **Indexes / LlamaIndex**
2503
2488
 
2504
2489
  **General**
@@ -2637,10 +2622,12 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
2637
2622
 
2638
2623
  - `Check for updates in background`: Enables checking for updates in background (checking every 5 minutes). Default: True.
2639
2624
 
2640
- **Developer**
2625
+ **Debug**
2641
2626
 
2642
2627
  - `Show debug menu`: Enables debug (developer) menu.
2643
2628
 
2629
+ - `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)
2630
+
2644
2631
  - `Log and debug context`: Enables logging of context input/output.
2645
2632
 
2646
2633
  - `Log and debug events`: Enables logging of event dispatch.
@@ -2657,8 +2644,6 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
2657
2644
 
2658
2645
  - `Log Assistants usage to console`: Enables logging of Assistants API usage to console.
2659
2646
 
2660
- - `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)
2661
-
2662
2647
 
2663
2648
  ## JSON files
2664
2649
 
@@ -3363,7 +3348,7 @@ These wrappers are loaded into the application during startup using `launcher.ad
3363
3348
  ```python
3364
3349
  # app.py
3365
3350
 
3366
- from pygpt_net.provider.llms.openai import OpenAILLM
3351
+ from pygpt_net.provider.api.openai import OpenAILLM
3367
3352
  from pygpt_net.provider.llms.azure_openai import AzureOpenAILLM
3368
3353
  from pygpt_net.provider.llms.anthropic import AnthropicLLM
3369
3354
  from pygpt_net.provider.llms.hugging_face import HuggingFaceLLM
@@ -3575,6 +3560,21 @@ may consume additional tokens that are not displayed in the main window.
3575
3560
 
3576
3561
  ## Recent changes:
3577
3562
 
3563
+ **2.6.31 (2025-09-01)**
3564
+
3565
+ - Chat with Audio mode renamed to Realtime + audio.
3566
+ - Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
3567
+ - Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
3568
+ - Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
3569
+ - Added URL Context remote tool support in Google Gen AI.
3570
+
3571
+ **2.6.30 (2025-08-29)**
3572
+
3573
+ - Added native Google GenAI API support (beta); live audio is not supported yet (#132).
3574
+ - Added new predefined models for image generation: Google Imagen3 and Imagen4.
3575
+ - Optimized token usage in the Responses API.
3576
+ - Removed Vision mode (it is now integrated into Chat).
3577
+
3578
3578
  **2.6.29 (2025-08-28)**
3579
3579
 
3580
3580
  - Verbose options have been moved to the Developer section in settings.