pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +7 -1
  4. pygpt_net/app_core.py +3 -1
  5. pygpt_net/config.py +3 -1
  6. pygpt_net/controller/__init__.py +9 -2
  7. pygpt_net/controller/audio/audio.py +38 -1
  8. pygpt_net/controller/audio/ui.py +2 -2
  9. pygpt_net/controller/chat/audio.py +1 -8
  10. pygpt_net/controller/chat/common.py +23 -62
  11. pygpt_net/controller/chat/handler/__init__.py +0 -0
  12. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  13. pygpt_net/controller/chat/output.py +8 -3
  14. pygpt_net/controller/chat/stream.py +3 -1071
  15. pygpt_net/controller/chat/text.py +3 -2
  16. pygpt_net/controller/kernel/kernel.py +11 -3
  17. pygpt_net/controller/kernel/reply.py +5 -1
  18. pygpt_net/controller/lang/custom.py +2 -2
  19. pygpt_net/controller/media/__init__.py +12 -0
  20. pygpt_net/controller/media/media.py +115 -0
  21. pygpt_net/controller/realtime/__init__.py +12 -0
  22. pygpt_net/controller/realtime/manager.py +53 -0
  23. pygpt_net/controller/realtime/realtime.py +293 -0
  24. pygpt_net/controller/ui/mode.py +23 -2
  25. pygpt_net/controller/ui/ui.py +19 -1
  26. pygpt_net/core/audio/audio.py +6 -1
  27. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  28. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  29. pygpt_net/core/audio/backend/native/player.py +139 -0
  30. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  31. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  32. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  33. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  34. pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
  35. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  36. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  37. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  38. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  39. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  40. pygpt_net/core/audio/backend/shared/player.py +137 -0
  41. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  42. pygpt_net/core/audio/capture.py +5 -0
  43. pygpt_net/core/audio/output.py +14 -2
  44. pygpt_net/core/audio/whisper.py +6 -2
  45. pygpt_net/core/bridge/bridge.py +2 -1
  46. pygpt_net/core/bridge/worker.py +4 -1
  47. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  48. pygpt_net/core/events/__init__.py +2 -1
  49. pygpt_net/core/events/realtime.py +55 -0
  50. pygpt_net/core/image/image.py +56 -5
  51. pygpt_net/core/realtime/__init__.py +0 -0
  52. pygpt_net/core/realtime/options.py +87 -0
  53. pygpt_net/core/realtime/shared/__init__.py +0 -0
  54. pygpt_net/core/realtime/shared/audio.py +213 -0
  55. pygpt_net/core/realtime/shared/loop.py +64 -0
  56. pygpt_net/core/realtime/shared/session.py +59 -0
  57. pygpt_net/core/realtime/shared/text.py +37 -0
  58. pygpt_net/core/realtime/shared/tools.py +276 -0
  59. pygpt_net/core/realtime/shared/turn.py +38 -0
  60. pygpt_net/core/realtime/shared/types.py +16 -0
  61. pygpt_net/core/realtime/worker.py +160 -0
  62. pygpt_net/core/render/web/body.py +24 -3
  63. pygpt_net/core/text/utils.py +54 -2
  64. pygpt_net/core/types/__init__.py +1 -0
  65. pygpt_net/core/types/image.py +54 -0
  66. pygpt_net/core/video/__init__.py +12 -0
  67. pygpt_net/core/video/video.py +290 -0
  68. pygpt_net/data/config/config.json +26 -5
  69. pygpt_net/data/config/models.json +221 -103
  70. pygpt_net/data/config/settings.json +244 -6
  71. pygpt_net/data/css/web-blocks.css +6 -0
  72. pygpt_net/data/css/web-chatgpt.css +6 -0
  73. pygpt_net/data/css/web-chatgpt_wide.css +6 -0
  74. pygpt_net/data/locale/locale.de.ini +35 -7
  75. pygpt_net/data/locale/locale.en.ini +56 -17
  76. pygpt_net/data/locale/locale.es.ini +35 -7
  77. pygpt_net/data/locale/locale.fr.ini +35 -7
  78. pygpt_net/data/locale/locale.it.ini +35 -7
  79. pygpt_net/data/locale/locale.pl.ini +38 -7
  80. pygpt_net/data/locale/locale.uk.ini +35 -7
  81. pygpt_net/data/locale/locale.zh.ini +31 -3
  82. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  83. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  84. pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
  85. pygpt_net/item/model.py +22 -1
  86. pygpt_net/plugin/audio_input/plugin.py +37 -4
  87. pygpt_net/plugin/audio_input/simple.py +57 -8
  88. pygpt_net/plugin/cmd_files/worker.py +3 -0
  89. pygpt_net/provider/api/google/__init__.py +76 -7
  90. pygpt_net/provider/api/google/audio.py +8 -1
  91. pygpt_net/provider/api/google/chat.py +45 -6
  92. pygpt_net/provider/api/google/image.py +226 -86
  93. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  94. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  95. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  96. pygpt_net/provider/api/google/video.py +364 -0
  97. pygpt_net/provider/api/openai/__init__.py +22 -2
  98. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  99. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  100. pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
  101. pygpt_net/provider/audio_input/google_genai.py +103 -0
  102. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  103. pygpt_net/provider/audio_output/google_tts.py +0 -12
  104. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  105. pygpt_net/provider/core/config/patch.py +241 -178
  106. pygpt_net/provider/core/model/patch.py +28 -2
  107. pygpt_net/provider/llms/google.py +8 -9
  108. pygpt_net/provider/web/duckduck_search.py +212 -0
  109. pygpt_net/ui/layout/toolbox/audio.py +55 -0
  110. pygpt_net/ui/layout/toolbox/footer.py +14 -42
  111. pygpt_net/ui/layout/toolbox/image.py +7 -13
  112. pygpt_net/ui/layout/toolbox/raw.py +52 -0
  113. pygpt_net/ui/layout/toolbox/split.py +48 -0
  114. pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
  115. pygpt_net/ui/layout/toolbox/video.py +49 -0
  116. pygpt_net/ui/widget/option/combo.py +15 -1
  117. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
  118. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
  119. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  120. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
  121. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
  122. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
@@ -79,6 +79,7 @@ class OptionCombo(QWidget):
79
79
  self.combo = NoScrollCombo()
80
80
  self.combo.currentIndexChanged.connect(self.on_combo_change)
81
81
  self.current_id = None
82
+ self.locked = False
82
83
 
83
84
  # add items
84
85
  self.update()
@@ -114,6 +115,12 @@ class OptionCombo(QWidget):
114
115
  self.combo.addItem(value, key)
115
116
  else:
116
117
  self.combo.addItem(item, item)
118
+ elif type(self.keys) is dict:
119
+ for key, value in self.keys.items():
120
+ if key.startswith("separator::"):
121
+ self.combo.addSeparator(value)
122
+ else:
123
+ self.combo.addItem(value, key)
117
124
 
118
125
  def set_value(self, value):
119
126
  """
@@ -135,16 +142,21 @@ class OptionCombo(QWidget):
135
142
  """
136
143
  return self.current_id
137
144
 
138
- def set_keys(self, keys):
145
+ def set_keys(self, keys, lock: bool = False):
139
146
  """
140
147
  Set keys
141
148
 
142
149
  :param keys: keys
150
+ :param lock: lock current value if True
143
151
  """
152
+ if lock:
153
+ self.locked = True # lock on_change
144
154
  self.keys = keys
145
155
  self.option["keys"] = keys
146
156
  self.combo.clear()
147
157
  self.update()
158
+ if lock:
159
+ self.locked = False
148
160
 
149
161
  def on_combo_change(self, index):
150
162
  """
@@ -153,6 +165,8 @@ class OptionCombo(QWidget):
153
165
  :param index: combo index
154
166
  :return:
155
167
  """
168
+ if self.locked:
169
+ return
156
170
  self.current_id = self.combo.itemData(index)
157
171
  self.window.controller.config.combo.on_update(self.parent_id, self.id, self.option, self.current_id)
158
172
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pygpt-net
3
- Version: 2.6.30
3
+ Version: 2.6.32
4
4
  Summary: Desktop AI Assistant powered by: OpenAI GPT-5, GPT-4, o1, o3, Gemini, Claude, Grok, DeepSeek, and other models supported by Llama Index, and Ollama. Chatbot, agents, completion, image generation, vision analysis, speech-to-text, plugins, internet access, file handling, command execution and more.
5
5
  License: MIT
6
6
  Keywords: ai,api,api key,app,assistant,bielik,chat,chatbot,chatgpt,claude,dall-e,deepseek,desktop,gemini,gpt,gpt-3.5,gpt-4,gpt-4-vision,gpt-4o,gpt-5,gpt-oss,gpt3.5,gpt4,grok,langchain,llama-index,llama3,mistral,o1,o3,ollama,openai,presets,py-gpt,py_gpt,pygpt,pyside,qt,text completion,tts,ui,vision,whisper
@@ -30,6 +30,7 @@ Requires-Dist: beautifulsoup4 (>=4.13.5,<5.0.0)
30
30
  Requires-Dist: boto3 (>=1.40.17,<2.0.0)
31
31
  Requires-Dist: chromadb (>=0.5.20,<0.6.0)
32
32
  Requires-Dist: croniter (>=2.0.7,<3.0.0)
33
+ Requires-Dist: ddgs (>=9.5.5,<10.0.0)
33
34
  Requires-Dist: docker (>=7.1.0,<8.0.0)
34
35
  Requires-Dist: docx2txt (>=0.8,<0.9)
35
36
  Requires-Dist: gkeepapi (>=0.15.1,<0.16.0)
@@ -117,7 +118,7 @@ Description-Content-Type: text/markdown
117
118
 
118
119
  [![pygpt](https://snapcraft.io/pygpt/badge.svg)](https://snapcraft.io/pygpt)
119
120
 
120
- Release: **2.6.30** | build: **2025-08-29** | Python: **>=3.10, <3.14**
121
+ Release: **2.6.32** | build: **2025-09-02** | Python: **>=3.10, <3.14**
121
122
 
122
123
  > Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
123
124
  >
@@ -129,9 +130,9 @@ Release: **2.6.30** | build: **2025-08-29** | Python: **>=3.10, <3.14**
129
130
 
130
131
  ## Overview
131
132
 
132
- **PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3/R1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity / Sonar`, and `xAI Grok`.
133
+ **PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing other SDKs and `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3/R1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity / Sonar`, and `xAI Grok`.
133
134
 
134
- This assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I/O, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `Google` and `Microsoft Bing`.
135
+ This assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I/O, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `DuckDuckGo`, `Google` and `Microsoft Bing`.
135
136
 
136
137
  For audio interactions, **PyGPT** includes speech synthesis using the `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services. Additionally, it features speech recognition capabilities provided by `OpenAI Whisper`, `Google` and `Bing` enabling the application to understand spoken commands and transcribe audio inputs into text. It features context memory with save and load functionality, enabling users to resume interactions from predefined points in the conversation. Prompt creation and management are streamlined through an intuitive preset system.
137
138
 
@@ -157,13 +158,13 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
157
158
 
158
159
  - Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.
159
160
  - Works similarly to `ChatGPT`, but locally (on a desktop computer).
160
- - 11 modes of operation: Chat, Chat with Files, Chat with Audio, Research (Perplexity), Completion, Image generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
161
+ - 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image and video generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
161
162
  - Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3/R1`, `Perplexity / Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.
162
163
  - Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video/audio, images and other data types, or use conversation history as additional context provided to the model.
163
164
  - Built-in vector databases support and automated files and data embedding.
164
165
  - Included support features for individuals with disabilities: customizable keyboard shortcuts, voice control, and translation of on-screen actions into audio via speech synthesis.
165
166
  - Handles and stores the full context of conversations (short and long-term memory).
166
- - Internet access via `Google` and `Microsoft Bing`.
167
+ - Internet access via `DuckDuckGo`, `Google` and `Microsoft Bing`.
167
168
  - Speech synthesis via `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services.
168
169
  - Speech recognition via `OpenAI Whisper`, `Google` and `Microsoft Speech Recognition`.
169
170
  - Real-time video camera capture in Vision mode.
@@ -544,7 +545,7 @@ With this plugin, you can capture an image with your camera or attach an image a
544
545
 
545
546
  ![v3_vision_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_vision_chat.png)
546
547
 
547
- **Image generation:** If you want to generate images (using DALL-E) directly in chat you must enable plugin `Image generation (inline)` in the Plugins menu.
548
+ **Image generation:** If you want to generate images directly in chat you must enable plugin `Image generation (inline)` in the Plugins menu.
548
549
  Plugin allows you to generate images in Chat mode:
549
550
 
550
551
  ![v3_img_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img_chat.png)
@@ -662,11 +663,11 @@ In the `Settings -> LlamaIndex -> Data loaders` section you can define the addit
662
663
 
663
664
  ## Chat with Audio
664
665
 
665
- This mode works like the Chat mode but with native support for audio input and output using a multimodal model - `gpt-4o-audio`. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
666
+ This mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
666
667
 
667
- More info: https://platform.openai.com/docs/guides/audio/quickstart
668
+ Currently, in beta.
668
669
 
669
- Currently, in beta. Tool and function calls are not enabled in this mode.
670
+ At this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.
670
671
 
671
672
  ## Research
672
673
 
@@ -688,7 +689,7 @@ Additionally, this mode offers options for labeling the AI and the user, making
688
689
 
689
690
  From version `2.0.107` the `davinci` models are deprecated and has been replaced with `gpt-3.5-turbo-instruct` model in Completion mode.
690
691
 
691
- ## Image generation
692
+ ## Image and video generation
692
693
 
693
694
  ### OpenAI DALL-E 3 / Google Imagen 3 and 4
694
695
 
@@ -704,6 +705,8 @@ Plugin allows you to generate images in Chat mode:
704
705
 
705
706
  ![v3_img_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img_chat.png)
706
707
 
708
+ **Video generation**: From version `2.6.32`, video generation (using `Google Veo 3`) is also available.
709
+
707
710
  ### Multiple variants
708
711
 
709
712
  You can generate up to **4 different variants** (DALL-E 2) for a given prompt in one session. DALL-E 3 allows one image.
@@ -1148,7 +1151,7 @@ The name of the currently active profile is shown as (Profile Name) in the windo
1148
1151
 
1149
1152
  ## Built-in models
1150
1153
 
1151
- PyGPT has a preconfigured list of models (as of 2025-07-26):
1154
+ PyGPT has a preconfigured list of models (as of 2025-08-31):
1152
1155
 
1153
1156
  - `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)
1154
1157
  - `chatgpt-4o-latest` (OpenAI)
@@ -1172,6 +1175,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1172
1175
  - `gemini-1.5-pro` (Google)
1173
1176
  - `gemini-2.0-flash-exp` (Google)
1174
1177
  - `gemini-2.5-flash` (Google)
1178
+ - `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)
1175
1179
  - `gemini-2.5-pro` (Google)
1176
1180
  - `gpt-3.5-turbo` (OpenAI)
1177
1181
  - `gpt-3.5-turbo-16k` (OpenAI)
@@ -1184,7 +1188,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1184
1188
  - `gpt-4.1-mini` (OpenAI)
1185
1189
  - `gpt-4.1-nano` (OpenAI)
1186
1190
  - `gpt-4o` (OpenAI)
1187
- - `gpt-4o-audio-preview` (OpenAI)
1191
+ - `gpt-4o-realtime-preview` (OpenAI, real-time)
1188
1192
  - `gpt-4o-mini` (OpenAI)
1189
1193
  - `gpt-5` (OpenAI)
1190
1194
  - `gpt-5-mini` (OpenAI)
@@ -1192,6 +1196,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1192
1196
  - `gpt-image-1` (OpenAI)
1193
1197
  - `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)
1194
1198
  - `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)
1199
+ - `gpt-realtime` (OpenAI, real-time)
1195
1200
  - `grok-2-vision` (xAI)
1196
1201
  - `grok-3` (xAI)
1197
1202
  - `grok-3-fast` (xAI)
@@ -1223,6 +1228,8 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
1223
1228
  - `sonar-pro` (Perplexity)
1224
1229
  - `sonar-reasoning` (Perplexity)
1225
1230
  - `sonar-reasoning-pro` (Perplexity)
1231
+ - `veo-3.0-generate-preview` (Google)
1232
+ - `veo-3.0-fast-generate-preview` (Google)
1226
1233
 
1227
1234
  All models are specified in the configuration file `models.json`, which you can customize.
1228
1235
  This file is located in your working directory. You can add new models provided directly by `OpenAI API` (or compatible) and those supported by `LlamaIndex` or `Ollama` to this file. Configuration for LlamaIndex is placed in `llama_index` key.
@@ -1240,20 +1247,20 @@ There is built-in support for those LLM providers:
1240
1247
  - `Anthropic`
1241
1248
  - `Azure OpenAI`
1242
1249
  - `Deepseek API`
1243
- - `Google`
1250
+ - `Google` (native SDK)
1244
1251
  - `HuggingFace API`
1245
1252
  - `HuggingFace Router` (wrapper for OpenAI compatible ChatCompletions)
1246
1253
  - `Local models` (OpenAI API compatible)
1247
1254
  - `Mistral AI`
1248
1255
  - `Ollama`
1249
- - `OpenAI`
1256
+ - `OpenAI` (native SDK)
1250
1257
  - `OpenRouter`
1251
1258
  - `Perplexity`
1252
1259
  - `xAI`
1253
1260
 
1254
1261
  ## How to use local or non-GPT models
1255
1262
 
1256
- ### Llama 3, Mistral, DeepSeek, and other local models
1263
+ ### Llama 3, Mistral, DeepSeek, Qwen, gpt-oss, and other local models
1257
1264
 
1258
1265
  How to use locally installed Llama 3, DeepSeek, Mistral, etc. models:
1259
1266
 
@@ -2436,17 +2443,15 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
2436
2443
 
2437
2444
  - `Experts: Master prompt`: Prompt to instruct how to handle experts.
2438
2445
 
2439
- - `DALL-E: image generate`: Prompt for generating prompts for DALL-E (if raw-mode is disabled).
2446
+ - `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).
2440
2447
 
2441
2448
  **Images**
2442
2449
 
2443
- - `DALL-E Image size`: The resolution of the generated images (DALL-E). Default: 1792x1024.
2444
-
2445
- - `DALL-E Image quality`: The image quality of the generated images (DALL-E). Default: standard.
2450
+ - `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.
2446
2451
 
2447
- - `Open image dialog after generate`: Enable the image dialog to open after an image is generated in Image mode.
2452
+ - `Image quality`: The image quality of the generated images (DALL-E). Default: standard.
2448
2453
 
2449
- - `DALL-E: prompt generation model`: Model used for generating prompts for DALL-E (if raw-mode is disabled).
2454
+ - `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).
2450
2455
 
2451
2456
  **Vision**
2452
2457
 
@@ -2480,6 +2485,10 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
2480
2485
 
2481
2486
  - `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).
2482
2487
 
2488
+ - `VAD prefix padding (in ms)`: VAD prefix padding in ms, default: 300ms (Realtime audio mode)
2489
+
2490
+ - `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)
2491
+
2483
2492
  **Indexes / LlamaIndex**
2484
2493
 
2485
2494
  **General**
@@ -3556,6 +3565,21 @@ may consume additional tokens that are not displayed in the main window.
3556
3565
 
3557
3566
  ## Recent changes:
3558
3567
 
3568
+ **2.6.32 (2025-09-02)**
3569
+
3570
+ - Added video generation and support for Google Veo 3 models.
3571
+ - Introduced new predefined models: veo-3.0-generate-preview and veo-3.0-fast-generate-preview.
3572
+ - Integrated DuckDuckGo as a search provider in the WebSearch plugin.
3573
+ - Added "Loop" mode to Realtime + audio mode for automatic turn handling and continuous conversation without manually enabling the microphone.
3574
+
3575
+ **2.6.31 (2025-09-01)**
3576
+
3577
+ - Chat with Audio mode renamed to Realtime + audio.
3578
+ - Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
3579
+ - Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
3580
+ - Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
3581
+ - Added URL Context remote tool support in Google Gen AI.
3582
+
3559
3583
  **2.6.30 (2025-08-29)**
3560
3584
 
3561
3585
  - Added native Google GenAI API support (beta); live audio is not supported yet (#132).