pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +7 -1
- pygpt_net/app_core.py +3 -1
- pygpt_net/config.py +3 -1
- pygpt_net/controller/__init__.py +9 -2
- pygpt_net/controller/audio/audio.py +38 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +23 -62
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/lang/custom.py +2 -2
- pygpt_net/controller/media/__init__.py +12 -0
- pygpt_net/controller/media/media.py +115 -0
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +293 -0
- pygpt_net/controller/ui/mode.py +23 -2
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +14 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +56 -5
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +160 -0
- pygpt_net/core/render/web/body.py +24 -3
- pygpt_net/core/text/utils.py +54 -2
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +54 -0
- pygpt_net/core/video/__init__.py +12 -0
- pygpt_net/core/video/video.py +290 -0
- pygpt_net/data/config/config.json +26 -5
- pygpt_net/data/config/models.json +221 -103
- pygpt_net/data/config/settings.json +244 -6
- pygpt_net/data/css/web-blocks.css +6 -0
- pygpt_net/data/css/web-chatgpt.css +6 -0
- pygpt_net/data/css/web-chatgpt_wide.css +6 -0
- pygpt_net/data/locale/locale.de.ini +35 -7
- pygpt_net/data/locale/locale.en.ini +56 -17
- pygpt_net/data/locale/locale.es.ini +35 -7
- pygpt_net/data/locale/locale.fr.ini +35 -7
- pygpt_net/data/locale/locale.it.ini +35 -7
- pygpt_net/data/locale/locale.pl.ini +38 -7
- pygpt_net/data/locale/locale.uk.ini +35 -7
- pygpt_net/data/locale/locale.zh.ini +31 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
- pygpt_net/item/model.py +22 -1
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +76 -7
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/video.py +364 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +241 -178
- pygpt_net/provider/core/model/patch.py +28 -2
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/web/duckduck_search.py +212 -0
- pygpt_net/ui/layout/toolbox/audio.py +55 -0
- pygpt_net/ui/layout/toolbox/footer.py +14 -42
- pygpt_net/ui/layout/toolbox/image.py +7 -13
- pygpt_net/ui/layout/toolbox/raw.py +52 -0
- pygpt_net/ui/layout/toolbox/split.py +48 -0
- pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
- pygpt_net/ui/layout/toolbox/video.py +49 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
|
@@ -79,6 +79,7 @@ class OptionCombo(QWidget):
|
|
|
79
79
|
self.combo = NoScrollCombo()
|
|
80
80
|
self.combo.currentIndexChanged.connect(self.on_combo_change)
|
|
81
81
|
self.current_id = None
|
|
82
|
+
self.locked = False
|
|
82
83
|
|
|
83
84
|
# add items
|
|
84
85
|
self.update()
|
|
@@ -114,6 +115,12 @@ class OptionCombo(QWidget):
|
|
|
114
115
|
self.combo.addItem(value, key)
|
|
115
116
|
else:
|
|
116
117
|
self.combo.addItem(item, item)
|
|
118
|
+
elif type(self.keys) is dict:
|
|
119
|
+
for key, value in self.keys.items():
|
|
120
|
+
if key.startswith("separator::"):
|
|
121
|
+
self.combo.addSeparator(value)
|
|
122
|
+
else:
|
|
123
|
+
self.combo.addItem(value, key)
|
|
117
124
|
|
|
118
125
|
def set_value(self, value):
|
|
119
126
|
"""
|
|
@@ -135,16 +142,21 @@ class OptionCombo(QWidget):
|
|
|
135
142
|
"""
|
|
136
143
|
return self.current_id
|
|
137
144
|
|
|
138
|
-
def set_keys(self, keys):
|
|
145
|
+
def set_keys(self, keys, lock: bool = False):
|
|
139
146
|
"""
|
|
140
147
|
Set keys
|
|
141
148
|
|
|
142
149
|
:param keys: keys
|
|
150
|
+
:param lock: lock current value if True
|
|
143
151
|
"""
|
|
152
|
+
if lock:
|
|
153
|
+
self.locked = True # lock on_change
|
|
144
154
|
self.keys = keys
|
|
145
155
|
self.option["keys"] = keys
|
|
146
156
|
self.combo.clear()
|
|
147
157
|
self.update()
|
|
158
|
+
if lock:
|
|
159
|
+
self.locked = False
|
|
148
160
|
|
|
149
161
|
def on_combo_change(self, index):
|
|
150
162
|
"""
|
|
@@ -153,6 +165,8 @@ class OptionCombo(QWidget):
|
|
|
153
165
|
:param index: combo index
|
|
154
166
|
:return:
|
|
155
167
|
"""
|
|
168
|
+
if self.locked:
|
|
169
|
+
return
|
|
156
170
|
self.current_id = self.combo.itemData(index)
|
|
157
171
|
self.window.controller.config.combo.on_update(self.parent_id, self.id, self.option, self.current_id)
|
|
158
172
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pygpt-net
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.32
|
|
4
4
|
Summary: Desktop AI Assistant powered by: OpenAI GPT-5, GPT-4, o1, o3, Gemini, Claude, Grok, DeepSeek, and other models supported by Llama Index, and Ollama. Chatbot, agents, completion, image generation, vision analysis, speech-to-text, plugins, internet access, file handling, command execution and more.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: ai,api,api key,app,assistant,bielik,chat,chatbot,chatgpt,claude,dall-e,deepseek,desktop,gemini,gpt,gpt-3.5,gpt-4,gpt-4-vision,gpt-4o,gpt-5,gpt-oss,gpt3.5,gpt4,grok,langchain,llama-index,llama3,mistral,o1,o3,ollama,openai,presets,py-gpt,py_gpt,pygpt,pyside,qt,text completion,tts,ui,vision,whisper
|
|
@@ -30,6 +30,7 @@ Requires-Dist: beautifulsoup4 (>=4.13.5,<5.0.0)
|
|
|
30
30
|
Requires-Dist: boto3 (>=1.40.17,<2.0.0)
|
|
31
31
|
Requires-Dist: chromadb (>=0.5.20,<0.6.0)
|
|
32
32
|
Requires-Dist: croniter (>=2.0.7,<3.0.0)
|
|
33
|
+
Requires-Dist: ddgs (>=9.5.5,<10.0.0)
|
|
33
34
|
Requires-Dist: docker (>=7.1.0,<8.0.0)
|
|
34
35
|
Requires-Dist: docx2txt (>=0.8,<0.9)
|
|
35
36
|
Requires-Dist: gkeepapi (>=0.15.1,<0.16.0)
|
|
@@ -117,7 +118,7 @@ Description-Content-Type: text/markdown
|
|
|
117
118
|
|
|
118
119
|
[](https://snapcraft.io/pygpt)
|
|
119
120
|
|
|
120
|
-
Release: **2.6.
|
|
121
|
+
Release: **2.6.32** | build: **2025-09-02** | Python: **>=3.10, <3.14**
|
|
121
122
|
|
|
122
123
|
> Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
|
|
123
124
|
>
|
|
@@ -129,9 +130,9 @@ Release: **2.6.30** | build: **2025-08-29** | Python: **>=3.10, <3.14**
|
|
|
129
130
|
|
|
130
131
|
## Overview
|
|
131
132
|
|
|
132
|
-
**PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3/R1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity / Sonar`, and `xAI Grok`.
|
|
133
|
+
**PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing other SDKs and `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3/R1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity / Sonar`, and `xAI Grok`.
|
|
133
134
|
|
|
134
|
-
This assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I/O, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `Google` and `Microsoft Bing`.
|
|
135
|
+
This assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I/O, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `DuckDuckGo`, `Google` and `Microsoft Bing`.
|
|
135
136
|
|
|
136
137
|
For audio interactions, **PyGPT** includes speech synthesis using the `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services. Additionally, it features speech recognition capabilities provided by `OpenAI Whisper`, `Google` and `Bing` enabling the application to understand spoken commands and transcribe audio inputs into text. It features context memory with save and load functionality, enabling users to resume interactions from predefined points in the conversation. Prompt creation and management are streamlined through an intuitive preset system.
|
|
137
138
|
|
|
@@ -157,13 +158,13 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
|
|
|
157
158
|
|
|
158
159
|
- Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.
|
|
159
160
|
- Works similarly to `ChatGPT`, but locally (on a desktop computer).
|
|
160
|
-
- 11 modes of operation: Chat, Chat with Files,
|
|
161
|
+
- 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image and video generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
|
|
161
162
|
- Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3/R1`, `Perplexity / Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.
|
|
162
163
|
- Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video/audio, images and other data types, or use conversation history as additional context provided to the model.
|
|
163
164
|
- Built-in vector databases support and automated files and data embedding.
|
|
164
165
|
- Included support features for individuals with disabilities: customizable keyboard shortcuts, voice control, and translation of on-screen actions into audio via speech synthesis.
|
|
165
166
|
- Handles and stores the full context of conversations (short and long-term memory).
|
|
166
|
-
- Internet access via `Google` and `Microsoft Bing`.
|
|
167
|
+
- Internet access via `DuckDuckGo`, `Google` and `Microsoft Bing`.
|
|
167
168
|
- Speech synthesis via `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services.
|
|
168
169
|
- Speech recognition via `OpenAI Whisper`, `Google` and `Microsoft Speech Recognition`.
|
|
169
170
|
- Real-time video camera capture in Vision mode.
|
|
@@ -544,7 +545,7 @@ With this plugin, you can capture an image with your camera or attach an image a
|
|
|
544
545
|
|
|
545
546
|

|
|
546
547
|
|
|
547
|
-
**Image generation:** If you want to generate images
|
|
548
|
+
**Image generation:** If you want to generate images directly in chat you must enable plugin `Image generation (inline)` in the Plugins menu.
|
|
548
549
|
Plugin allows you to generate images in Chat mode:
|
|
549
550
|
|
|
550
551
|

|
|
@@ -662,11 +663,11 @@ In the `Settings -> LlamaIndex -> Data loaders` section you can define the addit
|
|
|
662
663
|
|
|
663
664
|
## Chat with Audio
|
|
664
665
|
|
|
665
|
-
This mode works like the Chat mode but with native support for audio input and output using a
|
|
666
|
+
This mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
|
|
666
667
|
|
|
667
|
-
|
|
668
|
+
Currently, in beta.
|
|
668
669
|
|
|
669
|
-
|
|
670
|
+
At this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.
|
|
670
671
|
|
|
671
672
|
## Research
|
|
672
673
|
|
|
@@ -688,7 +689,7 @@ Additionally, this mode offers options for labeling the AI and the user, making
|
|
|
688
689
|
|
|
689
690
|
From version `2.0.107` the `davinci` models are deprecated and has been replaced with `gpt-3.5-turbo-instruct` model in Completion mode.
|
|
690
691
|
|
|
691
|
-
## Image generation
|
|
692
|
+
## Image and video generation
|
|
692
693
|
|
|
693
694
|
### OpenAI DALL-E 3 / Google Imagen 3 and 4
|
|
694
695
|
|
|
@@ -704,6 +705,8 @@ Plugin allows you to generate images in Chat mode:
|
|
|
704
705
|
|
|
705
706
|

|
|
706
707
|
|
|
708
|
+
**Video generation**: From version `2.6.32`, video generation (using `Google Veo 3`) is also available.
|
|
709
|
+
|
|
707
710
|
### Multiple variants
|
|
708
711
|
|
|
709
712
|
You can generate up to **4 different variants** (DALL-E 2) for a given prompt in one session. DALL-E 3 allows one image.
|
|
@@ -1148,7 +1151,7 @@ The name of the currently active profile is shown as (Profile Name) in the windo
|
|
|
1148
1151
|
|
|
1149
1152
|
## Built-in models
|
|
1150
1153
|
|
|
1151
|
-
PyGPT has a preconfigured list of models (as of 2025-
|
|
1154
|
+
PyGPT has a preconfigured list of models (as of 2025-08-31):
|
|
1152
1155
|
|
|
1153
1156
|
- `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)
|
|
1154
1157
|
- `chatgpt-4o-latest` (OpenAI)
|
|
@@ -1172,6 +1175,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1172
1175
|
- `gemini-1.5-pro` (Google)
|
|
1173
1176
|
- `gemini-2.0-flash-exp` (Google)
|
|
1174
1177
|
- `gemini-2.5-flash` (Google)
|
|
1178
|
+
- `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)
|
|
1175
1179
|
- `gemini-2.5-pro` (Google)
|
|
1176
1180
|
- `gpt-3.5-turbo` (OpenAI)
|
|
1177
1181
|
- `gpt-3.5-turbo-16k` (OpenAI)
|
|
@@ -1184,7 +1188,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1184
1188
|
- `gpt-4.1-mini` (OpenAI)
|
|
1185
1189
|
- `gpt-4.1-nano` (OpenAI)
|
|
1186
1190
|
- `gpt-4o` (OpenAI)
|
|
1187
|
-
- `gpt-4o-
|
|
1191
|
+
- `gpt-4o-realtime-preview` (OpenAI, real-time)
|
|
1188
1192
|
- `gpt-4o-mini` (OpenAI)
|
|
1189
1193
|
- `gpt-5` (OpenAI)
|
|
1190
1194
|
- `gpt-5-mini` (OpenAI)
|
|
@@ -1192,6 +1196,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1192
1196
|
- `gpt-image-1` (OpenAI)
|
|
1193
1197
|
- `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)
|
|
1194
1198
|
- `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)
|
|
1199
|
+
- `gpt-realtime` (OpenAI, real-time)
|
|
1195
1200
|
- `grok-2-vision` (xAI)
|
|
1196
1201
|
- `grok-3` (xAI)
|
|
1197
1202
|
- `grok-3-fast` (xAI)
|
|
@@ -1223,6 +1228,8 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1223
1228
|
- `sonar-pro` (Perplexity)
|
|
1224
1229
|
- `sonar-reasoning` (Perplexity)
|
|
1225
1230
|
- `sonar-reasoning-pro` (Perplexity)
|
|
1231
|
+
- `veo-3.0-generate-preview` (Google)
|
|
1232
|
+
- `veo-3.0-fast-generate-preview` (Google)
|
|
1226
1233
|
|
|
1227
1234
|
All models are specified in the configuration file `models.json`, which you can customize.
|
|
1228
1235
|
This file is located in your working directory. You can add new models provided directly by `OpenAI API` (or compatible) and those supported by `LlamaIndex` or `Ollama` to this file. Configuration for LlamaIndex is placed in `llama_index` key.
|
|
@@ -1240,20 +1247,20 @@ There is built-in support for those LLM providers:
|
|
|
1240
1247
|
- `Anthropic`
|
|
1241
1248
|
- `Azure OpenAI`
|
|
1242
1249
|
- `Deepseek API`
|
|
1243
|
-
- `Google`
|
|
1250
|
+
- `Google` (native SDK)
|
|
1244
1251
|
- `HuggingFace API`
|
|
1245
1252
|
- `HuggingFace Router` (wrapper for OpenAI compatible ChatCompletions)
|
|
1246
1253
|
- `Local models` (OpenAI API compatible)
|
|
1247
1254
|
- `Mistral AI`
|
|
1248
1255
|
- `Ollama`
|
|
1249
|
-
- `OpenAI`
|
|
1256
|
+
- `OpenAI` (native SDK)
|
|
1250
1257
|
- `OpenRouter`
|
|
1251
1258
|
- `Perplexity`
|
|
1252
1259
|
- `xAI`
|
|
1253
1260
|
|
|
1254
1261
|
## How to use local or non-GPT models
|
|
1255
1262
|
|
|
1256
|
-
### Llama 3, Mistral, DeepSeek, and other local models
|
|
1263
|
+
### Llama 3, Mistral, DeepSeek, Qwen, gpt-oss, and other local models
|
|
1257
1264
|
|
|
1258
1265
|
How to use locally installed Llama 3, DeepSeek, Mistral, etc. models:
|
|
1259
1266
|
|
|
@@ -2436,17 +2443,15 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
|
|
|
2436
2443
|
|
|
2437
2444
|
- `Experts: Master prompt`: Prompt to instruct how to handle experts.
|
|
2438
2445
|
|
|
2439
|
-
- `
|
|
2446
|
+
- `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).
|
|
2440
2447
|
|
|
2441
2448
|
**Images**
|
|
2442
2449
|
|
|
2443
|
-
- `
|
|
2444
|
-
|
|
2445
|
-
- `DALL-E Image quality`: The image quality of the generated images (DALL-E). Default: standard.
|
|
2450
|
+
- `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.
|
|
2446
2451
|
|
|
2447
|
-
- `
|
|
2452
|
+
- `Image quality`: The image quality of the generated images (DALL-E). Default: standard.
|
|
2448
2453
|
|
|
2449
|
-
- `
|
|
2454
|
+
- `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).
|
|
2450
2455
|
|
|
2451
2456
|
**Vision**
|
|
2452
2457
|
|
|
@@ -2480,6 +2485,10 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
|
|
|
2480
2485
|
|
|
2481
2486
|
- `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).
|
|
2482
2487
|
|
|
2488
|
+
- `VAD prefix padding (in ms)`: VAD prefix padding in ms, default: 300ms (Realtime audio mode)
|
|
2489
|
+
|
|
2490
|
+
- `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)
|
|
2491
|
+
|
|
2483
2492
|
**Indexes / LlamaIndex**
|
|
2484
2493
|
|
|
2485
2494
|
**General**
|
|
@@ -3556,6 +3565,21 @@ may consume additional tokens that are not displayed in the main window.
|
|
|
3556
3565
|
|
|
3557
3566
|
## Recent changes:
|
|
3558
3567
|
|
|
3568
|
+
**2.6.32 (2025-09-02)**
|
|
3569
|
+
|
|
3570
|
+
- Added video generation and support for Google Veo 3 models.
|
|
3571
|
+
- Introduced new predefined models: veo-3.0-generate-preview and veo-3.0-fast-generate-preview.
|
|
3572
|
+
- Integrated DuckDuckGo as a search provider in the WebSearch plugin.
|
|
3573
|
+
- Added "Loop" mode to Realtime + audio mode for automatic turn handling and continuous conversation without manually enabling the microphone.
|
|
3574
|
+
|
|
3575
|
+
**2.6.31 (2025-09-01)**
|
|
3576
|
+
|
|
3577
|
+
- Chat with Audio mode renamed to Realtime + audio.
|
|
3578
|
+
- Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
|
|
3579
|
+
- Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
|
|
3580
|
+
- Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
|
|
3581
|
+
- Added URL Context remote tool support in Google Gen AI.
|
|
3582
|
+
|
|
3559
3583
|
**2.6.30 (2025-08-29)**
|
|
3560
3584
|
|
|
3561
3585
|
- Added native Google GenAI API support (beta); live audio is not supported yet (#132).
|