pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/{container.py → app_core.py} +5 -6
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/access/control.py +1 -9
- pygpt_net/controller/assistant/assistant.py +4 -4
- pygpt_net/controller/assistant/batch.py +7 -7
- pygpt_net/controller/assistant/files.py +4 -4
- pygpt_net/controller/assistant/threads.py +3 -3
- pygpt_net/controller/attachment/attachment.py +4 -7
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +30 -4
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +4 -405
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/chat/vision.py +11 -19
- pygpt_net/controller/config/placeholder.py +1 -1
- pygpt_net/controller/ctx/ctx.py +1 -1
- pygpt_net/controller/ctx/summarizer.py +1 -1
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/mode/mode.py +21 -12
- pygpt_net/controller/plugins/settings.py +3 -2
- pygpt_net/controller/presets/editor.py +112 -99
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/theme/theme.py +3 -2
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/controller/ui/vision.py +4 -4
- pygpt_net/core/agents/legacy.py +2 -2
- pygpt_net/core/agents/runners/openai_workflow.py +2 -2
- pygpt_net/core/assistants/files.py +5 -5
- pygpt_net/core/assistants/store.py +4 -4
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +4 -3
- pygpt_net/core/bridge/worker.py +31 -9
- pygpt_net/core/debug/console/console.py +2 -2
- pygpt_net/core/debug/presets.py +2 -2
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/experts/experts.py +2 -2
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/modes/modes.py +2 -2
- pygpt_net/core/presets/presets.py +3 -3
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/tokens/tokens.py +4 -4
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/core/types/mode.py +5 -2
- pygpt_net/core/vision/analyzer.py +1 -1
- pygpt_net/data/config/config.json +13 -4
- pygpt_net/data/config/models.json +219 -101
- pygpt_net/data/config/modes.json +3 -9
- pygpt_net/data/config/settings.json +135 -27
- pygpt_net/data/config/settings_section.json +2 -2
- pygpt_net/data/locale/locale.de.ini +7 -7
- pygpt_net/data/locale/locale.en.ini +25 -12
- pygpt_net/data/locale/locale.es.ini +7 -7
- pygpt_net/data/locale/locale.fr.ini +7 -7
- pygpt_net/data/locale/locale.it.ini +7 -7
- pygpt_net/data/locale/locale.pl.ini +8 -8
- pygpt_net/data/locale/locale.uk.ini +7 -7
- pygpt_net/data/locale/locale.zh.ini +3 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/item/model.py +23 -3
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/plugin/openai_dalle/plugin.py +4 -4
- pygpt_net/plugin/openai_vision/plugin.py +12 -13
- pygpt_net/provider/agents/openai/agent.py +5 -5
- pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
- pygpt_net/provider/agents/openai/agent_planner.py +5 -6
- pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
- pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
- pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
- pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
- pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
- pygpt_net/provider/agents/openai/evolve.py +5 -5
- pygpt_net/provider/agents/openai/supervisor.py +4 -4
- pygpt_net/provider/api/__init__.py +27 -0
- pygpt_net/provider/api/anthropic/__init__.py +68 -0
- pygpt_net/provider/api/google/__init__.py +295 -0
- pygpt_net/provider/api/google/audio.py +121 -0
- pygpt_net/provider/api/google/chat.py +591 -0
- pygpt_net/provider/api/google/image.py +427 -0
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/tools.py +222 -0
- pygpt_net/provider/api/google/vision.py +129 -0
- pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
- pygpt_net/provider/api/openai/agents/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
- pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
- pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
- pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
- pygpt_net/provider/api/openai/worker/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
- pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_input/openai_whisper.py +1 -1
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/openai_tts.py +9 -6
- pygpt_net/provider/core/config/patch.py +26 -0
- pygpt_net/provider/core/model/patch.py +20 -0
- pygpt_net/provider/core/preset/json_file.py +2 -4
- pygpt_net/provider/llms/anthropic.py +2 -5
- pygpt_net/provider/llms/base.py +4 -3
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/llms/openai.py +1 -1
- pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
- pygpt_net/ui/dialog/preset.py +71 -55
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/main.py +6 -4
- pygpt_net/ui/widget/option/combo.py +15 -1
- pygpt_net/utils.py +9 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
- /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pygpt-net
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.31
|
|
4
4
|
Summary: Desktop AI Assistant powered by: OpenAI GPT-5, GPT-4, o1, o3, Gemini, Claude, Grok, DeepSeek, and other models supported by Llama Index, and Ollama. Chatbot, agents, completion, image generation, vision analysis, speech-to-text, plugins, internet access, file handling, command execution and more.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: ai,api,api key,app,assistant,bielik,chat,chatbot,chatgpt,claude,dall-e,deepseek,desktop,gemini,gpt,gpt-3.5,gpt-4,gpt-4-vision,gpt-4o,gpt-5,gpt-oss,gpt3.5,gpt4,grok,langchain,llama-index,llama3,mistral,o1,o3,ollama,openai,presets,py-gpt,py_gpt,pygpt,pyside,qt,text completion,tts,ui,vision,whisper
|
|
@@ -117,7 +117,7 @@ Description-Content-Type: text/markdown
|
|
|
117
117
|
|
|
118
118
|
[](https://snapcraft.io/pygpt)
|
|
119
119
|
|
|
120
|
-
Release: **2.6.
|
|
120
|
+
Release: **2.6.31** | build: **2025-09-01** | Python: **>=3.10, <3.14**
|
|
121
121
|
|
|
122
122
|
> Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
|
|
123
123
|
>
|
|
@@ -157,7 +157,7 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
|
|
|
157
157
|
|
|
158
158
|
- Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.
|
|
159
159
|
- Works similarly to `ChatGPT`, but locally (on a desktop computer).
|
|
160
|
-
-
|
|
160
|
+
- 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
|
|
161
161
|
- Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3/R1`, `Perplexity / Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.
|
|
162
162
|
- Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video/audio, images and other data types, or use conversation history as additional context provided to the model.
|
|
163
163
|
- Built-in vector databases support and automated files and data embedding.
|
|
@@ -181,7 +181,7 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
|
|
|
181
181
|
- Includes simple painter / drawing tool.
|
|
182
182
|
- Supports multiple languages.
|
|
183
183
|
- Requires no previous knowledge of using AI models.
|
|
184
|
-
- Simplifies image generation using `DALL-E`.
|
|
184
|
+
- Simplifies image generation using image models like `DALL-E` and `Imagen`.
|
|
185
185
|
- Fully configurable.
|
|
186
186
|
- Themes support.
|
|
187
187
|
- Real-time code syntax highlighting.
|
|
@@ -439,9 +439,9 @@ Alternatively, you can try removing snap and reinstalling it:
|
|
|
439
439
|
`sudo snap install pygpt`
|
|
440
440
|
|
|
441
441
|
|
|
442
|
-
**Access to microphone and audio in Windows version:**
|
|
442
|
+
**Access to a microphone and audio in Windows version:**
|
|
443
443
|
|
|
444
|
-
If you have a problems with audio or microphone in the non-binary PIP/Python version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https://phoenixnap.com/kb/ffmpeg-windows. The binary version already includes FFmpeg.
|
|
444
|
+
If you have a problems with audio or a microphone in the non-binary PIP/Python version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https://phoenixnap.com/kb/ffmpeg-windows. The binary version already includes FFmpeg.
|
|
445
445
|
|
|
446
446
|
**Windows and VC++ Redistributable**
|
|
447
447
|
|
|
@@ -519,9 +519,16 @@ Here, you can add or manage API keys for any supported provider.
|
|
|
519
519
|
|
|
520
520
|
**+ Inline Vision and Image generation**
|
|
521
521
|
|
|
522
|
-
|
|
522
|
+
In **PyGPT**, this mode mirrors `ChatGPT`, allowing you to chat with models like `GPT-5`, `GPT-4`, `o1`, `o3`, `Claude`, `Gemini`, `Grok`, `Perplexity (Sonar)`, `Deepseek`, and more. It works using the OpenAI API `Responses` and `ChatCompletions`, or the `Google GenAI SDK` if the Google native client is enabled. You can choose the API endpoint for `ChatCompletions` in `Config -> Settings -> API Keys`.
|
|
523
523
|
|
|
524
|
-
**Tip
|
|
524
|
+
**Tip:** This mode uses the provider SDK directly. If there's no native client built into the app, models like Gemini, Claude, Grok, Sonar, or Llama3 are supported in Chat mode via LlamaIndex or compatible OpenAI API endpoints. The app automatically switches to these endpoints when using non-OpenAI models.
|
|
525
|
+
|
|
526
|
+
Currently built-in native clients:
|
|
527
|
+
|
|
528
|
+
- OpenAI SDK
|
|
529
|
+
- Google GenAI SDK
|
|
530
|
+
|
|
531
|
+
Support for Anthropic and xAI native clients is coming soon.
|
|
525
532
|
|
|
526
533
|
The main part of the interface is a chat window where you see your conversations. Below it is a message box for typing. On the right side, you can set up or change the model and system prompt. You can also save these settings as presets to easily switch between models or tasks.
|
|
527
534
|
|
|
@@ -655,11 +662,11 @@ In the `Settings -> LlamaIndex -> Data loaders` section you can define the addit
|
|
|
655
662
|
|
|
656
663
|
## Chat with Audio
|
|
657
664
|
|
|
658
|
-
This mode works like the Chat mode but with native support for audio input and output using a
|
|
665
|
+
This mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
|
|
659
666
|
|
|
660
|
-
|
|
667
|
+
Currently, in beta.
|
|
661
668
|
|
|
662
|
-
|
|
669
|
+
At this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.
|
|
663
670
|
|
|
664
671
|
## Research
|
|
665
672
|
|
|
@@ -683,17 +690,16 @@ From version `2.0.107` the `davinci` models are deprecated and has been replaced
|
|
|
683
690
|
|
|
684
691
|
## Image generation
|
|
685
692
|
|
|
686
|
-
### DALL-E 3
|
|
693
|
+
### OpenAI DALL-E 3 / Google Imagen 3 and 4
|
|
687
694
|
|
|
688
|
-
**PyGPT** enables quick and easy image creation with `DALL-E 3
|
|
689
|
-
|
|
690
|
-
and displaying the image onscreen. You can send raw prompt to `DALL-E` in `Image generation` mode or ask the model for the best prompt.
|
|
695
|
+
**PyGPT** enables quick and easy image creation with image models like `DALL-E 3`, `gpt-image-1` or `Google Imagen`.
|
|
696
|
+
Generating images is akin to a chat conversation - a user's prompt triggers the generation, followed by downloading, saving to the computer, and displaying the image onscreen. You can send raw prompt to the model in `Image generation` mode or ask the model for the best prompt.
|
|
691
697
|
|
|
692
698
|

|
|
693
699
|
|
|
694
|
-
Image generation using
|
|
700
|
+
Image generation using image models is also available in every mode via plugin `Image Generation (inline)`. Just ask any model, in any mode, like e.g. GPT or Gemini to generate an image and it will do it inline, without need to mode change.
|
|
695
701
|
|
|
696
|
-
If you want to generate images
|
|
702
|
+
If you want to generate images directly in chat you must enable plugin **Image generation (inline)** in the Plugins menu.
|
|
697
703
|
Plugin allows you to generate images in Chat mode:
|
|
698
704
|
|
|
699
705
|

|
|
@@ -708,7 +714,7 @@ the bottom of the screen. This replaces the conversation temperature slider when
|
|
|
708
714
|
|
|
709
715
|
There is an option for switching prompt generation mode.
|
|
710
716
|
|
|
711
|
-
If **Raw Mode** is enabled,
|
|
717
|
+
If **Raw Mode** is enabled, a model will receive the prompt exactly as you have provided it.
|
|
712
718
|
If **Raw Mode** is disabled, a model will generate the best prompt for you based on your instructions.
|
|
713
719
|
|
|
714
720
|
### Image storage
|
|
@@ -724,31 +730,6 @@ prompts for creating new images.
|
|
|
724
730
|
|
|
725
731
|
Images are stored in ``img`` directory in **PyGPT** user data folder.
|
|
726
732
|
|
|
727
|
-
|
|
728
|
-
## Vision
|
|
729
|
-
|
|
730
|
-
This mode enables image analysis using the `GPT-5`, `GPT-4o` and other vision (multimodal) models. Functioning much like the chat mode,
|
|
731
|
-
it also allows you to upload images or provide URLs to images. The vision feature can analyze both local
|
|
732
|
-
images and those found online.
|
|
733
|
-
|
|
734
|
-
Vision is also integrated into any chat mode via plugin `Vision (inline)`. Just enable the plugin and use Vision in other work modes, such as Chat or Chat with Files.
|
|
735
|
-
|
|
736
|
-
Vision mode also includes real-time video capture from camera. To capture image from camera and append it to chat just click on video at left side. You can also enable `Auto capture` - image will be captured and appended to chat message every time you send message.
|
|
737
|
-
|
|
738
|
-
**1) Video camera real-time image capture**
|
|
739
|
-
|
|
740
|
-

|
|
741
|
-
|
|
742
|
-
**2) you can also provide an image URL**
|
|
743
|
-
|
|
744
|
-

|
|
745
|
-
|
|
746
|
-
**3) or you can just upload your local images or use the inline Vision in the standard chat mode:**
|
|
747
|
-
|
|
748
|
-

|
|
749
|
-
|
|
750
|
-
**Tip:** When using `Vision (inline)` by utilizing a plugin in standard mode, such as `Chat` (not `Vision` mode), the `+ Vision` label will appear at the bottom of the Chat window.
|
|
751
|
-
|
|
752
733
|
## Assistants
|
|
753
734
|
|
|
754
735
|
This mode uses the OpenAI's **Assistants API**.
|
|
@@ -1167,7 +1148,7 @@ The name of the currently active profile is shown as (Profile Name) in the windo
|
|
|
1167
1148
|
|
|
1168
1149
|
## Built-in models
|
|
1169
1150
|
|
|
1170
|
-
PyGPT has a preconfigured list of models (as of 2025-
|
|
1151
|
+
PyGPT has a preconfigured list of models (as of 2025-08-31):
|
|
1171
1152
|
|
|
1172
1153
|
- `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)
|
|
1173
1154
|
- `chatgpt-4o-latest` (OpenAI)
|
|
@@ -1191,6 +1172,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1191
1172
|
- `gemini-1.5-pro` (Google)
|
|
1192
1173
|
- `gemini-2.0-flash-exp` (Google)
|
|
1193
1174
|
- `gemini-2.5-flash` (Google)
|
|
1175
|
+
- `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)
|
|
1194
1176
|
- `gemini-2.5-pro` (Google)
|
|
1195
1177
|
- `gpt-3.5-turbo` (OpenAI)
|
|
1196
1178
|
- `gpt-3.5-turbo-16k` (OpenAI)
|
|
@@ -1203,7 +1185,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1203
1185
|
- `gpt-4.1-mini` (OpenAI)
|
|
1204
1186
|
- `gpt-4.1-nano` (OpenAI)
|
|
1205
1187
|
- `gpt-4o` (OpenAI)
|
|
1206
|
-
- `gpt-4o-
|
|
1188
|
+
- `gpt-4o-realtime-preview` (OpenAI, real-time)
|
|
1207
1189
|
- `gpt-4o-mini` (OpenAI)
|
|
1208
1190
|
- `gpt-5` (OpenAI)
|
|
1209
1191
|
- `gpt-5-mini` (OpenAI)
|
|
@@ -1211,6 +1193,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
|
|
|
1211
1193
|
- `gpt-image-1` (OpenAI)
|
|
1212
1194
|
- `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)
|
|
1213
1195
|
- `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)
|
|
1196
|
+
- `gpt-realtime` (OpenAI, real-time)
|
|
1214
1197
|
- `grok-2-vision` (xAI)
|
|
1215
1198
|
- `grok-3` (xAI)
|
|
1216
1199
|
- `grok-3-fast` (xAI)
|
|
@@ -2455,17 +2438,15 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
|
|
|
2455
2438
|
|
|
2456
2439
|
- `Experts: Master prompt`: Prompt to instruct how to handle experts.
|
|
2457
2440
|
|
|
2458
|
-
- `
|
|
2441
|
+
- `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).
|
|
2459
2442
|
|
|
2460
2443
|
**Images**
|
|
2461
2444
|
|
|
2462
|
-
- `
|
|
2445
|
+
- `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.
|
|
2463
2446
|
|
|
2464
|
-
- `
|
|
2447
|
+
- `Image quality`: The image quality of the generated images (DALL-E). Default: standard.
|
|
2465
2448
|
|
|
2466
|
-
- `
|
|
2467
|
-
|
|
2468
|
-
- `DALL-E: prompt generation model`: Model used for generating prompts for DALL-E (if raw-mode is disabled).
|
|
2449
|
+
- `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).
|
|
2469
2450
|
|
|
2470
2451
|
**Vision**
|
|
2471
2452
|
|
|
@@ -2499,6 +2480,10 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
|
|
|
2499
2480
|
|
|
2500
2481
|
- `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).
|
|
2501
2482
|
|
|
2483
|
+
- `VAD prefix padding (in ms)`: VAD prefix padding in ms, default: 300ms (Realtime audio mode)
|
|
2484
|
+
|
|
2485
|
+
- `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)
|
|
2486
|
+
|
|
2502
2487
|
**Indexes / LlamaIndex**
|
|
2503
2488
|
|
|
2504
2489
|
**General**
|
|
@@ -2637,10 +2622,12 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
|
|
|
2637
2622
|
|
|
2638
2623
|
- `Check for updates in background`: Enables checking for updates in background (checking every 5 minutes). Default: True.
|
|
2639
2624
|
|
|
2640
|
-
**
|
|
2625
|
+
**Debug**
|
|
2641
2626
|
|
|
2642
2627
|
- `Show debug menu`: Enables debug (developer) menu.
|
|
2643
2628
|
|
|
2629
|
+
- `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)
|
|
2630
|
+
|
|
2644
2631
|
- `Log and debug context`: Enables logging of context input/output.
|
|
2645
2632
|
|
|
2646
2633
|
- `Log and debug events`: Enables logging of event dispatch.
|
|
@@ -2657,8 +2644,6 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
|
|
|
2657
2644
|
|
|
2658
2645
|
- `Log Assistants usage to console`: Enables logging of Assistants API usage to console.
|
|
2659
2646
|
|
|
2660
|
-
- `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)
|
|
2661
|
-
|
|
2662
2647
|
|
|
2663
2648
|
## JSON files
|
|
2664
2649
|
|
|
@@ -3363,7 +3348,7 @@ These wrappers are loaded into the application during startup using `launcher.ad
|
|
|
3363
3348
|
```python
|
|
3364
3349
|
# app.py
|
|
3365
3350
|
|
|
3366
|
-
from pygpt_net.provider.
|
|
3351
|
+
from pygpt_net.provider.api.openai import OpenAILLM
|
|
3367
3352
|
from pygpt_net.provider.llms.azure_openai import AzureOpenAILLM
|
|
3368
3353
|
from pygpt_net.provider.llms.anthropic import AnthropicLLM
|
|
3369
3354
|
from pygpt_net.provider.llms.hugging_face import HuggingFaceLLM
|
|
@@ -3575,6 +3560,21 @@ may consume additional tokens that are not displayed in the main window.
|
|
|
3575
3560
|
|
|
3576
3561
|
## Recent changes:
|
|
3577
3562
|
|
|
3563
|
+
**2.6.31 (2025-09-01)**
|
|
3564
|
+
|
|
3565
|
+
- Chat with Audio mode renamed to Realtime + audio.
|
|
3566
|
+
- Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
|
|
3567
|
+
- Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
|
|
3568
|
+
- Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
|
|
3569
|
+
- Added URL Context remote tool support in Google Gen AI.
|
|
3570
|
+
|
|
3571
|
+
**2.6.30 (2025-08-29)**
|
|
3572
|
+
|
|
3573
|
+
- Added native Google GenAI API support (beta); live audio is not supported yet (#132).
|
|
3574
|
+
- Added new predefined models for image generation: Google Imagen3 and Imagen4.
|
|
3575
|
+
- Optimized token usage in the Responses API.
|
|
3576
|
+
- Removed Vision mode (it is now integrated into Chat).
|
|
3577
|
+
|
|
3578
3578
|
**2.6.29 (2025-08-28)**
|
|
3579
3579
|
|
|
3580
3580
|
- Verbose options have been moved to the Developer section in settings.
|