PyPI - pygpt-net - Versions diffs - 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl - Mend

pygpt-net 2.6.29py3-none-any.whl → 2.6.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +4 -0
pygpt_net/{container.py → app_core.py} +5 -6
pygpt_net/controller/__init__.py +5 -2
pygpt_net/controller/access/control.py +1 -9
pygpt_net/controller/assistant/assistant.py +4 -4
pygpt_net/controller/assistant/batch.py +7 -7
pygpt_net/controller/assistant/files.py +4 -4
pygpt_net/controller/assistant/threads.py +3 -3
pygpt_net/controller/attachment/attachment.py +4 -7
pygpt_net/controller/audio/audio.py +25 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +30 -4
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +4 -405
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/chat/vision.py +11 -19
pygpt_net/controller/config/placeholder.py +1 -1
pygpt_net/controller/ctx/ctx.py +1 -1
pygpt_net/controller/ctx/summarizer.py +1 -1
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/mode/mode.py +21 -12
pygpt_net/controller/plugins/settings.py +3 -2
pygpt_net/controller/presets/editor.py +112 -99
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +268 -0
pygpt_net/controller/theme/theme.py +3 -2
pygpt_net/controller/ui/mode.py +7 -0
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/controller/ui/vision.py +4 -4
pygpt_net/core/agents/legacy.py +2 -2
pygpt_net/core/agents/runners/openai_workflow.py +2 -2
pygpt_net/core/assistants/files.py +5 -5
pygpt_net/core/assistants/store.py +4 -4
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +13 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +4 -3
pygpt_net/core/bridge/worker.py +31 -9
pygpt_net/core/debug/console/console.py +2 -2
pygpt_net/core/debug/presets.py +2 -2
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/experts/experts.py +2 -2
pygpt_net/core/image/image.py +51 -1
pygpt_net/core/modes/modes.py +2 -2
pygpt_net/core/presets/presets.py +3 -3
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +164 -0
pygpt_net/core/tokens/tokens.py +4 -4
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +48 -0
pygpt_net/core/types/mode.py +5 -2
pygpt_net/core/vision/analyzer.py +1 -1
pygpt_net/data/config/config.json +13 -4
pygpt_net/data/config/models.json +219 -101
pygpt_net/data/config/modes.json +3 -9
pygpt_net/data/config/settings.json +135 -27
pygpt_net/data/config/settings_section.json +2 -2
pygpt_net/data/locale/locale.de.ini +7 -7
pygpt_net/data/locale/locale.en.ini +25 -12
pygpt_net/data/locale/locale.es.ini +7 -7
pygpt_net/data/locale/locale.fr.ini +7 -7
pygpt_net/data/locale/locale.it.ini +7 -7
pygpt_net/data/locale/locale.pl.ini +8 -8
pygpt_net/data/locale/locale.uk.ini +7 -7
pygpt_net/data/locale/locale.zh.ini +3 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/item/model.py +23 -3
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/plugin/openai_dalle/plugin.py +4 -4
pygpt_net/plugin/openai_vision/plugin.py +12 -13
pygpt_net/provider/agents/openai/agent.py +5 -5
pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
pygpt_net/provider/agents/openai/agent_planner.py +5 -6
pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
pygpt_net/provider/agents/openai/evolve.py +5 -5
pygpt_net/provider/agents/openai/supervisor.py +4 -4
pygpt_net/provider/api/__init__.py +27 -0
pygpt_net/provider/api/anthropic/__init__.py +68 -0
pygpt_net/provider/api/google/__init__.py +295 -0
pygpt_net/provider/api/google/audio.py +121 -0
pygpt_net/provider/api/google/chat.py +591 -0
pygpt_net/provider/api/google/image.py +427 -0
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/tools.py +222 -0
pygpt_net/provider/api/google/vision.py +129 -0
pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
pygpt_net/provider/api/openai/agents/__init__.py +0 -0
pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
pygpt_net/provider/api/openai/worker/__init__.py +0 -0
pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_input/openai_whisper.py +1 -1
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/openai_tts.py +9 -6
pygpt_net/provider/core/config/patch.py +26 -0
pygpt_net/provider/core/model/patch.py +20 -0
pygpt_net/provider/core/preset/json_file.py +2 -4
pygpt_net/provider/llms/anthropic.py +2 -5
pygpt_net/provider/llms/base.py +4 -3
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/llms/openai.py +1 -1
pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
pygpt_net/ui/dialog/preset.py +71 -55
pygpt_net/ui/layout/toolbox/footer.py +16 -0
pygpt_net/ui/layout/toolbox/image.py +5 -0
pygpt_net/ui/main.py +6 -4
pygpt_net/ui/widget/option/combo.py +15 -1
pygpt_net/utils.py +9 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
pygpt_net/core/audio/backend/pyaudio.py +0 -554
/pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
/pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
/pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0

{pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pygpt-net
-Version: 2.6.29
+Version: 2.6.31
 Summary: Desktop AI Assistant powered by: OpenAI GPT-5, GPT-4, o1, o3, Gemini, Claude, Grok, DeepSeek, and other models supported by Llama Index, and Ollama. Chatbot, agents, completion, image generation, vision analysis, speech-to-text, plugins, internet access, file handling, command execution and more.
 License: MIT
 Keywords: ai,api,api key,app,assistant,bielik,chat,chatbot,chatgpt,claude,dall-e,deepseek,desktop,gemini,gpt,gpt-3.5,gpt-4,gpt-4-vision,gpt-4o,gpt-5,gpt-oss,gpt3.5,gpt4,grok,langchain,llama-index,llama3,mistral,o1,o3,ollama,openai,presets,py-gpt,py_gpt,pygpt,pyside,qt,text completion,tts,ui,vision,whisper
@@ -117,7 +117,7 @@ Description-Content-Type: text/markdown
 [![pygpt](https://snapcraft.io/pygpt/badge.svg)](https://snapcraft.io/pygpt)
-Release: **2.6.29** | build: **2025-08-27** | Python: **>=3.10, <3.14**
+Release: **2.6.31** | build: **2025-09-01** | Python: **>=3.10, <3.14**
 > Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
 >
@@ -157,7 +157,7 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
 - Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.
 - Works similarly to `ChatGPT`, but locally (on a desktop computer).
-- 12 modes of operation: Chat, Chat with Files, Chat with Audio, Research (Perplexity), Completion, Image generation, Vision, Assistants, Experts, Computer use, Agents and Autonomous Mode.
+- 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
 - Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3/R1`, `Perplexity / Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.
 - Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video/audio, images and other data types, or use conversation history as additional context provided to the model.
 - Built-in vector databases support and automated files and data embedding.
@@ -181,7 +181,7 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
 - Includes simple painter / drawing tool.
 - Supports multiple languages.
 - Requires no previous knowledge of using AI models.
-- Simplifies image generation using `DALL-E`.
+- Simplifies image generation using image models like `DALL-E` and `Imagen`.
 - Fully configurable.
 - Themes support.
 - Real-time code syntax highlighting.
@@ -439,9 +439,9 @@ Alternatively, you can try removing snap and reinstalling it:
 `sudo snap install pygpt`
-**Access to microphone and audio in Windows version:**
+**Access to a microphone and audio in Windows version:**
-If you have a problems with audio or microphone in the non-binary PIP/Python version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https://phoenixnap.com/kb/ffmpeg-windows. The binary version already includes FFmpeg.
+If you have a problems with audio or a microphone in the non-binary PIP/Python version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https://phoenixnap.com/kb/ffmpeg-windows. The binary version already includes FFmpeg.
 **Windows and VC++ Redistributable**
@@ -519,9 +519,16 @@ Here, you can add or manage API keys for any supported provider.
 **+ Inline Vision and Image generation**
-This mode in **PyGPT** mirrors `ChatGPT`, allowing you to chat with models such as `GPT-5`, `GPT-4`, `o1`, `o3`, and`Claude`, `Gemini`, `Grok`, `Perplexity (sonar)`, `Deepseek`, and others. It works by using the `Responses` and `ChatCompletions` OpenAI API (or compatible). You can select the API endpoint to use in: `Config -> Settings -> API Keys`.
+In **PyGPT**, this mode mirrors `ChatGPT`, allowing you to chat with models like `GPT-5`, `GPT-4`, `o1`, `o3`, `Claude`, `Gemini`, `Grok`, `Perplexity (Sonar)`, `Deepseek`, and more. It works using the OpenAI API `Responses` and `ChatCompletions`, or the `Google GenAI SDK` if the Google native client is enabled. You can choose the API endpoint for `ChatCompletions` in `Config -> Settings -> API Keys`.
-**Tip: This mode directly uses the OpenAI SDK. Other models, such as Gemini, Claude, Grok, Sonar, or Llama3, are supported in Chat mode via LlamaIndex or OpenAI API compatible endpoints (if available), which the application switches to in the background when working with models other than OpenAI.**
+**Tip:** This mode uses the provider SDK directly. If there's no native client built into the app, models like Gemini, Claude, Grok, Sonar, or Llama3 are supported in Chat mode via LlamaIndex or compatible OpenAI API endpoints. The app automatically switches to these endpoints when using non-OpenAI models.
+Currently built-in native clients:
+- OpenAI SDK
+- Google GenAI SDK
+Support for Anthropic and xAI native clients is coming soon.
 The main part of the interface is a chat window where you see your conversations. Below it is a message box for typing. On the right side, you can set up or change the model and system prompt. You can also save these settings as presets to easily switch between models or tasks.
@@ -655,11 +662,11 @@ In the `Settings -> LlamaIndex -> Data loaders` section you can define the addit
 ## Chat with Audio
-This mode works like the Chat mode but with native support for audio input and output using a multimodal model - `gpt-4o-audio`. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
+This mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
-More info: https://platform.openai.com/docs/guides/audio/quickstart
+Currently, in beta.
-Currently, in beta. Tool and function calls are not enabled in this mode.
+At this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.
 ## Research
@@ -683,17 +690,16 @@ From version `2.0.107` the `davinci` models are deprecated and has been replaced
 ## Image generation
-### DALL-E 3
+### OpenAI DALL-E 3 / Google Imagen 3 and 4
-**PyGPT** enables quick and easy image creation with `DALL-E 3` or `gpt-image-1`.
-The older model version, `DALL-E 2`, is also accessible. Generating images is akin to a chat conversation  -  a user's prompt triggers the generation, followed by downloading, saving to the computer,
-and displaying the image onscreen. You can send raw prompt to `DALL-E` in `Image generation` mode or ask the model for the best prompt.
+**PyGPT** enables quick and easy image creation with image models like `DALL-E 3`, `gpt-image-1` or `Google Imagen`.
+Generating images is akin to a chat conversation  -  a user's prompt triggers the generation, followed by downloading, saving to the computer, and displaying the image onscreen. You can send raw prompt to the model in `Image generation` mode or ask the model for the best prompt.
 ![v3_img](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img.png)
-Image generation using DALL-E is available in every mode via plugin `Image Generation (inline)`. Just ask any model, in any mode, like e.g. GPT-4 to generate an image and it will do it inline, without need to mode change.
+Image generation using image models is also available in every mode via plugin `Image Generation (inline)`. Just ask any model, in any mode, like e.g. GPT or Gemini to generate an image and it will do it inline, without need to mode change.
-If you want to generate images (using DALL-E) directly in chat you must enable plugin **Image generation (inline)** in the Plugins menu.
+If you want to generate images directly in chat you must enable plugin **Image generation (inline)** in the Plugins menu.
 Plugin allows you to generate images in Chat mode:
 ![v3_img_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img_chat.png)
@@ -708,7 +714,7 @@ the bottom of the screen. This replaces the conversation temperature slider when
 There is an option for switching prompt generation mode.
-If **Raw Mode** is enabled, DALL-E will receive the prompt exactly as you have provided it.
+If **Raw Mode** is enabled, a model will receive the prompt exactly as you have provided it.
 If **Raw Mode** is disabled, a model will generate the best prompt for you based on your instructions.
 ### Image storage
@@ -724,31 +730,6 @@ prompts for creating new images.
 Images are stored in ``img`` directory in **PyGPT** user data folder.
-## Vision
-This mode enables image analysis using the `GPT-5`, `GPT-4o` and other vision (multimodal) models. Functioning much like the chat mode,
-it also allows you to upload images or provide URLs to images. The vision feature can analyze both local
-images and those found online.
-Vision is also integrated into any chat mode via plugin `Vision (inline)`. Just enable the plugin and use Vision in other work modes, such as Chat or Chat with Files.
-Vision mode also includes real-time video capture from camera. To capture image from camera and append it to chat just click on video at left side. You can also enable `Auto capture` - image will be captured and appended to chat message every time you send message.
-**1) Video camera real-time image capture**
-![v3_vision_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_vision_chat.png)
-**2) you can also provide an image URL**
-![v2_mode_vision](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v2_mode_vision.png)
-**3) or you can just upload your local images or use the inline Vision in the standard chat mode:**
-![v2_mode_vision_upload](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v2_mode_vision_upload.png)
-**Tip:** When using `Vision (inline)` by utilizing a plugin in standard mode, such as `Chat` (not `Vision` mode), the `+ Vision` label will appear at the bottom of the Chat window.
 ## Assistants
 This mode uses the OpenAI's **Assistants API**.
@@ -1167,7 +1148,7 @@ The name of the currently active profile is shown as (Profile Name) in the windo
 ## Built-in models
-PyGPT has a preconfigured list of models (as of 2025-07-26):
+PyGPT has a preconfigured list of models (as of 2025-08-31):
 - `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)
 - `chatgpt-4o-latest` (OpenAI)
@@ -1191,6 +1172,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `gemini-1.5-pro` (Google)
 - `gemini-2.0-flash-exp` (Google)
 - `gemini-2.5-flash` (Google)
+- `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)
 - `gemini-2.5-pro` (Google)
 - `gpt-3.5-turbo` (OpenAI)
 - `gpt-3.5-turbo-16k` (OpenAI)
@@ -1203,7 +1185,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `gpt-4.1-mini` (OpenAI)
 - `gpt-4.1-nano` (OpenAI)
 - `gpt-4o` (OpenAI)
-- `gpt-4o-audio-preview` (OpenAI)
+- `gpt-4o-realtime-preview` (OpenAI, real-time)
 - `gpt-4o-mini` (OpenAI)
 - `gpt-5` (OpenAI)
 - `gpt-5-mini` (OpenAI)
@@ -1211,6 +1193,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `gpt-image-1` (OpenAI)
 - `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)
 - `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)
+- `gpt-realtime` (OpenAI, real-time)
 - `grok-2-vision` (xAI)
 - `grok-3` (xAI)
 - `grok-3-fast` (xAI)
@@ -2455,17 +2438,15 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
 - `Experts: Master prompt`: Prompt to instruct how to handle experts.
-- `DALL-E: image generate`: Prompt for generating prompts for DALL-E (if raw-mode is disabled).
+- `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).
 **Images**
-- `DALL-E Image size`: The resolution of the generated images (DALL-E). Default: 1792x1024.
+- `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.
-- `DALL-E Image quality`: The image quality of the generated images (DALL-E). Default: standard.
+- `Image quality`: The image quality of the generated images (DALL-E). Default: standard.
-- `Open image dialog after generate`: Enable the image dialog to open after an image is generated in Image mode.
-- `DALL-E: prompt generation model`: Model used for generating prompts for DALL-E (if raw-mode is disabled).
+- `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).
 **Vision**
@@ -2499,6 +2480,10 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
 - `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).
+- `VAD prefix padding (in ms)`:  VAD prefix padding in ms, default: 300ms (Realtime audio mode)
+- `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)
 **Indexes / LlamaIndex**
 **General**
@@ -2637,10 +2622,12 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
 - `Check for updates in background`: Enables checking for updates in background (checking every 5 minutes). Default: True.
-**Developer**
+**Debug**
 - `Show debug menu`: Enables debug (developer) menu.
+- `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)
 - `Log and debug context`: Enables logging of context input/output.
 - `Log and debug events`: Enables logging of event dispatch.
@@ -2657,8 +2644,6 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
 - `Log Assistants usage to console`: Enables logging of Assistants API usage to console.
-- `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)
 ## JSON files
@@ -3363,7 +3348,7 @@ These wrappers are loaded into the application during startup using `launcher.ad
 ```python
 # app.py
-from pygpt_net.provider.llms.openai import OpenAILLM
+from pygpt_net.provider.api.openai import OpenAILLM
 from pygpt_net.provider.llms.azure_openai import AzureOpenAILLM
 from pygpt_net.provider.llms.anthropic import AnthropicLLM
 from pygpt_net.provider.llms.hugging_face import HuggingFaceLLM
@@ -3575,6 +3560,21 @@ may consume additional tokens that are not displayed in the main window.
 ## Recent changes:
+**2.6.31 (2025-09-01)**
+- Chat with Audio mode renamed to Realtime + audio.
+- Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
+- Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
+- Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
+- Added URL Context remote tool support in Google Gen AI.
+**2.6.30 (2025-08-29)**
+- Added native Google GenAI API support (beta); live audio is not supported yet (#132).
+- Added new predefined models for image generation: Google Imagen3 and Imagen4.
+- Optimized token usage in the Responses API.
+- Removed Vision mode (it is now integrated into Chat).
 **2.6.29 (2025-08-28)**
 - Verbose options have been moved to the Developer section in settings.

pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

pygpt-net 2.6.29py3-none-any.whl → 2.6.31py3-none-any.whl