PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +7 -1
pygpt_net/app_core.py +3 -1
pygpt_net/config.py +3 -1
pygpt_net/controller/__init__.py +9 -2
pygpt_net/controller/audio/audio.py +38 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +23 -62
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/lang/custom.py +2 -2
pygpt_net/controller/media/__init__.py +12 -0
pygpt_net/controller/media/media.py +115 -0
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +293 -0
pygpt_net/controller/ui/mode.py +23 -2
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +14 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +56 -5
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +160 -0
pygpt_net/core/render/web/body.py +24 -3
pygpt_net/core/text/utils.py +54 -2
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +54 -0
pygpt_net/core/video/__init__.py +12 -0
pygpt_net/core/video/video.py +290 -0
pygpt_net/data/config/config.json +26 -5
pygpt_net/data/config/models.json +221 -103
pygpt_net/data/config/settings.json +244 -6
pygpt_net/data/css/web-blocks.css +6 -0
pygpt_net/data/css/web-chatgpt.css +6 -0
pygpt_net/data/css/web-chatgpt_wide.css +6 -0
pygpt_net/data/locale/locale.de.ini +35 -7
pygpt_net/data/locale/locale.en.ini +56 -17
pygpt_net/data/locale/locale.es.ini +35 -7
pygpt_net/data/locale/locale.fr.ini +35 -7
pygpt_net/data/locale/locale.it.ini +35 -7
pygpt_net/data/locale/locale.pl.ini +38 -7
pygpt_net/data/locale/locale.uk.ini +35 -7
pygpt_net/data/locale/locale.zh.ini +31 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
pygpt_net/item/model.py +22 -1
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +76 -7
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/video.py +364 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +241 -178
pygpt_net/provider/core/model/patch.py +28 -2
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/web/duckduck_search.py +212 -0
pygpt_net/ui/layout/toolbox/audio.py +55 -0
pygpt_net/ui/layout/toolbox/footer.py +14 -42
pygpt_net/ui/layout/toolbox/image.py +7 -13
pygpt_net/ui/layout/toolbox/raw.py +52 -0
pygpt_net/ui/layout/toolbox/split.py +48 -0
pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
pygpt_net/ui/layout/toolbox/video.py +49 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0

pygpt_net/ui/widget/option/combo.py CHANGED Viewed

@@ -79,6 +79,7 @@ class OptionCombo(QWidget):
         self.combo = NoScrollCombo()
         self.combo.currentIndexChanged.connect(self.on_combo_change)
         self.current_id = None
+        self.locked = False
         # add items
         self.update()
@@ -114,6 +115,12 @@ class OptionCombo(QWidget):
                             self.combo.addItem(value, key)
                 else:
                     self.combo.addItem(item, item)
+        elif type(self.keys) is dict:
+            for key, value in self.keys.items():
+                if key.startswith("separator::"):
+                    self.combo.addSeparator(value)
+                else:
+                    self.combo.addItem(value, key)
     def set_value(self, value):
         """
@@ -135,16 +142,21 @@ class OptionCombo(QWidget):
         """
         return self.current_id
-    def set_keys(self, keys):
+    def set_keys(self, keys, lock: bool = False):
         """
         Set keys
         :param keys: keys
+        :param lock: lock current value if True
         """
+        if lock:
+            self.locked = True  # lock on_change
         self.keys = keys
         self.option["keys"] = keys
         self.combo.clear()
         self.update()
+        if lock:
+            self.locked = False
     def on_combo_change(self, index):
         """
@@ -153,6 +165,8 @@ class OptionCombo(QWidget):
         :param index: combo index
         :return:
         """
+        if self.locked:
+            return
         self.current_id = self.combo.itemData(index)
         self.window.controller.config.combo.on_update(self.parent_id, self.id, self.option, self.current_id)

{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pygpt-net
-Version: 2.6.30
+Version: 2.6.32
 Summary: Desktop AI Assistant powered by: OpenAI GPT-5, GPT-4, o1, o3, Gemini, Claude, Grok, DeepSeek, and other models supported by Llama Index, and Ollama. Chatbot, agents, completion, image generation, vision analysis, speech-to-text, plugins, internet access, file handling, command execution and more.
 License: MIT
 Keywords: ai,api,api key,app,assistant,bielik,chat,chatbot,chatgpt,claude,dall-e,deepseek,desktop,gemini,gpt,gpt-3.5,gpt-4,gpt-4-vision,gpt-4o,gpt-5,gpt-oss,gpt3.5,gpt4,grok,langchain,llama-index,llama3,mistral,o1,o3,ollama,openai,presets,py-gpt,py_gpt,pygpt,pyside,qt,text completion,tts,ui,vision,whisper
@@ -30,6 +30,7 @@ Requires-Dist: beautifulsoup4 (>=4.13.5,<5.0.0)
 Requires-Dist: boto3 (>=1.40.17,<2.0.0)
 Requires-Dist: chromadb (>=0.5.20,<0.6.0)
 Requires-Dist: croniter (>=2.0.7,<3.0.0)
+Requires-Dist: ddgs (>=9.5.5,<10.0.0)
 Requires-Dist: docker (>=7.1.0,<8.0.0)
 Requires-Dist: docx2txt (>=0.8,<0.9)
 Requires-Dist: gkeepapi (>=0.15.1,<0.16.0)
@@ -117,7 +118,7 @@ Description-Content-Type: text/markdown
 [![pygpt](https://snapcraft.io/pygpt/badge.svg)](https://snapcraft.io/pygpt)
-Release: **2.6.30** | build: **2025-08-29** | Python: **>=3.10, <3.14**
+Release: **2.6.32** | build: **2025-09-02** | Python: **>=3.10, <3.14**
 > Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
 >
@@ -129,9 +130,9 @@ Release: **2.6.30** | build: **2025-08-29** | Python: **>=3.10, <3.14**
 ## Overview
-**PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3/R1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity / Sonar`, and `xAI Grok`.
+**PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing other SDKs and `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3/R1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity / Sonar`, and `xAI Grok`.
-This assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I/O, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `Google` and `Microsoft Bing`.
+This assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I/O, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `DuckDuckGo`, `Google` and `Microsoft Bing`.
 For audio interactions, **PyGPT** includes speech synthesis using the `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services. Additionally, it features speech recognition capabilities provided by `OpenAI Whisper`, `Google` and `Bing` enabling the application to understand spoken commands and transcribe audio inputs into text. It features context memory with save and load functionality, enabling users to resume interactions from predefined points in the conversation. Prompt creation and management are streamlined through an intuitive preset system.
@@ -157,13 +158,13 @@ You can download compiled 64-bit versions for Windows and Linux here: https://py
 - Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.
 - Works similarly to `ChatGPT`, but locally (on a desktop computer).
-- 11 modes of operation: Chat, Chat with Files, Chat with Audio, Research (Perplexity), Completion, Image generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
+- 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image and video generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.
 - Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3/R1`, `Perplexity / Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.
 - Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video/audio, images and other data types, or use conversation history as additional context provided to the model.
 - Built-in vector databases support and automated files and data embedding.
 - Included support features for individuals with disabilities: customizable keyboard shortcuts, voice control, and translation of on-screen actions into audio via speech synthesis.
 - Handles and stores the full context of conversations (short and long-term memory).
-- Internet access via `Google` and `Microsoft Bing`.
+- Internet access via `DuckDuckGo`, `Google` and `Microsoft Bing`.
 - Speech synthesis via `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services.
 - Speech recognition via `OpenAI Whisper`, `Google` and `Microsoft Speech Recognition`.
 - Real-time video camera capture in Vision mode.
@@ -544,7 +545,7 @@ With this plugin, you can capture an image with your camera or attach an image a
 ![v3_vision_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_vision_chat.png)
-**Image generation:** If you want to generate images (using DALL-E) directly in chat you must enable plugin `Image generation (inline)` in the Plugins menu.
+**Image generation:** If you want to generate images directly in chat you must enable plugin `Image generation (inline)` in the Plugins menu.
 Plugin allows you to generate images in Chat mode:
 ![v3_img_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img_chat.png)
@@ -662,11 +663,11 @@ In the `Settings -> LlamaIndex -> Data loaders` section you can define the addit
 ## Chat with Audio
-This mode works like the Chat mode but with native support for audio input and output using a multimodal model - `gpt-4o-audio`. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
+This mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.
-More info: https://platform.openai.com/docs/guides/audio/quickstart
+Currently, in beta.
-Currently, in beta. Tool and function calls are not enabled in this mode.
+At this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.
 ## Research
@@ -688,7 +689,7 @@ Additionally, this mode offers options for labeling the AI and the user, making
 From version `2.0.107` the `davinci` models are deprecated and has been replaced with `gpt-3.5-turbo-instruct` model in Completion mode.
-## Image generation
+## Image and video generation
 ### OpenAI DALL-E 3 / Google Imagen 3 and 4
@@ -704,6 +705,8 @@ Plugin allows you to generate images in Chat mode:
 ![v3_img_chat](https://github.com/szczyglis-dev/py-gpt/raw/master/docs/source/images/v3_img_chat.png)
+**Video generation**: From version `2.6.32`, video generation (using `Google Veo 3`) is also available.
 ### Multiple variants
 You can generate up to **4 different variants** (DALL-E 2) for a given prompt in one session. DALL-E 3 allows one image.
@@ -1148,7 +1151,7 @@ The name of the currently active profile is shown as (Profile Name) in the windo
 ## Built-in models
-PyGPT has a preconfigured list of models (as of 2025-07-26):
+PyGPT has a preconfigured list of models (as of 2025-08-31):
 - `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)
 - `chatgpt-4o-latest` (OpenAI)
@@ -1172,6 +1175,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `gemini-1.5-pro` (Google)
 - `gemini-2.0-flash-exp` (Google)
 - `gemini-2.5-flash` (Google)
+- `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)
 - `gemini-2.5-pro` (Google)
 - `gpt-3.5-turbo` (OpenAI)
 - `gpt-3.5-turbo-16k` (OpenAI)
@@ -1184,7 +1188,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `gpt-4.1-mini` (OpenAI)
 - `gpt-4.1-nano` (OpenAI)
 - `gpt-4o` (OpenAI)
-- `gpt-4o-audio-preview` (OpenAI)
+- `gpt-4o-realtime-preview` (OpenAI, real-time)
 - `gpt-4o-mini` (OpenAI)
 - `gpt-5` (OpenAI)
 - `gpt-5-mini` (OpenAI)
@@ -1192,6 +1196,7 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `gpt-image-1` (OpenAI)
 - `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)
 - `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)
+- `gpt-realtime` (OpenAI, real-time)
 - `grok-2-vision` (xAI)
 - `grok-3` (xAI)
 - `grok-3-fast` (xAI)
@@ -1223,6 +1228,8 @@ PyGPT has a preconfigured list of models (as of 2025-07-26):
 - `sonar-pro` (Perplexity)
 - `sonar-reasoning` (Perplexity)
 - `sonar-reasoning-pro` (Perplexity)
+- `veo-3.0-generate-preview` (Google)
+- `veo-3.0-fast-generate-preview` (Google)
 All models are specified in the configuration file `models.json`, which you can customize.
 This file is located in your working directory. You can add new models provided directly by `OpenAI API` (or compatible) and those supported by `LlamaIndex` or `Ollama` to this file. Configuration for LlamaIndex is placed in `llama_index` key.
@@ -1240,20 +1247,20 @@ There is built-in support for those LLM providers:
 - `Anthropic`
 - `Azure OpenAI`
 - `Deepseek API`
-- `Google`
+- `Google` (native SDK)
 - `HuggingFace API`
 - `HuggingFace Router` (wrapper for OpenAI compatible ChatCompletions)
 - `Local models` (OpenAI API compatible)
 - `Mistral AI`
 - `Ollama`
-- `OpenAI`
+- `OpenAI` (native SDK)
 - `OpenRouter`
 - `Perplexity`
 - `xAI`
 ## How to use local or non-GPT models
-### Llama 3, Mistral, DeepSeek, and other local models
+### Llama 3, Mistral, DeepSeek, Qwen, gpt-oss, and other local models
 How to use locally installed Llama 3, DeepSeek, Mistral, etc. models:
@@ -2436,17 +2443,15 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
 - `Experts: Master prompt`: Prompt to instruct how to handle experts.
-- `DALL-E: image generate`: Prompt for generating prompts for DALL-E (if raw-mode is disabled).
+- `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).
 **Images**
-- `DALL-E Image size`: The resolution of the generated images (DALL-E). Default: 1792x1024.
-- `DALL-E Image quality`: The image quality of the generated images (DALL-E). Default: standard.
+- `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.
-- `Open image dialog after generate`: Enable the image dialog to open after an image is generated in Image mode.
+- `Image quality`: The image quality of the generated images (DALL-E). Default: standard.
-- `DALL-E: prompt generation model`: Model used for generating prompts for DALL-E (if raw-mode is disabled).
+- `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).
 **Vision**
@@ -2480,6 +2485,10 @@ Enable/disable remote tools, like Web Search or Image generation to use in OpenA
 - `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).
+- `VAD prefix padding (in ms)`:  VAD prefix padding in ms, default: 300ms (Realtime audio mode)
+- `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)
 **Indexes / LlamaIndex**
 **General**
@@ -3556,6 +3565,21 @@ may consume additional tokens that are not displayed in the main window.
 ## Recent changes:
+**2.6.32 (2025-09-02)**
+- Added video generation and support for Google Veo 3 models.
+- Introduced new predefined models: veo-3.0-generate-preview and veo-3.0-fast-generate-preview.
+- Integrated DuckDuckGo as a search provider in the WebSearch plugin.
+- Added "Loop" mode to Realtime + audio mode for automatic turn handling and continuous conversation without manually enabling the microphone.
+**2.6.31 (2025-09-01)**
+- Chat with Audio mode renamed to Realtime + audio.
+- Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
+- Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
+- Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
+- Added URL Context remote tool support in Google Gen AI.
 **2.6.30 (2025-08-29)**
 - Added native Google GenAI API support (beta); live audio is not supported yet (#132).

pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl