pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +8 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +29 -3
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/data/config/config.json +10 -4
- pygpt_net/data/config/models.json +149 -103
- pygpt_net/data/config/settings.json +50 -0
- pygpt_net/data/locale/locale.de.ini +5 -5
- pygpt_net/data/locale/locale.en.ini +19 -13
- pygpt_net/data/locale/locale.es.ini +5 -5
- pygpt_net/data/locale/locale.fr.ini +5 -5
- pygpt_net/data/locale/locale.it.ini +5 -5
- pygpt_net/data/locale/locale.pl.ini +5 -5
- pygpt_net/data/locale/locale.uk.ini +5 -5
- pygpt_net/data/locale/locale.zh.ini +1 -1
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +39 -6
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +11 -0
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
pygpt_net/CHANGELOG.txt
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
2.6.31 (2025-09-01)
|
|
2
|
+
|
|
3
|
+
- Chat with Audio mode renamed to Realtime + audio.
|
|
4
|
+
- Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
|
|
5
|
+
- Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
|
|
6
|
+
- Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
|
|
7
|
+
- Added URL Context remote tool support in Google Gen AI.
|
|
8
|
+
|
|
1
9
|
2.6.30 (2025-08-29)
|
|
2
10
|
|
|
3
11
|
- Added native Google GenAI API support (beta); live audio is not supported yet (#132).
|
pygpt_net/__init__.py
CHANGED
|
@@ -6,15 +6,15 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.09.01 00:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
__author__ = "Marcin Szczygliński"
|
|
13
13
|
__copyright__ = "Copyright 2025, Marcin Szczygliński"
|
|
14
14
|
__credits__ = ["Marcin Szczygliński"]
|
|
15
15
|
__license__ = "MIT"
|
|
16
|
-
__version__ = "2.6.
|
|
17
|
-
__build__ = "2025-
|
|
16
|
+
__version__ = "2.6.31"
|
|
17
|
+
__build__ = "2025-09-01"
|
|
18
18
|
__maintainer__ = "Marcin Szczygliński"
|
|
19
19
|
__github__ = "https://github.com/szczyglis-dev/py-gpt"
|
|
20
20
|
__report__ = "https://github.com/szczyglis-dev/py-gpt/issues"
|
pygpt_net/app.py
CHANGED
|
@@ -171,10 +171,12 @@ from pygpt_net.provider.audio_input.openai_whisper import OpenAIWhisper
|
|
|
171
171
|
from pygpt_net.provider.audio_input.openai_whisper_local import OpenAIWhisperLocal
|
|
172
172
|
from pygpt_net.provider.audio_input.google_speech_recognition import GoogleSpeechRecognition
|
|
173
173
|
from pygpt_net.provider.audio_input.google_cloud_speech_recognition import GoogleCloudSpeechRecognition
|
|
174
|
+
from pygpt_net.provider.audio_input.google_genai import GoogleGenAIAudioInput
|
|
174
175
|
from pygpt_net.provider.audio_input.bing_speech_recognition import BingSpeechRecognition
|
|
175
176
|
from pygpt_net.provider.audio_output.openai_tts import OpenAITextToSpeech
|
|
176
177
|
from pygpt_net.provider.audio_output.ms_azure_tts import MSAzureTextToSpeech
|
|
177
178
|
from pygpt_net.provider.audio_output.google_tts import GoogleTextToSpeech
|
|
179
|
+
from pygpt_net.provider.audio_output.google_genai_tts import GoogleGenAITextToSpeech
|
|
178
180
|
from pygpt_net.provider.audio_output.eleven_labs import ElevenLabsTextToSpeech
|
|
179
181
|
|
|
180
182
|
# web search engine providers
|
|
@@ -318,10 +320,12 @@ def run(**kwargs):
|
|
|
318
320
|
launcher.add_audio_input(OpenAIWhisperLocal())
|
|
319
321
|
launcher.add_audio_input(GoogleSpeechRecognition())
|
|
320
322
|
launcher.add_audio_input(GoogleCloudSpeechRecognition())
|
|
323
|
+
launcher.add_audio_input(GoogleGenAIAudioInput())
|
|
321
324
|
launcher.add_audio_input(BingSpeechRecognition())
|
|
322
325
|
launcher.add_audio_output(OpenAITextToSpeech())
|
|
323
326
|
launcher.add_audio_output(MSAzureTextToSpeech())
|
|
324
327
|
launcher.add_audio_output(GoogleTextToSpeech())
|
|
328
|
+
launcher.add_audio_output(GoogleGenAITextToSpeech())
|
|
325
329
|
launcher.add_audio_output(ElevenLabsTextToSpeech())
|
|
326
330
|
|
|
327
331
|
# register custom audio providers
|
pygpt_net/controller/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
# ================================================== #
|
|
4
4
|
# This file is a part of PYGPT package #
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from pygpt_net.controller.access import Access
|
|
@@ -34,6 +34,7 @@ from pygpt_net.controller.model import Model
|
|
|
34
34
|
from pygpt_net.controller.notepad import Notepad
|
|
35
35
|
from pygpt_net.controller.painter import Painter
|
|
36
36
|
from pygpt_net.controller.plugins import Plugins
|
|
37
|
+
from pygpt_net.controller.realtime import Realtime
|
|
37
38
|
from pygpt_net.controller.presets import Presets
|
|
38
39
|
from pygpt_net.controller.settings import Settings
|
|
39
40
|
from pygpt_net.controller.theme import Theme
|
|
@@ -76,6 +77,7 @@ class Controller:
|
|
|
76
77
|
self.painter = Painter(window)
|
|
77
78
|
self.plugins = Plugins(window)
|
|
78
79
|
self.presets = Presets(window)
|
|
80
|
+
self.realtime = Realtime(window)
|
|
79
81
|
self.settings = Settings(window)
|
|
80
82
|
self.theme = Theme(window)
|
|
81
83
|
self.tools = Tools(window)
|
|
@@ -108,6 +110,7 @@ class Controller:
|
|
|
108
110
|
self.attachment.setup()
|
|
109
111
|
self.camera.setup_ui()
|
|
110
112
|
self.access.setup()
|
|
113
|
+
self.realtime.setup()
|
|
111
114
|
|
|
112
115
|
def post_setup(self):
|
|
113
116
|
"""Post-setup, after plugins are loaded"""
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -43,6 +43,30 @@ class Audio:
|
|
|
43
43
|
if self.window.core.config.get("audio.input.continuous", False):
|
|
44
44
|
self.window.ui.plugin_addon['audio.input.btn'].continuous.setChecked(True)
|
|
45
45
|
|
|
46
|
+
if self.window.core.config.get("audio.input.auto_turn", False):
|
|
47
|
+
self.window.ui.nodes['audio.auto_turn'].box.setChecked(True)
|
|
48
|
+
|
|
49
|
+
def execute_input_stop(self):
|
|
50
|
+
"""Execute input stop (from UI)"""
|
|
51
|
+
self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE, {
|
|
52
|
+
"state": False,
|
|
53
|
+
"auto": True, # do not emit manual event
|
|
54
|
+
}))
|
|
55
|
+
|
|
56
|
+
def is_recording(self) -> bool:
|
|
57
|
+
"""
|
|
58
|
+
Check if audio input is recording
|
|
59
|
+
|
|
60
|
+
:return: True if recording
|
|
61
|
+
"""
|
|
62
|
+
return self.window.core.plugins.get("audio_input").is_recording()
|
|
63
|
+
|
|
64
|
+
def toggle_auto_turn(self):
|
|
65
|
+
"""Toggle auto turn setting"""
|
|
66
|
+
value = self.window.ui.nodes['audio.auto_turn'].box.isChecked()
|
|
67
|
+
self.window.core.config.set("audio.input.auto_turn", value)
|
|
68
|
+
self.window.core.config.save()
|
|
69
|
+
|
|
46
70
|
def toggle_input(
|
|
47
71
|
self,
|
|
48
72
|
state: bool,
|
pygpt_net/controller/audio/ui.py
CHANGED
|
@@ -183,7 +183,7 @@ class UI:
|
|
|
183
183
|
"""
|
|
184
184
|
self.recording = True
|
|
185
185
|
self.window.ui.nodes['input'].set_icon_state("mic", True)
|
|
186
|
-
if mode
|
|
186
|
+
if mode in ["input", "realtime"]:
|
|
187
187
|
self.window.controller.chat.common.lock_input()
|
|
188
188
|
return
|
|
189
189
|
btn = self.get_input_btn() if mode == 'input' else self.get_input_control_btn()
|
|
@@ -198,7 +198,7 @@ class UI:
|
|
|
198
198
|
"""
|
|
199
199
|
self.recording = False
|
|
200
200
|
self.window.ui.nodes['input'].set_icon_state("mic", False)
|
|
201
|
-
if mode
|
|
201
|
+
if mode in ["input", "realtime"]:
|
|
202
202
|
self.window.controller.chat.common.unlock_input()
|
|
203
203
|
return
|
|
204
204
|
btn = self.get_input_btn() if mode == 'input' else self.get_input_control_btn()
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date:
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import base64
|
|
@@ -40,19 +40,12 @@ class Audio:
|
|
|
40
40
|
"""Update input/output audio"""
|
|
41
41
|
mode = self.window.core.config.get("mode")
|
|
42
42
|
if mode == MODE_AUDIO:
|
|
43
|
-
if not self.window.controller.audio.is_output_enabled():
|
|
44
|
-
self.window.controller.audio.enable_output()
|
|
45
|
-
self.tmp_output = True
|
|
46
|
-
else:
|
|
47
|
-
self.tmp_output = False
|
|
48
43
|
if not self.window.controller.audio.is_input_enabled():
|
|
49
44
|
self.window.controller.audio.enable_input()
|
|
50
45
|
self.tmp_input = True
|
|
51
46
|
else:
|
|
52
47
|
self.tmp_input = False
|
|
53
48
|
else:
|
|
54
|
-
if self.tmp_output:
|
|
55
|
-
self.window.controller.audio.disable_output()
|
|
56
49
|
if self.tmp_input:
|
|
57
50
|
self.window.controller.audio.disable_input()
|
|
58
51
|
|
|
@@ -10,12 +10,13 @@
|
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
13
|
+
from typing import Any
|
|
13
14
|
|
|
14
15
|
from PySide6.QtGui import QTextCursor
|
|
15
16
|
from PySide6.QtWidgets import QFileDialog, QApplication
|
|
16
17
|
|
|
17
18
|
from pygpt_net.core.events import Event, AppEvent, RenderEvent, KernelEvent
|
|
18
|
-
from pygpt_net.core.types import MODE_ASSISTANT
|
|
19
|
+
from pygpt_net.core.types import MODE_ASSISTANT, MODE_AUDIO
|
|
19
20
|
from pygpt_net.item.ctx import CtxItem
|
|
20
21
|
from pygpt_net.item.model import ModelItem
|
|
21
22
|
from pygpt_net.utils import trans
|
|
@@ -119,6 +120,17 @@ class Common:
|
|
|
119
120
|
else:
|
|
120
121
|
self.window.ui.config['global']['img_raw'].setChecked(False)
|
|
121
122
|
|
|
123
|
+
# image resolution
|
|
124
|
+
resolution = self.window.core.config.get('img_resolution', '1024x1024')
|
|
125
|
+
self.window.controller.config.apply_value(
|
|
126
|
+
parent_id="global",
|
|
127
|
+
key="img_resolution",
|
|
128
|
+
option=self.window.core.image.get_resolution_option(),
|
|
129
|
+
value=resolution,
|
|
130
|
+
)
|
|
131
|
+
if not self.initialized:
|
|
132
|
+
self.window.ui.add_hook("update.global.img_resolution", self.hook_update)
|
|
133
|
+
|
|
122
134
|
# set focus to input
|
|
123
135
|
self.window.ui.nodes['input'].setFocus()
|
|
124
136
|
self.initialized = True
|
|
@@ -257,7 +269,7 @@ class Common:
|
|
|
257
269
|
self.window.controller.access.voice.stop_recording(timeout=True)
|
|
258
270
|
|
|
259
271
|
if self.window.core.plugins.get("audio_input").handler_simple.is_recording:
|
|
260
|
-
self.window.
|
|
272
|
+
self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE))
|
|
261
273
|
return
|
|
262
274
|
|
|
263
275
|
# stop audio output if playing
|
|
@@ -275,7 +287,8 @@ class Common:
|
|
|
275
287
|
"""
|
|
276
288
|
# don't unlock input and leave stop btn if assistant mode or if agent/autonomous is enabled
|
|
277
289
|
# send btn will be unlocked in agent mode on stop
|
|
278
|
-
|
|
290
|
+
mode = self.window.core.config.get('mode')
|
|
291
|
+
if self.can_unlock(ctx) and mode != MODE_AUDIO:
|
|
279
292
|
if not self.window.controller.kernel.stopped():
|
|
280
293
|
self.unlock_input() # unlock input
|
|
281
294
|
return True
|
|
@@ -452,6 +465,19 @@ class Common:
|
|
|
452
465
|
else:
|
|
453
466
|
self.img_enable_raw()
|
|
454
467
|
|
|
468
|
+
def hook_update(self, key: str, value: Any, caller, *args, **kwargs):
|
|
469
|
+
"""
|
|
470
|
+
Hook for updating image resolution
|
|
471
|
+
|
|
472
|
+
:param key: config key
|
|
473
|
+
:param value: new value
|
|
474
|
+
:param caller: caller object
|
|
475
|
+
"""
|
|
476
|
+
if key == "img_resolution":
|
|
477
|
+
if not value:
|
|
478
|
+
return
|
|
479
|
+
self.window.core.config.set('img_resolution', value)
|
|
480
|
+
|
|
455
481
|
def save_text(
|
|
456
482
|
self,
|
|
457
483
|
text: str,
|
|
File without changes
|