pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
pygpt_net/CHANGELOG.txt CHANGED
@@ -1,3 +1,11 @@
1
+ 2.6.31 (2025-09-01)
2
+
3
+ - Chat with Audio mode renamed to Realtime + audio.
4
+ - Added support for real-time audio models from OpenAI (Realtime API) and Google (Live API), featuring real-time audio integration (beta).
5
+ - Introduced new predefined models: gpt-realtime, gpt-4o-realtime-preview, and gemini-2.5-flash-preview-native-audio-dialog.
6
+ - Included Google Gen AI audio input and output providers in the Audio Input/Output plugins.
7
+ - Added URL Context remote tool support in Google Gen AI.
8
+
1
9
  2.6.30 (2025-08-29)
2
10
 
3
11
  - Added native Google GenAI API support (beta); live audio is not supported yet (#132).
pygpt_net/__init__.py CHANGED
@@ -6,15 +6,15 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.29 00:00:00 #
9
+ # Updated Date: 2025.09.01 00:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  __author__ = "Marcin Szczygliński"
13
13
  __copyright__ = "Copyright 2025, Marcin Szczygliński"
14
14
  __credits__ = ["Marcin Szczygliński"]
15
15
  __license__ = "MIT"
16
- __version__ = "2.6.30"
17
- __build__ = "2025-08-29"
16
+ __version__ = "2.6.31"
17
+ __build__ = "2025-09-01"
18
18
  __maintainer__ = "Marcin Szczygliński"
19
19
  __github__ = "https://github.com/szczyglis-dev/py-gpt"
20
20
  __report__ = "https://github.com/szczyglis-dev/py-gpt/issues"
pygpt_net/app.py CHANGED
@@ -171,10 +171,12 @@ from pygpt_net.provider.audio_input.openai_whisper import OpenAIWhisper
171
171
  from pygpt_net.provider.audio_input.openai_whisper_local import OpenAIWhisperLocal
172
172
  from pygpt_net.provider.audio_input.google_speech_recognition import GoogleSpeechRecognition
173
173
  from pygpt_net.provider.audio_input.google_cloud_speech_recognition import GoogleCloudSpeechRecognition
174
+ from pygpt_net.provider.audio_input.google_genai import GoogleGenAIAudioInput
174
175
  from pygpt_net.provider.audio_input.bing_speech_recognition import BingSpeechRecognition
175
176
  from pygpt_net.provider.audio_output.openai_tts import OpenAITextToSpeech
176
177
  from pygpt_net.provider.audio_output.ms_azure_tts import MSAzureTextToSpeech
177
178
  from pygpt_net.provider.audio_output.google_tts import GoogleTextToSpeech
179
+ from pygpt_net.provider.audio_output.google_genai_tts import GoogleGenAITextToSpeech
178
180
  from pygpt_net.provider.audio_output.eleven_labs import ElevenLabsTextToSpeech
179
181
 
180
182
  # web search engine providers
@@ -318,10 +320,12 @@ def run(**kwargs):
318
320
  launcher.add_audio_input(OpenAIWhisperLocal())
319
321
  launcher.add_audio_input(GoogleSpeechRecognition())
320
322
  launcher.add_audio_input(GoogleCloudSpeechRecognition())
323
+ launcher.add_audio_input(GoogleGenAIAudioInput())
321
324
  launcher.add_audio_input(BingSpeechRecognition())
322
325
  launcher.add_audio_output(OpenAITextToSpeech())
323
326
  launcher.add_audio_output(MSAzureTextToSpeech())
324
327
  launcher.add_audio_output(GoogleTextToSpeech())
328
+ launcher.add_audio_output(GoogleGenAITextToSpeech())
325
329
  launcher.add_audio_output(ElevenLabsTextToSpeech())
326
330
 
327
331
  # register custom audio providers
@@ -1,4 +1,4 @@
1
- # !/usr/bin/env python3
1
+ #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  # ================================================== #
4
4
  # This file is a part of PYGPT package #
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.24 23:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from pygpt_net.controller.access import Access
@@ -34,6 +34,7 @@ from pygpt_net.controller.model import Model
34
34
  from pygpt_net.controller.notepad import Notepad
35
35
  from pygpt_net.controller.painter import Painter
36
36
  from pygpt_net.controller.plugins import Plugins
37
+ from pygpt_net.controller.realtime import Realtime
37
38
  from pygpt_net.controller.presets import Presets
38
39
  from pygpt_net.controller.settings import Settings
39
40
  from pygpt_net.controller.theme import Theme
@@ -76,6 +77,7 @@ class Controller:
76
77
  self.painter = Painter(window)
77
78
  self.plugins = Plugins(window)
78
79
  self.presets = Presets(window)
80
+ self.realtime = Realtime(window)
79
81
  self.settings = Settings(window)
80
82
  self.theme = Theme(window)
81
83
  self.tools = Tools(window)
@@ -108,6 +110,7 @@ class Controller:
108
110
  self.attachment.setup()
109
111
  self.camera.setup_ui()
110
112
  self.access.setup()
113
+ self.realtime.setup()
111
114
 
112
115
  def post_setup(self):
113
116
  """Post-setup, after plugins are loaded"""
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.27 07:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -43,6 +43,30 @@ class Audio:
43
43
  if self.window.core.config.get("audio.input.continuous", False):
44
44
  self.window.ui.plugin_addon['audio.input.btn'].continuous.setChecked(True)
45
45
 
46
+ if self.window.core.config.get("audio.input.auto_turn", False):
47
+ self.window.ui.nodes['audio.auto_turn'].box.setChecked(True)
48
+
49
+ def execute_input_stop(self):
50
+ """Execute input stop (from UI)"""
51
+ self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE, {
52
+ "state": False,
53
+ "auto": True, # do not emit manual event
54
+ }))
55
+
56
+ def is_recording(self) -> bool:
57
+ """
58
+ Check if audio input is recording
59
+
60
+ :return: True if recording
61
+ """
62
+ return self.window.core.plugins.get("audio_input").is_recording()
63
+
64
+ def toggle_auto_turn(self):
65
+ """Toggle auto turn setting"""
66
+ value = self.window.ui.nodes['audio.auto_turn'].box.isChecked()
67
+ self.window.core.config.set("audio.input.auto_turn", value)
68
+ self.window.core.config.save()
69
+
46
70
  def toggle_input(
47
71
  self,
48
72
  state: bool,
@@ -183,7 +183,7 @@ class UI:
183
183
  """
184
184
  self.recording = True
185
185
  self.window.ui.nodes['input'].set_icon_state("mic", True)
186
- if mode == "input":
186
+ if mode in ["input", "realtime"]:
187
187
  self.window.controller.chat.common.lock_input()
188
188
  return
189
189
  btn = self.get_input_btn() if mode == 'input' else self.get_input_control_btn()
@@ -198,7 +198,7 @@ class UI:
198
198
  """
199
199
  self.recording = False
200
200
  self.window.ui.nodes['input'].set_icon_state("mic", False)
201
- if mode == "input":
201
+ if mode in ["input", "realtime"]:
202
202
  self.window.controller.chat.common.unlock_input()
203
203
  return
204
204
  btn = self.get_input_btn() if mode == 'input' else self.get_input_control_btn()
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.14 18:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import base64
@@ -40,19 +40,12 @@ class Audio:
40
40
  """Update input/output audio"""
41
41
  mode = self.window.core.config.get("mode")
42
42
  if mode == MODE_AUDIO:
43
- if not self.window.controller.audio.is_output_enabled():
44
- self.window.controller.audio.enable_output()
45
- self.tmp_output = True
46
- else:
47
- self.tmp_output = False
48
43
  if not self.window.controller.audio.is_input_enabled():
49
44
  self.window.controller.audio.enable_input()
50
45
  self.tmp_input = True
51
46
  else:
52
47
  self.tmp_input = False
53
48
  else:
54
- if self.tmp_output:
55
- self.window.controller.audio.disable_output()
56
49
  if self.tmp_input:
57
50
  self.window.controller.audio.disable_input()
58
51
 
@@ -10,12 +10,13 @@
10
10
  # ================================================== #
11
11
 
12
12
  import os
13
+ from typing import Any
13
14
 
14
15
  from PySide6.QtGui import QTextCursor
15
16
  from PySide6.QtWidgets import QFileDialog, QApplication
16
17
 
17
18
  from pygpt_net.core.events import Event, AppEvent, RenderEvent, KernelEvent
18
- from pygpt_net.core.types import MODE_ASSISTANT
19
+ from pygpt_net.core.types import MODE_ASSISTANT, MODE_AUDIO
19
20
  from pygpt_net.item.ctx import CtxItem
20
21
  from pygpt_net.item.model import ModelItem
21
22
  from pygpt_net.utils import trans
@@ -119,6 +120,17 @@ class Common:
119
120
  else:
120
121
  self.window.ui.config['global']['img_raw'].setChecked(False)
121
122
 
123
+ # image resolution
124
+ resolution = self.window.core.config.get('img_resolution', '1024x1024')
125
+ self.window.controller.config.apply_value(
126
+ parent_id="global",
127
+ key="img_resolution",
128
+ option=self.window.core.image.get_resolution_option(),
129
+ value=resolution,
130
+ )
131
+ if not self.initialized:
132
+ self.window.ui.add_hook("update.global.img_resolution", self.hook_update)
133
+
122
134
  # set focus to input
123
135
  self.window.ui.nodes['input'].setFocus()
124
136
  self.initialized = True
@@ -257,7 +269,7 @@ class Common:
257
269
  self.window.controller.access.voice.stop_recording(timeout=True)
258
270
 
259
271
  if self.window.core.plugins.get("audio_input").handler_simple.is_recording:
260
- self.window.core.plugins.get("audio_input").handler_simple.stop_recording(timeout=False)
272
+ self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE))
261
273
  return
262
274
 
263
275
  # stop audio output if playing
@@ -275,7 +287,8 @@ class Common:
275
287
  """
276
288
  # don't unlock input and leave stop btn if assistant mode or if agent/autonomous is enabled
277
289
  # send btn will be unlocked in agent mode on stop
278
- if self.can_unlock(ctx):
290
+ mode = self.window.core.config.get('mode')
291
+ if self.can_unlock(ctx) and mode != MODE_AUDIO:
279
292
  if not self.window.controller.kernel.stopped():
280
293
  self.unlock_input() # unlock input
281
294
  return True
@@ -452,6 +465,19 @@ class Common:
452
465
  else:
453
466
  self.img_enable_raw()
454
467
 
468
+ def hook_update(self, key: str, value: Any, caller, *args, **kwargs):
469
+ """
470
+ Hook for updating image resolution
471
+
472
+ :param key: config key
473
+ :param value: new value
474
+ :param caller: caller object
475
+ """
476
+ if key == "img_resolution":
477
+ if not value:
478
+ return
479
+ self.window.core.config.set('img_resolution', value)
480
+
455
481
  def save_text(
456
482
  self,
457
483
  text: str,
File without changes