pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Optional
@@ -111,6 +111,7 @@ class Text:
111
111
  # if prev ctx is not empty, then copy input name to current ctx
112
112
  if prev_ctx is not None and prev_ctx.sub_call is True: # sub_call = sent from expert
113
113
  ctx.input_name = prev_ctx.input_name
114
+
114
115
  if reply:
115
116
  ctx.extra["sub_reply"] = True # mark as sub reply in extra data
116
117
 
@@ -238,7 +239,7 @@ class Text:
238
239
  """
239
240
  core = self.window.core
240
241
  stream = core.config.get("stream")
241
- if mode in (MODE_AGENT_LLAMA, MODE_AUDIO):
242
+ if mode in (MODE_AGENT_LLAMA):
242
243
  return False # TODO: check if this is correct in agent
243
244
  elif mode == MODE_LLAMA_INDEX:
244
245
  if core.config.get("llama.idx.mode") == "retrieval":
@@ -6,13 +6,13 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.06 19:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import threading
13
13
  from typing import Any, Dict, Optional, Union, List
14
14
 
15
- from PySide6.QtCore import QObject, Slot
15
+ from PySide6.QtCore import Slot
16
16
  from PySide6.QtWidgets import QApplication
17
17
 
18
18
  from pygpt_net.core.types import (
@@ -23,7 +23,7 @@ from pygpt_net.core.types import (
23
23
  MODE_EXPERT,
24
24
  MODE_LLAMA_INDEX,
25
25
  )
26
- from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent
26
+ from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent, RealtimeEvent, Event
27
27
  from pygpt_net.core.bridge.context import BridgeContext
28
28
  from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.utils import trans
@@ -95,6 +95,13 @@ class Kernel:
95
95
  KernelEvent.INPUT_USER,
96
96
  KernelEvent.FORCE_CALL,
97
97
  KernelEvent.STATUS,
98
+ Event.AUDIO_INPUT_RECORD_TOGGLE,
99
+ RealtimeEvent.RT_INPUT_AUDIO_DELTA,
100
+ RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP,
101
+ RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START,
102
+ RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT,
103
+ RealtimeEvent.RT_OUTPUT_TURN_END,
104
+ RealtimeEvent.RT_OUTPUT_READY,
98
105
  ]
99
106
 
100
107
  def init(self):
@@ -281,6 +288,7 @@ class Kernel:
281
288
  self.window.dispatch(KernelEvent(KernelEvent.TERMINATE))
282
289
  self.stop(exit=True)
283
290
  self.window.controller.plugins.destroy()
291
+ self.window.controller.realtime.shutdown()
284
292
 
285
293
  def stop(self, exit: bool = False):
286
294
  """
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -109,6 +109,10 @@ class Reply:
109
109
  core.ctx.update_item(self.reply_ctx) # update context in db
110
110
  self.window.update_status('...')
111
111
 
112
+ # append tool calls from previous context (used for tool results handling)
113
+ if self.reply_ctx.tool_calls:
114
+ prev_ctx.extra["prev_tool_calls"] = self.reply_ctx.tool_calls
115
+
112
116
  # tool output append
113
117
  dispatch(RenderEvent(RenderEvent.TOOL_UPDATE, {
114
118
  "meta": self.reply_ctx.meta,
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
+ # ================================================== #
11
+
12
+ from .realtime import Realtime
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional
13
+
14
+ from pygpt_net.core.realtime.worker import RealtimeWorker, RealtimeOptions
15
+ from pygpt_net.item.ctx import CtxItem
16
+
17
+ class Manager:
18
+ """
19
+ Manager that mirrors chat.stream controller shape.
20
+
21
+ Starts a RealtimeWorker and routes text events and lifecycle to the UI.
22
+ Audio is forwarded by the main-thread via RT_OUTPUT_AUDIO_DELTA events.
23
+ """
24
+ def __init__(self, window=None):
25
+ self.window = window
26
+ self.worker: Optional[RealtimeWorker] = None
27
+ self.ctx: Optional[CtxItem] = None
28
+ self.provider: Optional[str] = None
29
+ self.opts: Optional[RealtimeOptions] = None
30
+
31
+ def start(
32
+ self,
33
+ ctx: CtxItem,
34
+ opts: RealtimeOptions
35
+ ):
36
+ """
37
+ Start realtime worker
38
+
39
+ :param ctx: CtxItem
40
+ :param opts: RealtimeOptions
41
+ """
42
+ self.ctx = ctx
43
+ self.opts = opts
44
+ self.provider = opts.provider
45
+
46
+ worker = RealtimeWorker(self.window, ctx, opts)
47
+ self.worker = worker
48
+ self.window.core.debug.info(f"[realtime] Begin: provider={opts.provider}, model={opts.model}")
49
+ self.window.threadpool.start(worker)
50
+
51
+ def shutdown(self):
52
+ """Shutdown realtime worker"""
53
+ self.worker = None
@@ -0,0 +1,268 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from PySide6.QtCore import Slot, QTimer
13
+
14
+ from pygpt_net.core.events import RealtimeEvent, RenderEvent, BaseEvent, AppEvent, KernelEvent
15
+ from pygpt_net.core.realtime.worker import RealtimeSignals
16
+ from pygpt_net.core.types import MODE_AUDIO
17
+ from pygpt_net.utils import trans
18
+ from pygpt_net.core.tabs import Tab
19
+
20
+ from .manager import Manager
21
+
22
+ class Realtime:
23
+ def __init__(self, window=None):
24
+ """
25
+ Realtime controller
26
+
27
+ :param window: Window instance
28
+ """
29
+ self.window = window
30
+ self.manager = Manager(window)
31
+ self.signals = RealtimeSignals()
32
+ self.signals.response.connect(self.handle_response)
33
+ self.current_active = None # openai | google
34
+ self.allowed_modes = [MODE_AUDIO]
35
+ self.manual_commit_sent = False
36
+
37
+ def setup(self):
38
+ """Setup realtime core, signals, etc. in main thread"""
39
+ self.window.core.audio.setup() # setup RT signals in audio input/output core
40
+
41
+ def is_enabled(self) -> bool:
42
+ """
43
+ Check if realtime is enabled in settings
44
+
45
+ :return: True if enabled, False otherwise
46
+ """
47
+ mode = self.window.core.config.get("mode")
48
+ if mode == MODE_AUDIO:
49
+ if self.window.controller.ui.tabs.get_current_type() != Tab.TAB_NOTEPAD:
50
+ return True
51
+ return False
52
+
53
+ @Slot(object)
54
+ def handle(self, event: BaseEvent):
55
+ """
56
+ Handle realtime event (returned from dispatcher)
57
+
58
+ :param event: RealtimeEvent instance
59
+ """
60
+ # check if mode is supported
61
+ if not self.is_supported() and isinstance(event, RealtimeEvent):
62
+ event.stop = True # stop further propagation
63
+ return # ignore if not in realtime mode
64
+
65
+ # ----------------------------------------------------
66
+
67
+ # audio output chunk: send to audio output handler
68
+ if event.name == RealtimeEvent.RT_OUTPUT_AUDIO_DELTA:
69
+ self.set_idle()
70
+ payload = event.data.get("payload", None)
71
+ if payload:
72
+ self.window.core.audio.output.handle_realtime(payload, self.signals)
73
+
74
+ # audio input chunk: send to the active realtime client
75
+ elif event.name == RealtimeEvent.RT_INPUT_AUDIO_DELTA:
76
+ self.set_idle()
77
+ if self.current_active == "google":
78
+ self.window.core.api.google.realtime.handle_audio_input(event)
79
+ elif self.current_active == "openai":
80
+ self.window.core.api.openai.realtime.handle_audio_input(event)
81
+
82
+ # begin: first text chunk or audio chunk received, start rendering
83
+ elif event.name == RealtimeEvent.RT_OUTPUT_READY:
84
+ ctx = event.data.get('ctx', None)
85
+ if ctx:
86
+ self.window.dispatch(RenderEvent(RenderEvent.STREAM_BEGIN, {
87
+ "meta": ctx.meta,
88
+ "ctx": ctx,
89
+ }))
90
+ self.set_busy()
91
+
92
+ # commit: audio buffer sent, stop audio input and finalize the response
93
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT:
94
+ self.set_busy()
95
+ if self.manual_commit_sent:
96
+ self.manual_commit_sent = False
97
+ return # abort if manual commit was already sent
98
+ self.window.controller.audio.execute_input_stop()
99
+
100
+ elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP:
101
+ self.manual_commit_sent = True
102
+ self.set_busy()
103
+ QTimer.singleShot(0, lambda: self.manual_commit())
104
+
105
+ elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START:
106
+ self.set_idle()
107
+ self.window.controller.chat.input.execute("...", force=True)
108
+ self.window.dispatch(KernelEvent(KernelEvent.STATUS, {
109
+ 'status': trans("speech.listening"),
110
+ }))
111
+
112
+ # text delta: append text chunk to the response
113
+ elif event.name == RealtimeEvent.RT_OUTPUT_TEXT_DELTA:
114
+ self.set_idle()
115
+ ctx = event.data.get('ctx', None)
116
+ chunk = event.data.get('chunk', "")
117
+ if chunk and ctx:
118
+ self.window.dispatch(RenderEvent(RenderEvent.STREAM_APPEND, {
119
+ "meta": ctx.meta,
120
+ "ctx": ctx,
121
+ "chunk": chunk,
122
+ "begin": False,
123
+ }))
124
+
125
+ # audio end: stop audio playback
126
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_END:
127
+ self.set_idle()
128
+ self.window.controller.chat.common.unlock_input()
129
+
130
+ # end of turn: finalize the response
131
+ elif event.name == RealtimeEvent.RT_OUTPUT_TURN_END:
132
+ self.set_idle()
133
+ ctx = event.data.get('ctx', None)
134
+ if ctx:
135
+ self.end_turn(ctx)
136
+ if self.window.controller.audio.is_recording():
137
+ self.window.update_status(trans("speech.listening"))
138
+ self.window.controller.chat.common.unlock_input()
139
+
140
+ # volume change: update volume in audio output handler
141
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_VOLUME_CHANGED:
142
+ volume = event.data.get("volume", 1.0)
143
+ self.window.controller.audio.ui.on_output_volume_change(volume)
144
+
145
+ # error: audio output error
146
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_ERROR:
147
+ self.set_idle()
148
+ error = event.data.get("error")
149
+ self.window.core.debug.log(error)
150
+ self.window.controller.chat.common.unlock_input()
151
+
152
+ # -----------------------------------
153
+
154
+ # app events, always handled
155
+ elif event.name == AppEvent.MODE_SELECTED:
156
+ mode = self.window.core.config.get("mode")
157
+ if mode != MODE_AUDIO:
158
+ QTimer.singleShot(0, lambda: self.reset())
159
+
160
+ elif event.name == AppEvent.CTX_CREATED:
161
+ QTimer.singleShot(0, lambda: self.reset())
162
+
163
+ elif event.name == AppEvent.CTX_SELECTED:
164
+ QTimer.singleShot(0, lambda: self.reset())
165
+
166
+ @Slot(object)
167
+ def handle_response(self, event: RealtimeEvent):
168
+ """
169
+ Handle response event (send to kernel -> dispatcher)
170
+
171
+ :param event: RealtimeEvent instance
172
+ """
173
+ self.window.controller.kernel.listener(event)
174
+
175
+ def is_auto_turn(self) -> bool:
176
+ """
177
+ Check if auto-turn is enabled
178
+
179
+ :return: True if auto-turn is enabled, False otherwise
180
+ """
181
+ return self.window.core.config.get("audio.input.auto_turn", True)
182
+
183
+ def manual_commit(self):
184
+ """Manually commit the response (end of turn)"""
185
+ if self.current_active == "google":
186
+ self.window.core.api.google.realtime.manual_commit()
187
+ elif self.current_active == "openai":
188
+ self.window.core.api.openai.realtime.manual_commit()
189
+
190
+ def end_turn(self, ctx):
191
+ """
192
+ End of realtime turn - finalize the response
193
+
194
+ :param ctx: Context instance
195
+ """
196
+ self.set_idle()
197
+ if not ctx:
198
+ return
199
+ self.window.controller.chat.output.handle_after(
200
+ ctx=ctx,
201
+ mode=MODE_AUDIO,
202
+ stream=True,
203
+ )
204
+ self.window.controller.chat.output.post_handle(
205
+ ctx=ctx,
206
+ mode=MODE_AUDIO,
207
+ stream=True,
208
+ )
209
+ self.window.controller.chat.output.handle_end(
210
+ ctx=ctx,
211
+ mode=MODE_AUDIO,
212
+ )
213
+ self.window.controller.chat.common.show_response_tokens(ctx)
214
+
215
+ def shutdown(self):
216
+ """Shutdown all realtime threads and async loops"""
217
+ try:
218
+ self.window.core.api.openai.realtime.shutdown()
219
+ except Exception as e:
220
+ self.window.core.debug.log(f"[openai] Realtime shutdown error: {e}")
221
+ try:
222
+ self.window.core.api.google.realtime.shutdown()
223
+ except Exception as e:
224
+ self.window.core.debug.log(f"[google] Realtime shutdown error: {e}")
225
+ try:
226
+ self.manager.shutdown()
227
+ except Exception as e:
228
+ self.window.core.debug.log(f"[manager] Realtime shutdown error: {e}")
229
+
230
+ def reset(self):
231
+ """Reset realtime session"""
232
+ try:
233
+ self.window.core.api.openai.realtime.reset()
234
+ except Exception as e:
235
+ self.window.core.debug.log(f"[openai] Realtime reset error: {e}")
236
+ try:
237
+ self.window.core.api.google.realtime.reset()
238
+ except Exception as e:
239
+ self.window.core.debug.log(f"[google] Realtime reset error: {e}")
240
+
241
+ def is_supported(self) -> bool:
242
+ """
243
+ Check if current mode supports realtime
244
+
245
+ :return: True if mode supports realtime, False otherwise
246
+ """
247
+ mode = self.window.core.config.get("mode")
248
+ return mode in self.allowed_modes
249
+
250
+ def set_current_active(self, provider: str):
251
+ """
252
+ Set the current active realtime provider
253
+
254
+ :param provider: Provider name (openai, google)
255
+ """
256
+ self.current_active = provider.lower() if provider else None
257
+
258
+ def set_idle(self):
259
+ """Set kernel state to IDLE"""
260
+ QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_IDLE, {
261
+ "id": "realtime",
262
+ })))
263
+
264
+ def set_busy(self):
265
+ """Set kernel state to BUSY"""
266
+ QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {
267
+ "id": "realtime",
268
+ })))
@@ -20,6 +20,7 @@ from pygpt_net.core.types import (
20
20
  MODE_COMPUTER,
21
21
  MODE_AGENT_OPENAI,
22
22
  MODE_COMPLETION,
23
+ MODE_AUDIO,
23
24
  )
24
25
  from pygpt_net.core.tabs.tab import Tab
25
26
  from pygpt_net.core.events import Event
@@ -55,6 +56,12 @@ class Mode:
55
56
  is_image = mode == MODE_IMAGE
56
57
  is_llama_index = mode == MODE_LLAMA_INDEX
57
58
  is_completion = mode == MODE_COMPLETION
59
+ is_audio = mode == MODE_AUDIO
60
+
61
+ if not is_audio:
62
+ self.window.ui.nodes['audio.auto_turn'].setVisible(False)
63
+ else:
64
+ self.window.ui.nodes['audio.auto_turn'].setVisible(True)
58
65
 
59
66
  if not is_assistant:
60
67
  ui_nodes['presets.widget'].setVisible(True)
@@ -13,6 +13,7 @@ from typing import Optional
13
13
 
14
14
  from PySide6.QtGui import QColor
15
15
 
16
+ from pygpt_net.core.types import MODE_IMAGE
16
17
  from pygpt_net.core.events import BaseEvent, Event
17
18
  from pygpt_net.utils import trans
18
19
 
@@ -64,6 +65,7 @@ class UI:
64
65
  self.update_tokens()
65
66
  self.vision.update()
66
67
  self.window.controller.agent.legacy.update()
68
+ self.img_update_available_resolutions()
67
69
 
68
70
  def handle(self, event: BaseEvent):
69
71
  """
@@ -215,4 +217,20 @@ class UI:
215
217
  def on_global_stop(self):
216
218
  """Global stop button action"""
217
219
  if self.stop_action == "idx":
218
- self.window.controller.idx.force_stop()
220
+ self.window.controller.idx.force_stop()
221
+
222
+ def img_update_available_resolutions(self):
223
+ """Update available resolutions for images"""
224
+ mode = self.window.core.config.get('mode')
225
+ if mode != MODE_IMAGE:
226
+ return
227
+ model = self.window.core.config.get('model')
228
+ keys = self.window.core.image.get_available_resolutions(model)
229
+ current = self.window.core.config.get('img_resolution', '1024x1024')
230
+ self.window.ui.config['global']['img_resolution'].set_keys(keys, lock=False)
231
+ self.window.controller.config.apply_value(
232
+ parent_id="global",
233
+ key="img_resolution",
234
+ option=self.window.core.image.get_resolution_option(),
235
+ value=current,
236
+ )
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.07 22:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import hashlib
@@ -42,6 +42,11 @@ class Audio:
42
42
  }
43
43
  self.last_error = None
44
44
 
45
+ def setup(self):
46
+ """Initialize audio core"""
47
+ self.capture.setup()
48
+ self.output.setup()
49
+
45
50
  def get_input_devices(self) -> List[Tuple[int, str]]:
46
51
  """
47
52
  Get input devices
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from .native import NativeBackend