pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Any, Optional
@@ -15,6 +15,7 @@ from pygpt_net.core.bridge import BridgeContext
15
15
  from pygpt_net.core.types import (
16
16
  MODE_ASSISTANT,
17
17
  MODE_IMAGE,
18
+ MODE_AUDIO,
18
19
  )
19
20
  from pygpt_net.core.events import Event, AppEvent, RenderEvent, KernelEvent
20
21
  from pygpt_net.item.ctx import CtxItem
@@ -166,8 +167,12 @@ class Output:
166
167
 
167
168
  controller.chat.audio.handle_output(ctx) # handle audio output
168
169
  controller.chat.common.auto_unlock(ctx) # unlock input if allowed
169
- controller.chat.common.show_response_tokens(ctx) # update tokens
170
- dispatch(KernelEvent(KernelEvent.STATE_IDLE, self.STATE_PARAMS)) # state: idle
170
+ if mode != MODE_AUDIO:
171
+ controller.chat.common.show_response_tokens(ctx) # update tokens
172
+ dispatch(KernelEvent(KernelEvent.STATE_IDLE, self.STATE_PARAMS)) # state: idle
173
+ else:
174
+ if not controller.audio.is_recording():
175
+ self.window.update_status("...") # wait for audio
171
176
 
172
177
  def post_handle(
173
178
  self,
@@ -6,419 +6,19 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.19 07:00:00 #
9
+ # Updated Date: 2025.08.28 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
- import base64
13
- import io
14
- from typing import Optional, Literal
12
+ from typing import Optional
15
13
 
16
- from PySide6.QtCore import QObject, Signal, Slot, QRunnable
14
+ from PySide6.QtCore import Slot
17
15
 
18
16
  from pygpt_net.core.bridge import BridgeContext
19
17
  from pygpt_net.core.events import RenderEvent
20
18
  from pygpt_net.core.types import MODE_ASSISTANT
21
- from pygpt_net.core.text.utils import has_unclosed_code_tag
22
19
  from pygpt_net.item.ctx import CtxItem
23
20
 
24
- EventType = Literal[
25
- "response.completed",
26
- "response.output_text.delta",
27
- "response.output_item.added",
28
- "response.function_call_arguments.delta",
29
- "response.function_call_arguments.done",
30
- "response.output_text.annotation.added",
31
- "response.reasoning_summary_text.delta",
32
- "response.output_item.done",
33
- "response.code_interpreter_call_code.delta",
34
- "response.code_interpreter_call_code.done",
35
- "response.image_generation_call.partial_image",
36
- "response.created",
37
- "response.done",
38
- "response.failed",
39
- "error",
40
- ]
41
- ChunkType = Literal[
42
- "api_chat",
43
- "api_chat_responses",
44
- "api_completion",
45
- "langchain_chat",
46
- "llama_chat",
47
- "raw",
48
- ]
49
-
50
- class WorkerSignals(QObject):
51
- """
52
- Defines the signals available from a running worker thread.
53
- - `finished`: No data
54
- - `errorOccurred`: Exception
55
- - `eventReady`: RenderEvent
56
- """
57
- end = Signal(object)
58
- errorOccurred = Signal(Exception)
59
- eventReady = Signal(object)
60
-
61
-
62
- class StreamWorker(QRunnable):
63
- def __init__(self, ctx: CtxItem, window, parent=None):
64
- super().__init__()
65
- self.signals = WorkerSignals()
66
- self.ctx = ctx
67
- self.window = window
68
- self.stream = None
69
-
70
- @Slot()
71
- def run(self):
72
- ctx = self.ctx
73
- win = self.window
74
- core = win.core
75
- ctrl = win.controller
76
-
77
- emit_event = self.signals.eventReady.emit
78
- emit_error = self.signals.errorOccurred.emit
79
- emit_end = self.signals.end.emit
80
-
81
- output_parts = []
82
- output_tokens = 0
83
- begin = True
84
- error = None
85
- fn_args_buffers: dict[str, io.StringIO] = {}
86
- citations: Optional[list] = []
87
- files = []
88
- img_path = core.image.gen_unique_path(ctx)
89
- is_image = False
90
- is_code = False
91
- force_func_call = False
92
- stopped = False
93
- chunk_type: ChunkType = "raw"
94
- generator = self.stream
95
-
96
- base_data = {
97
- "meta": ctx.meta,
98
- "ctx": ctx,
99
- }
100
- emit_event(RenderEvent(RenderEvent.STREAM_BEGIN, base_data))
101
-
102
- tool_calls = []
103
- try:
104
- if generator is not None:
105
- for chunk in generator:
106
- if ctrl.kernel.stopped():
107
- if hasattr(generator, 'close'):
108
- try:
109
- generator.close()
110
- except Exception:
111
- pass
112
- elif hasattr(generator, 'cancel'):
113
- try:
114
- generator.cancel()
115
- except Exception:
116
- pass
117
- elif hasattr(generator, 'stop'):
118
- try:
119
- generator.stop()
120
- except Exception:
121
- pass
122
- ctx.msg_id = None
123
- stopped = True
124
- break
125
-
126
- if error is not None:
127
- ctx.msg_id = None
128
- stopped = True
129
- break
130
-
131
- etype: Optional[EventType] = None
132
- response = None
133
-
134
- if ctx.use_responses_api:
135
- if hasattr(chunk, 'type'):
136
- etype = chunk.type # type: ignore[assignment]
137
- chunk_type = "api_chat_responses"
138
- else:
139
- continue
140
- else:
141
- if (hasattr(chunk, 'choices')
142
- and chunk.choices
143
- and hasattr(chunk.choices[0], 'delta')
144
- and chunk.choices[0].delta is not None):
145
- chunk_type = "api_chat"
146
- elif (hasattr(chunk, 'choices')
147
- and chunk.choices
148
- and hasattr(chunk.choices[0], 'text')
149
- and chunk.choices[0].text is not None):
150
- chunk_type = "api_completion"
151
- elif hasattr(chunk, 'content') and chunk.content is not None:
152
- chunk_type = "langchain_chat"
153
- elif hasattr(chunk, 'delta') and chunk.delta is not None:
154
- chunk_type = "llama_chat"
155
- else:
156
- chunk_type = "raw"
157
-
158
- if chunk_type == "api_chat":
159
- citations = None
160
- delta = chunk.choices[0].delta
161
- if delta and delta.content is not None:
162
- if citations is None and hasattr(chunk, 'citations') and chunk.citations is not None:
163
- citations = chunk.citations
164
- ctx.urls = citations
165
- response = delta.content
166
-
167
- if delta and delta.tool_calls:
168
- for tool_chunk in delta.tool_calls:
169
- if tool_chunk.index is None:
170
- tool_chunk.index = 0
171
- if len(tool_calls) <= tool_chunk.index:
172
- tool_calls.append(
173
- {
174
- "id": "",
175
- "type": "function",
176
- "function": {"name": "", "arguments": ""}
177
- }
178
- )
179
- tool_call = tool_calls[tool_chunk.index]
180
- if getattr(tool_chunk, "id", None):
181
- tool_call["id"] += tool_chunk.id
182
- if getattr(tool_chunk.function, "name", None):
183
- tool_call["function"]["name"] += tool_chunk.function.name
184
- if getattr(tool_chunk.function, "arguments", None):
185
- tool_call["function"]["arguments"] += tool_chunk.function.arguments
186
-
187
- elif chunk_type == "api_chat_responses":
188
- if etype == "response.completed":
189
- for item in chunk.response.output:
190
- if item.type == "mcp_list_tools":
191
- core.gpt.responses.mcp_tools = item.tools
192
- elif item.type == "mcp_call":
193
- call = {
194
- "id": item.id,
195
- "type": "mcp_call",
196
- "approval_request_id": item.approval_request_id,
197
- "arguments": item.arguments,
198
- "error": item.error,
199
- "name": item.name,
200
- "output": item.output,
201
- "server_label": item.server_label,
202
- }
203
- tool_calls.append({
204
- "id": item.id,
205
- "call_id": "",
206
- "type": "function",
207
- "function": {"name": item.name, "arguments": item.arguments}
208
- })
209
- ctx.extra["mcp_call"] = call
210
- core.ctx.update_item(ctx)
211
- elif item.type == "mcp_approval_request":
212
- call = {
213
- "id": item.id,
214
- "type": "mcp_call",
215
- "arguments": item.arguments,
216
- "name": item.name,
217
- "server_label": item.server_label,
218
- }
219
- ctx.extra["mcp_approval_request"] = call
220
- core.ctx.update_item(ctx)
221
-
222
- elif etype == "response.output_text.delta":
223
- response = chunk.delta
224
-
225
- elif etype == "response.output_item.added" and chunk.item.type == "function_call":
226
- tool_calls.append({
227
- "id": chunk.item.id,
228
- "call_id": chunk.item.call_id,
229
- "type": "function",
230
- "function": {"name": chunk.item.name, "arguments": ""}
231
- })
232
- fn_args_buffers[chunk.item.id] = io.StringIO()
233
- elif etype == "response.function_call_arguments.delta":
234
- buf = fn_args_buffers.get(chunk.item_id)
235
- if buf is not None:
236
- buf.write(chunk.delta)
237
- elif etype == "response.function_call_arguments.done":
238
- buf = fn_args_buffers.pop(chunk.item_id, None)
239
- if buf is not None:
240
- try:
241
- args_val = buf.getvalue()
242
- finally:
243
- buf.close()
244
- for tc in tool_calls:
245
- if tc["id"] == chunk.item_id:
246
- tc["function"]["arguments"] = args_val
247
- break
248
-
249
- elif etype == "response.output_text.annotation.added":
250
- ann = chunk.annotation
251
- if ann['type'] == "url_citation":
252
- if citations is None:
253
- citations = []
254
- url_citation = ann['url']
255
- citations.append(url_citation)
256
- ctx.urls = citations
257
- elif ann['type'] == "container_file_citation":
258
- files.append({
259
- "container_id": ann['container_id'],
260
- "file_id": ann['file_id'],
261
- })
262
-
263
- elif etype == "response.reasoning_summary_text.delta":
264
- response = chunk.delta
265
-
266
- elif etype == "response.output_item.done":
267
- tool_calls, has_calls = core.gpt.computer.handle_stream_chunk(ctx, chunk, tool_calls)
268
- if has_calls:
269
- force_func_call = True
270
-
271
- elif etype == "response.code_interpreter_call_code.delta":
272
- if not is_code:
273
- response = "\n\n**Code interpreter**\n```python\n" + chunk.delta
274
- is_code = True
275
- else:
276
- response = chunk.delta
277
- elif etype == "response.code_interpreter_call_code.done":
278
- response = "\n\n```\n-----------\n"
279
-
280
- elif etype == "response.image_generation_call.partial_image":
281
- image_base64 = chunk.partial_image_b64
282
- image_bytes = base64.b64decode(image_base64)
283
- with open(img_path, "wb") as f:
284
- f.write(image_bytes)
285
- del image_bytes
286
- is_image = True
287
-
288
- elif etype == "response.created":
289
- ctx.msg_id = str(chunk.response.id)
290
- core.ctx.update_item(ctx)
291
-
292
- elif etype in {"response.done", "response.failed", "error"}:
293
- pass
294
-
295
- elif chunk_type == "api_completion":
296
- choice0 = chunk.choices[0]
297
- if choice0.text is not None:
298
- response = choice0.text
299
-
300
- elif chunk_type == "langchain_chat":
301
- if chunk.content is not None:
302
- response = str(chunk.content)
303
-
304
- elif chunk_type == "llama_chat":
305
- if chunk.delta is not None:
306
- response = str(chunk.delta)
307
- tool_chunks = getattr(chunk.message, "additional_kwargs", {}).get("tool_calls", [])
308
- if tool_chunks:
309
- for tool_chunk in tool_chunks:
310
- id_val = getattr(tool_chunk, "call_id", None) or getattr(tool_chunk, "id", None)
311
- name = getattr(tool_chunk, "name", None) or getattr(getattr(tool_chunk, "function", None), "name", None)
312
- args = getattr(tool_chunk, "arguments", None)
313
- if args is None:
314
- f = getattr(tool_chunk, "function", None)
315
- args = getattr(f, "arguments", None) if f else None
316
- if id_val:
317
- if not args:
318
- args = "{}"
319
- tool_call = {
320
- "id": id_val,
321
- "type": "function",
322
- "function": {"name": name, "arguments": args}
323
- }
324
- tool_calls.clear()
325
- tool_calls.append(tool_call)
326
-
327
- else:
328
- if chunk is not None:
329
- response = chunk if isinstance(chunk, str) else str(chunk)
330
-
331
- if response is not None and response != "" and not stopped:
332
- if begin and response == "":
333
- continue
334
- output_parts.append(response)
335
- output_tokens += 1
336
- emit_event(
337
- RenderEvent(
338
- RenderEvent.STREAM_APPEND,
339
- {
340
- "meta": ctx.meta,
341
- "ctx": ctx,
342
- "chunk": response,
343
- "begin": begin,
344
- },
345
- )
346
- )
347
- begin = False
348
-
349
- chunk = None
350
-
351
- if tool_calls:
352
- ctx.force_call = force_func_call
353
- core.debug.info("[chat] Tool calls found, unpacking...")
354
- core.command.unpack_tool_calls_chunks(ctx, tool_calls)
355
-
356
- if is_image:
357
- core.debug.info("[chat] Image generation call found")
358
- ctx.images = [img_path]
359
-
360
- except Exception as e:
361
- error = e
362
-
363
- finally:
364
- output = "".join(output_parts)
365
- output_parts.clear()
366
- del output_parts
367
-
368
- if has_unclosed_code_tag(output):
369
- output += "\n```"
370
-
371
- if generator and hasattr(generator, 'close'):
372
- try:
373
- generator.close()
374
- except Exception:
375
- pass
376
-
377
- del generator
378
- self.stream = None
379
-
380
- ctx.output = output
381
- ctx.set_tokens(ctx.input_tokens, output_tokens)
382
- core.ctx.update_item(ctx)
383
-
384
- output = None
385
-
386
- if files and not stopped:
387
- core.debug.info("[chat] Container files found, downloading...")
388
- try:
389
- core.gpt.container.download_files(ctx, files)
390
- except Exception as e:
391
- core.debug.error(f"[chat] Error downloading container files: {e}")
392
-
393
- if error:
394
- emit_error(error)
395
-
396
- emit_end(ctx)
397
-
398
- for _buf in fn_args_buffers.values():
399
- try:
400
- _buf.close()
401
- except Exception:
402
- pass
403
- fn_args_buffers.clear()
404
- files.clear()
405
- tool_calls.clear()
406
- if citations is not None and citations is not ctx.urls:
407
- citations.clear()
408
- citations = None
409
-
410
- self.cleanup()
411
-
412
- def cleanup(self):
413
- """Cleanup resources after worker execution."""
414
- sig = self.signals
415
- self.signals = None
416
- if sig is not None:
417
- try:
418
- sig.deleteLater()
419
- except RuntimeError:
420
- pass
421
-
21
+ from .handler.stream_worker import StreamWorker
422
22
 
423
23
  class Stream:
424
24
  def __init__(self, window=None):
@@ -468,7 +68,6 @@ class Stream:
468
68
  self.extra = extra if extra is not None else {}
469
69
 
470
70
  worker = StreamWorker(ctx, self.window)
471
-
472
71
  worker.stream = ctx.stream
473
72
  worker.signals.eventReady.connect(self.handleEvent)
474
73
  worker.signals.errorOccurred.connect(self.handleError)
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Optional
@@ -111,6 +111,7 @@ class Text:
111
111
  # if prev ctx is not empty, then copy input name to current ctx
112
112
  if prev_ctx is not None and prev_ctx.sub_call is True: # sub_call = sent from expert
113
113
  ctx.input_name = prev_ctx.input_name
114
+
114
115
  if reply:
115
116
  ctx.extra["sub_reply"] = True # mark as sub reply in extra data
116
117
 
@@ -238,7 +239,7 @@ class Text:
238
239
  """
239
240
  core = self.window.core
240
241
  stream = core.config.get("stream")
241
- if mode in (MODE_AGENT_LLAMA, MODE_AUDIO):
242
+ if mode in (MODE_AGENT_LLAMA):
242
243
  return False # TODO: check if this is correct in agent
243
244
  elif mode == MODE_LLAMA_INDEX:
244
245
  if core.config.get("llama.idx.mode") == "retrieval":
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.07.30 00:00:00 #
9
+ # Updated Date: 2025.08.28 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from pygpt_net.core.types import (
@@ -18,7 +18,8 @@ from pygpt_net.core.types import (
18
18
  MODE_LANGCHAIN,
19
19
  MODE_LLAMA_INDEX,
20
20
  MODE_VISION,
21
- MODE_RESEARCH, MULTIMODAL_IMAGE,
21
+ MODE_RESEARCH,
22
+ MULTIMODAL_IMAGE,
22
23
  )
23
24
 
24
25
  class Vision:
@@ -64,20 +65,21 @@ class Vision:
64
65
 
65
66
  def switch_to_vision(self):
66
67
  """Switch to vision mode"""
68
+ return # DO NOT SWITCH, deprecated
67
69
  mode = self.window.core.config.get('mode')
68
70
  model = self.window.core.config.get('model')
69
71
  model_data = self.window.core.models.get(model)
70
72
  if mode in [MODE_AGENT, MODE_AGENT_LLAMA, MODE_AGENT_OPENAI]:
71
73
  return # disallow change in agent modes
72
- if mode == MODE_CHAT and MODE_VISION in model_data.mode:
74
+ if mode == MODE_CHAT and MODE_CHAT in model_data.mode:
73
75
  return # abort if vision is already allowed
74
- if mode == MODE_VISION:
75
- return
76
+ # if mode == MODE_VISION:
77
+ # return
76
78
  # abort if vision is already enabled
77
79
  if not self.window.controller.plugins.is_enabled('openai_vision') \
78
80
  or (self.window.controller.plugins.is_enabled('openai_vision')
79
81
  and mode not in self.allowed_modes):
80
- self.window.controller.mode.set(MODE_VISION)
82
+ self.window.controller.mode.set(MODE_CHAT)
81
83
 
82
84
  def allowed(self) -> bool:
83
85
  """
@@ -85,10 +87,7 @@ class Vision:
85
87
 
86
88
  :return: True if allowed
87
89
  """
88
- if self.window.controller.plugins.is_enabled('openai_vision') \
89
- or self.window.core.config.get('mode') in self.allowed_modes:
90
- return True
91
- return False
90
+ return self.window.controller.plugins.is_enabled('openai_vision') or self.is_vision_model()
92
91
 
93
92
  def is_vision_model(self) -> bool:
94
93
  """
@@ -96,17 +95,10 @@ class Vision:
96
95
 
97
96
  :return: True if vision model
98
97
  """
99
- allowed_modes = [
100
- MODE_CHAT,
101
- MODE_COMPLETION,
102
- MODE_LANGCHAIN,
103
- MODE_LLAMA_INDEX,
104
- MODE_RESEARCH,
105
- ]
106
98
  mode = self.window.core.config.get('mode')
107
99
  model = self.window.core.config.get('model')
108
100
  model_data = self.window.core.models.get(model)
109
- if MULTIMODAL_IMAGE in model_data.input and mode in allowed_modes:
110
- return True
101
+ if model_data:
102
+ return model_data.is_image_input() and mode in self.allowed_modes
111
103
  return False
112
104
 
@@ -219,7 +219,7 @@ class Placeholder:
219
219
 
220
220
  :return: placeholders list
221
221
  """
222
- return self.window.core.gpt.remote_tools.get_choices()
222
+ return self.window.core.api.openai.remote_tools.get_choices()
223
223
 
224
224
  def get_llama_index_chat_modes(self) -> List[Dict[str, str]]:
225
225
  """
@@ -295,7 +295,7 @@ class Ctx:
295
295
 
296
296
  def clean_memory(self):
297
297
  """Clean memory"""
298
- self.window.core.gpt.close() # clear gpt client
298
+ self.window.core.api.openai.close() # clear gpt client
299
299
 
300
300
  def new_in_group(
301
301
  self,
@@ -59,7 +59,7 @@ class Summarizer:
59
59
  :param window: Window instance
60
60
  :param updated_signal: WorkerSignals: updated signal
61
61
  """
62
- title = window.core.gpt.summarizer.summary_ctx(ctx)
62
+ title = window.core.api.openai.summarizer.summary_ctx(ctx)
63
63
  if title:
64
64
  updated_signal.emit(id, ctx, title)
65
65
  updated_signal.disconnect()
@@ -6,13 +6,13 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.06 19:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import threading
13
13
  from typing import Any, Dict, Optional, Union, List
14
14
 
15
- from PySide6.QtCore import QObject, Slot
15
+ from PySide6.QtCore import Slot
16
16
  from PySide6.QtWidgets import QApplication
17
17
 
18
18
  from pygpt_net.core.types import (
@@ -23,7 +23,7 @@ from pygpt_net.core.types import (
23
23
  MODE_EXPERT,
24
24
  MODE_LLAMA_INDEX,
25
25
  )
26
- from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent
26
+ from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent, RealtimeEvent, Event
27
27
  from pygpt_net.core.bridge.context import BridgeContext
28
28
  from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.utils import trans
@@ -95,6 +95,13 @@ class Kernel:
95
95
  KernelEvent.INPUT_USER,
96
96
  KernelEvent.FORCE_CALL,
97
97
  KernelEvent.STATUS,
98
+ Event.AUDIO_INPUT_RECORD_TOGGLE,
99
+ RealtimeEvent.RT_INPUT_AUDIO_DELTA,
100
+ RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP,
101
+ RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START,
102
+ RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT,
103
+ RealtimeEvent.RT_OUTPUT_TURN_END,
104
+ RealtimeEvent.RT_OUTPUT_READY,
98
105
  ]
99
106
 
100
107
  def init(self):
@@ -281,6 +288,7 @@ class Kernel:
281
288
  self.window.dispatch(KernelEvent(KernelEvent.TERMINATE))
282
289
  self.stop(exit=True)
283
290
  self.window.controller.plugins.destroy()
291
+ self.window.controller.realtime.shutdown()
284
292
 
285
293
  def stop(self, exit: bool = False):
286
294
  """
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -109,6 +109,10 @@ class Reply:
109
109
  core.ctx.update_item(self.reply_ctx) # update context in db
110
110
  self.window.update_status('...')
111
111
 
112
+ # append tool calls from previous context (used for tool results handling)
113
+ if self.reply_ctx.tool_calls:
114
+ prev_ctx.extra["prev_tool_calls"] = self.reply_ctx.tool_calls
115
+
112
116
  # tool output append
113
117
  dispatch(RenderEvent(RenderEvent.TOOL_UPDATE, {
114
118
  "meta": self.reply_ctx.meta,