pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -9,1083 +9,16 @@
9
9
  # Updated Date: 2025.08.28 20:00:00 #
10
10
  # ================================================== #
11
11
 
12
- import base64
13
- import io
14
- import json
15
- from dataclasses import dataclass, field
16
- from typing import Optional, Literal, Any
12
+ from typing import Optional
17
13
 
18
- from PySide6.QtCore import QObject, Signal, Slot, QRunnable
14
+ from PySide6.QtCore import Slot
19
15
 
20
16
  from pygpt_net.core.bridge import BridgeContext
21
17
  from pygpt_net.core.events import RenderEvent
22
18
  from pygpt_net.core.types import MODE_ASSISTANT
23
- from pygpt_net.core.text.utils import has_unclosed_code_tag
24
19
  from pygpt_net.item.ctx import CtxItem
25
20
 
26
- EventType = Literal[
27
- "response.completed",
28
- "response.output_text.delta",
29
- "response.output_item.added",
30
- "response.function_call_arguments.delta",
31
- "response.function_call_arguments.done",
32
- "response.output_text.annotation.added",
33
- "response.reasoning_summary_text.delta",
34
- "response.output_item.done",
35
- "response.code_interpreter_call_code.delta",
36
- "response.code_interpreter_call_code.done",
37
- "response.image_generation_call.partial_image",
38
- "response.created",
39
- "response.done",
40
- "response.failed",
41
- "error",
42
- ]
43
- ChunkType = Literal[
44
- "api_chat",
45
- "api_chat_responses",
46
- "api_completion",
47
- "langchain_chat",
48
- "llama_chat",
49
- "google",
50
- "raw",
51
- ]
52
-
53
-
54
- class WorkerSignals(QObject):
55
- """
56
- Defines the signals available from a running worker thread.
57
- - `finished`: No data
58
- - `errorOccurred`: Exception
59
- - `eventReady`: RenderEvent
60
- """
61
- end = Signal(object)
62
- errorOccurred = Signal(Exception)
63
- eventReady = Signal(object)
64
-
65
-
66
- @dataclass
67
- class WorkerState:
68
- """Holds mutable state for the streaming loop."""
69
- output_parts: list[str] = field(default_factory=list)
70
- output_tokens: int = 0
71
- begin: bool = True
72
- error: Optional[Exception] = None
73
- fn_args_buffers: dict[str, io.StringIO] = field(default_factory=dict)
74
- citations: Optional[list] = field(default_factory=list)
75
- image_paths: list[str] = field(default_factory=list)
76
- files: list[dict] = field(default_factory=list)
77
- img_path: Optional[str] = None
78
- is_image: bool = False
79
- has_google_inline_image: bool = False
80
- is_code: bool = False
81
- force_func_call: bool = False
82
- stopped: bool = False
83
- chunk_type: ChunkType = "raw"
84
- generator: Any = None
85
- usage_vendor: Optional[str] = None
86
- usage_payload: dict = field(default_factory=dict)
87
- google_stream_ref: Any = None
88
- tool_calls: list[dict] = field(default_factory=list)
89
-
90
-
91
- class StreamWorker(QRunnable):
92
- def __init__(self, ctx: CtxItem, window, parent=None):
93
- super().__init__()
94
- self.signals = WorkerSignals()
95
- self.ctx = ctx
96
- self.window = window
97
- self.stream = None
98
-
99
- @Slot()
100
- def run(self):
101
- ctx = self.ctx
102
- win = self.window
103
- core = win.core
104
- ctrl = win.controller
105
-
106
- emit_event = self.signals.eventReady.emit
107
- emit_error = self.signals.errorOccurred.emit
108
- emit_end = self.signals.end.emit
109
-
110
- state = WorkerState()
111
- state.generator = self.stream
112
- state.img_path = core.image.gen_unique_path(ctx)
113
-
114
- base_data = {"meta": ctx.meta, "ctx": ctx}
115
- emit_event(RenderEvent(RenderEvent.STREAM_BEGIN, base_data))
116
-
117
- try:
118
- if state.generator is not None:
119
- for chunk in state.generator:
120
- # cooperative stop
121
- if self._should_stop(ctrl, state, ctx):
122
- break
123
-
124
- # if error flagged, stop early
125
- if state.error is not None:
126
- ctx.msg_id = None
127
- state.stopped = True
128
- break
129
-
130
- etype: Optional[EventType] = None
131
-
132
- # detect chunk type
133
- if ctx.use_responses_api:
134
- if hasattr(chunk, 'type'):
135
- etype = chunk.type # type: ignore[assignment]
136
- state.chunk_type = "api_chat_responses"
137
- else:
138
- continue
139
- else:
140
- state.chunk_type = self._detect_chunk_type(chunk)
141
-
142
- # process chunk according to type
143
- response = self._process_chunk(ctx, core, state, chunk, etype)
144
-
145
- # emit response delta if present
146
- if response is not None and response != "" and not state.stopped:
147
- self._append_response(ctx, state, response, emit_event)
148
-
149
- # free per-iteration ref
150
- chunk = None
151
-
152
- # after loop: handle tool-calls and images assembly
153
- self._handle_after_loop(ctx, core, state)
154
-
155
- except Exception as e:
156
- state.error = e
157
-
158
- finally:
159
- self._finalize(ctx, core, state, emit_end, emit_error)
160
-
161
- # ------------ Orchestration helpers ------------
162
-
163
- def _should_stop(self, ctrl, state: WorkerState, ctx: CtxItem) -> bool:
164
- """
165
- Checks external stop signal and attempts to stop the generator gracefully.
166
-
167
- :param ctrl: Controller with stop signal
168
- :param state: WorkerState
169
- :param ctx: CtxItem
170
- :return: True if stopped, False otherwise
171
- """
172
- if not ctrl.kernel.stopped():
173
- return False
174
-
175
- gen = state.generator
176
- if gen is not None:
177
- # Try common stop methods without raising
178
- for meth in ("close", "cancel", "stop"):
179
- if hasattr(gen, meth):
180
- try:
181
- getattr(gen, meth)()
182
- except Exception:
183
- pass
184
-
185
- ctx.msg_id = None
186
- state.stopped = True
187
- return True
188
-
189
- def _detect_chunk_type(self, chunk) -> ChunkType:
190
- """
191
- Detects chunk type for various providers/SDKs.
192
-
193
- :param chunk: The chunk object from the stream
194
- :return: Detected ChunkType
195
- """
196
- if (hasattr(chunk, 'choices')
197
- and chunk.choices
198
- and hasattr(chunk.choices[0], 'delta')
199
- and chunk.choices[0].delta is not None):
200
- return "api_chat"
201
- if (hasattr(chunk, 'choices')
202
- and chunk.choices
203
- and hasattr(chunk.choices[0], 'text')
204
- and chunk.choices[0].text is not None):
205
- return "api_completion"
206
- if hasattr(chunk, 'content') and chunk.content is not None:
207
- return "langchain_chat"
208
- if hasattr(chunk, 'delta') and chunk.delta is not None:
209
- return "llama_chat"
210
- if hasattr(chunk, "candidates"): # Google python-genai chunk
211
- return "google"
212
- return "raw"
213
-
214
- def _append_response(
215
- self,
216
- ctx: CtxItem,
217
- state: WorkerState,
218
- response: str,
219
- emit_event
220
- ):
221
- """
222
- Appends response delta and emits STREAM_APPEND event.
223
-
224
- Skips empty initial chunks if state.begin is True.
225
-
226
- :param ctx: CtxItem
227
- :param state: WorkerState
228
- :param response: Response delta string
229
- :param emit_event: Function to emit RenderEvent
230
- """
231
- if state.begin and response == "":
232
- return
233
- state.output_parts.append(response)
234
- state.output_tokens += 1
235
- emit_event(
236
- RenderEvent(
237
- RenderEvent.STREAM_APPEND,
238
- {
239
- "meta": ctx.meta,
240
- "ctx": ctx,
241
- "chunk": response,
242
- "begin": state.begin,
243
- },
244
- )
245
- )
246
- state.begin = False
247
-
248
- def _handle_after_loop(self, ctx: CtxItem, core, state: WorkerState):
249
- """
250
- Post-loop handling for tool calls and images assembly.
251
-
252
- :param ctx: CtxItem
253
- :param core: Core instance
254
- :param state: WorkerState
255
- """
256
- if state.tool_calls:
257
- ctx.force_call = state.force_func_call
258
- core.debug.info("[chat] Tool calls found, unpacking...")
259
- # Ensure function.arguments is JSON string
260
- for tc in state.tool_calls:
261
- fn = tc.get("function") or {}
262
- if isinstance(fn.get("arguments"), dict):
263
- fn["arguments"] = json.dumps(fn["arguments"], ensure_ascii=False)
264
- core.command.unpack_tool_calls_chunks(ctx, state.tool_calls)
265
-
266
- # OpenAI partial image assembly
267
- if state.is_image and state.img_path:
268
- core.debug.info("[chat] OpenAI partial image assembled")
269
- ctx.images = [state.img_path]
270
-
271
- # Google inline images
272
- if state.image_paths:
273
- core.debug.info("[chat] Google inline images found")
274
- if not isinstance(ctx.images, list) or not ctx.images:
275
- ctx.images = list(state.image_paths)
276
- else:
277
- seen = set(ctx.images)
278
- for p in state.image_paths:
279
- if p not in seen:
280
- ctx.images.append(p)
281
- seen.add(p)
282
-
283
- def _finalize(self, ctx: CtxItem, core, state: WorkerState, emit_end, emit_error):
284
- """
285
- Finalize stream: build output, usage, tokens, files, errors, cleanup.
286
-
287
- :param ctx: CtxItem
288
- :param core: Core instance
289
- :param state: WorkerState
290
- :param emit_end: Function to emit end signal
291
- """
292
- # Build final output
293
- output = "".join(state.output_parts)
294
- state.output_parts.clear()
295
-
296
- if has_unclosed_code_tag(output):
297
- output += "\n```"
298
-
299
- # Attempt to resolve Google usage from the stream object if missing
300
- if (state.usage_vendor is None or state.usage_vendor == "google") and not state.usage_payload and state.generator is not None:
301
- try:
302
- if hasattr(state.generator, "resolve"):
303
- state.generator.resolve()
304
- um = getattr(state.generator, "usage_metadata", None)
305
- if um:
306
- self._capture_google_usage(state, um)
307
- except Exception:
308
- pass
309
-
310
- # Close generator if possible
311
- gen = state.generator
312
- if gen and hasattr(gen, 'close'):
313
- try:
314
- gen.close()
315
- except Exception:
316
- pass
317
-
318
- self.stream = None
319
- ctx.output = output
320
-
321
- # Tokens usage
322
- if state.usage_payload:
323
- in_tok_final = state.usage_payload.get("in")
324
- out_tok_final = state.usage_payload.get("out")
325
-
326
- if in_tok_final is None:
327
- in_tok_final = ctx.input_tokens if ctx.input_tokens is not None else 0
328
- if out_tok_final is None:
329
- out_tok_final = state.output_tokens
330
-
331
- ctx.set_tokens(in_tok_final, out_tok_final)
332
-
333
- # Attach usage details in ctx.extra for debugging
334
- try:
335
- if not isinstance(ctx.extra, dict):
336
- ctx.extra = {}
337
- ctx.extra["usage"] = {
338
- "vendor": state.usage_vendor,
339
- "input_tokens": in_tok_final,
340
- "output_tokens": out_tok_final,
341
- "reasoning_tokens": state.usage_payload.get("reasoning", 0),
342
- "total_reported": state.usage_payload.get("total"),
343
- }
344
- except Exception:
345
- pass
346
- else:
347
- # Fallback when usage is not available
348
- ctx.set_tokens(ctx.input_tokens if ctx.input_tokens is not None else 0, state.output_tokens)
349
-
350
- core.ctx.update_item(ctx)
351
-
352
- # OpenAI only: download container files if present
353
- if state.files and not state.stopped:
354
- core.debug.info("[chat] Container files found, downloading...")
355
- try:
356
- core.api.openai.container.download_files(ctx, state.files)
357
- except Exception as e:
358
- core.debug.error(f"[chat] Error downloading container files: {e}")
359
-
360
- # Emit error and end
361
- if state.error:
362
- emit_error(state.error)
363
- emit_end(ctx)
364
-
365
- # Cleanup local buffers
366
- for _buf in state.fn_args_buffers.values():
367
- try:
368
- _buf.close()
369
- except Exception:
370
- pass
371
- state.fn_args_buffers.clear()
372
- state.files.clear()
373
- state.tool_calls.clear()
374
- if state.citations is not None and state.citations is not ctx.urls:
375
- state.citations.clear()
376
- state.citations = None
377
-
378
- # Worker cleanup (signals etc.)
379
- self.cleanup()
380
-
381
- # ------------ Chunk processors ------------
382
-
383
- def _process_chunk(
384
- self,
385
- ctx: CtxItem,
386
- core,
387
- state: WorkerState,
388
- chunk,
389
- etype: Optional[EventType]
390
- ) -> Optional[str]:
391
- """
392
- Dispatches processing to concrete provider-specific processing.
393
-
394
- :param ctx: CtxItem
395
- :param core: Core instance
396
- :param state: WorkerState
397
- :param chunk: The chunk object from the stream
398
- :param etype: Optional event type for Responses API
399
- :return: Response delta string or None
400
- """
401
- t = state.chunk_type
402
- if t == "api_chat":
403
- return self._process_api_chat(ctx, state, chunk)
404
- if t == "api_chat_responses":
405
- return self._process_api_chat_responses(ctx, core, state, chunk, etype)
406
- if t == "api_completion":
407
- return self._process_api_completion(chunk)
408
- if t == "langchain_chat":
409
- return self._process_langchain_chat(chunk)
410
- if t == "llama_chat":
411
- return self._process_llama_chat(state, chunk)
412
- if t == "google":
413
- return self._process_google_chunk(ctx, core, state, chunk)
414
- # raw fallback
415
- return self._process_raw(chunk)
416
-
417
- def _process_api_chat(
418
- self,
419
- ctx: CtxItem,
420
- state: WorkerState,
421
- chunk
422
- ) -> Optional[str]:
423
- """
424
- OpenAI Chat Completions stream delta.
425
-
426
- Handles text deltas, citations, and streamed tool_calls.
427
-
428
- :param ctx: CtxItem
429
- :param state: WorkerState
430
- :param chunk: The chunk object from the stream
431
- :return: Response delta string or None
432
- """
433
- response = None
434
- state.citations = None # as in original, reset to None for this type
435
-
436
- delta = chunk.choices[0].delta if getattr(chunk, "choices", None) else None
437
- if delta and getattr(delta, "content", None) is not None:
438
- if state.citations is None and hasattr(chunk, 'citations') and chunk.citations is not None:
439
- state.citations = chunk.citations
440
- ctx.urls = state.citations
441
- response = delta.content
442
-
443
- # Accumulate streamed tool_calls
444
- if delta and getattr(delta, "tool_calls", None):
445
- for tool_chunk in delta.tool_calls:
446
- if tool_chunk.index is None:
447
- tool_chunk.index = 0
448
- if len(state.tool_calls) <= tool_chunk.index:
449
- state.tool_calls.append(
450
- {
451
- "id": "",
452
- "type": "function",
453
- "function": {"name": "", "arguments": ""}
454
- }
455
- )
456
- tool_call = state.tool_calls[tool_chunk.index]
457
- if getattr(tool_chunk, "id", None):
458
- tool_call["id"] += tool_chunk.id
459
- if getattr(getattr(tool_chunk, "function", None), "name", None):
460
- tool_call["function"]["name"] += tool_chunk.function.name
461
- if getattr(getattr(tool_chunk, "function", None), "arguments", None):
462
- tool_call["function"]["arguments"] += tool_chunk.function.arguments
463
-
464
- # Capture usage (if available on final chunk with include_usage=True)
465
- try:
466
- u = getattr(chunk, "usage", None)
467
- if u:
468
- self._capture_openai_usage(state, u)
469
- except Exception:
470
- pass
471
-
472
- return response
473
-
474
- def _process_api_chat_responses(
475
- self,
476
- ctx: CtxItem,
477
- core,
478
- state: WorkerState,
479
- chunk,
480
- etype: Optional[EventType]
481
- ) -> Optional[str]:
482
- """
483
- OpenAI Responses API stream events
484
-
485
- Handles various event types including text deltas, tool calls, citations, images, and usage.
486
-
487
- :param ctx: CtxItem
488
- :param core: Core instance
489
- :param state: WorkerState
490
- :param chunk: The chunk object from the stream
491
- :param etype: EventType string
492
- :return: Response delta string or None
493
- """
494
- response = None
495
-
496
- if etype == "response.completed":
497
- # usage on final response
498
- try:
499
- u = getattr(chunk.response, "usage", None)
500
- if u:
501
- self._capture_openai_usage(state, u)
502
- except Exception:
503
- pass
504
-
505
- for item in chunk.response.output:
506
- if item.type == "mcp_list_tools":
507
- core.api.openai.responses.mcp_tools = item.tools
508
- elif item.type == "mcp_call":
509
- call = {
510
- "id": item.id,
511
- "type": "mcp_call",
512
- "approval_request_id": item.approval_request_id,
513
- "arguments": item.arguments,
514
- "error": item.error,
515
- "name": item.name,
516
- "output": item.output,
517
- "server_label": item.server_label,
518
- }
519
- state.tool_calls.append({
520
- "id": item.id,
521
- "call_id": "",
522
- "type": "function",
523
- "function": {"name": item.name, "arguments": item.arguments}
524
- })
525
- ctx.extra["mcp_call"] = call
526
- core.ctx.update_item(ctx)
527
- elif item.type == "mcp_approval_request":
528
- call = {
529
- "id": item.id,
530
- "type": "mcp_call",
531
- "arguments": item.arguments,
532
- "name": item.name,
533
- "server_label": item.server_label,
534
- }
535
- ctx.extra["mcp_approval_request"] = call
536
- core.ctx.update_item(ctx)
537
-
538
- elif etype == "response.output_text.delta":
539
- response = chunk.delta
540
-
541
- elif etype == "response.output_item.added" and chunk.item.type == "function_call":
542
- state.tool_calls.append({
543
- "id": chunk.item.id,
544
- "call_id": chunk.item.call_id,
545
- "type": "function",
546
- "function": {"name": chunk.item.name, "arguments": ""}
547
- })
548
- state.fn_args_buffers[chunk.item.id] = io.StringIO()
549
-
550
- elif etype == "response.function_call_arguments.delta":
551
- buf = state.fn_args_buffers.get(chunk.item_id)
552
- if buf is not None:
553
- buf.write(chunk.delta)
554
-
555
- elif etype == "response.function_call_arguments.done":
556
- buf = state.fn_args_buffers.pop(chunk.item_id, None)
557
- if buf is not None:
558
- try:
559
- args_val = buf.getvalue()
560
- finally:
561
- buf.close()
562
- for tc in state.tool_calls:
563
- if tc["id"] == chunk.item_id:
564
- tc["function"]["arguments"] = args_val
565
- break
566
-
567
- elif etype == "response.output_text.annotation.added":
568
- ann = chunk.annotation
569
- if ann['type'] == "url_citation":
570
- if state.citations is None:
571
- state.citations = []
572
- url_citation = ann['url']
573
- state.citations.append(url_citation)
574
- ctx.urls = state.citations
575
- elif ann['type'] == "container_file_citation":
576
- state.files.append({
577
- "container_id": ann['container_id'],
578
- "file_id": ann['file_id'],
579
- })
580
-
581
- elif etype == "response.reasoning_summary_text.delta":
582
- response = chunk.delta
583
-
584
- elif etype == "response.output_item.done":
585
- # Delegate to computer handler which may add tool calls
586
- tool_calls, has_calls = core.api.openai.computer.handle_stream_chunk(ctx, chunk, state.tool_calls)
587
- state.tool_calls = tool_calls
588
- if has_calls:
589
- state.force_func_call = True
590
-
591
- elif etype == "response.code_interpreter_call_code.delta":
592
- if not state.is_code:
593
- response = "\n\n**Code interpreter**\n```python\n" + chunk.delta
594
- state.is_code = True
595
- else:
596
- response = chunk.delta
597
-
598
- elif etype == "response.code_interpreter_call_code.done":
599
- response = "\n\n```\n-----------\n"
600
-
601
- elif etype == "response.image_generation_call.partial_image":
602
- image_base64 = chunk.partial_image_b64
603
- image_bytes = base64.b64decode(image_base64)
604
- if state.img_path:
605
- with open(state.img_path, "wb") as f:
606
- f.write(image_bytes)
607
- del image_bytes
608
- state.is_image = True
609
-
610
- elif etype == "response.created":
611
- ctx.msg_id = str(chunk.response.id)
612
- core.ctx.update_item(ctx)
613
-
614
- elif etype in {"response.done", "response.failed", "error"}:
615
- pass
616
-
617
- return response
618
-
619
- def _process_api_completion(self, chunk) -> Optional[str]:
620
- """
621
- OpenAI Completions stream delta.
622
-
623
- :param chunk: The chunk object from the stream
624
- :return: Response delta string or None
625
- """
626
- if getattr(chunk, "choices", None):
627
- choice0 = chunk.choices[0]
628
- if getattr(choice0, "text", None) is not None:
629
- return choice0.text
630
- return None
631
-
632
- def _process_langchain_chat(self, chunk) -> Optional[str]:
633
- """
634
- LangChain chat streaming delta.
635
-
636
- :param chunk: The chunk object from the stream
637
- :return: Response delta string or None
638
- """
639
- if getattr(chunk, "content", None) is not None:
640
- return str(chunk.content)
641
- return None
642
-
643
- def _process_llama_chat(self, state: WorkerState, chunk) -> Optional[str]:
644
- """
645
- Llama chat streaming delta with optional tool call extraction.
646
-
647
- :param state: WorkerState
648
- :param chunk: The chunk object from the stream
649
- :return: Response delta string or None
650
- """
651
- response = None
652
- if getattr(chunk, "delta", None) is not None:
653
- response = str(chunk.delta)
654
-
655
- tool_chunks = getattr(getattr(chunk, "message", None), "additional_kwargs", {}).get("tool_calls", [])
656
- if tool_chunks:
657
- for tool_chunk in tool_chunks:
658
- id_val = getattr(tool_chunk, "call_id", None) or getattr(tool_chunk, "id", None)
659
- name = getattr(tool_chunk, "name", None) or getattr(getattr(tool_chunk, "function", None), "name", None)
660
- args = getattr(tool_chunk, "arguments", None)
661
- if args is None:
662
- f = getattr(tool_chunk, "function", None)
663
- args = getattr(f, "arguments", None) if f else None
664
- if id_val:
665
- if not args:
666
- args = "{}"
667
- tool_call = {
668
- "id": id_val,
669
- "type": "function",
670
- "function": {"name": name, "arguments": args}
671
- }
672
- state.tool_calls.clear()
673
- state.tool_calls.append(tool_call)
674
-
675
- return response
676
-
677
- def _process_google_chunk(self, ctx: CtxItem, core, state: WorkerState, chunk) -> Optional[str]:
678
- """
679
- Google python-genai streaming chunk.
680
-
681
- Handles text, tool calls, inline images, code execution parts, citations, and usage.
682
-
683
- :param ctx: CtxItem
684
- :param core: Core instance
685
- :param state: WorkerState
686
- :param chunk: The chunk object from the stream
687
- :return: Response delta string or None
688
- """
689
- response_parts: list[str] = []
690
-
691
- # Keep a reference to stream object for resolve() later if needed
692
- if state.google_stream_ref is None:
693
- state.google_stream_ref = state.generator
694
-
695
- # Try to capture usage from this chunk (usage_metadata)
696
- try:
697
- um = getattr(chunk, "usage_metadata", None)
698
- if um:
699
- self._capture_google_usage(state, um)
700
- except Exception:
701
- pass
702
-
703
- # 1) Plain text delta (if present)
704
- t = None
705
- try:
706
- t = getattr(chunk, "text", None)
707
- if t:
708
- response_parts.append(t)
709
- except Exception:
710
- pass
711
-
712
- # 2) Tool calls (function_calls property preferred)
713
- fc_list = []
714
- try:
715
- fc_list = getattr(chunk, "function_calls", None) or []
716
- except Exception:
717
- fc_list = []
718
-
719
- new_calls = []
720
-
721
- def _to_plain_dict(obj):
722
- """
723
- Best-effort conversion of SDK objects to plain dict/list.
724
- """
725
- try:
726
- if hasattr(obj, "to_json_dict"):
727
- return obj.to_json_dict()
728
- if hasattr(obj, "model_dump"):
729
- return obj.model_dump()
730
- if hasattr(obj, "to_dict"):
731
- return obj.to_dict()
732
- except Exception:
733
- pass
734
- if isinstance(obj, dict):
735
- return {k: _to_plain_dict(v) for k, v in obj.items()}
736
- if isinstance(obj, (list, tuple)):
737
- return [_to_plain_dict(x) for x in obj]
738
- return obj
739
-
740
- if fc_list:
741
- for fc in fc_list:
742
- name = getattr(fc, "name", "") or ""
743
- args_obj = getattr(fc, "args", {}) or {}
744
- args_dict = _to_plain_dict(args_obj) or {}
745
- new_calls.append({
746
- "id": getattr(fc, "id", "") or "",
747
- "type": "function",
748
- "function": {
749
- "name": name,
750
- "arguments": json.dumps(args_dict, ensure_ascii=False),
751
- }
752
- })
753
- else:
754
- # Fallback: read from candidates -> parts[].function_call
755
- try:
756
- cands = getattr(chunk, "candidates", None) or []
757
- for cand in cands:
758
- content = getattr(cand, "content", None)
759
- parts = getattr(content, "parts", None) or []
760
- for p in parts:
761
- fn = getattr(p, "function_call", None)
762
- if not fn:
763
- continue
764
- name = getattr(fn, "name", "") or ""
765
- args_obj = getattr(fn, "args", {}) or {}
766
- args_dict = _to_plain_dict(args_obj) or {}
767
- new_calls.append({
768
- "id": getattr(fn, "id", "") or "",
769
- "type": "function",
770
- "function": {
771
- "name": name,
772
- "arguments": json.dumps(args_dict, ensure_ascii=False),
773
- }
774
- })
775
- except Exception:
776
- pass
777
-
778
- # De-duplicate tool calls and mark force flag if any found
779
- if new_calls:
780
- seen = {(tc["function"]["name"], tc["function"]["arguments"]) for tc in state.tool_calls}
781
- for tc in new_calls:
782
- key = (tc["function"]["name"], tc["function"]["arguments"])
783
- if key not in seen:
784
- state.tool_calls.append(tc)
785
- seen.add(key)
786
- state.force_func_call = True
787
-
788
- # 3) Inspect candidates for code execution parts, inline images, and citations
789
- try:
790
- cands = getattr(chunk, "candidates", None) or []
791
- for cand in cands:
792
- content = getattr(cand, "content", None)
793
- parts = getattr(content, "parts", None) or []
794
-
795
- for p in parts:
796
- # Code execution: executable code part -> open or append within fenced block
797
- ex = getattr(p, "executable_code", None)
798
- if ex:
799
- lang = (getattr(ex, "language", None) or "python").strip() or "python"
800
- code_txt = (
801
- getattr(ex, "code", None) or
802
- getattr(ex, "program", None) or
803
- getattr(ex, "source", None) or
804
- ""
805
- )
806
- if code_txt is None:
807
- code_txt = ""
808
- if not state.is_code:
809
- response_parts.append(f"\n\n**Code interpreter**\n```{lang.lower()}\n{code_txt}")
810
- state.is_code = True
811
- else:
812
- response_parts.append(str(code_txt))
813
-
814
- # Code execution result -> close fenced block (output will be streamed as normal text if provided)
815
- cer = getattr(p, "code_execution_result", None)
816
- if cer:
817
- if state.is_code:
818
- response_parts.append("\n\n```\n-----------\n")
819
- state.is_code = False
820
- # Note: We do not append execution outputs here to avoid duplicating chunk.text.
821
-
822
- # Inline image blobs
823
- blob = getattr(p, "inline_data", None)
824
- if blob:
825
- mime = (getattr(blob, "mime_type", "") or "").lower()
826
- if mime.startswith("image/"):
827
- data = getattr(blob, "data", None)
828
- if data:
829
- # inline_data.data may be bytes or base64-encoded string
830
- if isinstance(data, (bytes, bytearray)):
831
- img_bytes = bytes(data)
832
- else:
833
- img_bytes = base64.b64decode(data)
834
- save_path = core.image.gen_unique_path(ctx)
835
- with open(save_path, "wb") as f:
836
- f.write(img_bytes)
837
- if not isinstance(ctx.images, list):
838
- ctx.images = []
839
- ctx.images.append(save_path)
840
- state.image_paths.append(save_path)
841
- state.has_google_inline_image = True
842
-
843
- # File data that points to externally hosted image (http/https)
844
- fdata = getattr(p, "file_data", None)
845
- if fdata:
846
- uri = getattr(fdata, "file_uri", None) or getattr(fdata, "uri", None)
847
- mime = (getattr(fdata, "mime_type", "") or "").lower()
848
- if uri and mime.startswith("image/") and (uri.startswith("http://") or uri.startswith("https://")):
849
- if ctx.urls is None:
850
- ctx.urls = []
851
- ctx.urls.append(uri)
852
-
853
- # Collect citations (web search URLs) if present in candidates metadata
854
- self._collect_google_citations(ctx, state, chunk)
855
-
856
- except Exception:
857
- # Never break stream on extraction failures
858
- pass
859
-
860
- # Combine all response parts
861
- response = "".join(response_parts) if response_parts else None
862
- return response
863
-
864
- def _process_raw(self, chunk) -> Optional[str]:
865
- """
866
- Raw chunk fallback.
867
-
868
- :param chunk: The chunk object from the stream
869
- :return: String representation of chunk or None
870
- """
871
- if chunk is not None:
872
- return chunk if isinstance(chunk, str) else str(chunk)
873
- return None
874
-
875
- # ------------ Usage helpers ------------
876
-
877
- def _safe_get(self, obj, path: str):
878
- """
879
- Dot-path getter for dicts and objects.
880
-
881
- :param obj: dict or object
882
- :param path: Dot-separated path string
883
- """
884
- cur = obj
885
- for seg in path.split("."):
886
- if cur is None:
887
- return None
888
- if isinstance(cur, dict):
889
- cur = cur.get(seg)
890
- else:
891
- # Support numeric indices for lists like candidates.0...
892
- if seg.isdigit() and isinstance(cur, (list, tuple)):
893
- idx = int(seg)
894
- if 0 <= idx < len(cur):
895
- cur = cur[idx]
896
- else:
897
- return None
898
- else:
899
- cur = getattr(cur, seg, None)
900
- return cur
901
-
902
- def _as_int(self, val):
903
- """
904
- Coerce to int if possible, else None.
905
-
906
- :param val: Any value
907
- :return: int or None
908
- """
909
- if val is None:
910
- return None
911
- try:
912
- return int(val)
913
- except Exception:
914
- try:
915
- return int(float(val))
916
- except Exception:
917
- return None
918
-
919
- def _capture_openai_usage(self, state: WorkerState, u_obj):
920
- """
921
- Extract usage for OpenAI; include reasoning tokens in output if available.
922
-
923
- :param state: WorkerState
924
- :param u_obj: Usage object from OpenAI response
925
- """
926
- if not u_obj:
927
- return
928
- state.usage_vendor = "openai"
929
- in_tok = self._as_int(self._safe_get(u_obj, "input_tokens")) or self._as_int(self._safe_get(u_obj, "prompt_tokens"))
930
- out_tok = self._as_int(self._safe_get(u_obj, "output_tokens")) or self._as_int(self._safe_get(u_obj, "completion_tokens"))
931
- total = self._as_int(self._safe_get(u_obj, "total_tokens"))
932
- reasoning = (
933
- self._as_int(self._safe_get(u_obj, "output_tokens_details.reasoning_tokens")) or
934
- self._as_int(self._safe_get(u_obj, "completion_tokens_details.reasoning_tokens")) or
935
- self._as_int(self._safe_get(u_obj, "reasoning_tokens")) or
936
- 0
937
- )
938
- out_with_reason = (out_tok or 0) + (reasoning or 0)
939
- state.usage_payload = {"in": in_tok, "out": out_with_reason, "reasoning": reasoning or 0, "total": total}
940
-
941
- def _capture_google_usage(self, state: WorkerState, um_obj):
942
- """
943
- Extract usage for Google python-genai; prefer total - prompt to include reasoning.
944
-
945
- :param state: WorkerState
946
- :param um_obj: Usage metadata object from Google chunk
947
- """
948
- if not um_obj:
949
- return
950
- state.usage_vendor = "google"
951
- prompt = (
952
- self._as_int(self._safe_get(um_obj, "prompt_token_count")) or
953
- self._as_int(self._safe_get(um_obj, "prompt_tokens")) or
954
- self._as_int(self._safe_get(um_obj, "input_tokens"))
955
- )
956
- total = (
957
- self._as_int(self._safe_get(um_obj, "total_token_count")) or
958
- self._as_int(self._safe_get(um_obj, "total_tokens"))
959
- )
960
- candidates = (
961
- self._as_int(self._safe_get(um_obj, "candidates_token_count")) or
962
- self._as_int(self._safe_get(um_obj, "output_tokens"))
963
- )
964
- reasoning = (
965
- self._as_int(self._safe_get(um_obj, "candidates_reasoning_token_count")) or
966
- self._as_int(self._safe_get(um_obj, "reasoning_tokens")) or 0
967
- )
968
- if total is not None and prompt is not None:
969
- out_total = max(0, total - prompt)
970
- else:
971
- out_total = candidates
972
- state.usage_payload = {"in": prompt, "out": out_total, "reasoning": reasoning or 0, "total": total}
973
-
974
- def _collect_google_citations(self, ctx: CtxItem, state: WorkerState, chunk: Any):
975
- """
976
- Collect web citations (URLs) from Google GenAI stream.
977
-
978
- Tries multiple known locations (grounding metadata and citation metadata)
979
- in a defensive manner to remain compatible with SDK changes.
980
- """
981
- try:
982
- cands = getattr(chunk, "candidates", None) or []
983
- except Exception:
984
- cands = []
985
-
986
- if not isinstance(state.citations, list):
987
- state.citations = []
988
-
989
- # Helper to add URLs with de-duplication
990
- def _add_url(url: Optional[str]):
991
- if not url or not isinstance(url, str):
992
- return
993
- url = url.strip()
994
- if not (url.startswith("http://") or url.startswith("https://")):
995
- return
996
- # Initialize ctx.urls if needed
997
- if ctx.urls is None:
998
- ctx.urls = []
999
- if url not in state.citations:
1000
- state.citations.append(url)
1001
- if url not in ctx.urls:
1002
- ctx.urls.append(url)
1003
-
1004
- # Candidate-level metadata extraction
1005
- for cand in cands:
1006
- # Grounding metadata (web search attributions)
1007
- gm = self._safe_get(cand, "grounding_metadata") or self._safe_get(cand, "groundingMetadata")
1008
- if gm:
1009
- atts = self._safe_get(gm, "grounding_attributions") or self._safe_get(gm, "groundingAttributions") or []
1010
- try:
1011
- for att in atts or []:
1012
- # Try several common paths for URI
1013
- for path in (
1014
- "web.uri",
1015
- "web.url",
1016
- "source.web.uri",
1017
- "source.web.url",
1018
- "source.uri",
1019
- "source.url",
1020
- "uri",
1021
- "url",
1022
- ):
1023
- _add_url(self._safe_get(att, path))
1024
- except Exception:
1025
- pass
1026
- # Also check search entry point
1027
- for path in (
1028
- "search_entry_point.uri",
1029
- "search_entry_point.url",
1030
- "searchEntryPoint.uri",
1031
- "searchEntryPoint.url",
1032
- "search_entry_point.rendered_content_uri",
1033
- "searchEntryPoint.rendered_content_uri",
1034
- ):
1035
- _add_url(self._safe_get(gm, path))
1036
-
1037
- # Citation metadata (legacy and alt paths)
1038
- cm = self._safe_get(cand, "citation_metadata") or self._safe_get(cand, "citationMetadata")
1039
- if cm:
1040
- cit_arrays = (
1041
- self._safe_get(cm, "citation_sources") or
1042
- self._safe_get(cm, "citationSources") or
1043
- self._safe_get(cm, "citations") or []
1044
- )
1045
- try:
1046
- for cit in cit_arrays or []:
1047
- for path in ("uri", "url", "source.uri", "source.url", "web.uri", "web.url"):
1048
- _add_url(self._safe_get(cit, path))
1049
- except Exception:
1050
- pass
1051
-
1052
- # Part-level citation metadata
1053
- try:
1054
- parts = self._safe_get(cand, "content.parts") or []
1055
- for p in parts:
1056
- # Per-part citation metadata
1057
- pcm = self._safe_get(p, "citation_metadata") or self._safe_get(p, "citationMetadata")
1058
- if pcm:
1059
- arr = (
1060
- self._safe_get(pcm, "citation_sources") or
1061
- self._safe_get(pcm, "citationSources") or
1062
- self._safe_get(pcm, "citations") or []
1063
- )
1064
- for cit in arr or []:
1065
- for path in ("uri", "url", "source.uri", "source.url", "web.uri", "web.url"):
1066
- _add_url(self._safe_get(cit, path))
1067
- # Per-part grounding attributions (rare)
1068
- gpa = self._safe_get(p, "grounding_attributions") or self._safe_get(p, "groundingAttributions") or []
1069
- for att in gpa or []:
1070
- for path in ("web.uri", "web.url", "source.web.uri", "source.web.url", "uri", "url"):
1071
- _add_url(self._safe_get(att, path))
1072
- except Exception:
1073
- pass
1074
-
1075
- # Bind to ctx on first discovery for compatibility with other parts of the app
1076
- if state.citations and (ctx.urls is None or not ctx.urls):
1077
- ctx.urls = list(state.citations)
1078
-
1079
- def cleanup(self):
1080
- """Cleanup resources after worker execution."""
1081
- sig = self.signals
1082
- self.signals = None
1083
- if sig is not None:
1084
- try:
1085
- sig.deleteLater()
1086
- except RuntimeError:
1087
- pass
1088
-
21
+ from .handler.stream_worker import StreamWorker
1089
22
 
1090
23
  class Stream:
1091
24
  def __init__(self, window=None):
@@ -1135,7 +68,6 @@ class Stream:
1135
68
  self.extra = extra if extra is not None else {}
1136
69
 
1137
70
  worker = StreamWorker(ctx, self.window)
1138
-
1139
71
  worker.stream = ctx.stream
1140
72
  worker.signals.eventReady.connect(self.handleEvent)
1141
73
  worker.signals.errorOccurred.connect(self.handleError)