pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -15,6 +15,10 @@ eleven_labs_voice.description = Specify the Voice ID.
15
15
  eleven_labs_voice.label = Voice ID
16
16
  google_api_key.description = You can obtain your own API key here: https://console.cloud.google.com/apis/library/texttospeech.googleapis.com
17
17
  google_api_key.label = Google Cloud Text-to-speech API Key
18
+ google_genai_tts_model.description = Specify Gemini TTS model, e.g.: gemini-2.5-flash-preview-tts or gemini-2.5-pro-preview-tts
19
+ google_genai_tts_model.label = Model
20
+ google_genai_tts_voice.description = Specify voice, e.g.: Puck, Kore, Charon, Leda, Zephyr... (case-sensitive)
21
+ google_genai_tts_voice.label = Voice
18
22
  google_lang.description = Specify the language code.
19
23
  google_lang.label = Language code
20
24
  google_voice.description = Specify the voice.
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.11.26 19:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -23,6 +23,7 @@ from pygpt_net.utils import trans
23
23
  from .config import Config
24
24
  from .worker import Worker
25
25
  from .simple import Simple
26
+ from ...core.types import MODE_AUDIO
26
27
 
27
28
 
28
29
  class Plugin(BasePlugin):
@@ -124,13 +125,31 @@ class Plugin(BasePlugin):
124
125
  words = [x.strip() for x in words] # remove white-spaces
125
126
  return words
126
127
 
127
- def toggle_recording_simple(self):
128
+ def toggle_recording_simple(
129
+ self,
130
+ state: bool = None,
131
+ auto: bool = False
132
+ ):
128
133
  """
129
134
  Event: AUDIO_INPUT_RECORD_TOGGLE
130
135
 
131
136
  Toggle recording
137
+
138
+ :param state: state to set
139
+ :param auto: True if called automatically (not by user)
140
+ """
141
+ if self.window.controller.realtime.is_enabled():
142
+ self.handler_simple.toggle_realtime(state=state, auto=auto)
143
+ return
144
+ self.handler_simple.toggle_recording(state=state)
145
+
146
+ def is_recording(self) -> bool:
132
147
  """
133
- self.handler_simple.toggle_recording()
148
+ Check if is recording (simple mode)
149
+
150
+ :return: True if is recording
151
+ """
152
+ return self.handler_simple.is_recording
134
153
 
135
154
  def toggle_speech(self, state: bool):
136
155
  """
@@ -214,7 +233,9 @@ class Plugin(BasePlugin):
214
233
  self.toggle_speech(data['value'])
215
234
 
216
235
  elif name == Event.AUDIO_INPUT_RECORD_TOGGLE:
217
- self.toggle_recording_simple()
236
+ state = data['state'] if 'value' in data else None
237
+ auto = data['auto'] if 'auto' in data else False
238
+ self.toggle_recording_simple(state=state, auto=auto)
218
239
 
219
240
  elif name == Event.AUDIO_INPUT_STOP:
220
241
  self.on_stop()
@@ -492,6 +513,18 @@ class Plugin(BasePlugin):
492
513
  self.window.dispatch(event) # send text, input clear in send method
493
514
  self.set_status('')
494
515
 
516
+ def handle_realtime_stopped(self):
517
+ """Handle realtime stopped"""
518
+ context = BridgeContext()
519
+ context.prompt = "..."
520
+ extra = {}
521
+ event = KernelEvent(KernelEvent.INPUT_SYSTEM, {
522
+ 'context': context,
523
+ 'extra': extra,
524
+ })
525
+ self.window.dispatch(event) # send text, input clear in send method
526
+ self.set_status('')
527
+
495
528
  @Slot(object)
496
529
  def handle_status(self, data: str):
497
530
  """
@@ -6,14 +6,14 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.27 07:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
13
13
 
14
14
  from PySide6.QtCore import QTimer
15
15
 
16
- from pygpt_net.core.events import AppEvent
16
+ from pygpt_net.core.events import AppEvent, RealtimeEvent
17
17
  from pygpt_net.core.tabs.tab import Tab
18
18
  from pygpt_net.utils import trans
19
19
 
@@ -32,8 +32,46 @@ class Simple:
32
32
  self.is_recording = False
33
33
  self.timer = None
34
34
 
35
- def toggle_recording(self):
36
- """Toggle recording"""
35
+ def toggle_realtime(
36
+ self,
37
+ state: bool = None,
38
+ auto: bool = False
39
+ ):
40
+ """
41
+ Toggle recording
42
+
43
+ :param state: True to start recording, False to stop recording, None to toggle
44
+ :param auto: True if called automatically (not by user)
45
+ """
46
+ if state is not None:
47
+ if state and not self.is_recording:
48
+ self.start_recording(realtime=True)
49
+ elif not state:
50
+ self.force_stop()
51
+ else:
52
+ self.force_stop()
53
+ return
54
+ if self.is_recording:
55
+ self.stop_recording(realtime=True)
56
+ if not auto:
57
+ self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP))
58
+ else:
59
+ self.start_recording(realtime=True)
60
+ if not auto:
61
+ self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START))
62
+
63
+ def toggle_recording(self, state: bool = None):
64
+ """
65
+ Toggle recording
66
+
67
+ :param state: True to start recording, False to stop recording, None to toggle
68
+ """
69
+ if state is not None:
70
+ if state and not self.is_recording:
71
+ self.start_recording()
72
+ elif not state:
73
+ self.force_stop()
74
+ return
37
75
  if self.is_recording:
38
76
  self.stop_recording()
39
77
  else:
@@ -51,11 +89,12 @@ class Simple:
51
89
  """Stop timeout"""
52
90
  self.stop_recording(timeout=True)
53
91
 
54
- def start_recording(self, force: bool = False):
92
+ def start_recording(self, force: bool = False, realtime: bool = False):
55
93
  """
56
94
  Start recording
57
95
 
58
96
  :param force: True to force recording
97
+ :param realtime: True if called from realtime callback
59
98
  """
60
99
  # display snap warning if not displayed yet
61
100
  if (not self.plugin.window.core.config.get("audio.input.snap", False)
@@ -89,7 +128,7 @@ class Simple:
89
128
  # disable in continuous mode
90
129
  timeout = int(self.plugin.window.core.config.get('audio.input.timeout', 120) or 0) # get timeout
91
130
  timeout_continuous = self.plugin.window.core.config.get('audio.input.timeout.continuous', False) # enable continuous timeout
92
- if timeout > 0:
131
+ if timeout > 0 and not realtime:
93
132
  if self.timer is None and (not continuous_enabled or timeout_continuous):
94
133
  self.timer = QTimer()
95
134
  self.timer.timeout.connect(self.stop_timeout)
@@ -119,11 +158,12 @@ class Simple:
119
158
  )
120
159
  self.switch_btn_start() # switch button to start
121
160
 
122
- def stop_recording(self, timeout: bool = False):
161
+ def stop_recording(self, timeout: bool = False, realtime: bool = False):
123
162
  """
124
163
  Stop recording
125
164
 
126
165
  :param timeout: True if stopped due to timeout
166
+ :param realtime: True if called from realtime callback
127
167
  """
128
168
  self.plugin.window.core.audio.capture.reset_audio_level()
129
169
  self.is_recording = False
@@ -143,7 +183,7 @@ class Simple:
143
183
  return
144
184
 
145
185
  if self.plugin.window.core.audio.capture.has_frames():
146
- if not self.plugin.window.core.audio.capture.has_min_frames():
186
+ if not self.plugin.window.core.audio.capture.has_min_frames() and not realtime:
147
187
  self.plugin.window.update_status(trans("status.audio.too_short"))
148
188
  self.plugin.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED)) # app event
149
189
  return
@@ -152,6 +192,15 @@ class Simple:
152
192
  else:
153
193
  self.plugin.window.update_status("")
154
194
 
195
+ def force_stop(self):
196
+ """Stop recording"""
197
+ self.is_recording = False
198
+ self.plugin.window.dispatch(AppEvent(AppEvent.INPUT_VOICE_LISTEN_STOPPED)) # app event
199
+ self.switch_btn_start() # switch button to start
200
+ if self.plugin.window.core.audio.capture.has_source():
201
+ self.plugin.window.core.audio.capture.stop() # stop recording
202
+ return
203
+
155
204
  def on_stop(self):
156
205
  """Handle auto-transcribe"""
157
206
  path = os.path.join(self.plugin.window.core.config.path, self.plugin.input_file)
@@ -920,6 +920,9 @@ class Worker(BaseWorker):
920
920
  :param context: context data
921
921
  :return: extra data
922
922
  """
923
+ # disabled in v2.6.31
924
+ # reason: do not duplicate context in chat
925
+ return {}
923
926
  cmd = item["cmd"]
924
927
  extra = {
925
928
  'plugin': "cmd_files",
@@ -6,13 +6,14 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.28 20:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Optional, Dict, Any
13
13
 
14
14
  from google.genai import types as gtypes
15
15
  from google import genai
16
+
16
17
  from pygpt_net.core.types import (
17
18
  MODE_ASSISTANT,
18
19
  MODE_AUDIO,
@@ -29,7 +30,7 @@ from .vision import Vision
29
30
  from .tools import Tools
30
31
  from .audio import Audio
31
32
  from .image import Image
32
-
33
+ from .realtime import Realtime
33
34
 
34
35
  class ApiGoogle:
35
36
  def __init__(self, window=None):
@@ -44,6 +45,7 @@ class ApiGoogle:
44
45
  self.tools = Tools(window)
45
46
  self.audio = Audio(window)
46
47
  self.image = Image(window)
48
+ self.realtime = Realtime(window)
47
49
  self.client: Optional[genai.Client] = None
48
50
  self.locked = False
49
51
  self.last_client_args: Optional[Dict[str, Any]] = None
@@ -72,12 +74,18 @@ class ApiGoogle:
72
74
  self.last_client_args = filtered
73
75
  return self.client
74
76
 
75
- def call(self, context: BridgeContext, extra: dict = None) -> bool:
77
+ def call(
78
+ self,
79
+ context: BridgeContext,
80
+ extra: dict = None,
81
+ rt_signals = None
82
+ ) -> bool:
76
83
  """
77
84
  Make an API call to Google GenAI
78
85
 
79
86
  :param context: BridgeContext
80
87
  :param extra: Extra parameters
88
+ :param rt_signals: Realtime signals for audio streaming
81
89
  :return: True if successful, False otherwise
82
90
  """
83
91
  mode = context.mode
@@ -94,6 +102,18 @@ class ApiGoogle:
94
102
  response = None
95
103
 
96
104
  if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
105
+
106
+ # Live API for audio streaming
107
+ if mode == MODE_AUDIO and stream:
108
+ is_realtime = self.realtime.begin(
109
+ context=context,
110
+ model=model,
111
+ extra=extra or {},
112
+ rt_signals=rt_signals
113
+ )
114
+ if is_realtime:
115
+ return True
116
+
97
117
  response = self.chat.send(context=context, extra=extra)
98
118
  used_tokens = self.chat.get_used_tokens()
99
119
  if ctx:
@@ -135,7 +155,11 @@ class ApiGoogle:
135
155
  pass
136
156
  return True
137
157
 
138
- def quick_call(self, context: BridgeContext, extra: dict = None) -> str:
158
+ def quick_call(
159
+ self,
160
+ context: BridgeContext,
161
+ extra: dict = None
162
+ ) -> str:
139
163
  """
140
164
  Make a quick API call to Google GenAI and return the output text
141
165
 
@@ -206,9 +230,9 @@ class ApiGoogle:
206
230
  def build_remote_tools(self, model: ModelItem = None) -> list:
207
231
  """
208
232
  Build Google GenAI remote tools based on config flags.
209
- - google_tool_search: enables grounding via Google Search (Gemini 2.x)
233
+ - remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
210
234
  or GoogleSearchRetrieval (Gemini 1.5 fallback).
211
- - google_tool_code_execution: enables code execution tool.
235
+ - remote_tools.google.code_interpreter: enables code execution tool.
212
236
 
213
237
  Returns a list of gtypes.Tool objects (can be empty).
214
238
 
@@ -242,6 +266,15 @@ class ApiGoogle:
242
266
  except Exception as e:
243
267
  self.window.core.debug.log(e)
244
268
 
269
+ # URL Context tool
270
+ if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
271
+ try:
272
+ # Supported on Gemini 2.x+ models (not on 1.5)
273
+ if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
274
+ tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
275
+ except Exception as e:
276
+ self.window.core.debug.log(e)
277
+
245
278
  return tools
246
279
 
247
280
 
@@ -24,6 +24,8 @@ class Audio:
24
24
  Audio helpers for Google GenAI.
25
25
  - Build audio input parts for requests
26
26
  - Convert Google PCM output to WAV (base64) for UI compatibility
27
+
28
+ :param window: Window instance
27
29
  """
28
30
  self.window = window
29
31
 
@@ -103,7 +105,12 @@ class Audio:
103
105
 
104
106
  @staticmethod
105
107
  def _ensure_bytes(data) -> Optional[bytes]:
106
- """Return raw bytes from inline_data.data (bytes or base64 string)."""
108
+ """
109
+ Return raw bytes from inline_data.data (bytes or base64 string).
110
+
111
+ :param data: bytes or base64 string
112
+ :return: bytes or None
113
+ """
107
114
  try:
108
115
  if isinstance(data, (bytes, bytearray)):
109
116
  return bytes(data)
@@ -29,9 +29,17 @@ class Chat:
29
29
  self.window = window
30
30
  self.input_tokens = 0
31
31
 
32
- def send(self, context: BridgeContext, extra: Optional[Dict[str, Any]] = None):
32
+ def send(
33
+ self,
34
+ context: BridgeContext,
35
+ extra: Optional[Dict[str, Any]] = None
36
+ ):
33
37
  """
34
38
  Call Google GenAI for chat / multimodal / audio.
39
+
40
+ :param context: BridgeContext with prompt, model, history, mode, etc.
41
+ :param extra: Extra parameters (not used currently)
42
+ :return: Response object or generator (if streaming)
35
43
  """
36
44
  prompt = context.prompt
37
45
  stream = context.stream
@@ -110,9 +118,13 @@ class Chat:
110
118
  # Tools -> merge app-defined tools with remote tools
111
119
  base_tools = self.window.core.api.google.tools.prepare(model, functions)
112
120
  remote_tools = self.window.core.api.google.build_remote_tools(model)
121
+
122
+ # Check tools compatibility
113
123
  if base_tools:
114
- remote_tools = [] # do not mix local and remote tools
124
+ remote_tools = [] # remote tools are not allowed if function calling is used
115
125
  tools = (base_tools or []) + (remote_tools or [])
126
+ if "-image" in model.id:
127
+ tools = None # function calling is not supported for image models
116
128
 
117
129
  # Sampling
118
130
  temperature = self.window.core.config.get('temperature')
@@ -144,7 +156,7 @@ class Chat:
144
156
  # Voice selection (case-sensitive name)
145
157
  voice_name = "Kore"
146
158
  try:
147
- tmp = self.window.core.plugins.get_option("audio_output", "google_voice_native")
159
+ tmp = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
148
160
  if tmp:
149
161
  name = str(tmp).strip()
150
162
  mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse", "legend": "Legend"}
@@ -169,9 +181,17 @@ class Chat:
169
181
  else:
170
182
  return client.models.generate_content(**params)
171
183
 
172
- def unpack_response(self, mode: str, response, ctx: CtxItem):
184
+ def unpack_response(
185
+ self,
186
+ mode: str,
187
+ response, ctx: CtxItem
188
+ ):
173
189
  """
174
190
  Unpack non-streaming response from Google GenAI and set context.
191
+
192
+ :param mode: MODE_CHAT or MODE_AUDIO
193
+ :param response: Response object
194
+ :param ctx: CtxItem to set output, audio_output, tokens, tool_calls
175
195
  """
176
196
  if mode == MODE_AUDIO:
177
197
  # Prefer audio if present
@@ -229,6 +249,11 @@ class Chat:
229
249
  def extract_text(self, response) -> str:
230
250
  """
231
251
  Extract output text.
252
+
253
+ Prefer response.text (Python SDK), then fallback to parts[].text.
254
+
255
+ :param response: Response object
256
+ :return: Extracted text
232
257
  """
233
258
  txt = getattr(response, "text", None) or getattr(response, "output_text", None)
234
259
  if txt:
@@ -332,11 +357,17 @@ class Chat:
332
357
 
333
358
  return out
334
359
 
335
- def _extract_inline_images_and_links(self, response, ctx: CtxItem) -> None:
360
+ def _extract_inline_images_and_links(
361
+ self,
362
+ response, ctx: CtxItem
363
+ ) -> None:
336
364
  """
337
365
  Extract inline image parts (Gemini image output) and file links.
338
366
  - Saves inline_data (image/*) bytes to files and appends paths to ctx.images.
339
367
  - Appends HTTP(S) image URIs from file_data to ctx.urls.
368
+
369
+ :param response: Response object
370
+ :param ctx: CtxItem to set images and urls
340
371
  """
341
372
  images: list[str] = []
342
373
  urls: list[str] = []
@@ -386,7 +417,12 @@ class Chat:
386
417
 
387
418
  @staticmethod
388
419
  def _ensure_bytes(data) -> bytes | None:
389
- """Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string."""
420
+ """
421
+ Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string.
422
+
423
+ :param data: bytes or str
424
+ :return: bytes or None
425
+ """
390
426
  try:
391
427
  if isinstance(data, (bytes, bytearray)):
392
428
  return bytes(data)
@@ -545,6 +581,9 @@ class Chat:
545
581
  Heuristic check if the model supports native TTS.
546
582
  - Official TTS models contain '-tts' in id (e.g. 'gemini-2.5-flash-preview-tts').
547
583
  - Future/preview names may contain 'native-audio'.
584
+
585
+ :param model_id: Model ID
586
+ :return: True if supports TTS, False otherwise
548
587
  """
549
588
  if not model_id:
550
589
  return False