pygpt-net 2.7.8__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pygpt_net/CHANGELOG.txt CHANGED
@@ -1,3 +1,8 @@
1
+ 2.7.9 (2026-01-08)
2
+
3
+ - Improved realtime audio mode.
4
+ - Added xAI provider and Grok support in realtime audio mode.
5
+
1
6
  2.7.8 (2026-01-06)
2
7
 
3
8
  - Added the xAI Collections remote tool and integrated collections management into the Remote Vector Stores tool.
pygpt_net/__init__.py CHANGED
@@ -6,15 +6,15 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2026-01-06 00:00:00 #
9
+ # Updated Date: 2026-01-08 00:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  __author__ = "Marcin Szczygliński"
13
13
  __copyright__ = "Copyright 2026, Marcin Szczygliński"
14
14
  __credits__ = ["Marcin Szczygliński"]
15
15
  __license__ = "MIT"
16
- __version__ = "2.7.8"
17
- __build__ = "2026-01-06"
16
+ __version__ = "2.7.9"
17
+ __build__ = "2026-01-08"
18
18
  __maintainer__ = "Marcin Szczygliński"
19
19
  __github__ = "https://github.com/szczyglis-dev/py-gpt"
20
20
  __report__ = "https://github.com/szczyglis-dev/py-gpt/issues"
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.17 07:00:00 #
9
+ # Updated Date: 2026.01.07 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from PySide6.QtCore import Slot, QTimer
@@ -87,6 +87,8 @@ class Realtime:
87
87
  self.window.core.api.google.realtime.handle_audio_input(event)
88
88
  elif self.current_active == "openai":
89
89
  self.window.core.api.openai.realtime.handle_audio_input(event)
90
+ elif self.current_active == "x_ai":
91
+ self.window.core.api.xai.realtime.handle_audio_input(event)
90
92
 
91
93
  # begin: first text chunk or audio chunk received, start rendering
92
94
  elif event.name == RealtimeEvent.RT_OUTPUT_READY:
@@ -216,6 +218,8 @@ class Realtime:
216
218
  self.window.core.api.google.realtime.manual_commit()
217
219
  elif self.current_active == "openai":
218
220
  self.window.core.api.openai.realtime.manual_commit()
221
+ elif self.current_active == "x_ai":
222
+ self.window.core.api.xai.realtime.manual_commit()
219
223
 
220
224
  def end_turn(self, ctx):
221
225
  """
@@ -252,6 +256,10 @@ class Realtime:
252
256
  self.window.core.api.google.realtime.shutdown()
253
257
  except Exception as e:
254
258
  self.window.core.debug.log(f"[google] Realtime shutdown error: {e}")
259
+ try:
260
+ self.window.core.api.xai.realtime.shutdown()
261
+ except Exception as e:
262
+ self.window.core.debug.log(f"[xAI] Realtime shutdown error: {e}")
255
263
  try:
256
264
  self.manager.shutdown()
257
265
  except Exception as e:
@@ -267,6 +275,10 @@ class Realtime:
267
275
  self.window.core.api.google.realtime.reset()
268
276
  except Exception as e:
269
277
  self.window.core.debug.log(f"[google] Realtime reset error: {e}")
278
+ try:
279
+ self.window.core.api.xai.realtime.reset()
280
+ except Exception as e:
281
+ self.window.core.debug.log(f"[xAI] Realtime reset error: {e}")
270
282
 
271
283
  def is_supported(self) -> bool:
272
284
  """
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "__meta__": {
3
- "version": "2.7.8",
4
- "app.version": "2.7.8",
5
- "updated_at": "2026-01-06T00:00:00"
3
+ "version": "2.7.9",
4
+ "app.version": "2.7.9",
5
+ "updated_at": "2026-01-08T00:00:00"
6
6
  },
7
7
  "access.audio.event.speech": false,
8
8
  "access.audio.event.speech.disabled": [],
@@ -88,6 +88,7 @@
88
88
  "api_key_perplexity": "",
89
89
  "api_key_voyage": "",
90
90
  "api_key_xai": "",
91
+ "api_key_management_xai": "",
91
92
  "api_native_anthropic": true,
92
93
  "api_native_google": true,
93
94
  "api_native_google.app_credentials": "",
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "__meta__": {
3
- "version": "2.7.8",
4
- "app.version": "2.7.8",
5
- "updated_at": "2026-01-06T00:00:00"
3
+ "version": "2.7.9",
4
+ "app.version": "2.7.9",
5
+ "updated_at": "2026-01-08T00:00:00"
6
6
  },
7
7
  "items": {
8
8
  "SpeakLeash/bielik-11b-v2.3-instruct:Q4_K_M": {
@@ -1073,9 +1073,9 @@
1073
1073
  "provider": "google",
1074
1074
  "tool_calls": true
1075
1075
  },
1076
- "gemini-2.5-flash-preview-native-audio-dialog": {
1077
- "id": "gemini-2.5-flash-preview-native-audio-dialog",
1078
- "name": "gemini-2.5-flash-preview-native-audio-dialog",
1076
+ "gemini-2.5-flash-native-audio-latest": {
1077
+ "id": "gemini-2.5-flash-native-audio-latest",
1078
+ "name": "gemini-2.5-flash-native-audio-latest",
1079
1079
  "mode": [
1080
1080
  "audio"
1081
1081
  ],
@@ -1083,7 +1083,7 @@
1083
1083
  "args": [
1084
1084
  {
1085
1085
  "name": "model",
1086
- "value": "models/gemini-2.5-flash-preview-native-audio-dialog",
1086
+ "value": "models/gemini-2.5-flash-native-audio-latest",
1087
1087
  "type": "str"
1088
1088
  }
1089
1089
  ],
@@ -3170,7 +3170,8 @@
3170
3170
  "agent",
3171
3171
  "agent_llama",
3172
3172
  "expert",
3173
- "agent_openai"
3173
+ "agent_openai",
3174
+ "audio"
3174
3175
  ],
3175
3176
  "llama_index": {
3176
3177
  "args": [
@@ -3196,10 +3197,12 @@
3196
3197
  "default": false,
3197
3198
  "input": [
3198
3199
  "text",
3199
- "image"
3200
+ "image",
3201
+ "audio"
3200
3202
  ],
3201
3203
  "output": [
3202
- "text"
3204
+ "text",
3205
+ "audio"
3203
3206
  ],
3204
3207
  "extra": {},
3205
3208
  "imported": false,
@@ -3215,7 +3218,8 @@
3215
3218
  "agent_llama",
3216
3219
  "agent_openai",
3217
3220
  "agent",
3218
- "expert"
3221
+ "expert",
3222
+ "audio"
3219
3223
  ],
3220
3224
  "llama_index": {
3221
3225
  "args": [
@@ -3243,10 +3247,12 @@
3243
3247
  "default": false,
3244
3248
  "input": [
3245
3249
  "text",
3246
- "image"
3250
+ "image",
3251
+ "audio"
3247
3252
  ],
3248
3253
  "output": [
3249
- "text"
3254
+ "text",
3255
+ "audio"
3250
3256
  ],
3251
3257
  "extra": {},
3252
3258
  "imported": false,
@@ -3262,7 +3268,8 @@
3262
3268
  "agent_llama",
3263
3269
  "agent_openai",
3264
3270
  "agent",
3265
- "expert"
3271
+ "expert",
3272
+ "audio"
3266
3273
  ],
3267
3274
  "llama_index": {
3268
3275
  "args": [
@@ -3290,10 +3297,12 @@
3290
3297
  "default": false,
3291
3298
  "input": [
3292
3299
  "text",
3293
- "image"
3300
+ "image",
3301
+ "audio"
3294
3302
  ],
3295
3303
  "output": [
3296
- "text"
3304
+ "text",
3305
+ "audio"
3297
3306
  ],
3298
3307
  "extra": {},
3299
3308
  "imported": false,
@@ -3309,7 +3318,8 @@
3309
3318
  "agent_llama",
3310
3319
  "agent_openai",
3311
3320
  "agent",
3312
- "expert"
3321
+ "expert",
3322
+ "audio"
3313
3323
  ],
3314
3324
  "llama_index": {
3315
3325
  "args": [
@@ -3337,10 +3347,12 @@
3337
3347
  "default": false,
3338
3348
  "input": [
3339
3349
  "text",
3340
- "image"
3350
+ "image",
3351
+ "audio"
3341
3352
  ],
3342
3353
  "output": [
3343
- "text"
3354
+ "text",
3355
+ "audio"
3344
3356
  ],
3345
3357
  "extra": {},
3346
3358
  "imported": false,
@@ -3356,7 +3368,8 @@
3356
3368
  "agent_llama",
3357
3369
  "agent_openai",
3358
3370
  "agent",
3359
- "expert"
3371
+ "expert",
3372
+ "audio"
3360
3373
  ],
3361
3374
  "llama_index": {
3362
3375
  "args": [
@@ -3384,10 +3397,12 @@
3384
3397
  "default": false,
3385
3398
  "input": [
3386
3399
  "text",
3387
- "image"
3400
+ "image",
3401
+ "audio"
3388
3402
  ],
3389
3403
  "output": [
3390
- "text"
3404
+ "text",
3405
+ "audio"
3391
3406
  ],
3392
3407
  "extra": {},
3393
3408
  "imported": false,
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.31 23:00:00 #
9
+ # Updated Date: 2026.01.07 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import asyncio
@@ -338,20 +338,25 @@ class GoogleLiveClient:
338
338
  if sys_prompt:
339
339
  live_cfg["system_instruction"] = str(sys_prompt)
340
340
 
341
- # Session resumption: enable updates; resume when a different non-empty handle is given
341
+ # Save callbacks and ctx early so handle persistence can target the current context
342
+ self._on_text = on_text
343
+ self._on_audio = on_audio
344
+ self._should_stop = should_stop or (lambda: False)
345
+ self._ctx = ctx
346
+ self._last_opts = opts
347
+
348
+ # Session resumption: configure per docs; include handle when provided, otherwise None.
342
349
  try:
350
+ ph = None
343
351
  provided_handle = getattr(opts, "rt_session_id", None)
344
- resume_handle = None
345
352
  if isinstance(provided_handle, str):
346
- ph = provided_handle.strip()
347
- if ph and ph != (self._rt_session_id or ""):
348
- resume_handle = ph
353
+ ph = provided_handle.strip() or None
349
354
 
350
- live_cfg["session_resumption"] = gtypes.SessionResumptionConfig(handle=resume_handle)
355
+ sr_cfg = gtypes.SessionResumptionConfig(handle=ph)
356
+ live_cfg["session_resumption"] = sr_cfg
351
357
 
352
- if resume_handle:
353
- self._rt_session_id = resume_handle
354
- set_ctx_rt_handle(self._ctx, resume_handle, self.window)
358
+ if ph:
359
+ self._persist_rt_handle(ph)
355
360
  except Exception:
356
361
  pass
357
362
 
@@ -360,13 +365,6 @@ class GoogleLiveClient:
360
365
  apply_turn_mode_google(live_cfg, turn_mode)
361
366
  self._tune_google_vad(live_cfg, opts)
362
367
 
363
- # Save callbacks and ctx
364
- self._on_text = on_text
365
- self._on_audio = on_audio
366
- self._should_stop = should_stop or (lambda: False)
367
- self._ctx = ctx
368
- self._last_opts = opts
369
-
370
368
  # Control primitives
371
369
  self._response_done = asyncio.Event()
372
370
  self._send_lock = asyncio.Lock()
@@ -407,7 +405,7 @@ class GoogleLiveClient:
407
405
  self._rt_state = None
408
406
  self._last_tool_calls = []
409
407
 
410
- # Clear only in-memory handle; keep persisted ctx.extra["rt_session_id"]
408
+ # Clear in-memory handle as well to prevent unintended resumption
411
409
  self._rt_session_id = None
412
410
 
413
411
  # Clear cached tools signature
@@ -820,11 +818,10 @@ class GoogleLiveClient:
820
818
  try:
821
819
  sru = getattr(response, "session_resumption_update", None) or getattr(response, "sessionResumptionUpdate", None)
822
820
  if sru:
823
- resumable = bool(getattr(sru, "resumable", None))
824
- new_handle = getattr(sru, "new_handle", None) or getattr(sru, "newHandle", None)
825
- if resumable and isinstance(new_handle, str) and new_handle.strip():
826
- self._rt_session_id = new_handle.strip()
827
- set_ctx_rt_handle(self._ctx, self._rt_session_id, self.window)
821
+ # Prefer robustness: persist handle if present, regardless of 'resumable' flag inconsistencies
822
+ new_handle = self._extract_sru_handle(sru)
823
+ if isinstance(new_handle, str) and new_handle.strip():
824
+ self._persist_rt_handle(new_handle.strip())
828
825
  if self.debug:
829
826
  print(f"[google.live] session handle updated: {self._rt_session_id}")
830
827
  except Exception:
@@ -1740,6 +1737,10 @@ class GoogleLiveClient:
1740
1737
  """
1741
1738
  self.debug = bool(enabled)
1742
1739
 
1740
+ def is_session(self) -> bool:
1741
+ """Check if the WS session is currently open."""
1742
+ return self._session is not None
1743
+
1743
1744
  def is_session_active(self) -> bool:
1744
1745
  """Check if the WS session is currently open."""
1745
1746
  return self._session is not None
@@ -1748,6 +1749,12 @@ class GoogleLiveClient:
1748
1749
  """Update the current CtxItem (for session handle persistence)."""
1749
1750
  self._ctx = ctx
1750
1751
 
1752
+ def get_current_rt_session_id(self) -> Optional[str]:
1753
+ """
1754
+ Return the current resumable session handle if known.
1755
+ """
1756
+ return self._rt_session_id
1757
+
1751
1758
  # -----------------------------
1752
1759
  # Internal: auto-turn receiver bootstrap
1753
1760
  # -----------------------------
@@ -1942,4 +1949,43 @@ class GoogleLiveClient:
1942
1949
  """
1943
1950
  Emit RT_OUTPUT_AUDIO_COMMIT on first sign of model output in auto-turn mode.
1944
1951
  """
1945
- self._emit_audio_commit_signal()
1952
+ self._emit_audio_commit_signal()
1953
+
1954
+ # -----------------------------
1955
+ # Internal: session handle helpers
1956
+ # -----------------------------
1957
+
1958
+ def _persist_rt_handle(self, handle: str) -> None:
1959
+ """
1960
+ Persist current session handle in-memory, to ctx.extra and into last opts for future restarts.
1961
+ """
1962
+ try:
1963
+ self._rt_session_id = handle
1964
+ set_ctx_rt_handle(self._ctx, handle, self.window)
1965
+ except Exception:
1966
+ pass
1967
+ try:
1968
+ if self._last_opts is not None:
1969
+ setattr(self._last_opts, "rt_session_id", handle)
1970
+ except Exception:
1971
+ pass
1972
+
1973
+ def _extract_sru_handle(self, sru: Any) -> Optional[str]:
1974
+ """
1975
+ Extract handle from SessionResumptionUpdate (supports snake_case and camelCase, and token alias).
1976
+ """
1977
+ # Objects (attrs)
1978
+ for attr in ("new_handle", "newHandle", "token"):
1979
+ try:
1980
+ v = getattr(sru, attr, None)
1981
+ if isinstance(v, str) and v.strip():
1982
+ return v.strip()
1983
+ except Exception:
1984
+ pass
1985
+ # Dicts
1986
+ if isinstance(sru, dict):
1987
+ for k in ("new_handle", "newHandle", "token"):
1988
+ v = sru.get(k)
1989
+ if isinstance(v, str) and v.strip():
1990
+ return v.strip()
1991
+ return None
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2026.01.02 19:00:00 #
9
+ # Updated Date: 2026.01.07 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -86,8 +86,55 @@ class Realtime:
86
86
  self.handler.send_tool_results_sync({
87
87
  tool_call_id: tool_results
88
88
  })
89
+ self.handler.update_ctx(context.ctx)
89
90
  return True # do not start new session, just send tool results
90
91
 
92
+ # Tools
93
+ tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
94
+ remote_tools = self.window.core.api.google.remote_tools.build_remote_tools(model)
95
+ if tools:
96
+ remote_tools = [] # in Google, remote tools are not allowed if function calling is used
97
+
98
+ # Resolve last session ID, prefer history, then fallback to current ctx and in-memory handler handle
99
+ last_session_id = extract_last_session_id(context.history) if context.history else None
100
+ if not last_session_id:
101
+ try:
102
+ if context.ctx and isinstance(context.ctx.extra, dict):
103
+ sid = context.ctx.extra.get("rt_session_id")
104
+ if isinstance(sid, str) and sid.strip():
105
+ last_session_id = sid.strip()
106
+ except Exception:
107
+ pass
108
+ if not last_session_id and self.handler.is_session_active():
109
+ try:
110
+ sid = self.handler.get_current_rt_session_id()
111
+ if isinstance(sid, str) and sid.strip():
112
+ last_session_id = sid.strip()
113
+ except Exception:
114
+ pass
115
+
116
+ if is_debug:
117
+ print("[realtime session] Last ID", last_session_id)
118
+
119
+ # Enforce clean state rules:
120
+ # - No history: always reset to ensure a fresh server context.
121
+ # - If history exists, keep the current live session even if the resumable handle has not been captured yet.
122
+ # Gemini Live can emit the handle slightly after the first turn starts; closing here would drop context continuity.
123
+ try:
124
+ history_len = len(context.history) if context.history else 0
125
+ except Exception:
126
+ history_len = 0
127
+
128
+ if history_len == 0:
129
+ if self.handler.is_session_active():
130
+ self.handler.close_session_sync()
131
+ try:
132
+ if context.ctx and isinstance(context.ctx.extra, dict):
133
+ context.ctx.extra.pop("rt_session_id", None)
134
+ except Exception:
135
+ pass
136
+ last_session_id = None # force new session
137
+
91
138
  # update auto-turn in active session
92
139
  if (self.handler.is_session_active()
93
140
  and (auto_turn != self.prev_auto_turn
@@ -95,23 +142,12 @@ class Realtime:
95
142
  or opt_vad_prefix != self.prev_vad_prefix)):
96
143
  self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
97
144
 
98
- # Tools
99
- tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
100
- remote_tools = self.window.core.api.google.remote_tools.build_remote_tools(model)
101
- if tools:
102
- remote_tools = [] # in Google, remote tools are not allowed if function calling is used
103
-
104
145
  # if auto-turn is enabled and prompt is empty, update session and context only
105
146
  if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
106
147
  self.handler.update_session_tools_sync(tools, remote_tools)
107
148
  self.handler.update_ctx(context.ctx)
108
149
  return True # do not send new request if session is active
109
150
 
110
- # Last session ID
111
- last_session_id = extract_last_session_id(context.history)
112
- if is_debug:
113
- print("[realtime session] Last ID", last_session_id)
114
-
115
151
  # Voice
116
152
  voice_name = "Kore"
117
153
  try:
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.09.01 23:00:00 #
9
+ # Updated Date: 2026.01.07 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -102,6 +102,31 @@ class Realtime:
102
102
  self.handler.update_ctx(context.ctx)
103
103
  return True # do not start new session, just send tool results
104
104
 
105
+ # Resolve last session ID from history only (do not fallback anywhere)
106
+ last_session_id = extract_last_session_id(context.history) if context.history else None
107
+ if is_debug:
108
+ print("[realtime session] Last ID", last_session_id)
109
+
110
+ # Enforce clean state rules before any live updates:
111
+ # - If there is no history at all: always reset live session to ensure a fresh context.
112
+ # - If there is history but it has no resumable session id: close any active session to avoid accidental continuation.
113
+ try:
114
+ history_len = len(context.history) if context.history else 0
115
+ except Exception:
116
+ history_len = 0
117
+
118
+ if history_len == 0:
119
+ if self.handler.is_session_active():
120
+ self.handler.close_session_sync()
121
+ try:
122
+ if context.ctx and isinstance(context.ctx.extra, dict):
123
+ context.ctx.extra.pop("rt_session_id", None)
124
+ except Exception:
125
+ pass
126
+ last_session_id = None # force new session
127
+ elif not last_session_id and self.handler.is_session_active():
128
+ self.handler.close_session_sync()
129
+
105
130
  # update auto-turn in active session
106
131
  if (self.handler.is_session_active()
107
132
  and (auto_turn != self.prev_auto_turn
@@ -116,11 +141,6 @@ class Realtime:
116
141
  self.window.update_status(trans("speech.listening"))
117
142
  return True # do not send new request if session is active
118
143
 
119
- # Last session ID
120
- last_session_id = extract_last_session_id(context.history)
121
- if is_debug:
122
- print("[realtime session] Last ID", last_session_id)
123
-
124
144
  # Voice
125
145
  voice = "alloy"
126
146
  try:
@@ -141,9 +141,6 @@ class ApiXAI:
141
141
  MODE_RESEARCH,
142
142
  MODE_AUDIO
143
143
  ):
144
- if mode == MODE_AUDIO:
145
- raise NotImplementedError("Not available. xAI realtime audio streaming coming soon!")
146
-
147
144
  if mode == MODE_AUDIO and stream:
148
145
  # Realtime API for audio streaming
149
146
  is_realtime = self.realtime.begin(