pygpt-net 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. pygpt_net/CHANGELOG.txt +13 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +5 -1
  4. pygpt_net/controller/assistant/batch.py +2 -2
  5. pygpt_net/controller/assistant/files.py +7 -6
  6. pygpt_net/controller/assistant/threads.py +0 -0
  7. pygpt_net/controller/chat/command.py +0 -0
  8. pygpt_net/controller/chat/remote_tools.py +3 -9
  9. pygpt_net/controller/chat/stream.py +2 -2
  10. pygpt_net/controller/chat/{handler/worker.py → stream_worker.py} +13 -35
  11. pygpt_net/controller/dialogs/confirm.py +35 -58
  12. pygpt_net/controller/lang/mapping.py +9 -9
  13. pygpt_net/controller/remote_store/{google/batch.py → batch.py} +209 -252
  14. pygpt_net/controller/remote_store/remote_store.py +982 -13
  15. pygpt_net/core/command/command.py +0 -0
  16. pygpt_net/core/db/viewer.py +1 -1
  17. pygpt_net/core/debug/models.py +2 -2
  18. pygpt_net/core/realtime/worker.py +3 -1
  19. pygpt_net/{controller/remote_store/google → core/remote_store/anthropic}/__init__.py +0 -1
  20. pygpt_net/core/remote_store/anthropic/files.py +211 -0
  21. pygpt_net/core/remote_store/anthropic/store.py +208 -0
  22. pygpt_net/core/remote_store/openai/store.py +5 -4
  23. pygpt_net/core/remote_store/remote_store.py +5 -1
  24. pygpt_net/{controller/remote_store/openai → core/remote_store/xai}/__init__.py +0 -1
  25. pygpt_net/core/remote_store/xai/files.py +225 -0
  26. pygpt_net/core/remote_store/xai/store.py +219 -0
  27. pygpt_net/data/config/config.json +18 -5
  28. pygpt_net/data/config/models.json +193 -4
  29. pygpt_net/data/config/settings.json +179 -36
  30. pygpt_net/data/icons/folder_eye.svg +1 -0
  31. pygpt_net/data/icons/folder_eye_filled.svg +1 -0
  32. pygpt_net/data/icons/folder_open.svg +1 -0
  33. pygpt_net/data/icons/folder_open_filled.svg +1 -0
  34. pygpt_net/data/locale/locale.de.ini +6 -3
  35. pygpt_net/data/locale/locale.en.ini +46 -12
  36. pygpt_net/data/locale/locale.es.ini +6 -3
  37. pygpt_net/data/locale/locale.fr.ini +6 -3
  38. pygpt_net/data/locale/locale.it.ini +6 -3
  39. pygpt_net/data/locale/locale.pl.ini +7 -4
  40. pygpt_net/data/locale/locale.uk.ini +6 -3
  41. pygpt_net/data/locale/locale.zh.ini +6 -3
  42. pygpt_net/icons.qrc +4 -0
  43. pygpt_net/icons_rc.py +282 -138
  44. pygpt_net/plugin/cmd_mouse_control/worker.py +2 -1
  45. pygpt_net/plugin/cmd_mouse_control/worker_sandbox.py +2 -1
  46. pygpt_net/provider/api/anthropic/__init__.py +10 -3
  47. pygpt_net/provider/api/anthropic/chat.py +342 -11
  48. pygpt_net/provider/api/anthropic/computer.py +844 -0
  49. pygpt_net/provider/api/anthropic/remote_tools.py +172 -0
  50. pygpt_net/provider/api/anthropic/store.py +307 -0
  51. pygpt_net/{controller/chat/handler/anthropic_stream.py → provider/api/anthropic/stream.py} +99 -10
  52. pygpt_net/provider/api/anthropic/tools.py +32 -77
  53. pygpt_net/provider/api/anthropic/utils.py +30 -0
  54. pygpt_net/{controller/chat/handler → provider/api/anthropic/worker}/__init__.py +0 -0
  55. pygpt_net/provider/api/anthropic/worker/importer.py +278 -0
  56. pygpt_net/provider/api/google/chat.py +62 -9
  57. pygpt_net/provider/api/google/store.py +124 -3
  58. pygpt_net/{controller/chat/handler/google_stream.py → provider/api/google/stream.py} +92 -25
  59. pygpt_net/provider/api/google/utils.py +185 -0
  60. pygpt_net/provider/api/google/worker/importer.py +16 -28
  61. pygpt_net/provider/api/langchain/__init__.py +0 -0
  62. pygpt_net/{controller/chat/handler/langchain_stream.py → provider/api/langchain/stream.py} +1 -1
  63. pygpt_net/provider/api/llama_index/__init__.py +0 -0
  64. pygpt_net/{controller/chat/handler/llamaindex_stream.py → provider/api/llama_index/stream.py} +1 -1
  65. pygpt_net/provider/api/openai/assistants.py +2 -2
  66. pygpt_net/provider/api/openai/image.py +2 -2
  67. pygpt_net/provider/api/openai/store.py +4 -1
  68. pygpt_net/{controller/chat/handler/openai_stream.py → provider/api/openai/stream.py} +1 -1
  69. pygpt_net/provider/api/openai/utils.py +69 -3
  70. pygpt_net/provider/api/openai/worker/importer.py +19 -61
  71. pygpt_net/provider/api/openai/worker/importer_assistants.py +230 -0
  72. pygpt_net/provider/api/x_ai/__init__.py +138 -15
  73. pygpt_net/provider/api/x_ai/audio.py +43 -11
  74. pygpt_net/provider/api/x_ai/chat.py +92 -4
  75. pygpt_net/provider/api/x_ai/image.py +149 -47
  76. pygpt_net/provider/api/x_ai/realtime/__init__.py +12 -0
  77. pygpt_net/provider/api/x_ai/realtime/client.py +1825 -0
  78. pygpt_net/provider/api/x_ai/realtime/realtime.py +198 -0
  79. pygpt_net/provider/api/x_ai/{remote.py → remote_tools.py} +183 -70
  80. pygpt_net/provider/api/x_ai/responses.py +507 -0
  81. pygpt_net/provider/api/x_ai/store.py +610 -0
  82. pygpt_net/{controller/chat/handler/xai_stream.py → provider/api/x_ai/stream.py} +42 -10
  83. pygpt_net/provider/api/x_ai/tools.py +59 -8
  84. pygpt_net/{controller/chat/handler → provider/api/x_ai}/utils.py +1 -2
  85. pygpt_net/provider/api/x_ai/vision.py +1 -4
  86. pygpt_net/provider/api/x_ai/worker/importer.py +308 -0
  87. pygpt_net/provider/audio_input/xai_grok_voice.py +390 -0
  88. pygpt_net/provider/audio_output/xai_tts.py +325 -0
  89. pygpt_net/provider/core/config/patch.py +39 -3
  90. pygpt_net/provider/core/config/patches/patch_before_2_6_42.py +2 -2
  91. pygpt_net/provider/core/model/patch.py +39 -1
  92. pygpt_net/tools/image_viewer/tool.py +334 -34
  93. pygpt_net/tools/image_viewer/ui/dialogs.py +319 -22
  94. pygpt_net/tools/text_editor/ui/dialogs.py +3 -2
  95. pygpt_net/tools/text_editor/ui/widgets.py +0 -0
  96. pygpt_net/ui/dialog/assistant.py +1 -1
  97. pygpt_net/ui/dialog/plugins.py +13 -5
  98. pygpt_net/ui/dialog/remote_store.py +552 -0
  99. pygpt_net/ui/dialogs.py +3 -5
  100. pygpt_net/ui/layout/ctx/ctx_list.py +58 -7
  101. pygpt_net/ui/menu/tools.py +6 -13
  102. pygpt_net/ui/widget/dialog/base.py +16 -5
  103. pygpt_net/ui/widget/dialog/{remote_store_google.py → remote_store.py} +10 -10
  104. pygpt_net/ui/widget/element/button.py +4 -4
  105. pygpt_net/ui/widget/image/display.py +2 -2
  106. pygpt_net/ui/widget/lists/context.py +2 -2
  107. pygpt_net/ui/widget/textarea/editor.py +0 -0
  108. {pygpt_net-2.7.6.dist-info → pygpt_net-2.7.8.dist-info}/METADATA +15 -2
  109. {pygpt_net-2.7.6.dist-info → pygpt_net-2.7.8.dist-info}/RECORD +107 -89
  110. pygpt_net/controller/remote_store/google/store.py +0 -615
  111. pygpt_net/controller/remote_store/openai/batch.py +0 -524
  112. pygpt_net/controller/remote_store/openai/store.py +0 -699
  113. pygpt_net/ui/dialog/remote_store_google.py +0 -539
  114. pygpt_net/ui/dialog/remote_store_openai.py +0 -539
  115. pygpt_net/ui/widget/dialog/remote_store_openai.py +0 -56
  116. pygpt_net/ui/widget/lists/remote_store_google.py +0 -248
  117. pygpt_net/ui/widget/lists/remote_store_openai.py +0 -317
  118. {pygpt_net-2.7.6.dist-info → pygpt_net-2.7.8.dist-info}/LICENSE +0 -0
  119. {pygpt_net-2.7.6.dist-info → pygpt_net-2.7.8.dist-info}/WHEEL +0 -0
  120. {pygpt_net-2.7.6.dist-info → pygpt_net-2.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2026.01.06 20:00:00 #
10
+ # ================================================== #
11
+
12
+ import asyncio
13
+ import base64
14
+ import json
15
+ import os
16
+ import queue
17
+ import subprocess
18
+ import threading
19
+ from typing import Optional
20
+
21
+ from .base import BaseProvider
22
+
23
+
24
+ class XAIGrokVoiceAudioInput(BaseProvider):
25
+ PROMPT_TRANSCRIBE = (
26
+ "You are a speech-to-text transcriber. "
27
+ "Return only the verbatim transcript as plain text. "
28
+ "Do not add any explanations, timestamps, labels or formatting."
29
+ )
30
+
31
+ def __init__(self, *args, **kwargs):
32
+ """
33
+ xAI Grok Voice Agent-based audio transcription provider (via WebSocket API).
34
+
35
+ :param args: args
36
+ :param kwargs: kwargs
37
+ """
38
+ super(XAIGrokVoiceAudioInput, self).__init__(*args, **kwargs)
39
+ self.plugin = kwargs.get("plugin")
40
+ self.id = "xai_grok_voice"
41
+ self.name = "xAI Grok Voice"
42
+
43
+ def init_options(self):
44
+ """Initialize options"""
45
+ # Model is implicit for the realtime Voice Agent; keep options focused on audio and behavior
46
+ self.plugin.add_option(
47
+ "xai_voice_audio_sample_rate",
48
+ type="text",
49
+ value="16000",
50
+ label="Sample rate (Hz)",
51
+ tab="xai_grok_voice",
52
+ description="PCM sample rate for input, e.g., 16000 or 24000",
53
+ )
54
+ self.plugin.add_option(
55
+ "xai_voice_system_prompt",
56
+ type="textarea",
57
+ value=self.PROMPT_TRANSCRIBE,
58
+ label="System Prompt",
59
+ tab="xai_grok_voice",
60
+ description="System prompt to guide the transcription output",
61
+ tooltip="System prompt for transcription",
62
+ persist=True,
63
+ )
64
+ self.plugin.add_option(
65
+ "xai_voice_region",
66
+ type="text",
67
+ value="",
68
+ label="Region (optional)",
69
+ tab="xai_grok_voice",
70
+ description="Regional endpoint like us-east-1; leave empty to use the global endpoint",
71
+ )
72
+ self.plugin.add_option(
73
+ "xai_voice_chunk_ms",
74
+ type="text",
75
+ value="200",
76
+ label="Chunk size (ms)",
77
+ tab="xai_grok_voice",
78
+ description="Size of audio chunks to send over WebSocket",
79
+ )
80
+
81
+ def transcribe(self, path: str) -> str:
82
+ """
83
+ Audio to text transcription using xAI Grok Voice Agent (WebSocket).
84
+
85
+ :param path: path to audio file to transcribe
86
+ :return: transcribed text
87
+ """
88
+ # Ensure xAI client is initialized in core (keeps auth/config consistent with the app)
89
+ # We do not rely on its methods for WebSocket, but we respect the app's initialization order
90
+ try:
91
+ _ = self.plugin.window.core.api.xai.get_client()
92
+ except Exception:
93
+ # Client not strictly required for WebSocket usage; continue if available credentials are set
94
+ pass
95
+
96
+ api_key = self._get_api_key()
97
+ if not api_key:
98
+ raise RuntimeError("xAI API key is not set. Please configure it in settings.")
99
+
100
+ # Resolve endpoint (optionally regionalized)
101
+ region = (self.plugin.get_option_value("xai_voice_region") or "").strip()
102
+ host = f"{region}.api.x.ai" if region else "api.x.ai"
103
+ ws_uri = f"wss://{host}/v1/realtime"
104
+
105
+ # Read options
106
+ prompt = self.plugin.get_option_value("xai_voice_system_prompt") or self.PROMPT_TRANSCRIBE
107
+ sr_opt = str(self.plugin.get_option_value("xai_voice_audio_sample_rate") or "16000").strip()
108
+ try:
109
+ sample_rate = max(8000, int(sr_opt))
110
+ except Exception:
111
+ sample_rate = 16000
112
+
113
+ chunk_ms_opt = str(self.plugin.get_option_value("xai_voice_chunk_ms") or "200").strip()
114
+ try:
115
+ chunk_ms = max(20, int(chunk_ms_opt))
116
+ except Exception:
117
+ chunk_ms = 200
118
+
119
+ # Compute chunk size for 16-bit mono PCM
120
+ bytes_per_second = sample_rate * 2 # 1 channel * 2 bytes
121
+ chunk_size = max(4096, int(bytes_per_second * (chunk_ms / 1000.0)))
122
+
123
+ # Run async websocket pipeline in an isolated thread/loop to avoid interfering with the UI loop
124
+ result_queue: queue.Queue[str] = queue.Queue()
125
+
126
+ def _runner():
127
+ loop = asyncio.new_event_loop()
128
+ try:
129
+ asyncio.set_event_loop(loop)
130
+ text = loop.run_until_complete(
131
+ self._transcribe_async(
132
+ ws_uri=ws_uri,
133
+ api_key=api_key,
134
+ path=path,
135
+ sample_rate=sample_rate,
136
+ chunk_size=chunk_size,
137
+ system_prompt=prompt,
138
+ )
139
+ )
140
+ result_queue.put(text or "")
141
+ finally:
142
+ try:
143
+ loop.close()
144
+ except Exception:
145
+ pass
146
+
147
+ t = threading.Thread(target=_runner, daemon=True)
148
+ t.start()
149
+ t.join()
150
+
151
+ return result_queue.get() if not result_queue.empty() else ""
152
+
153
+ async def _transcribe_async(
154
+ self,
155
+ ws_uri: str,
156
+ api_key: str,
157
+ path: str,
158
+ sample_rate: int,
159
+ chunk_size: int,
160
+ system_prompt: str,
161
+ ) -> str:
162
+ """
163
+ Connects to xAI Voice Agent realtime WebSocket and streams audio for transcription.
164
+ Returns the final transcript text.
165
+ """
166
+ try:
167
+ import websockets # type: ignore
168
+ from websockets.asyncio.client import ClientConnection # type: ignore
169
+ except Exception as e:
170
+ raise RuntimeError(
171
+ "The 'websockets' package is required for xAI Voice transcription. "
172
+ "Please install it in your environment."
173
+ ) from e
174
+
175
+ transcript: Optional[str] = None
176
+
177
+ async with websockets.connect(
178
+ uri=ws_uri,
179
+ ssl=True,
180
+ open_timeout=30,
181
+ close_timeout=10,
182
+ additional_headers={"Authorization": f"Bearer {api_key}"},
183
+ max_size=None,
184
+ ) as ws: # type: ClientConnection
185
+ # Configure session to match our audio and enforce transcription-only behavior
186
+ session_config = {
187
+ "type": "session.update",
188
+ "session": {
189
+ "instructions": system_prompt,
190
+ # We are only transcribing; disable server VAD and commit manually as a single turn
191
+ "turn_detection": {"type": None},
192
+ "audio": {
193
+ "input": {"format": {"type": "audio/pcm", "rate": sample_rate}},
194
+ # Output audio not needed; keep default
195
+ },
196
+ },
197
+ }
198
+ await ws.send(json.dumps(session_config))
199
+
200
+ # Stream the audio
201
+ is_wav = path.lower().endswith((".wav", ".wave"))
202
+ if is_wav:
203
+ # Fast path for WAV (PCM or otherwise; convert to mono s16le at desired rate)
204
+ pcm_bytes, duration_s = self._decode_wav_to_pcm_s16le(path, sample_rate)
205
+ for i in range(0, len(pcm_bytes), chunk_size):
206
+ chunk = pcm_bytes[i : i + chunk_size]
207
+ if not chunk:
208
+ break
209
+ await ws.send(
210
+ json.dumps(
211
+ {
212
+ "type": "input_audio_buffer.append",
213
+ "audio": base64.b64encode(chunk).decode("ascii"),
214
+ }
215
+ )
216
+ )
217
+ else:
218
+ # Generic path via ffmpeg to decode to mono s16le at sample_rate
219
+ duration_s = None # unknown
220
+ await self._stream_via_ffmpeg(ws, path, sample_rate, chunk_size)
221
+
222
+ # Commit a single user message from the accumulated audio buffer
223
+ await ws.send(json.dumps({"type": "input_audio_buffer.commit"}))
224
+
225
+ # Wait for transcript events
226
+ # Use a dynamic timeout: at least 30s; more for longer audio
227
+ base_timeout = 30.0
228
+ if duration_s is not None:
229
+ # allow ~2x audio length + base safety window
230
+ timeout_s = min(600.0, max(base_timeout, duration_s * 2.0 + 10.0))
231
+ else:
232
+ timeout_s = 120.0
233
+
234
+ try:
235
+ transcript = await self._await_transcript(ws, timeout=timeout_s)
236
+ except asyncio.TimeoutError:
237
+ # Try to salvage from any conversation.item.added events cached in the loop
238
+ pass
239
+
240
+ return transcript or ""
241
+
242
+ async def _await_transcript(self, ws, timeout: float) -> Optional[str]:
243
+ """
244
+ Waits for either:
245
+ - conversation.item.input_audio_transcription.completed (preferred)
246
+ - conversation.item.added with content.type == 'input_audio' (fallback)
247
+ """
248
+ end_time = asyncio.get_event_loop().time() + timeout
249
+ pending_fallback: Optional[str] = None
250
+
251
+ while True:
252
+ remaining = end_time - asyncio.get_event_loop().time()
253
+ if remaining <= 0:
254
+ raise asyncio.TimeoutError("Timed out waiting for xAI transcription result.")
255
+
256
+ try:
257
+ msg = await asyncio.wait_for(ws.recv(), timeout=remaining)
258
+ except asyncio.TimeoutError:
259
+ raise
260
+ except Exception:
261
+ break
262
+
263
+ try:
264
+ event = json.loads(msg)
265
+ except Exception:
266
+ continue
267
+
268
+ etype = event.get("type", "")
269
+ if etype == "conversation.item.input_audio_transcription.completed":
270
+ # Preferred final transcript
271
+ return event.get("transcript") or ""
272
+ elif etype == "conversation.item.added":
273
+ # Fallback: some responses include the inline transcript in the added user item
274
+ item = event.get("item") or {}
275
+ if item.get("role") == "user":
276
+ for c in item.get("content", []):
277
+ if isinstance(c, dict) and c.get("type") == "input_audio" and "transcript" in c:
278
+ pending_fallback = c.get("transcript") or pending_fallback
279
+ elif etype == "response.done":
280
+ # If server signals end of turn and we have a fallback transcript, return it
281
+ if pending_fallback:
282
+ return pending_fallback
283
+
284
+ async def _stream_via_ffmpeg(self, ws, path: str, sample_rate: int, chunk_size: int):
285
+ """
286
+ Uses ffmpeg to decode arbitrary input to mono s16le at sample_rate and streams chunks.
287
+ """
288
+ cmd = [
289
+ "ffmpeg",
290
+ "-nostdin",
291
+ "-hide_banner",
292
+ "-loglevel",
293
+ "error",
294
+ "-i",
295
+ path,
296
+ "-ac",
297
+ "1",
298
+ "-ar",
299
+ str(sample_rate),
300
+ "-f",
301
+ "s16le",
302
+ "pipe:1",
303
+ ]
304
+ try:
305
+ proc = await asyncio.create_subprocess_exec(
306
+ *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
307
+ )
308
+ except FileNotFoundError as e:
309
+ raise RuntimeError(
310
+ "ffmpeg not found. Install ffmpeg or provide WAV input for xAI transcription."
311
+ ) from e
312
+
313
+ try:
314
+ while True:
315
+ chunk = await proc.stdout.read(chunk_size)
316
+ if not chunk:
317
+ break
318
+ await ws.send(
319
+ json.dumps(
320
+ {
321
+ "type": "input_audio_buffer.append",
322
+ "audio": base64.b64encode(chunk).decode("ascii"),
323
+ }
324
+ )
325
+ )
326
+ finally:
327
+ try:
328
+ await proc.wait()
329
+ except Exception:
330
+ pass
331
+
332
+ def _decode_wav_to_pcm_s16le(self, path: str, target_rate: int):
333
+ """
334
+ Decodes a WAV file to mono 16-bit PCM at target_rate.
335
+ Returns (bytes, duration_seconds).
336
+ """
337
+ import wave
338
+ import audioop
339
+
340
+ with wave.open(path, "rb") as wf:
341
+ n_channels = wf.getnchannels()
342
+ sampwidth = wf.getsampwidth()
343
+ framerate = wf.getframerate()
344
+ n_frames = wf.getnframes()
345
+ raw = wf.readframes(n_frames)
346
+
347
+ # Convert to mono if needed
348
+ if n_channels > 1:
349
+ raw = audioop.tomono(raw, sampwidth, 1, 1)
350
+
351
+ # Convert sample width to 16-bit
352
+ if sampwidth != 2:
353
+ raw = audioop.lin2lin(raw, sampwidth, 2)
354
+
355
+ # Resample if needed
356
+ if framerate != target_rate:
357
+ raw, _ = audioop.ratecv(raw, 2, 1, framerate, target_rate, None)
358
+ framerate = target_rate
359
+
360
+ duration_s = len(raw) / float(target_rate * 2) # mono, 16-bit
361
+ return raw, duration_s
362
+
363
+ def _get_api_key(self) -> Optional[str]:
364
+ """
365
+ Resolve xAI API key from the app's configuration.
366
+ """
367
+ # Prefer explicit xAI key if present
368
+ key = self.plugin.window.core.config.get("api_key_xai")
369
+ if key:
370
+ return key
371
+
372
+ # Optional: try environment variable for parity with SDKs
373
+ return os.getenv("XAI_API_KEY")
374
+
375
+ def is_configured(self) -> bool:
376
+ """
377
+ Check if provider is configured
378
+
379
+ :return: True if configured, False otherwise
380
+ """
381
+ api_key = self._get_api_key()
382
+ return api_key is not None and api_key != ""
383
+
384
+ def get_config_message(self) -> str:
385
+ """
386
+ Return message to display when provider is not configured
387
+
388
+ :return: message
389
+ """
390
+ return "xAI API key is not set yet. Please configure it in settings."