pygpt-net 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. pygpt_net/CHANGELOG.txt +7 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +5 -1
  4. pygpt_net/controller/assistant/batch.py +2 -2
  5. pygpt_net/controller/assistant/files.py +7 -6
  6. pygpt_net/controller/assistant/threads.py +0 -0
  7. pygpt_net/controller/chat/command.py +0 -0
  8. pygpt_net/controller/dialogs/confirm.py +35 -58
  9. pygpt_net/controller/lang/mapping.py +9 -9
  10. pygpt_net/controller/remote_store/{google/batch.py → batch.py} +209 -252
  11. pygpt_net/controller/remote_store/remote_store.py +982 -13
  12. pygpt_net/core/command/command.py +0 -0
  13. pygpt_net/core/db/viewer.py +1 -1
  14. pygpt_net/core/realtime/worker.py +3 -1
  15. pygpt_net/{controller/remote_store/google → core/remote_store/anthropic}/__init__.py +0 -1
  16. pygpt_net/core/remote_store/anthropic/files.py +211 -0
  17. pygpt_net/core/remote_store/anthropic/store.py +208 -0
  18. pygpt_net/core/remote_store/openai/store.py +5 -4
  19. pygpt_net/core/remote_store/remote_store.py +5 -1
  20. pygpt_net/{controller/remote_store/openai → core/remote_store/xai}/__init__.py +0 -1
  21. pygpt_net/core/remote_store/xai/files.py +225 -0
  22. pygpt_net/core/remote_store/xai/store.py +219 -0
  23. pygpt_net/data/config/config.json +9 -6
  24. pygpt_net/data/config/models.json +5 -4
  25. pygpt_net/data/config/settings.json +54 -1
  26. pygpt_net/data/icons/folder_eye.svg +1 -0
  27. pygpt_net/data/icons/folder_eye_filled.svg +1 -0
  28. pygpt_net/data/icons/folder_open.svg +1 -0
  29. pygpt_net/data/icons/folder_open_filled.svg +1 -0
  30. pygpt_net/data/locale/locale.de.ini +4 -3
  31. pygpt_net/data/locale/locale.en.ini +14 -4
  32. pygpt_net/data/locale/locale.es.ini +4 -3
  33. pygpt_net/data/locale/locale.fr.ini +4 -3
  34. pygpt_net/data/locale/locale.it.ini +4 -3
  35. pygpt_net/data/locale/locale.pl.ini +5 -4
  36. pygpt_net/data/locale/locale.uk.ini +4 -3
  37. pygpt_net/data/locale/locale.zh.ini +4 -3
  38. pygpt_net/icons.qrc +4 -0
  39. pygpt_net/icons_rc.py +282 -138
  40. pygpt_net/provider/api/anthropic/__init__.py +2 -0
  41. pygpt_net/provider/api/anthropic/chat.py +84 -1
  42. pygpt_net/provider/api/anthropic/store.py +307 -0
  43. pygpt_net/provider/api/anthropic/stream.py +75 -0
  44. pygpt_net/provider/api/anthropic/worker/__init__.py +0 -0
  45. pygpt_net/provider/api/anthropic/worker/importer.py +278 -0
  46. pygpt_net/provider/api/google/chat.py +59 -2
  47. pygpt_net/provider/api/google/store.py +124 -3
  48. pygpt_net/provider/api/google/stream.py +91 -24
  49. pygpt_net/provider/api/google/worker/importer.py +16 -28
  50. pygpt_net/provider/api/openai/assistants.py +2 -2
  51. pygpt_net/provider/api/openai/store.py +4 -1
  52. pygpt_net/provider/api/openai/worker/importer.py +19 -61
  53. pygpt_net/provider/api/openai/worker/importer_assistants.py +230 -0
  54. pygpt_net/provider/api/x_ai/__init__.py +30 -6
  55. pygpt_net/provider/api/x_ai/audio.py +43 -11
  56. pygpt_net/provider/api/x_ai/chat.py +92 -4
  57. pygpt_net/provider/api/x_ai/realtime/__init__.py +12 -0
  58. pygpt_net/provider/api/x_ai/realtime/client.py +1825 -0
  59. pygpt_net/provider/api/x_ai/realtime/realtime.py +198 -0
  60. pygpt_net/provider/api/x_ai/remote_tools.py +19 -1
  61. pygpt_net/provider/api/x_ai/store.py +610 -0
  62. pygpt_net/provider/api/x_ai/stream.py +30 -9
  63. pygpt_net/provider/api/x_ai/worker/importer.py +308 -0
  64. pygpt_net/provider/audio_input/xai_grok_voice.py +390 -0
  65. pygpt_net/provider/audio_output/xai_tts.py +325 -0
  66. pygpt_net/provider/core/config/patch.py +18 -3
  67. pygpt_net/provider/core/config/patches/patch_before_2_6_42.py +2 -2
  68. pygpt_net/provider/core/model/patch.py +13 -0
  69. pygpt_net/tools/image_viewer/tool.py +334 -34
  70. pygpt_net/tools/image_viewer/ui/dialogs.py +317 -21
  71. pygpt_net/ui/dialog/assistant.py +1 -1
  72. pygpt_net/ui/dialog/plugins.py +13 -5
  73. pygpt_net/ui/dialog/remote_store.py +552 -0
  74. pygpt_net/ui/dialogs.py +3 -5
  75. pygpt_net/ui/layout/ctx/ctx_list.py +58 -7
  76. pygpt_net/ui/menu/tools.py +6 -13
  77. pygpt_net/ui/widget/dialog/{remote_store_google.py → remote_store.py} +10 -10
  78. pygpt_net/ui/widget/element/button.py +4 -4
  79. pygpt_net/ui/widget/image/display.py +2 -2
  80. pygpt_net/ui/widget/lists/context.py +2 -2
  81. {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/METADATA +9 -2
  82. {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/RECORD +82 -70
  83. pygpt_net/controller/remote_store/google/store.py +0 -615
  84. pygpt_net/controller/remote_store/openai/batch.py +0 -524
  85. pygpt_net/controller/remote_store/openai/store.py +0 -699
  86. pygpt_net/ui/dialog/remote_store_google.py +0 -539
  87. pygpt_net/ui/dialog/remote_store_openai.py +0 -539
  88. pygpt_net/ui/widget/dialog/remote_store_openai.py +0 -56
  89. pygpt_net/ui/widget/lists/remote_store_google.py +0 -248
  90. pygpt_net/ui/widget/lists/remote_store_openai.py +0 -317
  91. {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/LICENSE +0 -0
  92. {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/WHEEL +0 -0
  93. {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,308 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2026.01.06 06:00:00 #
10
+ # ================================================== #
11
+
12
+ import os
13
+
14
+ from PySide6.QtCore import QObject, Signal, QRunnable, Slot
15
+
16
+
17
+ class Importer(QObject):
18
+ def __init__(self, window=None):
19
+ """
20
+ Importer core (xAI Collections)
21
+
22
+ :param window: Window instance
23
+ """
24
+ super(Importer, self).__init__()
25
+ self.window = window
26
+ self.worker = None
27
+
28
+ @Slot(str, object)
29
+ def handle_error(self, mode: str, err: any):
30
+ batch = self.window.controller.remote_store.batch
31
+ if mode == "import_files":
32
+ batch.handle_imported_files_failed(err)
33
+ elif mode == "truncate_files":
34
+ batch.handle_truncated_files_failed(err)
35
+ elif mode == "upload_files":
36
+ batch.handle_uploaded_files_failed(err)
37
+ elif mode in "vector_stores":
38
+ batch.handle_imported_stores_failed(err)
39
+ elif mode in "truncate_vector_stores":
40
+ batch.handle_truncated_stores_failed(err)
41
+ elif mode in "refresh_vector_stores":
42
+ batch.handle_refreshed_stores_failed(err)
43
+
44
+ @Slot(str, str, int)
45
+ def handle_finished(self, mode: str, store_id: str = None, num: int = 0):
46
+ batch = self.window.controller.remote_store.batch
47
+ if mode == "import_files":
48
+ batch.handle_imported_files(num)
49
+ elif mode == "truncate_files":
50
+ batch.handle_truncated_files(store_id, num)
51
+ elif mode == "upload_files":
52
+ batch.handle_uploaded_files(num)
53
+ elif mode == "vector_stores":
54
+ batch.handle_imported_stores(num)
55
+ elif mode == "truncate_vector_stores":
56
+ batch.handle_truncated_stores(num)
57
+ elif mode == "refresh_vector_stores":
58
+ batch.handle_refreshed_stores(num)
59
+
60
+ @Slot(str, str)
61
+ def handle_status(self, mode: str, msg: str):
62
+ self.window.controller.assistant.batch.handle_status_change(mode, msg)
63
+
64
+ @Slot(str, str)
65
+ def handle_log(self, mode: str, msg: str):
66
+ self.window.controller.assistant.threads.log(mode + ": " + msg)
67
+
68
+ # ---------- Vector stores (Collections) ----------
69
+
70
+ def import_vector_stores(self):
71
+ """Import collections"""
72
+ self.worker = ImportWorker()
73
+ self.worker.window = self.window
74
+ self.worker.mode = "vector_stores"
75
+ self.connect_signals(self.worker)
76
+ self.window.threadpool.start(self.worker)
77
+
78
+ def truncate_vector_stores(self):
79
+ """Delete collections"""
80
+ self.worker = ImportWorker()
81
+ self.worker.window = self.window
82
+ self.worker.mode = "truncate_vector_stores"
83
+ self.connect_signals(self.worker)
84
+ self.window.threadpool.start(self.worker)
85
+
86
+ def refresh_vector_stores(self):
87
+ """Refresh collections"""
88
+ self.worker = ImportWorker()
89
+ self.worker.window = self.window
90
+ self.worker.mode = "refresh_vector_stores"
91
+ self.connect_signals(self.worker)
92
+ self.window.threadpool.start(self.worker)
93
+
94
+ # ---------- Files (documents) ----------
95
+
96
+ def truncate_files(self, store_id: str = None):
97
+ """Remove documents from one/all collections"""
98
+ self.worker = ImportWorker()
99
+ self.worker.window = self.window
100
+ self.worker.mode = "truncate_files"
101
+ self.worker.store_id = store_id
102
+ self.connect_signals(self.worker)
103
+ self.window.threadpool.start(self.worker)
104
+
105
+ def upload_files(self, store_id: str, files: list = None):
106
+ """Upload files to a collection"""
107
+ self.worker = ImportWorker()
108
+ self.worker.window = self.window
109
+ self.worker.mode = "upload_files"
110
+ self.worker.store_id = store_id
111
+ self.worker.files = files or []
112
+ self.connect_signals(self.worker)
113
+ self.window.threadpool.start(self.worker)
114
+
115
+ def import_files(self, store_id: str = None):
116
+ """Import documents from one/all collections"""
117
+ self.worker = ImportWorker()
118
+ self.worker.window = self.window
119
+ self.worker.mode = "import_files"
120
+ self.worker.store_id = store_id
121
+ self.connect_signals(self.worker)
122
+ self.window.threadpool.start(self.worker)
123
+
124
+ def connect_signals(self, worker):
125
+ worker.signals.finished.connect(self.handle_finished)
126
+ worker.signals.error.connect(self.handle_error)
127
+ worker.signals.status.connect(self.handle_status)
128
+ worker.signals.log.connect(self.handle_log)
129
+
130
+
131
+ class ImportWorkerSignals(QObject):
132
+ status = Signal(str, str) # mode, message
133
+ finished = Signal(str, str, int) # mode, store_id, num
134
+ error = Signal(str, object) # mode, error
135
+ log = Signal(str, str) # mode, message
136
+
137
+
138
+ class ImportWorker(QRunnable):
139
+ """Import worker (xAI Collections)"""
140
+ def __init__(self, *args, **kwargs):
141
+ super().__init__()
142
+ self.signals = ImportWorkerSignals()
143
+ self.window = None
144
+ self.mode = "vector_stores"
145
+ self.store_id = None
146
+ self.files = []
147
+
148
+ @Slot()
149
+ def run(self):
150
+ try:
151
+ if self.mode == "vector_stores":
152
+ if self.import_vector_stores():
153
+ self.import_files()
154
+ elif self.mode == "truncate_vector_stores":
155
+ self.truncate_vector_stores()
156
+ elif self.mode == "refresh_vector_stores":
157
+ self.refresh_vector_stores()
158
+ elif self.mode == "truncate_files":
159
+ self.truncate_files()
160
+ elif self.mode == "import_files":
161
+ self.import_files()
162
+ elif self.mode == "upload_files":
163
+ self.upload_files()
164
+ except Exception as e:
165
+ self.signals.error.emit(self.mode, e)
166
+ finally:
167
+ self.cleanup()
168
+
169
+ # ---------- Collections ----------
170
+
171
+ def import_vector_stores(self, silent: bool = False) -> bool:
172
+ try:
173
+ self.log("Importing collections...")
174
+ self.window.core.remote_store.xai.clear()
175
+ items = {}
176
+ self.window.core.api.xai.store.import_collections_collections(items, callback=self.callback)
177
+ self.window.core.remote_store.xai.import_items(items)
178
+ if not silent:
179
+ self.signals.finished.emit("vector_stores", self.store_id, len(items))
180
+ return True
181
+ except Exception as e:
182
+ self.log("API error: {}".format(e))
183
+ self.signals.error.emit("vector_stores", e)
184
+ return False
185
+
186
+ def truncate_vector_stores(self, silent: bool = False) -> bool:
187
+ try:
188
+ self.log("Truncating collections...")
189
+ num = self.window.core.api.xai.store.remove_all_collections_collections(callback=self.callback)
190
+ self.window.core.remote_store.xai.items = {}
191
+ self.window.core.remote_store.xai.save()
192
+ if not silent:
193
+ self.signals.finished.emit("truncate_vector_stores", self.store_id, num)
194
+ return True
195
+ except Exception as e:
196
+ self.log("API error: {}".format(e))
197
+ self.signals.error.emit("truncate_vector_stores", e)
198
+ return False
199
+
200
+ def refresh_vector_stores(self, silent: bool = False) -> bool:
201
+ try:
202
+ self.log("Refreshing collections...")
203
+ num = 0
204
+ stores = self.window.core.remote_store.xai.items
205
+ for id in list(stores.keys()):
206
+ store = stores[id]
207
+ try:
208
+ self.window.controller.remote_store.refresh_store(store, update=False, provider="xai")
209
+ num += 1
210
+ except Exception as e:
211
+ self.log("Failed to refresh collection: {}".format(id))
212
+ self.window.core.debug.log(e)
213
+ if not silent:
214
+ self.signals.finished.emit("refresh_vector_stores", self.store_id, num)
215
+ return True
216
+ except Exception as e:
217
+ self.log("API error: {}".format(e))
218
+ self.signals.error.emit("refresh_vector_stores", e)
219
+ return False
220
+
221
+ # ---------- Documents ----------
222
+
223
+ def truncate_files(self, silent: bool = False) -> bool:
224
+ try:
225
+ if self.store_id is None:
226
+ self.log("Truncating all collection documents...")
227
+ self.window.core.remote_store.xai.files.truncate() # clear all local + detach from all collections
228
+ num = self.window.core.api.xai.store.remove_files(callback=self.callback) # delete remote files
229
+ else:
230
+ self.log("Truncating documents for collection: {}".format(self.store_id))
231
+ self.window.core.remote_store.xai.files.truncate(self.store_id) # clear local + detach from this collection
232
+ num = self.window.core.api.xai.store.remove_from_collection_collections(
233
+ self.store_id,
234
+ callback=self.callback,
235
+ )
236
+ if not silent:
237
+ self.signals.finished.emit("truncate_files", self.store_id, num)
238
+ return True
239
+ except Exception as e:
240
+ self.log("API error: {}".format(e))
241
+ self.signals.error.emit("truncate_files", e)
242
+ return False
243
+
244
+ def upload_files(self, silent: bool = False) -> bool:
245
+ num = 0
246
+ try:
247
+ self.log("Uploading files to collection...")
248
+ for path in self.files:
249
+ try:
250
+ doc = self.window.core.api.xai.store.upload_to_collection_collections(self.store_id, path)
251
+ if doc is not None:
252
+ self.window.core.remote_store.xai.files.insert(self.store_id, doc.file_metadata)
253
+ num += 1
254
+ msg = "Uploaded file: {}/{}".format(num, len(self.files))
255
+ self.signals.status.emit("upload_files", msg)
256
+ self.log(msg)
257
+ else:
258
+ self.signals.status.emit("upload_files", "Failed to upload: {}".format(os.path.basename(path)))
259
+ except Exception as e:
260
+ self.window.core.debug.log(e)
261
+ self.signals.status.emit("upload_files", "Failed to upload: {}".format(os.path.basename(path)))
262
+ if not silent:
263
+ self.signals.finished.emit("upload_files", self.store_id, num)
264
+ return True
265
+ except Exception as e:
266
+ self.log("API error: {}".format(e))
267
+ self.signals.error.emit("upload_files", e)
268
+ return False
269
+
270
+ def import_files(self, silent: bool = False) -> bool:
271
+ try:
272
+ if self.store_id is None:
273
+ self.log("Importing all collection documents...")
274
+ self.window.core.remote_store.xai.files.truncate_local() # clear local DB (all)
275
+ num = self.window.core.api.xai.store.import_collections_files_collections(callback=self.callback)
276
+ else:
277
+ self.log("Importing documents for collection: {}".format(self.store_id))
278
+ self.window.core.remote_store.xai.files.truncate_local(self.store_id) # clear local DB (store)
279
+ items = self.window.core.api.xai.store.import_collection_files_collections(
280
+ self.store_id,
281
+ [],
282
+ callback=self.callback,
283
+ )
284
+ num = len(items)
285
+ if not silent:
286
+ self.signals.finished.emit("import_files", self.store_id, num)
287
+ return True
288
+ except Exception as e:
289
+ self.log("API error: {}".format(e))
290
+ self.signals.error.emit("import_files", e)
291
+ return False
292
+
293
+ # ---------- Utils ----------
294
+
295
+ def callback(self, msg: str):
296
+ self.log(msg)
297
+
298
+ def log(self, msg: str):
299
+ self.signals.log.emit(self.mode, msg)
300
+
301
+ def cleanup(self):
302
+ sig = self.signals
303
+ self.signals = None
304
+ if sig is not None:
305
+ try:
306
+ sig.deleteLater()
307
+ except RuntimeError:
308
+ pass
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2026.01.06 20:00:00 #
10
+ # ================================================== #
11
+
12
+ import asyncio
13
+ import base64
14
+ import json
15
+ import os
16
+ import queue
17
+ import subprocess
18
+ import threading
19
+ from typing import Optional
20
+
21
+ from .base import BaseProvider
22
+
23
+
24
+ class XAIGrokVoiceAudioInput(BaseProvider):
25
+ PROMPT_TRANSCRIBE = (
26
+ "You are a speech-to-text transcriber. "
27
+ "Return only the verbatim transcript as plain text. "
28
+ "Do not add any explanations, timestamps, labels or formatting."
29
+ )
30
+
31
+ def __init__(self, *args, **kwargs):
32
+ """
33
+ xAI Grok Voice Agent-based audio transcription provider (via WebSocket API).
34
+
35
+ :param args: args
36
+ :param kwargs: kwargs
37
+ """
38
+ super(XAIGrokVoiceAudioInput, self).__init__(*args, **kwargs)
39
+ self.plugin = kwargs.get("plugin")
40
+ self.id = "xai_grok_voice"
41
+ self.name = "xAI Grok Voice"
42
+
43
+ def init_options(self):
44
+ """Initialize options"""
45
+ # Model is implicit for the realtime Voice Agent; keep options focused on audio and behavior
46
+ self.plugin.add_option(
47
+ "xai_voice_audio_sample_rate",
48
+ type="text",
49
+ value="16000",
50
+ label="Sample rate (Hz)",
51
+ tab="xai_grok_voice",
52
+ description="PCM sample rate for input, e.g., 16000 or 24000",
53
+ )
54
+ self.plugin.add_option(
55
+ "xai_voice_system_prompt",
56
+ type="textarea",
57
+ value=self.PROMPT_TRANSCRIBE,
58
+ label="System Prompt",
59
+ tab="xai_grok_voice",
60
+ description="System prompt to guide the transcription output",
61
+ tooltip="System prompt for transcription",
62
+ persist=True,
63
+ )
64
+ self.plugin.add_option(
65
+ "xai_voice_region",
66
+ type="text",
67
+ value="",
68
+ label="Region (optional)",
69
+ tab="xai_grok_voice",
70
+ description="Regional endpoint like us-east-1; leave empty to use the global endpoint",
71
+ )
72
+ self.plugin.add_option(
73
+ "xai_voice_chunk_ms",
74
+ type="text",
75
+ value="200",
76
+ label="Chunk size (ms)",
77
+ tab="xai_grok_voice",
78
+ description="Size of audio chunks to send over WebSocket",
79
+ )
80
+
81
+ def transcribe(self, path: str) -> str:
82
+ """
83
+ Audio to text transcription using xAI Grok Voice Agent (WebSocket).
84
+
85
+ :param path: path to audio file to transcribe
86
+ :return: transcribed text
87
+ """
88
+ # Ensure xAI client is initialized in core (keeps auth/config consistent with the app)
89
+ # We do not rely on its methods for WebSocket, but we respect the app's initialization order
90
+ try:
91
+ _ = self.plugin.window.core.api.xai.get_client()
92
+ except Exception:
93
+ # Client not strictly required for WebSocket usage; continue if available credentials are set
94
+ pass
95
+
96
+ api_key = self._get_api_key()
97
+ if not api_key:
98
+ raise RuntimeError("xAI API key is not set. Please configure it in settings.")
99
+
100
+ # Resolve endpoint (optionally regionalized)
101
+ region = (self.plugin.get_option_value("xai_voice_region") or "").strip()
102
+ host = f"{region}.api.x.ai" if region else "api.x.ai"
103
+ ws_uri = f"wss://{host}/v1/realtime"
104
+
105
+ # Read options
106
+ prompt = self.plugin.get_option_value("xai_voice_system_prompt") or self.PROMPT_TRANSCRIBE
107
+ sr_opt = str(self.plugin.get_option_value("xai_voice_audio_sample_rate") or "16000").strip()
108
+ try:
109
+ sample_rate = max(8000, int(sr_opt))
110
+ except Exception:
111
+ sample_rate = 16000
112
+
113
+ chunk_ms_opt = str(self.plugin.get_option_value("xai_voice_chunk_ms") or "200").strip()
114
+ try:
115
+ chunk_ms = max(20, int(chunk_ms_opt))
116
+ except Exception:
117
+ chunk_ms = 200
118
+
119
+ # Compute chunk size for 16-bit mono PCM
120
+ bytes_per_second = sample_rate * 2 # 1 channel * 2 bytes
121
+ chunk_size = max(4096, int(bytes_per_second * (chunk_ms / 1000.0)))
122
+
123
+ # Run async websocket pipeline in an isolated thread/loop to avoid interfering with the UI loop
124
+ result_queue: queue.Queue[str] = queue.Queue()
125
+
126
+ def _runner():
127
+ loop = asyncio.new_event_loop()
128
+ try:
129
+ asyncio.set_event_loop(loop)
130
+ text = loop.run_until_complete(
131
+ self._transcribe_async(
132
+ ws_uri=ws_uri,
133
+ api_key=api_key,
134
+ path=path,
135
+ sample_rate=sample_rate,
136
+ chunk_size=chunk_size,
137
+ system_prompt=prompt,
138
+ )
139
+ )
140
+ result_queue.put(text or "")
141
+ finally:
142
+ try:
143
+ loop.close()
144
+ except Exception:
145
+ pass
146
+
147
+ t = threading.Thread(target=_runner, daemon=True)
148
+ t.start()
149
+ t.join()
150
+
151
+ return result_queue.get() if not result_queue.empty() else ""
152
+
153
+ async def _transcribe_async(
154
+ self,
155
+ ws_uri: str,
156
+ api_key: str,
157
+ path: str,
158
+ sample_rate: int,
159
+ chunk_size: int,
160
+ system_prompt: str,
161
+ ) -> str:
162
+ """
163
+ Connects to xAI Voice Agent realtime WebSocket and streams audio for transcription.
164
+ Returns the final transcript text.
165
+ """
166
+ try:
167
+ import websockets # type: ignore
168
+ from websockets.asyncio.client import ClientConnection # type: ignore
169
+ except Exception as e:
170
+ raise RuntimeError(
171
+ "The 'websockets' package is required for xAI Voice transcription. "
172
+ "Please install it in your environment."
173
+ ) from e
174
+
175
+ transcript: Optional[str] = None
176
+
177
+ async with websockets.connect(
178
+ uri=ws_uri,
179
+ ssl=True,
180
+ open_timeout=30,
181
+ close_timeout=10,
182
+ additional_headers={"Authorization": f"Bearer {api_key}"},
183
+ max_size=None,
184
+ ) as ws: # type: ClientConnection
185
+ # Configure session to match our audio and enforce transcription-only behavior
186
+ session_config = {
187
+ "type": "session.update",
188
+ "session": {
189
+ "instructions": system_prompt,
190
+ # We are only transcribing; disable server VAD and commit manually as a single turn
191
+ "turn_detection": {"type": None},
192
+ "audio": {
193
+ "input": {"format": {"type": "audio/pcm", "rate": sample_rate}},
194
+ # Output audio not needed; keep default
195
+ },
196
+ },
197
+ }
198
+ await ws.send(json.dumps(session_config))
199
+
200
+ # Stream the audio
201
+ is_wav = path.lower().endswith((".wav", ".wave"))
202
+ if is_wav:
203
+ # Fast path for WAV (PCM or otherwise; convert to mono s16le at desired rate)
204
+ pcm_bytes, duration_s = self._decode_wav_to_pcm_s16le(path, sample_rate)
205
+ for i in range(0, len(pcm_bytes), chunk_size):
206
+ chunk = pcm_bytes[i : i + chunk_size]
207
+ if not chunk:
208
+ break
209
+ await ws.send(
210
+ json.dumps(
211
+ {
212
+ "type": "input_audio_buffer.append",
213
+ "audio": base64.b64encode(chunk).decode("ascii"),
214
+ }
215
+ )
216
+ )
217
+ else:
218
+ # Generic path via ffmpeg to decode to mono s16le at sample_rate
219
+ duration_s = None # unknown
220
+ await self._stream_via_ffmpeg(ws, path, sample_rate, chunk_size)
221
+
222
+ # Commit a single user message from the accumulated audio buffer
223
+ await ws.send(json.dumps({"type": "input_audio_buffer.commit"}))
224
+
225
+ # Wait for transcript events
226
+ # Use a dynamic timeout: at least 30s; more for longer audio
227
+ base_timeout = 30.0
228
+ if duration_s is not None:
229
+ # allow ~2x audio length + base safety window
230
+ timeout_s = min(600.0, max(base_timeout, duration_s * 2.0 + 10.0))
231
+ else:
232
+ timeout_s = 120.0
233
+
234
+ try:
235
+ transcript = await self._await_transcript(ws, timeout=timeout_s)
236
+ except asyncio.TimeoutError:
237
+ # Try to salvage from any conversation.item.added events cached in the loop
238
+ pass
239
+
240
+ return transcript or ""
241
+
242
+ async def _await_transcript(self, ws, timeout: float) -> Optional[str]:
243
+ """
244
+ Waits for either:
245
+ - conversation.item.input_audio_transcription.completed (preferred)
246
+ - conversation.item.added with content.type == 'input_audio' (fallback)
247
+ """
248
+ end_time = asyncio.get_event_loop().time() + timeout
249
+ pending_fallback: Optional[str] = None
250
+
251
+ while True:
252
+ remaining = end_time - asyncio.get_event_loop().time()
253
+ if remaining <= 0:
254
+ raise asyncio.TimeoutError("Timed out waiting for xAI transcription result.")
255
+
256
+ try:
257
+ msg = await asyncio.wait_for(ws.recv(), timeout=remaining)
258
+ except asyncio.TimeoutError:
259
+ raise
260
+ except Exception:
261
+ break
262
+
263
+ try:
264
+ event = json.loads(msg)
265
+ except Exception:
266
+ continue
267
+
268
+ etype = event.get("type", "")
269
+ if etype == "conversation.item.input_audio_transcription.completed":
270
+ # Preferred final transcript
271
+ return event.get("transcript") or ""
272
+ elif etype == "conversation.item.added":
273
+ # Fallback: some responses include the inline transcript in the added user item
274
+ item = event.get("item") or {}
275
+ if item.get("role") == "user":
276
+ for c in item.get("content", []):
277
+ if isinstance(c, dict) and c.get("type") == "input_audio" and "transcript" in c:
278
+ pending_fallback = c.get("transcript") or pending_fallback
279
+ elif etype == "response.done":
280
+ # If server signals end of turn and we have a fallback transcript, return it
281
+ if pending_fallback:
282
+ return pending_fallback
283
+
284
+ async def _stream_via_ffmpeg(self, ws, path: str, sample_rate: int, chunk_size: int):
285
+ """
286
+ Uses ffmpeg to decode arbitrary input to mono s16le at sample_rate and streams chunks.
287
+ """
288
+ cmd = [
289
+ "ffmpeg",
290
+ "-nostdin",
291
+ "-hide_banner",
292
+ "-loglevel",
293
+ "error",
294
+ "-i",
295
+ path,
296
+ "-ac",
297
+ "1",
298
+ "-ar",
299
+ str(sample_rate),
300
+ "-f",
301
+ "s16le",
302
+ "pipe:1",
303
+ ]
304
+ try:
305
+ proc = await asyncio.create_subprocess_exec(
306
+ *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
307
+ )
308
+ except FileNotFoundError as e:
309
+ raise RuntimeError(
310
+ "ffmpeg not found. Install ffmpeg or provide WAV input for xAI transcription."
311
+ ) from e
312
+
313
+ try:
314
+ while True:
315
+ chunk = await proc.stdout.read(chunk_size)
316
+ if not chunk:
317
+ break
318
+ await ws.send(
319
+ json.dumps(
320
+ {
321
+ "type": "input_audio_buffer.append",
322
+ "audio": base64.b64encode(chunk).decode("ascii"),
323
+ }
324
+ )
325
+ )
326
+ finally:
327
+ try:
328
+ await proc.wait()
329
+ except Exception:
330
+ pass
331
+
332
+ def _decode_wav_to_pcm_s16le(self, path: str, target_rate: int):
333
+ """
334
+ Decodes a WAV file to mono 16-bit PCM at target_rate.
335
+ Returns (bytes, duration_seconds).
336
+ """
337
+ import wave
338
+ import audioop
339
+
340
+ with wave.open(path, "rb") as wf:
341
+ n_channels = wf.getnchannels()
342
+ sampwidth = wf.getsampwidth()
343
+ framerate = wf.getframerate()
344
+ n_frames = wf.getnframes()
345
+ raw = wf.readframes(n_frames)
346
+
347
+ # Convert to mono if needed
348
+ if n_channels > 1:
349
+ raw = audioop.tomono(raw, sampwidth, 1, 1)
350
+
351
+ # Convert sample width to 16-bit
352
+ if sampwidth != 2:
353
+ raw = audioop.lin2lin(raw, sampwidth, 2)
354
+
355
+ # Resample if needed
356
+ if framerate != target_rate:
357
+ raw, _ = audioop.ratecv(raw, 2, 1, framerate, target_rate, None)
358
+ framerate = target_rate
359
+
360
+ duration_s = len(raw) / float(target_rate * 2) # mono, 16-bit
361
+ return raw, duration_s
362
+
363
+ def _get_api_key(self) -> Optional[str]:
364
+ """
365
+ Resolve xAI API key from the app's configuration.
366
+ """
367
+ # Prefer explicit xAI key if present
368
+ key = self.plugin.window.core.config.get("api_key_xai")
369
+ if key:
370
+ return key
371
+
372
+ # Optional: try environment variable for parity with SDKs
373
+ return os.getenv("XAI_API_KEY")
374
+
375
+ def is_configured(self) -> bool:
376
+ """
377
+ Check if provider is configured
378
+
379
+ :return: True if configured, False otherwise
380
+ """
381
+ api_key = self._get_api_key()
382
+ return api_key is not None and api_key != ""
383
+
384
+ def get_config_message(self) -> str:
385
+ """
386
+ Return message to display when provider is not configured
387
+
388
+ :return: message
389
+ """
390
+ return "xAI API key is not set yet. Please configure it in settings."