pygpt-net 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +7 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +5 -1
- pygpt_net/controller/assistant/batch.py +2 -2
- pygpt_net/controller/assistant/files.py +7 -6
- pygpt_net/controller/assistant/threads.py +0 -0
- pygpt_net/controller/chat/command.py +0 -0
- pygpt_net/controller/dialogs/confirm.py +35 -58
- pygpt_net/controller/lang/mapping.py +9 -9
- pygpt_net/controller/remote_store/{google/batch.py → batch.py} +209 -252
- pygpt_net/controller/remote_store/remote_store.py +982 -13
- pygpt_net/core/command/command.py +0 -0
- pygpt_net/core/db/viewer.py +1 -1
- pygpt_net/core/realtime/worker.py +3 -1
- pygpt_net/{controller/remote_store/google → core/remote_store/anthropic}/__init__.py +0 -1
- pygpt_net/core/remote_store/anthropic/files.py +211 -0
- pygpt_net/core/remote_store/anthropic/store.py +208 -0
- pygpt_net/core/remote_store/openai/store.py +5 -4
- pygpt_net/core/remote_store/remote_store.py +5 -1
- pygpt_net/{controller/remote_store/openai → core/remote_store/xai}/__init__.py +0 -1
- pygpt_net/core/remote_store/xai/files.py +225 -0
- pygpt_net/core/remote_store/xai/store.py +219 -0
- pygpt_net/data/config/config.json +9 -6
- pygpt_net/data/config/models.json +5 -4
- pygpt_net/data/config/settings.json +54 -1
- pygpt_net/data/icons/folder_eye.svg +1 -0
- pygpt_net/data/icons/folder_eye_filled.svg +1 -0
- pygpt_net/data/icons/folder_open.svg +1 -0
- pygpt_net/data/icons/folder_open_filled.svg +1 -0
- pygpt_net/data/locale/locale.de.ini +4 -3
- pygpt_net/data/locale/locale.en.ini +14 -4
- pygpt_net/data/locale/locale.es.ini +4 -3
- pygpt_net/data/locale/locale.fr.ini +4 -3
- pygpt_net/data/locale/locale.it.ini +4 -3
- pygpt_net/data/locale/locale.pl.ini +5 -4
- pygpt_net/data/locale/locale.uk.ini +4 -3
- pygpt_net/data/locale/locale.zh.ini +4 -3
- pygpt_net/icons.qrc +4 -0
- pygpt_net/icons_rc.py +282 -138
- pygpt_net/provider/api/anthropic/__init__.py +2 -0
- pygpt_net/provider/api/anthropic/chat.py +84 -1
- pygpt_net/provider/api/anthropic/store.py +307 -0
- pygpt_net/provider/api/anthropic/stream.py +75 -0
- pygpt_net/provider/api/anthropic/worker/__init__.py +0 -0
- pygpt_net/provider/api/anthropic/worker/importer.py +278 -0
- pygpt_net/provider/api/google/chat.py +59 -2
- pygpt_net/provider/api/google/store.py +124 -3
- pygpt_net/provider/api/google/stream.py +91 -24
- pygpt_net/provider/api/google/worker/importer.py +16 -28
- pygpt_net/provider/api/openai/assistants.py +2 -2
- pygpt_net/provider/api/openai/store.py +4 -1
- pygpt_net/provider/api/openai/worker/importer.py +19 -61
- pygpt_net/provider/api/openai/worker/importer_assistants.py +230 -0
- pygpt_net/provider/api/x_ai/__init__.py +30 -6
- pygpt_net/provider/api/x_ai/audio.py +43 -11
- pygpt_net/provider/api/x_ai/chat.py +92 -4
- pygpt_net/provider/api/x_ai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/x_ai/realtime/client.py +1825 -0
- pygpt_net/provider/api/x_ai/realtime/realtime.py +198 -0
- pygpt_net/provider/api/x_ai/remote_tools.py +19 -1
- pygpt_net/provider/api/x_ai/store.py +610 -0
- pygpt_net/provider/api/x_ai/stream.py +30 -9
- pygpt_net/provider/api/x_ai/worker/importer.py +308 -0
- pygpt_net/provider/audio_input/xai_grok_voice.py +390 -0
- pygpt_net/provider/audio_output/xai_tts.py +325 -0
- pygpt_net/provider/core/config/patch.py +18 -3
- pygpt_net/provider/core/config/patches/patch_before_2_6_42.py +2 -2
- pygpt_net/provider/core/model/patch.py +13 -0
- pygpt_net/tools/image_viewer/tool.py +334 -34
- pygpt_net/tools/image_viewer/ui/dialogs.py +317 -21
- pygpt_net/ui/dialog/assistant.py +1 -1
- pygpt_net/ui/dialog/plugins.py +13 -5
- pygpt_net/ui/dialog/remote_store.py +552 -0
- pygpt_net/ui/dialogs.py +3 -5
- pygpt_net/ui/layout/ctx/ctx_list.py +58 -7
- pygpt_net/ui/menu/tools.py +6 -13
- pygpt_net/ui/widget/dialog/{remote_store_google.py → remote_store.py} +10 -10
- pygpt_net/ui/widget/element/button.py +4 -4
- pygpt_net/ui/widget/image/display.py +2 -2
- pygpt_net/ui/widget/lists/context.py +2 -2
- {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/METADATA +9 -2
- {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/RECORD +82 -70
- pygpt_net/controller/remote_store/google/store.py +0 -615
- pygpt_net/controller/remote_store/openai/batch.py +0 -524
- pygpt_net/controller/remote_store/openai/store.py +0 -699
- pygpt_net/ui/dialog/remote_store_google.py +0 -539
- pygpt_net/ui/dialog/remote_store_openai.py +0 -539
- pygpt_net/ui/widget/dialog/remote_store_openai.py +0 -56
- pygpt_net/ui/widget/lists/remote_store_google.py +0 -248
- pygpt_net/ui/widget/lists/remote_store_openai.py +0 -317
- {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/LICENSE +0 -0
- {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/WHEEL +0 -0
- {pygpt_net-2.7.7.dist-info → pygpt_net-2.7.8.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2026.01.06 06:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
from PySide6.QtCore import QObject, Signal, QRunnable, Slot
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Importer(QObject):
|
|
18
|
+
def __init__(self, window=None):
|
|
19
|
+
"""
|
|
20
|
+
Importer core (xAI Collections)
|
|
21
|
+
|
|
22
|
+
:param window: Window instance
|
|
23
|
+
"""
|
|
24
|
+
super(Importer, self).__init__()
|
|
25
|
+
self.window = window
|
|
26
|
+
self.worker = None
|
|
27
|
+
|
|
28
|
+
@Slot(str, object)
|
|
29
|
+
def handle_error(self, mode: str, err: any):
|
|
30
|
+
batch = self.window.controller.remote_store.batch
|
|
31
|
+
if mode == "import_files":
|
|
32
|
+
batch.handle_imported_files_failed(err)
|
|
33
|
+
elif mode == "truncate_files":
|
|
34
|
+
batch.handle_truncated_files_failed(err)
|
|
35
|
+
elif mode == "upload_files":
|
|
36
|
+
batch.handle_uploaded_files_failed(err)
|
|
37
|
+
elif mode in "vector_stores":
|
|
38
|
+
batch.handle_imported_stores_failed(err)
|
|
39
|
+
elif mode in "truncate_vector_stores":
|
|
40
|
+
batch.handle_truncated_stores_failed(err)
|
|
41
|
+
elif mode in "refresh_vector_stores":
|
|
42
|
+
batch.handle_refreshed_stores_failed(err)
|
|
43
|
+
|
|
44
|
+
@Slot(str, str, int)
|
|
45
|
+
def handle_finished(self, mode: str, store_id: str = None, num: int = 0):
|
|
46
|
+
batch = self.window.controller.remote_store.batch
|
|
47
|
+
if mode == "import_files":
|
|
48
|
+
batch.handle_imported_files(num)
|
|
49
|
+
elif mode == "truncate_files":
|
|
50
|
+
batch.handle_truncated_files(store_id, num)
|
|
51
|
+
elif mode == "upload_files":
|
|
52
|
+
batch.handle_uploaded_files(num)
|
|
53
|
+
elif mode == "vector_stores":
|
|
54
|
+
batch.handle_imported_stores(num)
|
|
55
|
+
elif mode == "truncate_vector_stores":
|
|
56
|
+
batch.handle_truncated_stores(num)
|
|
57
|
+
elif mode == "refresh_vector_stores":
|
|
58
|
+
batch.handle_refreshed_stores(num)
|
|
59
|
+
|
|
60
|
+
@Slot(str, str)
|
|
61
|
+
def handle_status(self, mode: str, msg: str):
|
|
62
|
+
self.window.controller.assistant.batch.handle_status_change(mode, msg)
|
|
63
|
+
|
|
64
|
+
@Slot(str, str)
|
|
65
|
+
def handle_log(self, mode: str, msg: str):
|
|
66
|
+
self.window.controller.assistant.threads.log(mode + ": " + msg)
|
|
67
|
+
|
|
68
|
+
# ---------- Vector stores (Collections) ----------
|
|
69
|
+
|
|
70
|
+
def import_vector_stores(self):
|
|
71
|
+
"""Import collections"""
|
|
72
|
+
self.worker = ImportWorker()
|
|
73
|
+
self.worker.window = self.window
|
|
74
|
+
self.worker.mode = "vector_stores"
|
|
75
|
+
self.connect_signals(self.worker)
|
|
76
|
+
self.window.threadpool.start(self.worker)
|
|
77
|
+
|
|
78
|
+
def truncate_vector_stores(self):
|
|
79
|
+
"""Delete collections"""
|
|
80
|
+
self.worker = ImportWorker()
|
|
81
|
+
self.worker.window = self.window
|
|
82
|
+
self.worker.mode = "truncate_vector_stores"
|
|
83
|
+
self.connect_signals(self.worker)
|
|
84
|
+
self.window.threadpool.start(self.worker)
|
|
85
|
+
|
|
86
|
+
def refresh_vector_stores(self):
|
|
87
|
+
"""Refresh collections"""
|
|
88
|
+
self.worker = ImportWorker()
|
|
89
|
+
self.worker.window = self.window
|
|
90
|
+
self.worker.mode = "refresh_vector_stores"
|
|
91
|
+
self.connect_signals(self.worker)
|
|
92
|
+
self.window.threadpool.start(self.worker)
|
|
93
|
+
|
|
94
|
+
# ---------- Files (documents) ----------
|
|
95
|
+
|
|
96
|
+
def truncate_files(self, store_id: str = None):
|
|
97
|
+
"""Remove documents from one/all collections"""
|
|
98
|
+
self.worker = ImportWorker()
|
|
99
|
+
self.worker.window = self.window
|
|
100
|
+
self.worker.mode = "truncate_files"
|
|
101
|
+
self.worker.store_id = store_id
|
|
102
|
+
self.connect_signals(self.worker)
|
|
103
|
+
self.window.threadpool.start(self.worker)
|
|
104
|
+
|
|
105
|
+
def upload_files(self, store_id: str, files: list = None):
|
|
106
|
+
"""Upload files to a collection"""
|
|
107
|
+
self.worker = ImportWorker()
|
|
108
|
+
self.worker.window = self.window
|
|
109
|
+
self.worker.mode = "upload_files"
|
|
110
|
+
self.worker.store_id = store_id
|
|
111
|
+
self.worker.files = files or []
|
|
112
|
+
self.connect_signals(self.worker)
|
|
113
|
+
self.window.threadpool.start(self.worker)
|
|
114
|
+
|
|
115
|
+
def import_files(self, store_id: str = None):
|
|
116
|
+
"""Import documents from one/all collections"""
|
|
117
|
+
self.worker = ImportWorker()
|
|
118
|
+
self.worker.window = self.window
|
|
119
|
+
self.worker.mode = "import_files"
|
|
120
|
+
self.worker.store_id = store_id
|
|
121
|
+
self.connect_signals(self.worker)
|
|
122
|
+
self.window.threadpool.start(self.worker)
|
|
123
|
+
|
|
124
|
+
def connect_signals(self, worker):
|
|
125
|
+
worker.signals.finished.connect(self.handle_finished)
|
|
126
|
+
worker.signals.error.connect(self.handle_error)
|
|
127
|
+
worker.signals.status.connect(self.handle_status)
|
|
128
|
+
worker.signals.log.connect(self.handle_log)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ImportWorkerSignals(QObject):
|
|
132
|
+
status = Signal(str, str) # mode, message
|
|
133
|
+
finished = Signal(str, str, int) # mode, store_id, num
|
|
134
|
+
error = Signal(str, object) # mode, error
|
|
135
|
+
log = Signal(str, str) # mode, message
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ImportWorker(QRunnable):
|
|
139
|
+
"""Import worker (xAI Collections)"""
|
|
140
|
+
def __init__(self, *args, **kwargs):
|
|
141
|
+
super().__init__()
|
|
142
|
+
self.signals = ImportWorkerSignals()
|
|
143
|
+
self.window = None
|
|
144
|
+
self.mode = "vector_stores"
|
|
145
|
+
self.store_id = None
|
|
146
|
+
self.files = []
|
|
147
|
+
|
|
148
|
+
@Slot()
|
|
149
|
+
def run(self):
|
|
150
|
+
try:
|
|
151
|
+
if self.mode == "vector_stores":
|
|
152
|
+
if self.import_vector_stores():
|
|
153
|
+
self.import_files()
|
|
154
|
+
elif self.mode == "truncate_vector_stores":
|
|
155
|
+
self.truncate_vector_stores()
|
|
156
|
+
elif self.mode == "refresh_vector_stores":
|
|
157
|
+
self.refresh_vector_stores()
|
|
158
|
+
elif self.mode == "truncate_files":
|
|
159
|
+
self.truncate_files()
|
|
160
|
+
elif self.mode == "import_files":
|
|
161
|
+
self.import_files()
|
|
162
|
+
elif self.mode == "upload_files":
|
|
163
|
+
self.upload_files()
|
|
164
|
+
except Exception as e:
|
|
165
|
+
self.signals.error.emit(self.mode, e)
|
|
166
|
+
finally:
|
|
167
|
+
self.cleanup()
|
|
168
|
+
|
|
169
|
+
# ---------- Collections ----------
|
|
170
|
+
|
|
171
|
+
def import_vector_stores(self, silent: bool = False) -> bool:
|
|
172
|
+
try:
|
|
173
|
+
self.log("Importing collections...")
|
|
174
|
+
self.window.core.remote_store.xai.clear()
|
|
175
|
+
items = {}
|
|
176
|
+
self.window.core.api.xai.store.import_collections_collections(items, callback=self.callback)
|
|
177
|
+
self.window.core.remote_store.xai.import_items(items)
|
|
178
|
+
if not silent:
|
|
179
|
+
self.signals.finished.emit("vector_stores", self.store_id, len(items))
|
|
180
|
+
return True
|
|
181
|
+
except Exception as e:
|
|
182
|
+
self.log("API error: {}".format(e))
|
|
183
|
+
self.signals.error.emit("vector_stores", e)
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
def truncate_vector_stores(self, silent: bool = False) -> bool:
|
|
187
|
+
try:
|
|
188
|
+
self.log("Truncating collections...")
|
|
189
|
+
num = self.window.core.api.xai.store.remove_all_collections_collections(callback=self.callback)
|
|
190
|
+
self.window.core.remote_store.xai.items = {}
|
|
191
|
+
self.window.core.remote_store.xai.save()
|
|
192
|
+
if not silent:
|
|
193
|
+
self.signals.finished.emit("truncate_vector_stores", self.store_id, num)
|
|
194
|
+
return True
|
|
195
|
+
except Exception as e:
|
|
196
|
+
self.log("API error: {}".format(e))
|
|
197
|
+
self.signals.error.emit("truncate_vector_stores", e)
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
def refresh_vector_stores(self, silent: bool = False) -> bool:
|
|
201
|
+
try:
|
|
202
|
+
self.log("Refreshing collections...")
|
|
203
|
+
num = 0
|
|
204
|
+
stores = self.window.core.remote_store.xai.items
|
|
205
|
+
for id in list(stores.keys()):
|
|
206
|
+
store = stores[id]
|
|
207
|
+
try:
|
|
208
|
+
self.window.controller.remote_store.refresh_store(store, update=False, provider="xai")
|
|
209
|
+
num += 1
|
|
210
|
+
except Exception as e:
|
|
211
|
+
self.log("Failed to refresh collection: {}".format(id))
|
|
212
|
+
self.window.core.debug.log(e)
|
|
213
|
+
if not silent:
|
|
214
|
+
self.signals.finished.emit("refresh_vector_stores", self.store_id, num)
|
|
215
|
+
return True
|
|
216
|
+
except Exception as e:
|
|
217
|
+
self.log("API error: {}".format(e))
|
|
218
|
+
self.signals.error.emit("refresh_vector_stores", e)
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
# ---------- Documents ----------
|
|
222
|
+
|
|
223
|
+
def truncate_files(self, silent: bool = False) -> bool:
|
|
224
|
+
try:
|
|
225
|
+
if self.store_id is None:
|
|
226
|
+
self.log("Truncating all collection documents...")
|
|
227
|
+
self.window.core.remote_store.xai.files.truncate() # clear all local + detach from all collections
|
|
228
|
+
num = self.window.core.api.xai.store.remove_files(callback=self.callback) # delete remote files
|
|
229
|
+
else:
|
|
230
|
+
self.log("Truncating documents for collection: {}".format(self.store_id))
|
|
231
|
+
self.window.core.remote_store.xai.files.truncate(self.store_id) # clear local + detach from this collection
|
|
232
|
+
num = self.window.core.api.xai.store.remove_from_collection_collections(
|
|
233
|
+
self.store_id,
|
|
234
|
+
callback=self.callback,
|
|
235
|
+
)
|
|
236
|
+
if not silent:
|
|
237
|
+
self.signals.finished.emit("truncate_files", self.store_id, num)
|
|
238
|
+
return True
|
|
239
|
+
except Exception as e:
|
|
240
|
+
self.log("API error: {}".format(e))
|
|
241
|
+
self.signals.error.emit("truncate_files", e)
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
def upload_files(self, silent: bool = False) -> bool:
|
|
245
|
+
num = 0
|
|
246
|
+
try:
|
|
247
|
+
self.log("Uploading files to collection...")
|
|
248
|
+
for path in self.files:
|
|
249
|
+
try:
|
|
250
|
+
doc = self.window.core.api.xai.store.upload_to_collection_collections(self.store_id, path)
|
|
251
|
+
if doc is not None:
|
|
252
|
+
self.window.core.remote_store.xai.files.insert(self.store_id, doc.file_metadata)
|
|
253
|
+
num += 1
|
|
254
|
+
msg = "Uploaded file: {}/{}".format(num, len(self.files))
|
|
255
|
+
self.signals.status.emit("upload_files", msg)
|
|
256
|
+
self.log(msg)
|
|
257
|
+
else:
|
|
258
|
+
self.signals.status.emit("upload_files", "Failed to upload: {}".format(os.path.basename(path)))
|
|
259
|
+
except Exception as e:
|
|
260
|
+
self.window.core.debug.log(e)
|
|
261
|
+
self.signals.status.emit("upload_files", "Failed to upload: {}".format(os.path.basename(path)))
|
|
262
|
+
if not silent:
|
|
263
|
+
self.signals.finished.emit("upload_files", self.store_id, num)
|
|
264
|
+
return True
|
|
265
|
+
except Exception as e:
|
|
266
|
+
self.log("API error: {}".format(e))
|
|
267
|
+
self.signals.error.emit("upload_files", e)
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
def import_files(self, silent: bool = False) -> bool:
|
|
271
|
+
try:
|
|
272
|
+
if self.store_id is None:
|
|
273
|
+
self.log("Importing all collection documents...")
|
|
274
|
+
self.window.core.remote_store.xai.files.truncate_local() # clear local DB (all)
|
|
275
|
+
num = self.window.core.api.xai.store.import_collections_files_collections(callback=self.callback)
|
|
276
|
+
else:
|
|
277
|
+
self.log("Importing documents for collection: {}".format(self.store_id))
|
|
278
|
+
self.window.core.remote_store.xai.files.truncate_local(self.store_id) # clear local DB (store)
|
|
279
|
+
items = self.window.core.api.xai.store.import_collection_files_collections(
|
|
280
|
+
self.store_id,
|
|
281
|
+
[],
|
|
282
|
+
callback=self.callback,
|
|
283
|
+
)
|
|
284
|
+
num = len(items)
|
|
285
|
+
if not silent:
|
|
286
|
+
self.signals.finished.emit("import_files", self.store_id, num)
|
|
287
|
+
return True
|
|
288
|
+
except Exception as e:
|
|
289
|
+
self.log("API error: {}".format(e))
|
|
290
|
+
self.signals.error.emit("import_files", e)
|
|
291
|
+
return False
|
|
292
|
+
|
|
293
|
+
# ---------- Utils ----------
|
|
294
|
+
|
|
295
|
+
def callback(self, msg: str):
|
|
296
|
+
self.log(msg)
|
|
297
|
+
|
|
298
|
+
def log(self, msg: str):
|
|
299
|
+
self.signals.log.emit(self.mode, msg)
|
|
300
|
+
|
|
301
|
+
def cleanup(self):
|
|
302
|
+
sig = self.signals
|
|
303
|
+
self.signals = None
|
|
304
|
+
if sig is not None:
|
|
305
|
+
try:
|
|
306
|
+
sig.deleteLater()
|
|
307
|
+
except RuntimeError:
|
|
308
|
+
pass
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2026.01.06 20:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import base64
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import queue
|
|
17
|
+
import subprocess
|
|
18
|
+
import threading
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from .base import BaseProvider
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class XAIGrokVoiceAudioInput(BaseProvider):
|
|
25
|
+
PROMPT_TRANSCRIBE = (
|
|
26
|
+
"You are a speech-to-text transcriber. "
|
|
27
|
+
"Return only the verbatim transcript as plain text. "
|
|
28
|
+
"Do not add any explanations, timestamps, labels or formatting."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def __init__(self, *args, **kwargs):
|
|
32
|
+
"""
|
|
33
|
+
xAI Grok Voice Agent-based audio transcription provider (via WebSocket API).
|
|
34
|
+
|
|
35
|
+
:param args: args
|
|
36
|
+
:param kwargs: kwargs
|
|
37
|
+
"""
|
|
38
|
+
super(XAIGrokVoiceAudioInput, self).__init__(*args, **kwargs)
|
|
39
|
+
self.plugin = kwargs.get("plugin")
|
|
40
|
+
self.id = "xai_grok_voice"
|
|
41
|
+
self.name = "xAI Grok Voice"
|
|
42
|
+
|
|
43
|
+
def init_options(self):
|
|
44
|
+
"""Initialize options"""
|
|
45
|
+
# Model is implicit for the realtime Voice Agent; keep options focused on audio and behavior
|
|
46
|
+
self.plugin.add_option(
|
|
47
|
+
"xai_voice_audio_sample_rate",
|
|
48
|
+
type="text",
|
|
49
|
+
value="16000",
|
|
50
|
+
label="Sample rate (Hz)",
|
|
51
|
+
tab="xai_grok_voice",
|
|
52
|
+
description="PCM sample rate for input, e.g., 16000 or 24000",
|
|
53
|
+
)
|
|
54
|
+
self.plugin.add_option(
|
|
55
|
+
"xai_voice_system_prompt",
|
|
56
|
+
type="textarea",
|
|
57
|
+
value=self.PROMPT_TRANSCRIBE,
|
|
58
|
+
label="System Prompt",
|
|
59
|
+
tab="xai_grok_voice",
|
|
60
|
+
description="System prompt to guide the transcription output",
|
|
61
|
+
tooltip="System prompt for transcription",
|
|
62
|
+
persist=True,
|
|
63
|
+
)
|
|
64
|
+
self.plugin.add_option(
|
|
65
|
+
"xai_voice_region",
|
|
66
|
+
type="text",
|
|
67
|
+
value="",
|
|
68
|
+
label="Region (optional)",
|
|
69
|
+
tab="xai_grok_voice",
|
|
70
|
+
description="Regional endpoint like us-east-1; leave empty to use the global endpoint",
|
|
71
|
+
)
|
|
72
|
+
self.plugin.add_option(
|
|
73
|
+
"xai_voice_chunk_ms",
|
|
74
|
+
type="text",
|
|
75
|
+
value="200",
|
|
76
|
+
label="Chunk size (ms)",
|
|
77
|
+
tab="xai_grok_voice",
|
|
78
|
+
description="Size of audio chunks to send over WebSocket",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def transcribe(self, path: str) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Audio to text transcription using xAI Grok Voice Agent (WebSocket).
|
|
84
|
+
|
|
85
|
+
:param path: path to audio file to transcribe
|
|
86
|
+
:return: transcribed text
|
|
87
|
+
"""
|
|
88
|
+
# Ensure xAI client is initialized in core (keeps auth/config consistent with the app)
|
|
89
|
+
# We do not rely on its methods for WebSocket, but we respect the app's initialization order
|
|
90
|
+
try:
|
|
91
|
+
_ = self.plugin.window.core.api.xai.get_client()
|
|
92
|
+
except Exception:
|
|
93
|
+
# Client not strictly required for WebSocket usage; continue if available credentials are set
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
api_key = self._get_api_key()
|
|
97
|
+
if not api_key:
|
|
98
|
+
raise RuntimeError("xAI API key is not set. Please configure it in settings.")
|
|
99
|
+
|
|
100
|
+
# Resolve endpoint (optionally regionalized)
|
|
101
|
+
region = (self.plugin.get_option_value("xai_voice_region") or "").strip()
|
|
102
|
+
host = f"{region}.api.x.ai" if region else "api.x.ai"
|
|
103
|
+
ws_uri = f"wss://{host}/v1/realtime"
|
|
104
|
+
|
|
105
|
+
# Read options
|
|
106
|
+
prompt = self.plugin.get_option_value("xai_voice_system_prompt") or self.PROMPT_TRANSCRIBE
|
|
107
|
+
sr_opt = str(self.plugin.get_option_value("xai_voice_audio_sample_rate") or "16000").strip()
|
|
108
|
+
try:
|
|
109
|
+
sample_rate = max(8000, int(sr_opt))
|
|
110
|
+
except Exception:
|
|
111
|
+
sample_rate = 16000
|
|
112
|
+
|
|
113
|
+
chunk_ms_opt = str(self.plugin.get_option_value("xai_voice_chunk_ms") or "200").strip()
|
|
114
|
+
try:
|
|
115
|
+
chunk_ms = max(20, int(chunk_ms_opt))
|
|
116
|
+
except Exception:
|
|
117
|
+
chunk_ms = 200
|
|
118
|
+
|
|
119
|
+
# Compute chunk size for 16-bit mono PCM
|
|
120
|
+
bytes_per_second = sample_rate * 2 # 1 channel * 2 bytes
|
|
121
|
+
chunk_size = max(4096, int(bytes_per_second * (chunk_ms / 1000.0)))
|
|
122
|
+
|
|
123
|
+
# Run async websocket pipeline in an isolated thread/loop to avoid interfering with the UI loop
|
|
124
|
+
result_queue: queue.Queue[str] = queue.Queue()
|
|
125
|
+
|
|
126
|
+
def _runner():
|
|
127
|
+
loop = asyncio.new_event_loop()
|
|
128
|
+
try:
|
|
129
|
+
asyncio.set_event_loop(loop)
|
|
130
|
+
text = loop.run_until_complete(
|
|
131
|
+
self._transcribe_async(
|
|
132
|
+
ws_uri=ws_uri,
|
|
133
|
+
api_key=api_key,
|
|
134
|
+
path=path,
|
|
135
|
+
sample_rate=sample_rate,
|
|
136
|
+
chunk_size=chunk_size,
|
|
137
|
+
system_prompt=prompt,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
result_queue.put(text or "")
|
|
141
|
+
finally:
|
|
142
|
+
try:
|
|
143
|
+
loop.close()
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
t = threading.Thread(target=_runner, daemon=True)
|
|
148
|
+
t.start()
|
|
149
|
+
t.join()
|
|
150
|
+
|
|
151
|
+
return result_queue.get() if not result_queue.empty() else ""
|
|
152
|
+
|
|
153
|
+
async def _transcribe_async(
|
|
154
|
+
self,
|
|
155
|
+
ws_uri: str,
|
|
156
|
+
api_key: str,
|
|
157
|
+
path: str,
|
|
158
|
+
sample_rate: int,
|
|
159
|
+
chunk_size: int,
|
|
160
|
+
system_prompt: str,
|
|
161
|
+
) -> str:
|
|
162
|
+
"""
|
|
163
|
+
Connects to xAI Voice Agent realtime WebSocket and streams audio for transcription.
|
|
164
|
+
Returns the final transcript text.
|
|
165
|
+
"""
|
|
166
|
+
try:
|
|
167
|
+
import websockets # type: ignore
|
|
168
|
+
from websockets.asyncio.client import ClientConnection # type: ignore
|
|
169
|
+
except Exception as e:
|
|
170
|
+
raise RuntimeError(
|
|
171
|
+
"The 'websockets' package is required for xAI Voice transcription. "
|
|
172
|
+
"Please install it in your environment."
|
|
173
|
+
) from e
|
|
174
|
+
|
|
175
|
+
transcript: Optional[str] = None
|
|
176
|
+
|
|
177
|
+
async with websockets.connect(
|
|
178
|
+
uri=ws_uri,
|
|
179
|
+
ssl=True,
|
|
180
|
+
open_timeout=30,
|
|
181
|
+
close_timeout=10,
|
|
182
|
+
additional_headers={"Authorization": f"Bearer {api_key}"},
|
|
183
|
+
max_size=None,
|
|
184
|
+
) as ws: # type: ClientConnection
|
|
185
|
+
# Configure session to match our audio and enforce transcription-only behavior
|
|
186
|
+
session_config = {
|
|
187
|
+
"type": "session.update",
|
|
188
|
+
"session": {
|
|
189
|
+
"instructions": system_prompt,
|
|
190
|
+
# We are only transcribing; disable server VAD and commit manually as a single turn
|
|
191
|
+
"turn_detection": {"type": None},
|
|
192
|
+
"audio": {
|
|
193
|
+
"input": {"format": {"type": "audio/pcm", "rate": sample_rate}},
|
|
194
|
+
# Output audio not needed; keep default
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
}
|
|
198
|
+
await ws.send(json.dumps(session_config))
|
|
199
|
+
|
|
200
|
+
# Stream the audio
|
|
201
|
+
is_wav = path.lower().endswith((".wav", ".wave"))
|
|
202
|
+
if is_wav:
|
|
203
|
+
# Fast path for WAV (PCM or otherwise; convert to mono s16le at desired rate)
|
|
204
|
+
pcm_bytes, duration_s = self._decode_wav_to_pcm_s16le(path, sample_rate)
|
|
205
|
+
for i in range(0, len(pcm_bytes), chunk_size):
|
|
206
|
+
chunk = pcm_bytes[i : i + chunk_size]
|
|
207
|
+
if not chunk:
|
|
208
|
+
break
|
|
209
|
+
await ws.send(
|
|
210
|
+
json.dumps(
|
|
211
|
+
{
|
|
212
|
+
"type": "input_audio_buffer.append",
|
|
213
|
+
"audio": base64.b64encode(chunk).decode("ascii"),
|
|
214
|
+
}
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
# Generic path via ffmpeg to decode to mono s16le at sample_rate
|
|
219
|
+
duration_s = None # unknown
|
|
220
|
+
await self._stream_via_ffmpeg(ws, path, sample_rate, chunk_size)
|
|
221
|
+
|
|
222
|
+
# Commit a single user message from the accumulated audio buffer
|
|
223
|
+
await ws.send(json.dumps({"type": "input_audio_buffer.commit"}))
|
|
224
|
+
|
|
225
|
+
# Wait for transcript events
|
|
226
|
+
# Use a dynamic timeout: at least 30s; more for longer audio
|
|
227
|
+
base_timeout = 30.0
|
|
228
|
+
if duration_s is not None:
|
|
229
|
+
# allow ~2x audio length + base safety window
|
|
230
|
+
timeout_s = min(600.0, max(base_timeout, duration_s * 2.0 + 10.0))
|
|
231
|
+
else:
|
|
232
|
+
timeout_s = 120.0
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
transcript = await self._await_transcript(ws, timeout=timeout_s)
|
|
236
|
+
except asyncio.TimeoutError:
|
|
237
|
+
# Try to salvage from any conversation.item.added events cached in the loop
|
|
238
|
+
pass
|
|
239
|
+
|
|
240
|
+
return transcript or ""
|
|
241
|
+
|
|
242
|
+
async def _await_transcript(self, ws, timeout: float) -> Optional[str]:
|
|
243
|
+
"""
|
|
244
|
+
Waits for either:
|
|
245
|
+
- conversation.item.input_audio_transcription.completed (preferred)
|
|
246
|
+
- conversation.item.added with content.type == 'input_audio' (fallback)
|
|
247
|
+
"""
|
|
248
|
+
end_time = asyncio.get_event_loop().time() + timeout
|
|
249
|
+
pending_fallback: Optional[str] = None
|
|
250
|
+
|
|
251
|
+
while True:
|
|
252
|
+
remaining = end_time - asyncio.get_event_loop().time()
|
|
253
|
+
if remaining <= 0:
|
|
254
|
+
raise asyncio.TimeoutError("Timed out waiting for xAI transcription result.")
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
msg = await asyncio.wait_for(ws.recv(), timeout=remaining)
|
|
258
|
+
except asyncio.TimeoutError:
|
|
259
|
+
raise
|
|
260
|
+
except Exception:
|
|
261
|
+
break
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
event = json.loads(msg)
|
|
265
|
+
except Exception:
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
etype = event.get("type", "")
|
|
269
|
+
if etype == "conversation.item.input_audio_transcription.completed":
|
|
270
|
+
# Preferred final transcript
|
|
271
|
+
return event.get("transcript") or ""
|
|
272
|
+
elif etype == "conversation.item.added":
|
|
273
|
+
# Fallback: some responses include the inline transcript in the added user item
|
|
274
|
+
item = event.get("item") or {}
|
|
275
|
+
if item.get("role") == "user":
|
|
276
|
+
for c in item.get("content", []):
|
|
277
|
+
if isinstance(c, dict) and c.get("type") == "input_audio" and "transcript" in c:
|
|
278
|
+
pending_fallback = c.get("transcript") or pending_fallback
|
|
279
|
+
elif etype == "response.done":
|
|
280
|
+
# If server signals end of turn and we have a fallback transcript, return it
|
|
281
|
+
if pending_fallback:
|
|
282
|
+
return pending_fallback
|
|
283
|
+
|
|
284
|
+
async def _stream_via_ffmpeg(self, ws, path: str, sample_rate: int, chunk_size: int):
|
|
285
|
+
"""
|
|
286
|
+
Uses ffmpeg to decode arbitrary input to mono s16le at sample_rate and streams chunks.
|
|
287
|
+
"""
|
|
288
|
+
cmd = [
|
|
289
|
+
"ffmpeg",
|
|
290
|
+
"-nostdin",
|
|
291
|
+
"-hide_banner",
|
|
292
|
+
"-loglevel",
|
|
293
|
+
"error",
|
|
294
|
+
"-i",
|
|
295
|
+
path,
|
|
296
|
+
"-ac",
|
|
297
|
+
"1",
|
|
298
|
+
"-ar",
|
|
299
|
+
str(sample_rate),
|
|
300
|
+
"-f",
|
|
301
|
+
"s16le",
|
|
302
|
+
"pipe:1",
|
|
303
|
+
]
|
|
304
|
+
try:
|
|
305
|
+
proc = await asyncio.create_subprocess_exec(
|
|
306
|
+
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
307
|
+
)
|
|
308
|
+
except FileNotFoundError as e:
|
|
309
|
+
raise RuntimeError(
|
|
310
|
+
"ffmpeg not found. Install ffmpeg or provide WAV input for xAI transcription."
|
|
311
|
+
) from e
|
|
312
|
+
|
|
313
|
+
try:
|
|
314
|
+
while True:
|
|
315
|
+
chunk = await proc.stdout.read(chunk_size)
|
|
316
|
+
if not chunk:
|
|
317
|
+
break
|
|
318
|
+
await ws.send(
|
|
319
|
+
json.dumps(
|
|
320
|
+
{
|
|
321
|
+
"type": "input_audio_buffer.append",
|
|
322
|
+
"audio": base64.b64encode(chunk).decode("ascii"),
|
|
323
|
+
}
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
finally:
|
|
327
|
+
try:
|
|
328
|
+
await proc.wait()
|
|
329
|
+
except Exception:
|
|
330
|
+
pass
|
|
331
|
+
|
|
332
|
+
def _decode_wav_to_pcm_s16le(self, path: str, target_rate: int):
|
|
333
|
+
"""
|
|
334
|
+
Decodes a WAV file to mono 16-bit PCM at target_rate.
|
|
335
|
+
Returns (bytes, duration_seconds).
|
|
336
|
+
"""
|
|
337
|
+
import wave
|
|
338
|
+
import audioop
|
|
339
|
+
|
|
340
|
+
with wave.open(path, "rb") as wf:
|
|
341
|
+
n_channels = wf.getnchannels()
|
|
342
|
+
sampwidth = wf.getsampwidth()
|
|
343
|
+
framerate = wf.getframerate()
|
|
344
|
+
n_frames = wf.getnframes()
|
|
345
|
+
raw = wf.readframes(n_frames)
|
|
346
|
+
|
|
347
|
+
# Convert to mono if needed
|
|
348
|
+
if n_channels > 1:
|
|
349
|
+
raw = audioop.tomono(raw, sampwidth, 1, 1)
|
|
350
|
+
|
|
351
|
+
# Convert sample width to 16-bit
|
|
352
|
+
if sampwidth != 2:
|
|
353
|
+
raw = audioop.lin2lin(raw, sampwidth, 2)
|
|
354
|
+
|
|
355
|
+
# Resample if needed
|
|
356
|
+
if framerate != target_rate:
|
|
357
|
+
raw, _ = audioop.ratecv(raw, 2, 1, framerate, target_rate, None)
|
|
358
|
+
framerate = target_rate
|
|
359
|
+
|
|
360
|
+
duration_s = len(raw) / float(target_rate * 2) # mono, 16-bit
|
|
361
|
+
return raw, duration_s
|
|
362
|
+
|
|
363
|
+
def _get_api_key(self) -> Optional[str]:
|
|
364
|
+
"""
|
|
365
|
+
Resolve xAI API key from the app's configuration.
|
|
366
|
+
"""
|
|
367
|
+
# Prefer explicit xAI key if present
|
|
368
|
+
key = self.plugin.window.core.config.get("api_key_xai")
|
|
369
|
+
if key:
|
|
370
|
+
return key
|
|
371
|
+
|
|
372
|
+
# Optional: try environment variable for parity with SDKs
|
|
373
|
+
return os.getenv("XAI_API_KEY")
|
|
374
|
+
|
|
375
|
+
def is_configured(self) -> bool:
|
|
376
|
+
"""
|
|
377
|
+
Check if provider is configured
|
|
378
|
+
|
|
379
|
+
:return: True if configured, False otherwise
|
|
380
|
+
"""
|
|
381
|
+
api_key = self._get_api_key()
|
|
382
|
+
return api_key is not None and api_key != ""
|
|
383
|
+
|
|
384
|
+
def get_config_message(self) -> str:
|
|
385
|
+
"""
|
|
386
|
+
Return message to display when provider is not configured
|
|
387
|
+
|
|
388
|
+
:return: message
|
|
389
|
+
"""
|
|
390
|
+
return "xAI API key is not set yet. Please configure it in settings."
|