pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +7 -1
- pygpt_net/app_core.py +3 -1
- pygpt_net/config.py +3 -1
- pygpt_net/controller/__init__.py +9 -2
- pygpt_net/controller/audio/audio.py +38 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +23 -62
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/lang/custom.py +2 -2
- pygpt_net/controller/media/__init__.py +12 -0
- pygpt_net/controller/media/media.py +115 -0
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +293 -0
- pygpt_net/controller/ui/mode.py +23 -2
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +14 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +56 -5
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +160 -0
- pygpt_net/core/render/web/body.py +24 -3
- pygpt_net/core/text/utils.py +54 -2
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +54 -0
- pygpt_net/core/video/__init__.py +12 -0
- pygpt_net/core/video/video.py +290 -0
- pygpt_net/data/config/config.json +26 -5
- pygpt_net/data/config/models.json +221 -103
- pygpt_net/data/config/settings.json +244 -6
- pygpt_net/data/css/web-blocks.css +6 -0
- pygpt_net/data/css/web-chatgpt.css +6 -0
- pygpt_net/data/css/web-chatgpt_wide.css +6 -0
- pygpt_net/data/locale/locale.de.ini +35 -7
- pygpt_net/data/locale/locale.en.ini +56 -17
- pygpt_net/data/locale/locale.es.ini +35 -7
- pygpt_net/data/locale/locale.fr.ini +35 -7
- pygpt_net/data/locale/locale.it.ini +35 -7
- pygpt_net/data/locale/locale.pl.ini +38 -7
- pygpt_net/data/locale/locale.uk.ini +35 -7
- pygpt_net/data/locale/locale.zh.ini +31 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
- pygpt_net/item/model.py +22 -1
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +76 -7
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/video.py +364 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +241 -178
- pygpt_net/provider/core/model/patch.py +28 -2
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/web/duckduck_search.py +212 -0
- pygpt_net/ui/layout/toolbox/audio.py +55 -0
- pygpt_net/ui/layout/toolbox/footer.py +14 -42
- pygpt_net/ui/layout/toolbox/image.py +7 -13
- pygpt_net/ui/layout/toolbox/raw.py +52 -0
- pygpt_net/ui/layout/toolbox/split.py +48 -0
- pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
- pygpt_net/ui/layout/toolbox/video.py +49 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from PySide6.QtCore import QTimer, QObject, Qt
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RealtimeSessionPyAudio(QObject):
|
|
10
|
+
"""
|
|
11
|
+
Realtime PCM playback session using PyAudio in callback mode.
|
|
12
|
+
Consumes already-converted PCM frames, keeps GUI responsive and emits volume updates.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
device_index: int,
|
|
17
|
+
rate: int,
|
|
18
|
+
channels: int,
|
|
19
|
+
width_bytes: int = 2,
|
|
20
|
+
parent: Optional[QObject] = None,
|
|
21
|
+
volume_emitter: Optional[callable] = None,
|
|
22
|
+
):
|
|
23
|
+
super().__init__(parent)
|
|
24
|
+
import pyaudio # local import to keep backend import-safe
|
|
25
|
+
self._pa = pyaudio.PyAudio()
|
|
26
|
+
self.device_index = int(device_index)
|
|
27
|
+
self.rate = int(rate)
|
|
28
|
+
self.channels = int(channels)
|
|
29
|
+
self.width = int(width_bytes)
|
|
30
|
+
self.frame_bytes = max(1, self.channels * self.width)
|
|
31
|
+
self.bytes_per_ms = max(1, int(self.rate * self.frame_bytes / 1000))
|
|
32
|
+
|
|
33
|
+
# choose PyAudio format from width
|
|
34
|
+
self.pa_format = self._pa.get_format_from_width(
|
|
35
|
+
self.width,
|
|
36
|
+
unsigned=(self.width == 1)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# internal buffers/flags
|
|
40
|
+
self._buffer = bytearray()
|
|
41
|
+
self._buf_lock = threading.Lock()
|
|
42
|
+
self._final = False
|
|
43
|
+
self._tail_ms = 60 # add a small silence tail to avoid clicks
|
|
44
|
+
|
|
45
|
+
# one-shot guard to avoid double stop and duplicate callbacks
|
|
46
|
+
self._stopping = False
|
|
47
|
+
|
|
48
|
+
# volume metering
|
|
49
|
+
self._volume_emitter = volume_emitter
|
|
50
|
+
self._vol_buffer = bytearray()
|
|
51
|
+
self._vol_lock = threading.Lock()
|
|
52
|
+
self._vol_timer = QTimer(self)
|
|
53
|
+
self._vol_timer.setTimerType(Qt.PreciseTimer)
|
|
54
|
+
self._vol_timer.setInterval(33) # ~30 Hz meter
|
|
55
|
+
self._vol_timer.timeout.connect(self._emit_volume_tick)
|
|
56
|
+
self._vol_timer.start()
|
|
57
|
+
|
|
58
|
+
# open callback-based output stream
|
|
59
|
+
self._stream = self._pa.open(
|
|
60
|
+
format=self.pa_format,
|
|
61
|
+
channels=self.channels,
|
|
62
|
+
rate=self.rate,
|
|
63
|
+
output=True,
|
|
64
|
+
output_device_index=self.device_index,
|
|
65
|
+
stream_callback=self._callback,
|
|
66
|
+
frames_per_buffer=max(256, int(self.rate / 100)) # ~10 ms
|
|
67
|
+
)
|
|
68
|
+
try:
|
|
69
|
+
self._stream.start_stream()
|
|
70
|
+
except Exception:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
# finished-state watchdog: guarantees stop()+on_stopped once playback is truly done
|
|
74
|
+
self._finish_timer = QTimer(self)
|
|
75
|
+
self._finish_timer.setTimerType(Qt.PreciseTimer)
|
|
76
|
+
self._finish_timer.setInterval(15) # fast but lightweight watchdog
|
|
77
|
+
self._finish_timer.timeout.connect(self._check_finished)
|
|
78
|
+
self._finish_timer.start()
|
|
79
|
+
|
|
80
|
+
# stop callback (set by backend)
|
|
81
|
+
self.on_stopped = None
|
|
82
|
+
|
|
83
|
+
def is_active(self) -> bool:
|
|
84
|
+
"""
|
|
85
|
+
Return True if PortAudio stream is active.
|
|
86
|
+
|
|
87
|
+
:return: True if active
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
return self._stream is not None and self._stream.is_active()
|
|
91
|
+
except Exception:
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
def is_finalized(self) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Return True if session was marked final.
|
|
97
|
+
|
|
98
|
+
:return: True if final
|
|
99
|
+
"""
|
|
100
|
+
return bool(self._final)
|
|
101
|
+
|
|
102
|
+
def feed(self, data: bytes) -> None:
|
|
103
|
+
"""
|
|
104
|
+
Append PCM bytes (already in session/device format).
|
|
105
|
+
|
|
106
|
+
:param data: bytes to append
|
|
107
|
+
"""
|
|
108
|
+
if not data:
|
|
109
|
+
return
|
|
110
|
+
with self._buf_lock:
|
|
111
|
+
self._buffer.extend(data)
|
|
112
|
+
# push to volume window from the same bytes
|
|
113
|
+
self._vol_push(data)
|
|
114
|
+
|
|
115
|
+
def mark_final(self) -> None:
|
|
116
|
+
"""No more data will be supplied; add a small silence tail."""
|
|
117
|
+
if not self._final:
|
|
118
|
+
pad = self.bytes_per_ms * self._tail_ms
|
|
119
|
+
pad -= (pad % self.frame_bytes)
|
|
120
|
+
if pad > 0:
|
|
121
|
+
with self._buf_lock:
|
|
122
|
+
self._buffer.extend(self._silence(pad))
|
|
123
|
+
self._final = True
|
|
124
|
+
|
|
125
|
+
def stop(self) -> None:
|
|
126
|
+
"""Stop playback and free resources. Idempotent."""
|
|
127
|
+
# ensure this executes only once even if called from multiple paths
|
|
128
|
+
if self._stopping:
|
|
129
|
+
return
|
|
130
|
+
self._stopping = True
|
|
131
|
+
|
|
132
|
+
# stop timers first to prevent re-entry
|
|
133
|
+
try:
|
|
134
|
+
if self._finish_timer:
|
|
135
|
+
self._finish_timer.stop()
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
try:
|
|
139
|
+
if self._vol_timer:
|
|
140
|
+
self._vol_timer.stop()
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
# gracefully stop PortAudio stream and close/terminate
|
|
145
|
+
try:
|
|
146
|
+
if self._stream and self._stream.is_active():
|
|
147
|
+
self._stream.stop_stream() # drains queued audio per PortAudio docs
|
|
148
|
+
except Exception:
|
|
149
|
+
pass
|
|
150
|
+
try:
|
|
151
|
+
if self._stream:
|
|
152
|
+
self._stream.close()
|
|
153
|
+
except Exception:
|
|
154
|
+
pass
|
|
155
|
+
try:
|
|
156
|
+
if self._pa:
|
|
157
|
+
self._pa.terminate()
|
|
158
|
+
except Exception:
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
# zero the meter
|
|
162
|
+
try:
|
|
163
|
+
if self._volume_emitter:
|
|
164
|
+
self._volume_emitter(0)
|
|
165
|
+
except Exception:
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
self._stream = None
|
|
169
|
+
self._pa = None
|
|
170
|
+
|
|
171
|
+
cb = self.on_stopped
|
|
172
|
+
self.on_stopped = None
|
|
173
|
+
if cb:
|
|
174
|
+
try:
|
|
175
|
+
cb()
|
|
176
|
+
except Exception:
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
self.deleteLater()
|
|
180
|
+
|
|
181
|
+
# ---- internal ----
|
|
182
|
+
|
|
183
|
+
def _callback(self, in_data, frame_count, time_info, status):
|
|
184
|
+
"""
|
|
185
|
+
PortAudio callback: deliver frames from buffer.
|
|
186
|
+
|
|
187
|
+
:param in_data: input data (ignored)
|
|
188
|
+
:param frame_count: number of frames requested
|
|
189
|
+
:param time_info: timing info (ignored)
|
|
190
|
+
:param status: status flags (ignored)
|
|
191
|
+
:return: (data bytes, flag)
|
|
192
|
+
"""
|
|
193
|
+
import pyaudio
|
|
194
|
+
need = frame_count * self.frame_bytes
|
|
195
|
+
out = b""
|
|
196
|
+
with self._buf_lock:
|
|
197
|
+
if len(self._buffer) >= need:
|
|
198
|
+
out = bytes(self._buffer[:need])
|
|
199
|
+
del self._buffer[:need]
|
|
200
|
+
elif len(self._buffer) > 0:
|
|
201
|
+
out = bytes(self._buffer)
|
|
202
|
+
self._buffer.clear()
|
|
203
|
+
|
|
204
|
+
if len(out) < need:
|
|
205
|
+
out += self._silence(need - len(out))
|
|
206
|
+
|
|
207
|
+
# meter push from what is actually written
|
|
208
|
+
self._vol_push(out)
|
|
209
|
+
|
|
210
|
+
# auto-finish: when final and nothing more to play, complete and stop()
|
|
211
|
+
if self._final and self._buffer_empty():
|
|
212
|
+
# Return paComplete and request stop on the GUI thread.
|
|
213
|
+
# PaComplete deactivates the stream after the last callback buffer is played.
|
|
214
|
+
QTimer.singleShot(0, self.stop)
|
|
215
|
+
return out, pyaudio.paComplete
|
|
216
|
+
|
|
217
|
+
return out, pyaudio.paContinue
|
|
218
|
+
|
|
219
|
+
def _check_finished(self) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Watchdog that runs on the Qt thread to guarantee a single, reliable stop().
|
|
222
|
+
Triggers when PortAudio deactivates the stream, or when the buffer is fully
|
|
223
|
+
drained after mark_final().
|
|
224
|
+
"""
|
|
225
|
+
if self._stopping:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# If underlying PA stream is no longer active, we are done.
|
|
229
|
+
try:
|
|
230
|
+
if self._stream is not None and not self._stream.is_active():
|
|
231
|
+
self.stop()
|
|
232
|
+
return
|
|
233
|
+
except Exception:
|
|
234
|
+
# If querying state fails, assume the stream is done and stop.
|
|
235
|
+
self.stop()
|
|
236
|
+
return
|
|
237
|
+
|
|
238
|
+
# If we've been marked final and our buffer is empty, finalize proactively.
|
|
239
|
+
if self._final and self._buffer_empty():
|
|
240
|
+
self.stop()
|
|
241
|
+
|
|
242
|
+
def _buffer_empty(self) -> bool:
|
|
243
|
+
"""
|
|
244
|
+
Check if internal buffer is empty.
|
|
245
|
+
|
|
246
|
+
:return: True if empty
|
|
247
|
+
"""
|
|
248
|
+
with self._buf_lock:
|
|
249
|
+
return len(self._buffer) == 0
|
|
250
|
+
|
|
251
|
+
def _silence(self, n: int) -> bytes:
|
|
252
|
+
"""
|
|
253
|
+
Generate n bytes of silence.
|
|
254
|
+
|
|
255
|
+
:param n: number of bytes
|
|
256
|
+
:return: bytes of silence
|
|
257
|
+
"""
|
|
258
|
+
if n <= 0:
|
|
259
|
+
return b""
|
|
260
|
+
if self.width == 1:
|
|
261
|
+
return bytes([128]) * n # silence for unsigned 8-bit
|
|
262
|
+
return b"\x00" * n
|
|
263
|
+
|
|
264
|
+
def _vol_push(self, chunk: bytes) -> None:
|
|
265
|
+
"""
|
|
266
|
+
Push chunk to volume buffer and trim if needed.
|
|
267
|
+
|
|
268
|
+
:param chunk: bytes to push to volume buffer
|
|
269
|
+
"""
|
|
270
|
+
if not chunk:
|
|
271
|
+
return
|
|
272
|
+
with self._vol_lock:
|
|
273
|
+
self._vol_buffer.extend(chunk)
|
|
274
|
+
max_bytes = max(1, self.bytes_per_ms * 100) # ~100 ms window
|
|
275
|
+
if len(self._vol_buffer) > max_bytes:
|
|
276
|
+
del self._vol_buffer[:len(self._vol_buffer) - max_bytes]
|
|
277
|
+
|
|
278
|
+
def _emit_volume_tick(self) -> None:
|
|
279
|
+
"""Emit volume level based on current volume buffer."""
|
|
280
|
+
if self._volume_emitter is None:
|
|
281
|
+
return
|
|
282
|
+
with self._vol_lock:
|
|
283
|
+
buf = bytes(self._vol_buffer)
|
|
284
|
+
if not buf:
|
|
285
|
+
try:
|
|
286
|
+
self._volume_emitter(0)
|
|
287
|
+
except Exception:
|
|
288
|
+
pass
|
|
289
|
+
return
|
|
290
|
+
try:
|
|
291
|
+
# decode by sample width
|
|
292
|
+
if self.width == 1:
|
|
293
|
+
arr = np.frombuffer(buf, dtype=np.uint8).astype(np.int16)
|
|
294
|
+
arr = (arr - 128).astype(np.float32) / 128.0
|
|
295
|
+
elif self.width == 2:
|
|
296
|
+
arr = np.frombuffer(buf, dtype=np.int16).astype(np.float32) / 32768.0
|
|
297
|
+
elif self.width == 4:
|
|
298
|
+
arr = np.frombuffer(buf, dtype=np.int32).astype(np.float32) / 2147483648.0
|
|
299
|
+
else:
|
|
300
|
+
arr = np.frombuffer(buf, dtype=np.int16).astype(np.float32) / 32768.0
|
|
301
|
+
|
|
302
|
+
if arr.size == 0:
|
|
303
|
+
self._volume_emitter(0)
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
rms = float(np.sqrt(np.mean(arr.astype(np.float64) ** 2)))
|
|
307
|
+
db = -60.0 if rms <= 1e-9 else 20.0 * float(np.log10(min(1.0, rms)))
|
|
308
|
+
db = max(-60.0, min(0.0, db))
|
|
309
|
+
volume = int(((db + 60.0) / 60.0) * 100.0)
|
|
310
|
+
self._volume_emitter(volume)
|
|
311
|
+
except Exception:
|
|
312
|
+
pass
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from .pygame import PygameBackend
|
|
@@ -6,15 +6,20 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 04:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import time
|
|
13
13
|
import wave
|
|
14
|
+
import numpy as np
|
|
14
15
|
from typing import List, Tuple
|
|
16
|
+
from collections import deque
|
|
17
|
+
from threading import Lock
|
|
15
18
|
|
|
16
19
|
from PySide6.QtCore import QTimer
|
|
17
20
|
|
|
21
|
+
from ..shared import f32_to_s16le, build_rt_input_delta_event
|
|
22
|
+
|
|
18
23
|
class PygameBackend:
|
|
19
24
|
MIN_FRAMES = 25 # minimum frames to start transcription
|
|
20
25
|
|
|
@@ -22,6 +27,8 @@ class PygameBackend:
|
|
|
22
27
|
"""
|
|
23
28
|
Audio input capture core using pygame's SDL2 audio capture backend.
|
|
24
29
|
Captured devices are stored as device name strings.
|
|
30
|
+
|
|
31
|
+
:param window: Window instance
|
|
25
32
|
"""
|
|
26
33
|
self.window = window
|
|
27
34
|
self.path = None
|
|
@@ -55,10 +62,14 @@ class PygameBackend:
|
|
|
55
62
|
self.initialized = False
|
|
56
63
|
self.mode = "input" # input|control
|
|
57
64
|
|
|
65
|
+
# --- REALTIME INPUT (mic -> dispatcher) ---
|
|
66
|
+
self._rt_signals = None # set with set_rt_signals()
|
|
67
|
+
self._rt_queue = deque() # queue of raw float32 chunks from SDL audio thread
|
|
68
|
+
self._rt_lock = Lock() # protects _rt_queue
|
|
69
|
+
self._is_recording = False # suppress updates after stop
|
|
70
|
+
|
|
58
71
|
def init(self):
|
|
59
|
-
"""
|
|
60
|
-
Initialize the pygame audio system if not already initialized.
|
|
61
|
-
"""
|
|
72
|
+
"""Initialize the pygame audio system if not already initialized."""
|
|
62
73
|
if not self.initialized:
|
|
63
74
|
import pygame
|
|
64
75
|
from pygame._sdl2 import (
|
|
@@ -110,10 +121,20 @@ class PygameBackend:
|
|
|
110
121
|
"""
|
|
111
122
|
self.path = path
|
|
112
123
|
|
|
124
|
+
def set_rt_signals(self, signals) -> None:
|
|
125
|
+
"""
|
|
126
|
+
Set signals object for realtime events.
|
|
127
|
+
|
|
128
|
+
:param signals: Signals object
|
|
129
|
+
"""
|
|
130
|
+
self._rt_signals = signals
|
|
131
|
+
|
|
113
132
|
def start(self):
|
|
114
133
|
"""
|
|
115
134
|
Start audio recording using pygame’s SDL2 audio capture.
|
|
116
135
|
Returns True if started successfully.
|
|
136
|
+
|
|
137
|
+
:return: True if started
|
|
117
138
|
"""
|
|
118
139
|
self.init()
|
|
119
140
|
# Clear previously recorded frames.
|
|
@@ -136,15 +157,23 @@ class PygameBackend:
|
|
|
136
157
|
self.timer.timeout.connect(self._update_level)
|
|
137
158
|
self.timer.start(50) # update every 50ms
|
|
138
159
|
|
|
160
|
+
# mark recording as active after setup
|
|
161
|
+
self._is_recording = True
|
|
139
162
|
return True
|
|
140
163
|
|
|
141
164
|
def stop(self):
|
|
142
165
|
"""
|
|
143
166
|
Stop audio recording.
|
|
144
167
|
Returns True if stopped and audio data was saved (if path is set).
|
|
168
|
+
|
|
169
|
+
:return: True if stopped and saved
|
|
145
170
|
"""
|
|
146
171
|
self.init()
|
|
147
172
|
result = False
|
|
173
|
+
|
|
174
|
+
# immediately mark as not recording
|
|
175
|
+
self._is_recording = False
|
|
176
|
+
|
|
148
177
|
if self.audio_source is not None:
|
|
149
178
|
if self.timer is not None:
|
|
150
179
|
self.timer.stop()
|
|
@@ -154,6 +183,12 @@ class PygameBackend:
|
|
|
154
183
|
self.audio_source.pause(1)
|
|
155
184
|
self.audio_source = None
|
|
156
185
|
|
|
186
|
+
# Emit final input chunk marker for realtime consumers
|
|
187
|
+
try:
|
|
188
|
+
self._emit_rt_input_delta(b"", final=True)
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
|
|
157
192
|
if self.frames:
|
|
158
193
|
if self.path:
|
|
159
194
|
self.save_audio_file(self.path)
|
|
@@ -162,35 +197,48 @@ class PygameBackend:
|
|
|
162
197
|
print("File path is not set.")
|
|
163
198
|
else:
|
|
164
199
|
print("No audio data recorded")
|
|
200
|
+
|
|
201
|
+
# reset level indicator
|
|
202
|
+
try:
|
|
203
|
+
self.reset_audio_level()
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
|
|
165
207
|
return result
|
|
166
208
|
|
|
167
209
|
def has_source(self) -> bool:
|
|
168
210
|
"""
|
|
169
211
|
Check if the audio source is available.
|
|
212
|
+
|
|
213
|
+
:return: True if audio source is available
|
|
170
214
|
"""
|
|
171
215
|
return self.audio_source is not None
|
|
172
216
|
|
|
173
217
|
def has_frames(self) -> bool:
|
|
174
218
|
"""
|
|
175
219
|
Check if any audio frames have been recorded.
|
|
220
|
+
|
|
221
|
+
:return: True if any frames recorded
|
|
176
222
|
"""
|
|
177
223
|
return bool(self.frames)
|
|
178
224
|
|
|
179
225
|
def has_min_frames(self) -> bool:
|
|
180
226
|
"""
|
|
181
227
|
Check if at least MIN_FRAMES audio frames have been recorded.
|
|
228
|
+
|
|
229
|
+
:return: True if at least MIN_FRAMES recorded
|
|
182
230
|
"""
|
|
183
231
|
return len(self.frames) >= self.MIN_FRAMES
|
|
184
232
|
|
|
185
233
|
def reset_audio_level(self):
|
|
186
|
-
"""
|
|
187
|
-
Reset the audio level bar (if available).
|
|
188
|
-
"""
|
|
234
|
+
"""Reset the audio level bar (if available)."""
|
|
189
235
|
self.window.controller.audio.ui.on_input_volume_change(0, self.mode)
|
|
190
236
|
|
|
191
237
|
def check_audio_input(self) -> bool:
|
|
192
238
|
"""
|
|
193
239
|
Check if a default audio input device is available using pygame.
|
|
240
|
+
|
|
241
|
+
:return: True if an audio input device is available
|
|
194
242
|
"""
|
|
195
243
|
from pygame._sdl2 import (
|
|
196
244
|
get_audio_device_names,
|
|
@@ -226,6 +274,8 @@ class PygameBackend:
|
|
|
226
274
|
def device_changed(self, index: int):
|
|
227
275
|
"""
|
|
228
276
|
Change the selected audio input device by its index in the devices list.
|
|
277
|
+
|
|
278
|
+
:param index: Index of the device in the devices list.
|
|
229
279
|
"""
|
|
230
280
|
self.init()
|
|
231
281
|
if 0 <= index < len(self.devices):
|
|
@@ -234,9 +284,7 @@ class PygameBackend:
|
|
|
234
284
|
self.selected_device = None
|
|
235
285
|
|
|
236
286
|
def prepare_device(self):
|
|
237
|
-
"""
|
|
238
|
-
Set the current audio input device based on configuration.
|
|
239
|
-
"""
|
|
287
|
+
"""Set the current audio input device based on configuration."""
|
|
240
288
|
self.init()
|
|
241
289
|
if self.window is not None and hasattr(self.window, "core"):
|
|
242
290
|
device_index = int(self.window.core.config.get('audio.input.device', 0))
|
|
@@ -251,14 +299,26 @@ class PygameBackend:
|
|
|
251
299
|
"""
|
|
252
300
|
Callback function called in the audio thread.
|
|
253
301
|
It receives a memoryview of audio data which is converted to bytes and appended.
|
|
302
|
+
|
|
303
|
+
:param audiodevice: The audio device instance (not used here).
|
|
304
|
+
:param audiomemoryview: MemoryView of the captured audio data.
|
|
254
305
|
"""
|
|
306
|
+
if not self._is_recording:
|
|
307
|
+
return
|
|
308
|
+
|
|
255
309
|
# Append captured audio bytes to the frames list.
|
|
256
|
-
|
|
310
|
+
chunk = bytes(audiomemoryview)
|
|
311
|
+
self.frames.append(chunk)
|
|
312
|
+
|
|
313
|
+
# Enqueue chunk for realtime emission (processed on the Qt thread).
|
|
314
|
+
try:
|
|
315
|
+
with self._rt_lock:
|
|
316
|
+
self._rt_queue.append(chunk)
|
|
317
|
+
except Exception:
|
|
318
|
+
pass
|
|
257
319
|
|
|
258
320
|
def setup_audio_input(self):
|
|
259
|
-
"""
|
|
260
|
-
Create an AudioDevice with the selected device name and start recording.
|
|
261
|
-
"""
|
|
321
|
+
"""Create an AudioDevice with the selected device name and start recording."""
|
|
262
322
|
self.init()
|
|
263
323
|
from pygame._sdl2 import (
|
|
264
324
|
AudioDevice,
|
|
@@ -289,16 +349,18 @@ class PygameBackend:
|
|
|
289
349
|
Periodically called (via QTimer) to compute RMS from the last captured audio chunk
|
|
290
350
|
and update the audio level bar.
|
|
291
351
|
"""
|
|
352
|
+
# Drain realtime queue first to keep latency low.
|
|
353
|
+
self._drain_rt_queue()
|
|
354
|
+
|
|
292
355
|
if not self.frames:
|
|
293
356
|
return
|
|
294
357
|
|
|
295
|
-
import numpy as np
|
|
296
358
|
# Use the last captured chunk.
|
|
297
359
|
last_chunk = self.frames[-1]
|
|
298
360
|
try:
|
|
299
361
|
# Interpret the bytes as float32 samples.
|
|
300
362
|
samples = np.frombuffer(last_chunk, dtype=np.float32)
|
|
301
|
-
except Exception
|
|
363
|
+
except Exception:
|
|
302
364
|
return
|
|
303
365
|
if samples.size == 0:
|
|
304
366
|
return
|
|
@@ -329,7 +391,6 @@ class PygameBackend:
|
|
|
329
391
|
|
|
330
392
|
:param filename: The path to the output WAV file.
|
|
331
393
|
"""
|
|
332
|
-
import numpy as np
|
|
333
394
|
full_data = b"".join(self.frames)
|
|
334
395
|
try:
|
|
335
396
|
data_array = np.frombuffer(full_data, dtype=np.float32)
|
|
@@ -337,7 +398,7 @@ class PygameBackend:
|
|
|
337
398
|
print("Error converting audio data:", e)
|
|
338
399
|
return
|
|
339
400
|
# Convert float32 values in the range -1.0 ... 1.0 to PCM int16.
|
|
340
|
-
int_data = (data_array * 32767).astype(np.int16)
|
|
401
|
+
int_data = (np.clip(data_array, -1.0, 1.0) * 32767.0).astype(np.int16)
|
|
341
402
|
new_data = int_data.tobytes()
|
|
342
403
|
with wave.open(filename, 'wb') as wf:
|
|
343
404
|
wf.setnchannels(self.channels)
|
|
@@ -490,11 +551,61 @@ class PygameBackend:
|
|
|
490
551
|
def get_default_input_device(self) -> tuple:
|
|
491
552
|
"""
|
|
492
553
|
Retrieve the default input device using PyAudio.
|
|
554
|
+
|
|
555
|
+
:return: (index, name)
|
|
493
556
|
"""
|
|
494
557
|
return 0, "Default Input Device"
|
|
495
558
|
|
|
496
559
|
def get_default_output_device(self) -> tuple:
|
|
497
560
|
"""
|
|
498
561
|
Retrieve the default output device using PyAudio.
|
|
562
|
+
|
|
563
|
+
:return: (index, name)
|
|
499
564
|
"""
|
|
500
|
-
return 0, "Default Output Device"
|
|
565
|
+
return 0, "Default Output Device"
|
|
566
|
+
|
|
567
|
+
# --------------------
|
|
568
|
+
# REALTIME INPUT HELPERS
|
|
569
|
+
# --------------------
|
|
570
|
+
def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
|
|
571
|
+
"""
|
|
572
|
+
Emit RT_INPUT_AUDIO_DELTA with a provider-agnostic payload.
|
|
573
|
+
Standardizes to PCM16, little-endian, and includes rate/channels.
|
|
574
|
+
|
|
575
|
+
:param data: PCM16LE audio bytes
|
|
576
|
+
:param final: True if this is the final chunk
|
|
577
|
+
"""
|
|
578
|
+
if not self._rt_signals:
|
|
579
|
+
return
|
|
580
|
+
try:
|
|
581
|
+
event = build_rt_input_delta_event(
|
|
582
|
+
rate=int(self.rate),
|
|
583
|
+
channels=int(self.channels),
|
|
584
|
+
data=data or b"",
|
|
585
|
+
final=bool(final),
|
|
586
|
+
)
|
|
587
|
+
# Ensure emission on the Qt thread
|
|
588
|
+
QTimer.singleShot(0, lambda: self._rt_signals.response.emit(event))
|
|
589
|
+
except Exception:
|
|
590
|
+
pass
|
|
591
|
+
|
|
592
|
+
def _drain_rt_queue(self) -> None:
|
|
593
|
+
"""
|
|
594
|
+
Drain queued float32 chunks from the audio thread, convert to PCM16,
|
|
595
|
+
and emit a single realtime delta event.
|
|
596
|
+
"""
|
|
597
|
+
if not self._rt_signals:
|
|
598
|
+
# nothing to emit
|
|
599
|
+
with self._rt_lock:
|
|
600
|
+
self._rt_queue.clear()
|
|
601
|
+
return
|
|
602
|
+
|
|
603
|
+
with self._rt_lock:
|
|
604
|
+
if not self._rt_queue:
|
|
605
|
+
return
|
|
606
|
+
raw = b"".join(self._rt_queue)
|
|
607
|
+
self._rt_queue.clear()
|
|
608
|
+
|
|
609
|
+
s16 = f32_to_s16le(raw)
|
|
610
|
+
if s16:
|
|
611
|
+
self._emit_rt_input_delta(s16, final=False)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
# Shared helpers for audio backends
|
|
13
|
+
|
|
14
|
+
from .rt import (
|
|
15
|
+
build_rt_input_delta_event,
|
|
16
|
+
build_output_volume_event,
|
|
17
|
+
)
|
|
18
|
+
from .conversions import (
|
|
19
|
+
qaudio_dtype,
|
|
20
|
+
qaudio_norm_factor,
|
|
21
|
+
qaudio_to_s16le,
|
|
22
|
+
pyaudio_to_s16le,
|
|
23
|
+
f32_to_s16le,
|
|
24
|
+
convert_s16_pcm,
|
|
25
|
+
)
|
|
26
|
+
from .envelope import compute_envelope_from_file
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"build_rt_input_delta_event",
|
|
30
|
+
"build_output_volume_event",
|
|
31
|
+
"qaudio_dtype",
|
|
32
|
+
"qaudio_norm_factor",
|
|
33
|
+
"qaudio_to_s16le",
|
|
34
|
+
"pyaudio_to_s16le",
|
|
35
|
+
"f32_to_s16le",
|
|
36
|
+
"convert_s16_pcm",
|
|
37
|
+
"compute_envelope_from_file",
|
|
38
|
+
]
|