pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +7 -1
  4. pygpt_net/app_core.py +3 -1
  5. pygpt_net/config.py +3 -1
  6. pygpt_net/controller/__init__.py +9 -2
  7. pygpt_net/controller/audio/audio.py +38 -1
  8. pygpt_net/controller/audio/ui.py +2 -2
  9. pygpt_net/controller/chat/audio.py +1 -8
  10. pygpt_net/controller/chat/common.py +23 -62
  11. pygpt_net/controller/chat/handler/__init__.py +0 -0
  12. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  13. pygpt_net/controller/chat/output.py +8 -3
  14. pygpt_net/controller/chat/stream.py +3 -1071
  15. pygpt_net/controller/chat/text.py +3 -2
  16. pygpt_net/controller/kernel/kernel.py +11 -3
  17. pygpt_net/controller/kernel/reply.py +5 -1
  18. pygpt_net/controller/lang/custom.py +2 -2
  19. pygpt_net/controller/media/__init__.py +12 -0
  20. pygpt_net/controller/media/media.py +115 -0
  21. pygpt_net/controller/realtime/__init__.py +12 -0
  22. pygpt_net/controller/realtime/manager.py +53 -0
  23. pygpt_net/controller/realtime/realtime.py +293 -0
  24. pygpt_net/controller/ui/mode.py +23 -2
  25. pygpt_net/controller/ui/ui.py +19 -1
  26. pygpt_net/core/audio/audio.py +6 -1
  27. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  28. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  29. pygpt_net/core/audio/backend/native/player.py +139 -0
  30. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  31. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  32. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  33. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  34. pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
  35. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  36. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  37. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  38. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  39. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  40. pygpt_net/core/audio/backend/shared/player.py +137 -0
  41. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  42. pygpt_net/core/audio/capture.py +5 -0
  43. pygpt_net/core/audio/output.py +14 -2
  44. pygpt_net/core/audio/whisper.py +6 -2
  45. pygpt_net/core/bridge/bridge.py +2 -1
  46. pygpt_net/core/bridge/worker.py +4 -1
  47. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  48. pygpt_net/core/events/__init__.py +2 -1
  49. pygpt_net/core/events/realtime.py +55 -0
  50. pygpt_net/core/image/image.py +56 -5
  51. pygpt_net/core/realtime/__init__.py +0 -0
  52. pygpt_net/core/realtime/options.py +87 -0
  53. pygpt_net/core/realtime/shared/__init__.py +0 -0
  54. pygpt_net/core/realtime/shared/audio.py +213 -0
  55. pygpt_net/core/realtime/shared/loop.py +64 -0
  56. pygpt_net/core/realtime/shared/session.py +59 -0
  57. pygpt_net/core/realtime/shared/text.py +37 -0
  58. pygpt_net/core/realtime/shared/tools.py +276 -0
  59. pygpt_net/core/realtime/shared/turn.py +38 -0
  60. pygpt_net/core/realtime/shared/types.py +16 -0
  61. pygpt_net/core/realtime/worker.py +160 -0
  62. pygpt_net/core/render/web/body.py +24 -3
  63. pygpt_net/core/text/utils.py +54 -2
  64. pygpt_net/core/types/__init__.py +1 -0
  65. pygpt_net/core/types/image.py +54 -0
  66. pygpt_net/core/video/__init__.py +12 -0
  67. pygpt_net/core/video/video.py +290 -0
  68. pygpt_net/data/config/config.json +26 -5
  69. pygpt_net/data/config/models.json +221 -103
  70. pygpt_net/data/config/settings.json +244 -6
  71. pygpt_net/data/css/web-blocks.css +6 -0
  72. pygpt_net/data/css/web-chatgpt.css +6 -0
  73. pygpt_net/data/css/web-chatgpt_wide.css +6 -0
  74. pygpt_net/data/locale/locale.de.ini +35 -7
  75. pygpt_net/data/locale/locale.en.ini +56 -17
  76. pygpt_net/data/locale/locale.es.ini +35 -7
  77. pygpt_net/data/locale/locale.fr.ini +35 -7
  78. pygpt_net/data/locale/locale.it.ini +35 -7
  79. pygpt_net/data/locale/locale.pl.ini +38 -7
  80. pygpt_net/data/locale/locale.uk.ini +35 -7
  81. pygpt_net/data/locale/locale.zh.ini +31 -3
  82. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  83. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  84. pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
  85. pygpt_net/item/model.py +22 -1
  86. pygpt_net/plugin/audio_input/plugin.py +37 -4
  87. pygpt_net/plugin/audio_input/simple.py +57 -8
  88. pygpt_net/plugin/cmd_files/worker.py +3 -0
  89. pygpt_net/provider/api/google/__init__.py +76 -7
  90. pygpt_net/provider/api/google/audio.py +8 -1
  91. pygpt_net/provider/api/google/chat.py +45 -6
  92. pygpt_net/provider/api/google/image.py +226 -86
  93. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  94. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  95. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  96. pygpt_net/provider/api/google/video.py +364 -0
  97. pygpt_net/provider/api/openai/__init__.py +22 -2
  98. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  99. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  100. pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
  101. pygpt_net/provider/audio_input/google_genai.py +103 -0
  102. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  103. pygpt_net/provider/audio_output/google_tts.py +0 -12
  104. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  105. pygpt_net/provider/core/config/patch.py +241 -178
  106. pygpt_net/provider/core/model/patch.py +28 -2
  107. pygpt_net/provider/llms/google.py +8 -9
  108. pygpt_net/provider/web/duckduck_search.py +212 -0
  109. pygpt_net/ui/layout/toolbox/audio.py +55 -0
  110. pygpt_net/ui/layout/toolbox/footer.py +14 -42
  111. pygpt_net/ui/layout/toolbox/image.py +7 -13
  112. pygpt_net/ui/layout/toolbox/raw.py +52 -0
  113. pygpt_net/ui/layout/toolbox/split.py +48 -0
  114. pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
  115. pygpt_net/ui/layout/toolbox/video.py +49 -0
  116. pygpt_net/ui/widget/option/combo.py +15 -1
  117. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
  118. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
  119. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  120. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
  121. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
  122. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,312 @@
1
+ import threading
2
+ from typing import Optional
3
+
4
+ import numpy as np
5
+
6
+ from PySide6.QtCore import QTimer, QObject, Qt
7
+
8
+
9
+ class RealtimeSessionPyAudio(QObject):
10
+ """
11
+ Realtime PCM playback session using PyAudio in callback mode.
12
+ Consumes already-converted PCM frames, keeps GUI responsive and emits volume updates.
13
+ """
14
+ def __init__(
15
+ self,
16
+ device_index: int,
17
+ rate: int,
18
+ channels: int,
19
+ width_bytes: int = 2,
20
+ parent: Optional[QObject] = None,
21
+ volume_emitter: Optional[callable] = None,
22
+ ):
23
+ super().__init__(parent)
24
+ import pyaudio # local import to keep backend import-safe
25
+ self._pa = pyaudio.PyAudio()
26
+ self.device_index = int(device_index)
27
+ self.rate = int(rate)
28
+ self.channels = int(channels)
29
+ self.width = int(width_bytes)
30
+ self.frame_bytes = max(1, self.channels * self.width)
31
+ self.bytes_per_ms = max(1, int(self.rate * self.frame_bytes / 1000))
32
+
33
+ # choose PyAudio format from width
34
+ self.pa_format = self._pa.get_format_from_width(
35
+ self.width,
36
+ unsigned=(self.width == 1)
37
+ )
38
+
39
+ # internal buffers/flags
40
+ self._buffer = bytearray()
41
+ self._buf_lock = threading.Lock()
42
+ self._final = False
43
+ self._tail_ms = 60 # add a small silence tail to avoid clicks
44
+
45
+ # one-shot guard to avoid double stop and duplicate callbacks
46
+ self._stopping = False
47
+
48
+ # volume metering
49
+ self._volume_emitter = volume_emitter
50
+ self._vol_buffer = bytearray()
51
+ self._vol_lock = threading.Lock()
52
+ self._vol_timer = QTimer(self)
53
+ self._vol_timer.setTimerType(Qt.PreciseTimer)
54
+ self._vol_timer.setInterval(33) # ~30 Hz meter
55
+ self._vol_timer.timeout.connect(self._emit_volume_tick)
56
+ self._vol_timer.start()
57
+
58
+ # open callback-based output stream
59
+ self._stream = self._pa.open(
60
+ format=self.pa_format,
61
+ channels=self.channels,
62
+ rate=self.rate,
63
+ output=True,
64
+ output_device_index=self.device_index,
65
+ stream_callback=self._callback,
66
+ frames_per_buffer=max(256, int(self.rate / 100)) # ~10 ms
67
+ )
68
+ try:
69
+ self._stream.start_stream()
70
+ except Exception:
71
+ pass
72
+
73
+ # finished-state watchdog: guarantees stop()+on_stopped once playback is truly done
74
+ self._finish_timer = QTimer(self)
75
+ self._finish_timer.setTimerType(Qt.PreciseTimer)
76
+ self._finish_timer.setInterval(15) # fast but lightweight watchdog
77
+ self._finish_timer.timeout.connect(self._check_finished)
78
+ self._finish_timer.start()
79
+
80
+ # stop callback (set by backend)
81
+ self.on_stopped = None
82
+
83
+ def is_active(self) -> bool:
84
+ """
85
+ Return True if PortAudio stream is active.
86
+
87
+ :return: True if active
88
+ """
89
+ try:
90
+ return self._stream is not None and self._stream.is_active()
91
+ except Exception:
92
+ return False
93
+
94
+ def is_finalized(self) -> bool:
95
+ """
96
+ Return True if session was marked final.
97
+
98
+ :return: True if final
99
+ """
100
+ return bool(self._final)
101
+
102
+ def feed(self, data: bytes) -> None:
103
+ """
104
+ Append PCM bytes (already in session/device format).
105
+
106
+ :param data: bytes to append
107
+ """
108
+ if not data:
109
+ return
110
+ with self._buf_lock:
111
+ self._buffer.extend(data)
112
+ # push to volume window from the same bytes
113
+ self._vol_push(data)
114
+
115
+ def mark_final(self) -> None:
116
+ """No more data will be supplied; add a small silence tail."""
117
+ if not self._final:
118
+ pad = self.bytes_per_ms * self._tail_ms
119
+ pad -= (pad % self.frame_bytes)
120
+ if pad > 0:
121
+ with self._buf_lock:
122
+ self._buffer.extend(self._silence(pad))
123
+ self._final = True
124
+
125
+ def stop(self) -> None:
126
+ """Stop playback and free resources. Idempotent."""
127
+ # ensure this executes only once even if called from multiple paths
128
+ if self._stopping:
129
+ return
130
+ self._stopping = True
131
+
132
+ # stop timers first to prevent re-entry
133
+ try:
134
+ if self._finish_timer:
135
+ self._finish_timer.stop()
136
+ except Exception:
137
+ pass
138
+ try:
139
+ if self._vol_timer:
140
+ self._vol_timer.stop()
141
+ except Exception:
142
+ pass
143
+
144
+ # gracefully stop PortAudio stream and close/terminate
145
+ try:
146
+ if self._stream and self._stream.is_active():
147
+ self._stream.stop_stream() # drains queued audio per PortAudio docs
148
+ except Exception:
149
+ pass
150
+ try:
151
+ if self._stream:
152
+ self._stream.close()
153
+ except Exception:
154
+ pass
155
+ try:
156
+ if self._pa:
157
+ self._pa.terminate()
158
+ except Exception:
159
+ pass
160
+
161
+ # zero the meter
162
+ try:
163
+ if self._volume_emitter:
164
+ self._volume_emitter(0)
165
+ except Exception:
166
+ pass
167
+
168
+ self._stream = None
169
+ self._pa = None
170
+
171
+ cb = self.on_stopped
172
+ self.on_stopped = None
173
+ if cb:
174
+ try:
175
+ cb()
176
+ except Exception:
177
+ pass
178
+
179
+ self.deleteLater()
180
+
181
+ # ---- internal ----
182
+
183
+ def _callback(self, in_data, frame_count, time_info, status):
184
+ """
185
+ PortAudio callback: deliver frames from buffer.
186
+
187
+ :param in_data: input data (ignored)
188
+ :param frame_count: number of frames requested
189
+ :param time_info: timing info (ignored)
190
+ :param status: status flags (ignored)
191
+ :return: (data bytes, flag)
192
+ """
193
+ import pyaudio
194
+ need = frame_count * self.frame_bytes
195
+ out = b""
196
+ with self._buf_lock:
197
+ if len(self._buffer) >= need:
198
+ out = bytes(self._buffer[:need])
199
+ del self._buffer[:need]
200
+ elif len(self._buffer) > 0:
201
+ out = bytes(self._buffer)
202
+ self._buffer.clear()
203
+
204
+ if len(out) < need:
205
+ out += self._silence(need - len(out))
206
+
207
+ # meter push from what is actually written
208
+ self._vol_push(out)
209
+
210
+ # auto-finish: when final and nothing more to play, complete and stop()
211
+ if self._final and self._buffer_empty():
212
+ # Return paComplete and request stop on the GUI thread.
213
+ # PaComplete deactivates the stream after the last callback buffer is played.
214
+ QTimer.singleShot(0, self.stop)
215
+ return out, pyaudio.paComplete
216
+
217
+ return out, pyaudio.paContinue
218
+
219
+ def _check_finished(self) -> None:
220
+ """
221
+ Watchdog that runs on the Qt thread to guarantee a single, reliable stop().
222
+ Triggers when PortAudio deactivates the stream, or when the buffer is fully
223
+ drained after mark_final().
224
+ """
225
+ if self._stopping:
226
+ return
227
+
228
+ # If underlying PA stream is no longer active, we are done.
229
+ try:
230
+ if self._stream is not None and not self._stream.is_active():
231
+ self.stop()
232
+ return
233
+ except Exception:
234
+ # If querying state fails, assume the stream is done and stop.
235
+ self.stop()
236
+ return
237
+
238
+ # If we've been marked final and our buffer is empty, finalize proactively.
239
+ if self._final and self._buffer_empty():
240
+ self.stop()
241
+
242
+ def _buffer_empty(self) -> bool:
243
+ """
244
+ Check if internal buffer is empty.
245
+
246
+ :return: True if empty
247
+ """
248
+ with self._buf_lock:
249
+ return len(self._buffer) == 0
250
+
251
+ def _silence(self, n: int) -> bytes:
252
+ """
253
+ Generate n bytes of silence.
254
+
255
+ :param n: number of bytes
256
+ :return: bytes of silence
257
+ """
258
+ if n <= 0:
259
+ return b""
260
+ if self.width == 1:
261
+ return bytes([128]) * n # silence for unsigned 8-bit
262
+ return b"\x00" * n
263
+
264
+ def _vol_push(self, chunk: bytes) -> None:
265
+ """
266
+ Push chunk to volume buffer and trim if needed.
267
+
268
+ :param chunk: bytes to push to volume buffer
269
+ """
270
+ if not chunk:
271
+ return
272
+ with self._vol_lock:
273
+ self._vol_buffer.extend(chunk)
274
+ max_bytes = max(1, self.bytes_per_ms * 100) # ~100 ms window
275
+ if len(self._vol_buffer) > max_bytes:
276
+ del self._vol_buffer[:len(self._vol_buffer) - max_bytes]
277
+
278
+ def _emit_volume_tick(self) -> None:
279
+ """Emit volume level based on current volume buffer."""
280
+ if self._volume_emitter is None:
281
+ return
282
+ with self._vol_lock:
283
+ buf = bytes(self._vol_buffer)
284
+ if not buf:
285
+ try:
286
+ self._volume_emitter(0)
287
+ except Exception:
288
+ pass
289
+ return
290
+ try:
291
+ # decode by sample width
292
+ if self.width == 1:
293
+ arr = np.frombuffer(buf, dtype=np.uint8).astype(np.int16)
294
+ arr = (arr - 128).astype(np.float32) / 128.0
295
+ elif self.width == 2:
296
+ arr = np.frombuffer(buf, dtype=np.int16).astype(np.float32) / 32768.0
297
+ elif self.width == 4:
298
+ arr = np.frombuffer(buf, dtype=np.int32).astype(np.float32) / 2147483648.0
299
+ else:
300
+ arr = np.frombuffer(buf, dtype=np.int16).astype(np.float32) / 32768.0
301
+
302
+ if arr.size == 0:
303
+ self._volume_emitter(0)
304
+ return
305
+
306
+ rms = float(np.sqrt(np.mean(arr.astype(np.float64) ** 2)))
307
+ db = -60.0 if rms <= 1e-9 else 20.0 * float(np.log10(min(1.0, rms)))
308
+ db = max(-60.0, min(0.0, db))
309
+ volume = int(((db + 60.0) / 60.0) * 100.0)
310
+ self._volume_emitter(volume)
311
+ except Exception:
312
+ pass
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from .pygame import PygameBackend
@@ -6,15 +6,20 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.27 07:00:00 #
9
+ # Updated Date: 2025.08.31 04:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import time
13
13
  import wave
14
+ import numpy as np
14
15
  from typing import List, Tuple
16
+ from collections import deque
17
+ from threading import Lock
15
18
 
16
19
  from PySide6.QtCore import QTimer
17
20
 
21
+ from ..shared import f32_to_s16le, build_rt_input_delta_event
22
+
18
23
  class PygameBackend:
19
24
  MIN_FRAMES = 25 # minimum frames to start transcription
20
25
 
@@ -22,6 +27,8 @@ class PygameBackend:
22
27
  """
23
28
  Audio input capture core using pygame's SDL2 audio capture backend.
24
29
  Captured devices are stored as device name strings.
30
+
31
+ :param window: Window instance
25
32
  """
26
33
  self.window = window
27
34
  self.path = None
@@ -55,10 +62,14 @@ class PygameBackend:
55
62
  self.initialized = False
56
63
  self.mode = "input" # input|control
57
64
 
65
+ # --- REALTIME INPUT (mic -> dispatcher) ---
66
+ self._rt_signals = None # set with set_rt_signals()
67
+ self._rt_queue = deque() # queue of raw float32 chunks from SDL audio thread
68
+ self._rt_lock = Lock() # protects _rt_queue
69
+ self._is_recording = False # suppress updates after stop
70
+
58
71
  def init(self):
59
- """
60
- Initialize the pygame audio system if not already initialized.
61
- """
72
+ """Initialize the pygame audio system if not already initialized."""
62
73
  if not self.initialized:
63
74
  import pygame
64
75
  from pygame._sdl2 import (
@@ -110,10 +121,20 @@ class PygameBackend:
110
121
  """
111
122
  self.path = path
112
123
 
124
+ def set_rt_signals(self, signals) -> None:
125
+ """
126
+ Set signals object for realtime events.
127
+
128
+ :param signals: Signals object
129
+ """
130
+ self._rt_signals = signals
131
+
113
132
  def start(self):
114
133
  """
115
134
  Start audio recording using pygame’s SDL2 audio capture.
116
135
  Returns True if started successfully.
136
+
137
+ :return: True if started
117
138
  """
118
139
  self.init()
119
140
  # Clear previously recorded frames.
@@ -136,15 +157,23 @@ class PygameBackend:
136
157
  self.timer.timeout.connect(self._update_level)
137
158
  self.timer.start(50) # update every 50ms
138
159
 
160
+ # mark recording as active after setup
161
+ self._is_recording = True
139
162
  return True
140
163
 
141
164
  def stop(self):
142
165
  """
143
166
  Stop audio recording.
144
167
  Returns True if stopped and audio data was saved (if path is set).
168
+
169
+ :return: True if stopped and saved
145
170
  """
146
171
  self.init()
147
172
  result = False
173
+
174
+ # immediately mark as not recording
175
+ self._is_recording = False
176
+
148
177
  if self.audio_source is not None:
149
178
  if self.timer is not None:
150
179
  self.timer.stop()
@@ -154,6 +183,12 @@ class PygameBackend:
154
183
  self.audio_source.pause(1)
155
184
  self.audio_source = None
156
185
 
186
+ # Emit final input chunk marker for realtime consumers
187
+ try:
188
+ self._emit_rt_input_delta(b"", final=True)
189
+ except Exception:
190
+ pass
191
+
157
192
  if self.frames:
158
193
  if self.path:
159
194
  self.save_audio_file(self.path)
@@ -162,35 +197,48 @@ class PygameBackend:
162
197
  print("File path is not set.")
163
198
  else:
164
199
  print("No audio data recorded")
200
+
201
+ # reset level indicator
202
+ try:
203
+ self.reset_audio_level()
204
+ except Exception:
205
+ pass
206
+
165
207
  return result
166
208
 
167
209
  def has_source(self) -> bool:
168
210
  """
169
211
  Check if the audio source is available.
212
+
213
+ :return: True if audio source is available
170
214
  """
171
215
  return self.audio_source is not None
172
216
 
173
217
  def has_frames(self) -> bool:
174
218
  """
175
219
  Check if any audio frames have been recorded.
220
+
221
+ :return: True if any frames recorded
176
222
  """
177
223
  return bool(self.frames)
178
224
 
179
225
  def has_min_frames(self) -> bool:
180
226
  """
181
227
  Check if at least MIN_FRAMES audio frames have been recorded.
228
+
229
+ :return: True if at least MIN_FRAMES recorded
182
230
  """
183
231
  return len(self.frames) >= self.MIN_FRAMES
184
232
 
185
233
  def reset_audio_level(self):
186
- """
187
- Reset the audio level bar (if available).
188
- """
234
+ """Reset the audio level bar (if available)."""
189
235
  self.window.controller.audio.ui.on_input_volume_change(0, self.mode)
190
236
 
191
237
  def check_audio_input(self) -> bool:
192
238
  """
193
239
  Check if a default audio input device is available using pygame.
240
+
241
+ :return: True if an audio input device is available
194
242
  """
195
243
  from pygame._sdl2 import (
196
244
  get_audio_device_names,
@@ -226,6 +274,8 @@ class PygameBackend:
226
274
  def device_changed(self, index: int):
227
275
  """
228
276
  Change the selected audio input device by its index in the devices list.
277
+
278
+ :param index: Index of the device in the devices list.
229
279
  """
230
280
  self.init()
231
281
  if 0 <= index < len(self.devices):
@@ -234,9 +284,7 @@ class PygameBackend:
234
284
  self.selected_device = None
235
285
 
236
286
  def prepare_device(self):
237
- """
238
- Set the current audio input device based on configuration.
239
- """
287
+ """Set the current audio input device based on configuration."""
240
288
  self.init()
241
289
  if self.window is not None and hasattr(self.window, "core"):
242
290
  device_index = int(self.window.core.config.get('audio.input.device', 0))
@@ -251,14 +299,26 @@ class PygameBackend:
251
299
  """
252
300
  Callback function called in the audio thread.
253
301
  It receives a memoryview of audio data which is converted to bytes and appended.
302
+
303
+ :param audiodevice: The audio device instance (not used here).
304
+ :param audiomemoryview: MemoryView of the captured audio data.
254
305
  """
306
+ if not self._is_recording:
307
+ return
308
+
255
309
  # Append captured audio bytes to the frames list.
256
- self.frames.append(bytes(audiomemoryview))
310
+ chunk = bytes(audiomemoryview)
311
+ self.frames.append(chunk)
312
+
313
+ # Enqueue chunk for realtime emission (processed on the Qt thread).
314
+ try:
315
+ with self._rt_lock:
316
+ self._rt_queue.append(chunk)
317
+ except Exception:
318
+ pass
257
319
 
258
320
  def setup_audio_input(self):
259
- """
260
- Create an AudioDevice with the selected device name and start recording.
261
- """
321
+ """Create an AudioDevice with the selected device name and start recording."""
262
322
  self.init()
263
323
  from pygame._sdl2 import (
264
324
  AudioDevice,
@@ -289,16 +349,18 @@ class PygameBackend:
289
349
  Periodically called (via QTimer) to compute RMS from the last captured audio chunk
290
350
  and update the audio level bar.
291
351
  """
352
+ # Drain realtime queue first to keep latency low.
353
+ self._drain_rt_queue()
354
+
292
355
  if not self.frames:
293
356
  return
294
357
 
295
- import numpy as np
296
358
  # Use the last captured chunk.
297
359
  last_chunk = self.frames[-1]
298
360
  try:
299
361
  # Interpret the bytes as float32 samples.
300
362
  samples = np.frombuffer(last_chunk, dtype=np.float32)
301
- except Exception as e:
363
+ except Exception:
302
364
  return
303
365
  if samples.size == 0:
304
366
  return
@@ -329,7 +391,6 @@ class PygameBackend:
329
391
 
330
392
  :param filename: The path to the output WAV file.
331
393
  """
332
- import numpy as np
333
394
  full_data = b"".join(self.frames)
334
395
  try:
335
396
  data_array = np.frombuffer(full_data, dtype=np.float32)
@@ -337,7 +398,7 @@ class PygameBackend:
337
398
  print("Error converting audio data:", e)
338
399
  return
339
400
  # Convert float32 values in the range -1.0 ... 1.0 to PCM int16.
340
- int_data = (data_array * 32767).astype(np.int16)
401
+ int_data = (np.clip(data_array, -1.0, 1.0) * 32767.0).astype(np.int16)
341
402
  new_data = int_data.tobytes()
342
403
  with wave.open(filename, 'wb') as wf:
343
404
  wf.setnchannels(self.channels)
@@ -490,11 +551,61 @@ class PygameBackend:
490
551
  def get_default_input_device(self) -> tuple:
491
552
  """
492
553
  Retrieve the default input device using PyAudio.
554
+
555
+ :return: (index, name)
493
556
  """
494
557
  return 0, "Default Input Device"
495
558
 
496
559
  def get_default_output_device(self) -> tuple:
497
560
  """
498
561
  Retrieve the default output device using PyAudio.
562
+
563
+ :return: (index, name)
499
564
  """
500
- return 0, "Default Output Device"
565
+ return 0, "Default Output Device"
566
+
567
+ # --------------------
568
+ # REALTIME INPUT HELPERS
569
+ # --------------------
570
+ def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
571
+ """
572
+ Emit RT_INPUT_AUDIO_DELTA with a provider-agnostic payload.
573
+ Standardizes to PCM16, little-endian, and includes rate/channels.
574
+
575
+ :param data: PCM16LE audio bytes
576
+ :param final: True if this is the final chunk
577
+ """
578
+ if not self._rt_signals:
579
+ return
580
+ try:
581
+ event = build_rt_input_delta_event(
582
+ rate=int(self.rate),
583
+ channels=int(self.channels),
584
+ data=data or b"",
585
+ final=bool(final),
586
+ )
587
+ # Ensure emission on the Qt thread
588
+ QTimer.singleShot(0, lambda: self._rt_signals.response.emit(event))
589
+ except Exception:
590
+ pass
591
+
592
+ def _drain_rt_queue(self) -> None:
593
+ """
594
+ Drain queued float32 chunks from the audio thread, convert to PCM16,
595
+ and emit a single realtime delta event.
596
+ """
597
+ if not self._rt_signals:
598
+ # nothing to emit
599
+ with self._rt_lock:
600
+ self._rt_queue.clear()
601
+ return
602
+
603
+ with self._rt_lock:
604
+ if not self._rt_queue:
605
+ return
606
+ raw = b"".join(self._rt_queue)
607
+ self._rt_queue.clear()
608
+
609
+ s16 = f32_to_s16le(raw)
610
+ if s16:
611
+ self._emit_rt_input_delta(s16, final=False)
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ # Shared helpers for audio backends
13
+
14
+ from .rt import (
15
+ build_rt_input_delta_event,
16
+ build_output_volume_event,
17
+ )
18
+ from .conversions import (
19
+ qaudio_dtype,
20
+ qaudio_norm_factor,
21
+ qaudio_to_s16le,
22
+ pyaudio_to_s16le,
23
+ f32_to_s16le,
24
+ convert_s16_pcm,
25
+ )
26
+ from .envelope import compute_envelope_from_file
27
+
28
+ __all__ = [
29
+ "build_rt_input_delta_event",
30
+ "build_output_volume_event",
31
+ "qaudio_dtype",
32
+ "qaudio_norm_factor",
33
+ "qaudio_to_s16le",
34
+ "pyaudio_to_s16le",
35
+ "f32_to_s16le",
36
+ "convert_s16_pcm",
37
+ "compute_envelope_from_file",
38
+ ]