pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,275 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import threading
13
+ from typing import Optional
14
+
15
+ import numpy as np
16
+
17
+ from PySide6.QtCore import QTimer, QObject, Qt
18
+
19
+
20
+ class RealtimeSessionPyAudio(QObject):
21
+ """
22
+ Realtime PCM playback session using PyAudio in callback mode.
23
+ Consumes already-converted PCM frames, keeps GUI responsive and emits volume updates.
24
+ """
25
+ def __init__(
26
+ self,
27
+ device_index: int,
28
+ rate: int,
29
+ channels: int,
30
+ width_bytes: int = 2,
31
+ parent: Optional[QObject] = None,
32
+ volume_emitter: Optional[callable] = None,
33
+ ):
34
+ super().__init__(parent)
35
+ import pyaudio # local import to keep backend import-safe
36
+ self._pa = pyaudio.PyAudio()
37
+ self.device_index = int(device_index)
38
+ self.rate = int(rate)
39
+ self.channels = int(channels)
40
+ self.width = int(width_bytes)
41
+ self.frame_bytes = max(1, self.channels * self.width)
42
+ self.bytes_per_ms = max(1, int(self.rate * self.frame_bytes / 1000))
43
+
44
+ # choose PyAudio format from width
45
+ self.pa_format = self._pa.get_format_from_width(
46
+ self.width,
47
+ unsigned=(self.width == 1)
48
+ )
49
+
50
+ # internal buffers/flags
51
+ self._buffer = bytearray()
52
+ self._buf_lock = threading.Lock()
53
+ self._final = False
54
+ self._tail_ms = 60 # add a small silence tail to avoid clicks
55
+
56
+ # volume metering
57
+ self._volume_emitter = volume_emitter
58
+ self._vol_buffer = bytearray()
59
+ self._vol_lock = threading.Lock()
60
+ self._vol_timer = QTimer(self)
61
+ self._vol_timer.setTimerType(Qt.PreciseTimer)
62
+ self._vol_timer.setInterval(33) # ~30 Hz meter
63
+ self._vol_timer.timeout.connect(self._emit_volume_tick)
64
+ self._vol_timer.start()
65
+
66
+ # open callback-based output stream
67
+ self._stream = self._pa.open(
68
+ format=self.pa_format,
69
+ channels=self.channels,
70
+ rate=self.rate,
71
+ output=True,
72
+ output_device_index=self.device_index,
73
+ stream_callback=self._callback,
74
+ frames_per_buffer=max(256, int(self.rate / 100)) # ~10 ms
75
+ )
76
+ try:
77
+ self._stream.start_stream()
78
+ except Exception:
79
+ pass
80
+
81
+ # stop callback (set by backend)
82
+ self.on_stopped = None
83
+
84
+ def is_active(self) -> bool:
85
+ """
86
+ Return True if PortAudio stream is active.
87
+
88
+ :return: True if active
89
+ """
90
+ try:
91
+ return self._stream is not None and self._stream.is_active()
92
+ except Exception:
93
+ return False
94
+
95
+ def is_finalized(self) -> bool:
96
+ """
97
+ Return True if session was marked final.
98
+
99
+ :return: True if final
100
+ """
101
+ return bool(self._final)
102
+
103
+ def feed(self, data: bytes) -> None:
104
+ """
105
+ Append PCM bytes (already in session/device format).
106
+
107
+ :param data: bytes to append
108
+ """
109
+ if not data:
110
+ return
111
+ with self._buf_lock:
112
+ self._buffer.extend(data)
113
+ # push to volume window from the same bytes
114
+ self._vol_push(data)
115
+
116
+ def mark_final(self) -> None:
117
+ """No more data will be supplied; add a small silence tail."""
118
+ if not self._final:
119
+ pad = self.bytes_per_ms * self._tail_ms
120
+ pad -= (pad % self.frame_bytes)
121
+ if pad > 0:
122
+ with self._buf_lock:
123
+ self._buffer.extend(self._silence(pad))
124
+ self._final = True
125
+
126
+ def stop(self) -> None:
127
+ """Stop playback and free resources."""
128
+ try:
129
+ if self._vol_timer:
130
+ self._vol_timer.stop()
131
+ except Exception:
132
+ pass
133
+ try:
134
+ if self._stream and self._stream.is_active():
135
+ self._stream.stop_stream()
136
+ except Exception:
137
+ pass
138
+ try:
139
+ if self._stream:
140
+ self._stream.close()
141
+ except Exception:
142
+ pass
143
+ try:
144
+ if self._pa:
145
+ self._pa.terminate()
146
+ except Exception:
147
+ pass
148
+
149
+ # zero the meter
150
+ try:
151
+ if self._volume_emitter:
152
+ self._volume_emitter(0)
153
+ except Exception:
154
+ pass
155
+
156
+ self._stream = None
157
+ self._pa = None
158
+
159
+ cb = self.on_stopped
160
+ self.on_stopped = None
161
+ if cb:
162
+ try:
163
+ cb()
164
+ except Exception:
165
+ pass
166
+
167
+ self.deleteLater()
168
+
169
+ # ---- internal ----
170
+
171
+ def _callback(self, in_data, frame_count, time_info, status):
172
+ """
173
+ PortAudio callback: deliver frames from buffer.
174
+
175
+ :param in_data: input data (ignored)
176
+ :param frame_count: number of frames requested
177
+ :param time_info: timing info (ignored)
178
+ :param status: status flags (ignored)
179
+ :return: (data bytes, flag)
180
+ """
181
+ import pyaudio
182
+ need = frame_count * self.frame_bytes
183
+ out = b""
184
+ with self._buf_lock:
185
+ if len(self._buffer) >= need:
186
+ out = bytes(self._buffer[:need])
187
+ del self._buffer[:need]
188
+ elif len(self._buffer) > 0:
189
+ out = bytes(self._buffer)
190
+ self._buffer.clear()
191
+
192
+ if len(out) < need:
193
+ out += self._silence(need - len(out))
194
+
195
+ # meter push from what is actually written
196
+ self._vol_push(out)
197
+
198
+ # auto-finish: when final and nothing more to play, complete and stop()
199
+ if self._final and self._buffer_empty():
200
+ QTimer.singleShot(0, self.stop) # stop on the GUI thread
201
+ return out, pyaudio.paComplete
202
+
203
+ return out, pyaudio.paContinue
204
+
205
+ def _buffer_empty(self) -> bool:
206
+ """
207
+ Check if internal buffer is empty.
208
+
209
+ :return: True if empty
210
+ """
211
+ with self._buf_lock:
212
+ return len(self._buffer) == 0
213
+
214
+ def _silence(self, n: int) -> bytes:
215
+ """
216
+ Generate n bytes of silence.
217
+
218
+ :param n: number of bytes
219
+ :return: bytes of silence
220
+ """
221
+ if n <= 0:
222
+ return b""
223
+ if self.width == 1:
224
+ return bytes([128]) * n # silence for unsigned 8-bit
225
+ return b"\x00" * n
226
+
227
+ def _vol_push(self, chunk: bytes) -> None:
228
+ """
229
+ Push chunk to volume buffer and trim if needed.
230
+
231
+ :param chunk: bytes to push to volume buffer
232
+ """
233
+ if not chunk:
234
+ return
235
+ with self._vol_lock:
236
+ self._vol_buffer.extend(chunk)
237
+ max_bytes = max(1, self.bytes_per_ms * 100) # ~100 ms window
238
+ if len(self._vol_buffer) > max_bytes:
239
+ del self._vol_buffer[:len(self._vol_buffer) - max_bytes]
240
+
241
+ def _emit_volume_tick(self) -> None:
242
+ """Emit volume level based on current volume buffer."""
243
+ if self._volume_emitter is None:
244
+ return
245
+ with self._vol_lock:
246
+ buf = bytes(self._vol_buffer)
247
+ if not buf:
248
+ try:
249
+ self._volume_emitter(0)
250
+ except Exception:
251
+ pass
252
+ return
253
+ try:
254
+ # decode by sample width
255
+ if self.width == 1:
256
+ arr = np.frombuffer(buf, dtype=np.uint8).astype(np.int16)
257
+ arr = (arr - 128).astype(np.float32) / 128.0
258
+ elif self.width == 2:
259
+ arr = np.frombuffer(buf, dtype=np.int16).astype(np.float32) / 32768.0
260
+ elif self.width == 4:
261
+ arr = np.frombuffer(buf, dtype=np.int32).astype(np.float32) / 2147483648.0
262
+ else:
263
+ arr = np.frombuffer(buf, dtype=np.int16).astype(np.float32) / 32768.0
264
+
265
+ if arr.size == 0:
266
+ self._volume_emitter(0)
267
+ return
268
+
269
+ rms = float(np.sqrt(np.mean(arr.astype(np.float64) ** 2)))
270
+ db = -60.0 if rms <= 1e-9 else 20.0 * float(np.log10(min(1.0, rms)))
271
+ db = max(-60.0, min(0.0, db))
272
+ volume = int(((db + 60.0) / 60.0) * 100.0)
273
+ self._volume_emitter(volume)
274
+ except Exception:
275
+ pass
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from .pygame import PygameBackend
@@ -6,15 +6,20 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.27 07:00:00 #
9
+ # Updated Date: 2025.08.31 04:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import time
13
13
  import wave
14
+ import numpy as np
14
15
  from typing import List, Tuple
16
+ from collections import deque
17
+ from threading import Lock
15
18
 
16
19
  from PySide6.QtCore import QTimer
17
20
 
21
+ from ..shared import f32_to_s16le, build_rt_input_delta_event
22
+
18
23
  class PygameBackend:
19
24
  MIN_FRAMES = 25 # minimum frames to start transcription
20
25
 
@@ -22,6 +27,8 @@ class PygameBackend:
22
27
  """
23
28
  Audio input capture core using pygame's SDL2 audio capture backend.
24
29
  Captured devices are stored as device name strings.
30
+
31
+ :param window: Window instance
25
32
  """
26
33
  self.window = window
27
34
  self.path = None
@@ -55,10 +62,14 @@ class PygameBackend:
55
62
  self.initialized = False
56
63
  self.mode = "input" # input|control
57
64
 
65
+ # --- REALTIME INPUT (mic -> dispatcher) ---
66
+ self._rt_signals = None # set with set_rt_signals()
67
+ self._rt_queue = deque() # queue of raw float32 chunks from SDL audio thread
68
+ self._rt_lock = Lock() # protects _rt_queue
69
+ self._is_recording = False # suppress updates after stop
70
+
58
71
  def init(self):
59
- """
60
- Initialize the pygame audio system if not already initialized.
61
- """
72
+ """Initialize the pygame audio system if not already initialized."""
62
73
  if not self.initialized:
63
74
  import pygame
64
75
  from pygame._sdl2 import (
@@ -110,10 +121,20 @@ class PygameBackend:
110
121
  """
111
122
  self.path = path
112
123
 
124
+ def set_rt_signals(self, signals) -> None:
125
+ """
126
+ Set signals object for realtime events.
127
+
128
+ :param signals: Signals object
129
+ """
130
+ self._rt_signals = signals
131
+
113
132
  def start(self):
114
133
  """
115
134
  Start audio recording using pygame’s SDL2 audio capture.
116
135
  Returns True if started successfully.
136
+
137
+ :return: True if started
117
138
  """
118
139
  self.init()
119
140
  # Clear previously recorded frames.
@@ -136,15 +157,23 @@ class PygameBackend:
136
157
  self.timer.timeout.connect(self._update_level)
137
158
  self.timer.start(50) # update every 50ms
138
159
 
160
+ # mark recording as active after setup
161
+ self._is_recording = True
139
162
  return True
140
163
 
141
164
  def stop(self):
142
165
  """
143
166
  Stop audio recording.
144
167
  Returns True if stopped and audio data was saved (if path is set).
168
+
169
+ :return: True if stopped and saved
145
170
  """
146
171
  self.init()
147
172
  result = False
173
+
174
+ # immediately mark as not recording
175
+ self._is_recording = False
176
+
148
177
  if self.audio_source is not None:
149
178
  if self.timer is not None:
150
179
  self.timer.stop()
@@ -154,6 +183,12 @@ class PygameBackend:
154
183
  self.audio_source.pause(1)
155
184
  self.audio_source = None
156
185
 
186
+ # Emit final input chunk marker for realtime consumers
187
+ try:
188
+ self._emit_rt_input_delta(b"", final=True)
189
+ except Exception:
190
+ pass
191
+
157
192
  if self.frames:
158
193
  if self.path:
159
194
  self.save_audio_file(self.path)
@@ -162,35 +197,48 @@ class PygameBackend:
162
197
  print("File path is not set.")
163
198
  else:
164
199
  print("No audio data recorded")
200
+
201
+ # reset level indicator
202
+ try:
203
+ self.reset_audio_level()
204
+ except Exception:
205
+ pass
206
+
165
207
  return result
166
208
 
167
209
  def has_source(self) -> bool:
168
210
  """
169
211
  Check if the audio source is available.
212
+
213
+ :return: True if audio source is available
170
214
  """
171
215
  return self.audio_source is not None
172
216
 
173
217
  def has_frames(self) -> bool:
174
218
  """
175
219
  Check if any audio frames have been recorded.
220
+
221
+ :return: True if any frames recorded
176
222
  """
177
223
  return bool(self.frames)
178
224
 
179
225
  def has_min_frames(self) -> bool:
180
226
  """
181
227
  Check if at least MIN_FRAMES audio frames have been recorded.
228
+
229
+ :return: True if at least MIN_FRAMES recorded
182
230
  """
183
231
  return len(self.frames) >= self.MIN_FRAMES
184
232
 
185
233
  def reset_audio_level(self):
186
- """
187
- Reset the audio level bar (if available).
188
- """
234
+ """Reset the audio level bar (if available)."""
189
235
  self.window.controller.audio.ui.on_input_volume_change(0, self.mode)
190
236
 
191
237
  def check_audio_input(self) -> bool:
192
238
  """
193
239
  Check if a default audio input device is available using pygame.
240
+
241
+ :return: True if an audio input device is available
194
242
  """
195
243
  from pygame._sdl2 import (
196
244
  get_audio_device_names,
@@ -226,6 +274,8 @@ class PygameBackend:
226
274
  def device_changed(self, index: int):
227
275
  """
228
276
  Change the selected audio input device by its index in the devices list.
277
+
278
+ :param index: Index of the device in the devices list.
229
279
  """
230
280
  self.init()
231
281
  if 0 <= index < len(self.devices):
@@ -234,9 +284,7 @@ class PygameBackend:
234
284
  self.selected_device = None
235
285
 
236
286
  def prepare_device(self):
237
- """
238
- Set the current audio input device based on configuration.
239
- """
287
+ """Set the current audio input device based on configuration."""
240
288
  self.init()
241
289
  if self.window is not None and hasattr(self.window, "core"):
242
290
  device_index = int(self.window.core.config.get('audio.input.device', 0))
@@ -251,14 +299,26 @@ class PygameBackend:
251
299
  """
252
300
  Callback function called in the audio thread.
253
301
  It receives a memoryview of audio data which is converted to bytes and appended.
302
+
303
+ :param audiodevice: The audio device instance (not used here).
304
+ :param audiomemoryview: MemoryView of the captured audio data.
254
305
  """
306
+ if not self._is_recording:
307
+ return
308
+
255
309
  # Append captured audio bytes to the frames list.
256
- self.frames.append(bytes(audiomemoryview))
310
+ chunk = bytes(audiomemoryview)
311
+ self.frames.append(chunk)
312
+
313
+ # Enqueue chunk for realtime emission (processed on the Qt thread).
314
+ try:
315
+ with self._rt_lock:
316
+ self._rt_queue.append(chunk)
317
+ except Exception:
318
+ pass
257
319
 
258
320
  def setup_audio_input(self):
259
- """
260
- Create an AudioDevice with the selected device name and start recording.
261
- """
321
+ """Create an AudioDevice with the selected device name and start recording."""
262
322
  self.init()
263
323
  from pygame._sdl2 import (
264
324
  AudioDevice,
@@ -289,16 +349,18 @@ class PygameBackend:
289
349
  Periodically called (via QTimer) to compute RMS from the last captured audio chunk
290
350
  and update the audio level bar.
291
351
  """
352
+ # Drain realtime queue first to keep latency low.
353
+ self._drain_rt_queue()
354
+
292
355
  if not self.frames:
293
356
  return
294
357
 
295
- import numpy as np
296
358
  # Use the last captured chunk.
297
359
  last_chunk = self.frames[-1]
298
360
  try:
299
361
  # Interpret the bytes as float32 samples.
300
362
  samples = np.frombuffer(last_chunk, dtype=np.float32)
301
- except Exception as e:
363
+ except Exception:
302
364
  return
303
365
  if samples.size == 0:
304
366
  return
@@ -329,7 +391,6 @@ class PygameBackend:
329
391
 
330
392
  :param filename: The path to the output WAV file.
331
393
  """
332
- import numpy as np
333
394
  full_data = b"".join(self.frames)
334
395
  try:
335
396
  data_array = np.frombuffer(full_data, dtype=np.float32)
@@ -337,7 +398,7 @@ class PygameBackend:
337
398
  print("Error converting audio data:", e)
338
399
  return
339
400
  # Convert float32 values in the range -1.0 ... 1.0 to PCM int16.
340
- int_data = (data_array * 32767).astype(np.int16)
401
+ int_data = (np.clip(data_array, -1.0, 1.0) * 32767.0).astype(np.int16)
341
402
  new_data = int_data.tobytes()
342
403
  with wave.open(filename, 'wb') as wf:
343
404
  wf.setnchannels(self.channels)
@@ -490,11 +551,61 @@ class PygameBackend:
490
551
  def get_default_input_device(self) -> tuple:
491
552
  """
492
553
  Retrieve the default input device using PyAudio.
554
+
555
+ :return: (index, name)
493
556
  """
494
557
  return 0, "Default Input Device"
495
558
 
496
559
  def get_default_output_device(self) -> tuple:
497
560
  """
498
561
  Retrieve the default output device using PyAudio.
562
+
563
+ :return: (index, name)
499
564
  """
500
- return 0, "Default Output Device"
565
+ return 0, "Default Output Device"
566
+
567
+ # --------------------
568
+ # REALTIME INPUT HELPERS
569
+ # --------------------
570
+ def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
571
+ """
572
+ Emit RT_INPUT_AUDIO_DELTA with a provider-agnostic payload.
573
+ Standardizes to PCM16, little-endian, and includes rate/channels.
574
+
575
+ :param data: PCM16LE audio bytes
576
+ :param final: True if this is the final chunk
577
+ """
578
+ if not self._rt_signals:
579
+ return
580
+ try:
581
+ event = build_rt_input_delta_event(
582
+ rate=int(self.rate),
583
+ channels=int(self.channels),
584
+ data=data or b"",
585
+ final=bool(final),
586
+ )
587
+ # Ensure emission on the Qt thread
588
+ QTimer.singleShot(0, lambda: self._rt_signals.response.emit(event))
589
+ except Exception:
590
+ pass
591
+
592
+ def _drain_rt_queue(self) -> None:
593
+ """
594
+ Drain queued float32 chunks from the audio thread, convert to PCM16,
595
+ and emit a single realtime delta event.
596
+ """
597
+ if not self._rt_signals:
598
+ # nothing to emit
599
+ with self._rt_lock:
600
+ self._rt_queue.clear()
601
+ return
602
+
603
+ with self._rt_lock:
604
+ if not self._rt_queue:
605
+ return
606
+ raw = b"".join(self._rt_queue)
607
+ self._rt_queue.clear()
608
+
609
+ s16 = f32_to_s16le(raw)
610
+ if s16:
611
+ self._emit_rt_input_delta(s16, final=False)
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ # Shared helpers for audio backends
13
+
14
+ from .rt import (
15
+ build_rt_input_delta_event,
16
+ build_output_volume_event,
17
+ )
18
+ from .conversions import (
19
+ qaudio_dtype,
20
+ qaudio_norm_factor,
21
+ qaudio_to_s16le,
22
+ pyaudio_to_s16le,
23
+ f32_to_s16le,
24
+ convert_s16_pcm,
25
+ )
26
+ from .envelope import compute_envelope_from_file
27
+
28
+ __all__ = [
29
+ "build_rt_input_delta_event",
30
+ "build_output_volume_event",
31
+ "qaudio_dtype",
32
+ "qaudio_norm_factor",
33
+ "qaudio_to_s16le",
34
+ "pyaudio_to_s16le",
35
+ "f32_to_s16le",
36
+ "convert_s16_pcm",
37
+ "compute_envelope_from_file",
38
+ ]