pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,923 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 04:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import List, Tuple, Optional
13
+
14
+ import time
15
+ import wave
16
+ import numpy as np
17
+
18
+ from PySide6.QtCore import QTimer, QObject
19
+
20
+ from pygpt_net.core.events import RealtimeEvent
21
+
22
+ from .realtime import RealtimeSessionPyAudio
23
+ from .playback import _FilePlaybackThread
24
+ from ..shared import (
25
+ pyaudio_to_s16le,
26
+ convert_s16_pcm,
27
+ build_rt_input_delta_event,
28
+ build_output_volume_event,
29
+ )
30
+
31
+ class PyaudioBackend:
32
+
33
+ MIN_FRAMES = 25 # minimum frames to start transcription
34
+
35
+ def __init__(self, window=None):
36
+ """
37
+ Audio input capture core using PyAudio backend
38
+
39
+ :param window: Window instance
40
+ """
41
+ self.window = window
42
+ self.path = None
43
+ self.frames = []
44
+ self.loop = False
45
+ self.stop_callback = None
46
+ self.start_time = 0
47
+ self.initialized = False
48
+ self.pyaudio_instance = None
49
+ self.pyaudio_instance_output = None
50
+ self.stream = None
51
+ self.stream_output = None
52
+ self.mode = "input" # input|control
53
+
54
+ # Get configuration values (use defaults if unavailable)
55
+ if self.window is not None and hasattr(self.window, "core"):
56
+ self.channels = int(self.window.core.config.get('audio.input.channels', 1))
57
+ self.rate = int(self.window.core.config.get('audio.input.rate', 44100))
58
+ else:
59
+ self.channels = 1
60
+ self.rate = 44100
61
+
62
+ self.format = None
63
+ self.devices = []
64
+ self.selected_device = None
65
+
66
+ # realtime members (compatible with native backend)
67
+ self._rt_session: Optional[RealtimeSessionPyAudio] = None
68
+ self._rt_signals = None # set by set_rt_signals()
69
+
70
+ # input state guard (prevents races on stop)
71
+ self._input_active = False
72
+
73
+ # track actual input params for realtime payloads
74
+ self._in_rate: int = self.rate
75
+ self._in_channels: int = self.channels
76
+
77
+ # file playback worker + guard timer
78
+ self._file_thread: Optional[_FilePlaybackThread] = None
79
+ self._file_check_timer: Optional[QTimer] = None
80
+
81
+ def init(self):
82
+ """Initialize audio input backend."""
83
+ import pyaudio
84
+ if not self.initialized:
85
+ self.format = pyaudio.paInt16 # Default input format
86
+ self.pyaudio_instance = pyaudio.PyAudio()
87
+ self.check_audio_devices()
88
+ self.initialized = True
89
+
90
+ def set_mode(self, mode: str):
91
+ """
92
+ Set input mode (input|control).
93
+
94
+ :param mode: mode name
95
+ """
96
+ self.mode = mode
97
+
98
+ def set_repeat_callback(self, callback):
99
+ """
100
+ Set callback to be called on loop recording.
101
+
102
+ :param callback: function to call on loop recording
103
+ """
104
+ if callable(callback):
105
+ self.stop_callback = callback
106
+ else:
107
+ raise ValueError("Callback must be a callable function")
108
+
109
+ def set_loop(self, loop: bool):
110
+ """
111
+ Set loop recording.
112
+
113
+ :param loop: True to enable loop recording
114
+ """
115
+ self.loop = loop
116
+
117
+ def set_path(self, path: str):
118
+ """
119
+ Set audio input file path.
120
+
121
+ :param path: file path
122
+ """
123
+ self.path = path
124
+
125
+ def start(self) -> bool:
126
+ """
127
+ Start audio input recording using PyAudio.
128
+
129
+ :return: True if started
130
+ """
131
+ self.init()
132
+ self.frames = []
133
+ self.prepare_device()
134
+ if self.selected_device is None:
135
+ print("No audio input device selected")
136
+ return False
137
+ if self.stream is not None:
138
+ return False
139
+ self.setup_audio_input()
140
+ self.start_time = time.time()
141
+ return True
142
+
143
+ def stop(self) -> bool:
144
+ """
145
+ Stop audio input recording safely.
146
+
147
+ :return: True if stopped (and file saved) or False otherwise.
148
+ """
149
+ result = False
150
+ # block callback processing immediately
151
+ self._input_active = False
152
+
153
+ if self.stream is not None:
154
+ try:
155
+ self.stream.stop_stream()
156
+ except Exception:
157
+ pass
158
+ try:
159
+ self.stream.close()
160
+ except Exception as e:
161
+ print(f"Error closing input stream: {e}")
162
+ self.stream = None
163
+
164
+ # signal final input chunk marker for realtime consumers
165
+ try:
166
+ self._emit_rt_input_delta(b"", final=True)
167
+ except Exception:
168
+ pass
169
+
170
+ if self.frames:
171
+ if self.path:
172
+ try:
173
+ self.save_audio_file(self.path)
174
+ result = True
175
+ except Exception as e:
176
+ print(f"Error saving input WAV: {e}")
177
+ else:
178
+ print("File path is not set.")
179
+ else:
180
+ print("No audio data recorded")
181
+
182
+ # reset input meter
183
+ try:
184
+ self.reset_audio_level()
185
+ except Exception:
186
+ pass
187
+ return result
188
+
189
+ def has_source(self) -> bool:
190
+ """
191
+ Check if audio source is available.
192
+
193
+ :return: True if available
194
+ """
195
+ return self.stream is not None
196
+
197
+ def has_frames(self) -> bool:
198
+ """
199
+ Check if audio frames are available.
200
+
201
+ :return: True if available
202
+ """
203
+ return bool(self.frames)
204
+
205
+ def has_min_frames(self) -> bool:
206
+ """
207
+ Check if minimum required audio frames have been recorded.
208
+
209
+ :return: True if min frames
210
+ """
211
+ return len(self.frames) >= self.MIN_FRAMES
212
+
213
+ def reset_audio_level(self):
214
+ """Reset the audio level bar."""
215
+ self.window.controller.audio.ui.on_input_volume_change(0, self.mode)
216
+
217
+ def check_audio_input(self) -> bool:
218
+ """
219
+ Check if default audio input device is working using PyAudio.
220
+
221
+ :return: True if working
222
+ """
223
+ self.init()
224
+ try:
225
+ test_stream = self.pyaudio_instance.open(format=self.format,
226
+ channels=self.channels,
227
+ rate=self.rate,
228
+ input=True,
229
+ frames_per_buffer=1024)
230
+ test_stream.stop_stream()
231
+ test_stream.close()
232
+ return True
233
+ except Exception:
234
+ return False
235
+
236
+ def check_audio_devices(self):
237
+ """
238
+ Check audio input devices using PyAudio and populate self.devices.
239
+ Each device is stored as a dict with keys 'index' and 'name'.
240
+ """
241
+ self.devices = []
242
+ for i in range(self.pyaudio_instance.get_device_count()):
243
+ try:
244
+ info = self.pyaudio_instance.get_device_info_by_index(i)
245
+ if info.get('maxInputChannels', 0) > 0:
246
+ self.devices.append({'index': i, 'name': info.get('name', f'Device {i}')})
247
+ except Exception:
248
+ continue
249
+
250
+ if not self.devices:
251
+ self.selected_device = None
252
+ print("No audio input devices found.")
253
+ else:
254
+ self.selected_device = self.devices[0]['index']
255
+
256
+ def device_changed(self, index: int):
257
+ """
258
+ Change audio input device based on device list index.
259
+
260
+ :param index: index in self.devices list
261
+ """
262
+ self.init()
263
+ if 0 <= index < len(self.devices):
264
+ self.selected_device = self.devices[index]['index']
265
+ else:
266
+ self.selected_device = 0
267
+
268
+ def prepare_device(self):
269
+ """Set the current audio input device from configuration."""
270
+ self.init()
271
+ if self.window is not None and hasattr(self.window, "core"):
272
+ device_id = int(self.window.core.config.get('audio.input.device', 0))
273
+ self.device_changed(device_id)
274
+ else:
275
+ if self.devices:
276
+ self.selected_device = self.devices[0]['index']
277
+ else:
278
+ self.selected_device = None
279
+
280
+ def setup_audio_input(self):
281
+ """Set up audio input device and start recording using PyAudio."""
282
+ self.init()
283
+ if self.selected_device is None:
284
+ print("No audio input device selected")
285
+ return
286
+
287
+ try:
288
+ # remember current input parameters for RT payloads
289
+ self._in_rate = int(self.rate)
290
+ self._in_channels = int(self.channels)
291
+
292
+ self.stream = self.pyaudio_instance.open(format=self.format,
293
+ channels=self.channels,
294
+ rate=self.rate,
295
+ input=True,
296
+ frames_per_buffer=1024,
297
+ stream_callback=self._audio_callback)
298
+ try:
299
+ self.stream.start_stream()
300
+ except Exception:
301
+ pass
302
+ self._input_active = True
303
+ except Exception as e:
304
+ print(f"Failed to open audio input stream: {e}")
305
+ self.stream = None
306
+ self._input_active = False
307
+
308
+ def _audio_callback(self, in_data, frame_count, time_info, status):
309
+ """
310
+ PyAudio input callback to process incoming audio data.
311
+
312
+ :param in_data: audio data
313
+ :param frame_count: number of frames
314
+ :param time_info: timing information
315
+ :param status: status flags
316
+ """
317
+ import pyaudio
318
+
319
+ # If stop was requested, finish the callback loop cleanly
320
+ if not self._input_active:
321
+ return None, pyaudio.paComplete
322
+
323
+ # Append raw data to the frames list for saving
324
+ self.frames.append(in_data)
325
+
326
+ # Compute input metering
327
+ dtype = self.get_dtype_from_format(self.format)
328
+ samples = np.frombuffer(in_data, dtype=dtype)
329
+ if samples.size == 0:
330
+ return None, pyaudio.paContinue
331
+
332
+ rms = np.sqrt(np.mean(samples.astype(np.float64) ** 2))
333
+ normalization_factor = self.get_normalization_factor(self.format)
334
+ level = rms / normalization_factor
335
+ level = min(max(level, 0.0), 1.0)
336
+ level_percent = int(level * 100)
337
+
338
+ # Update UI on the main thread only when recording is active
339
+ if self._input_active:
340
+ try:
341
+ self.window.controller.audio.ui.on_input_volume_change(level_percent, self.mode)
342
+ except Exception as e:
343
+ print(f"Error updating audio level: {e}")
344
+ pass
345
+
346
+ # Emit realtime input delta (PCM16 LE), do not resample here
347
+ try:
348
+ s16 = pyaudio_to_s16le(in_data, self.format, pa_instance=self.pyaudio_instance)
349
+ self._emit_rt_input_delta(s16, final=False)
350
+ except Exception:
351
+ # fallback: emit raw buffer
352
+ self._emit_rt_input_delta(in_data or b"", final=False)
353
+
354
+ # Handle loop recording if enabled.
355
+ if self.loop and self.stop_callback is not None and self._input_active:
356
+ stop_interval = int(self.window.core.config.get('audio.input.stop_interval', 10)) \
357
+ if self.window and hasattr(self.window, "core") else 10
358
+ current_time = time.time()
359
+ if current_time - self.start_time >= stop_interval:
360
+ self.start_time = current_time
361
+ QTimer.singleShot(0, self.stop_callback)
362
+
363
+ return None, pyaudio.paContinue
364
+
365
+ def update_audio_level(self, level: int):
366
+ """
367
+ Update the audio level bar.
368
+
369
+ :param level: volume level (0-100)
370
+ """
371
+ self.window.controller.audio.ui.on_input_volume_change(level, self.mode)
372
+
373
+ def save_audio_file(self, filename: str):
374
+ """
375
+ Save the recorded audio frames to a WAV file.
376
+
377
+ :param filename: path to save the WAV file
378
+ """
379
+ sample_width = self.pyaudio_instance.get_sample_size(self.format)
380
+ with wave.open(filename, 'wb') as wf:
381
+ wf.setnchannels(self.channels)
382
+ wf.setsampwidth(sample_width)
383
+ wf.setframerate(self.rate)
384
+ wf.writeframes(b''.join(self.frames))
385
+
386
+ def get_dtype_from_format(self, fmt):
387
+ """
388
+ Get the NumPy dtype corresponding to the PyAudio format.
389
+
390
+ :param fmt: PyAudio format
391
+ :return: NumPy dtype
392
+ """
393
+ import pyaudio
394
+ if fmt == pyaudio.paInt16:
395
+ return np.int16
396
+ elif fmt == pyaudio.paInt8:
397
+ return np.int8
398
+ elif fmt == pyaudio.paUInt8:
399
+ return np.uint8
400
+ elif fmt == pyaudio.paFloat32:
401
+ return np.float32
402
+ else:
403
+ raise ValueError("Unsupported audio format")
404
+
405
+ def get_normalization_factor(self, fmt):
406
+ """
407
+ Get the normalization factor for the given PyAudio format.
408
+
409
+ :param fmt: PyAudio format
410
+ :return: normalization factor
411
+ """
412
+ import pyaudio
413
+ if fmt == pyaudio.paInt16:
414
+ return 32768.0
415
+ elif fmt == pyaudio.paInt8:
416
+ return 128.0
417
+ elif fmt == pyaudio.paUInt8:
418
+ return 255.0
419
+ elif fmt == pyaudio.paFloat32:
420
+ return 1.0
421
+ else:
422
+ raise ValueError("Unsupported audio format")
423
+
424
+ def stop_audio(self) -> bool:
425
+ """
426
+ Stop audio input recording.
427
+
428
+ :return: True if stopped
429
+ """
430
+ return self.stop()
431
+
432
+ def _stop_file_playback(self, signals=None, join_timeout: float = 1.0):
433
+ """
434
+ Cooperatively stop file playback worker and stop guard timer.
435
+
436
+ :param signals: signals object to emit volume reset
437
+ :param join_timeout: max seconds to wait for worker to join
438
+ """
439
+ try:
440
+ if self._file_check_timer is not None:
441
+ self._file_check_timer.stop()
442
+ except Exception:
443
+ pass
444
+ self._file_check_timer = None
445
+
446
+ t = self._file_thread
447
+ self._file_thread = None
448
+ if t is None:
449
+ return
450
+ try:
451
+ t.request_stop()
452
+ except Exception:
453
+ pass
454
+ try:
455
+ t.join(timeout=join_timeout)
456
+ except Exception:
457
+ pass
458
+ if signals is not None:
459
+ try:
460
+ signals.volume_changed.emit(0)
461
+ except Exception:
462
+ pass
463
+
464
+ def _release_realtime_for_playback(self, wait_sec: float = 0.6):
465
+ """
466
+ Finalize realtime session to free the device before starting file playback.
467
+
468
+ :param wait_sec: max seconds to wait for session to finalize
469
+ """
470
+ s = self._rt_session
471
+ if not s:
472
+ return
473
+ try:
474
+ s.stop()
475
+ except Exception:
476
+ pass
477
+ self._rt_session = None
478
+ t0 = time.time()
479
+ while time.time() - t0 < wait_sec:
480
+ time.sleep(0.02)
481
+
482
+ def play(
483
+ self,
484
+ audio_file: str,
485
+ event_name: str,
486
+ stopped: callable,
487
+ signals=None
488
+ ):
489
+ """
490
+ Start non-blocking file playback on its own thread.
491
+ Poll 'stopped()' on the GUI thread and request worker stop when needed.
492
+
493
+ :param audio_file: path to audio file
494
+ :param event_name: event name to emit on playback start
495
+ :param stopped: callable that returns True when playback should stop
496
+ :param signals: signals object to emit playback and volume events
497
+ """
498
+ # stop any previous file playback
499
+ self._stop_file_playback(signals=signals, join_timeout=1.0)
500
+
501
+ # ensure realtime session released
502
+ self._release_realtime_for_playback(wait_sec=0.6)
503
+
504
+ # emit start event in GUI
505
+ if signals is not None:
506
+ try:
507
+ signals.playback.emit(event_name)
508
+ except Exception:
509
+ pass
510
+
511
+ # select device and start worker
512
+ dev_idx = self._select_output_device()
513
+ t = _FilePlaybackThread(
514
+ device_index=dev_idx,
515
+ audio_file=audio_file,
516
+ signals=signals
517
+ )
518
+ self._file_thread = t
519
+ t.start()
520
+
521
+ # guard timer: stop worker if 'stopped()' turns True; also cleanup when worker ends
522
+ parent = self.window if isinstance(self.window, QObject) else None
523
+ self._file_check_timer = QTimer(parent)
524
+ self._file_check_timer.setInterval(100)
525
+
526
+ def _tick():
527
+ try:
528
+ # stop requested by app
529
+ if callable(stopped) and stopped():
530
+ self._stop_file_playback(signals=signals, join_timeout=1.0)
531
+ return
532
+ # worker finished on its own
533
+ if self._file_thread is None or not self._file_thread.is_alive():
534
+ self._stop_file_playback(signals=signals, join_timeout=0.0)
535
+ return
536
+ except Exception:
537
+ self._stop_file_playback(signals=signals, join_timeout=0.0)
538
+
539
+ self._file_check_timer.timeout.connect(_tick)
540
+ self._file_check_timer.start()
541
+
542
+ def stop_playback(self, signals=None):
543
+ """
544
+ Stop audio playback (realtime and file-based) without cross-thread closes.
545
+
546
+ :param signals: signals object to emit volume reset
547
+ """
548
+ # stop realtime session if any
549
+ if self._rt_session:
550
+ try:
551
+ self._rt_session.stop()
552
+ except Exception:
553
+ pass
554
+ self._rt_session = None
555
+
556
+ # cooperatively stop file worker; do NOT close stream/terminate here
557
+ self._stop_file_playback(signals=signals, join_timeout=1.0)
558
+
559
+ # ensure UI meter is reset
560
+ try:
561
+ if signals is not None:
562
+ signals.volume_changed.emit(0)
563
+ except Exception:
564
+ pass
565
+ return False
566
+
567
+ def get_input_devices(self) -> List[Tuple[int, str]]:
568
+ """
569
+ Get input devices list: [(id, name)].
570
+
571
+ Uses BeautifulSoup's UnicodeDammit to ensure proper UTF-8 encoding.
572
+
573
+ :return: list of (device index, device name)
574
+ """
575
+ from bs4 import UnicodeDammit
576
+ self.init()
577
+ devices_list = []
578
+ for item in self.devices:
579
+ index = item['index']
580
+ device_name = item['name']
581
+ dammit = UnicodeDammit(device_name)
582
+ devices_list.append((index, dammit.unicode_markup))
583
+ return devices_list
584
+
585
+ def get_output_devices(self) -> List[Tuple[int, str]]:
586
+ """
587
+ Get output devices using PyAudio.
588
+
589
+ :return: list of (device index, device name)
590
+ """
591
+ import pyaudio
592
+ p = pyaudio.PyAudio()
593
+ devices_list = []
594
+ for i in range(p.get_device_count()):
595
+ device_info = p.get_device_info_by_index(i)
596
+ if device_info.get('maxOutputChannels', 0) > 0:
597
+ devices_list.append((i, device_info.get('name', 'Unknown')))
598
+ p.terminate()
599
+ return devices_list
600
+
601
+ def get_default_input_device(self) -> tuple:
602
+ """
603
+ Retrieve the default input device using PyAudio.
604
+
605
+ :return: (device index, device name)
606
+ """
607
+ import pyaudio
608
+ p = pyaudio.PyAudio()
609
+ try:
610
+ default_info = p.get_default_input_device_info()
611
+ device_id = default_info.get('index')
612
+ device_name = default_info.get('name', 'Unknown')
613
+ except IOError as e:
614
+ print("Error getting default input device:", e)
615
+ device_id, device_name = None, None
616
+ p.terminate()
617
+ return device_id, device_name
618
+
619
+ def get_default_output_device(self) -> tuple:
620
+ """
621
+ Retrieve the default output device using PyAudio.
622
+
623
+ :return: (device index, device name)
624
+ """
625
+ import pyaudio
626
+ p = pyaudio.PyAudio()
627
+ try:
628
+ default_info = p.get_default_output_device_info()
629
+ device_id = default_info.get('index')
630
+ device_name = default_info.get('name', 'Unknown')
631
+ except IOError as e:
632
+ print("Error getting default output device:", e)
633
+ device_id, device_name = None, None
634
+ p.terminate()
635
+ return device_id, device_name
636
+
637
+ # ---- REALTIME ----
638
+
639
+ def set_rt_signals(self, signals) -> None:
640
+ """
641
+ Set signals object for realtime events.
642
+
643
+ :param signals: signals object with 'response' and 'playback' signals
644
+ """
645
+ self._rt_signals = signals
646
+
647
+ def set_signals(self, signals) -> None:
648
+ """
649
+ Alias to set_rt_signals
650
+
651
+ :param signals: signals object with 'response' and 'playback' signals
652
+ """
653
+ self.set_rt_signals(signals)
654
+
655
+ def _emit_output_volume(self, value: int) -> None:
656
+ """
657
+ Emit output volume change event (0-100) via rt_signals.
658
+
659
+ :param value: volume level (0-100)
660
+ """
661
+ if not self._rt_signals:
662
+ return
663
+ try:
664
+ self._rt_signals.response.emit(
665
+ build_output_volume_event(int(value))
666
+ )
667
+ except Exception:
668
+ pass
669
+
670
+ def _select_output_device(self) -> int:
671
+ """
672
+ Select PyAudio output device index based on configuration or default.
673
+
674
+ :return: device index
675
+ """
676
+ import pyaudio
677
+ pa = pyaudio.PyAudio()
678
+ try:
679
+ cfg_idx = int(self.window.core.config.get('audio.output.device', -1)) \
680
+ if self.window and hasattr(self.window, "core") else -1
681
+ except Exception:
682
+ cfg_idx = -1
683
+
684
+ chosen = None
685
+ if cfg_idx >= 0:
686
+ try:
687
+ di = pa.get_device_info_by_index(cfg_idx)
688
+ if di.get('maxOutputChannels', 0) > 0:
689
+ chosen = cfg_idx
690
+ except Exception:
691
+ chosen = None
692
+
693
+ if chosen is None:
694
+ try:
695
+ chosen = pa.get_default_output_device_info().get('index')
696
+ except Exception:
697
+ chosen = None
698
+
699
+ if chosen is None:
700
+ for i in range(pa.get_device_count()):
701
+ try:
702
+ di = pa.get_device_info_by_index(i)
703
+ if di.get('maxOutputChannels', 0) > 0:
704
+ chosen = i
705
+ break
706
+ except Exception:
707
+ continue
708
+
709
+ pa.terminate()
710
+ return int(chosen if chosen is not None else 0)
711
+
712
+ def _probe_supported_format(
713
+ self,
714
+ device_index: int,
715
+ rate: int,
716
+ channels: int
717
+ ) -> Tuple[int, int, int]:
718
+ """
719
+ Probe a supported (rate, channels, width_bytes=2) combination for the device.
720
+ Prefers requested values; falls back to common rates/channels.
721
+
722
+ :param device_index: PyAudio device index
723
+ :param rate: desired sample rate
724
+ :param channels: desired number of channels
725
+ :return: (rate, channels, width_bytes)
726
+ """
727
+ import pyaudio
728
+ pa = pyaudio.PyAudio()
729
+ fmt = pyaudio.paInt16
730
+ try_order = [
731
+ (rate, channels),
732
+ (rate, 2),
733
+ (rate, 1),
734
+ (44100, channels),
735
+ (48000, channels),
736
+ (44100, 2),
737
+ (48000, 2),
738
+ (44100, 1),
739
+ (48000, 1),
740
+ ]
741
+ for sr, ch in try_order:
742
+ try:
743
+ if pa.is_format_supported(sr, output_device=device_index, output_channels=ch, output_format=fmt):
744
+ pa.terminate()
745
+ return int(sr), int(ch), 2
746
+ except ValueError:
747
+ continue
748
+ except Exception:
749
+ continue
750
+ pa.terminate()
751
+ return int(rate), int(channels), 2
752
+
753
+ def _ensure_rt_session(self, rate: int, channels: int) -> RealtimeSessionPyAudio:
754
+ """
755
+ Ensure a realtime output session exists with a supported device format.
756
+ Reuse only if still active and not finalized; otherwise recreate.
757
+
758
+ :param rate: desired sample rate
759
+ :param channels: desired number of channels
760
+ :return: RealtimeSessionPyAudio instance
761
+ """
762
+ # make sure file playback is not holding the device
763
+ self._stop_file_playback(join_timeout=0.8)
764
+
765
+ dev_idx = self._select_output_device()
766
+ out_rate, out_ch, out_w = self._probe_supported_format(dev_idx, rate, channels)
767
+
768
+ s = self._rt_session
769
+ if s is not None:
770
+ try:
771
+ same_fmt = (s.device_index == dev_idx and s.rate == out_rate and
772
+ s.channels == out_ch and s.width == out_w)
773
+ if same_fmt and s.is_active() and not s.is_finalized():
774
+ return s
775
+ except Exception:
776
+ pass
777
+ try:
778
+ s.stop()
779
+ except Exception:
780
+ pass
781
+ self._rt_session = None
782
+
783
+ session = RealtimeSessionPyAudio(
784
+ device_index=dev_idx,
785
+ rate=out_rate,
786
+ channels=out_ch,
787
+ width_bytes=out_w,
788
+ parent=None,
789
+ volume_emitter=self._emit_output_volume
790
+ )
791
+ session.on_stopped = lambda: (
792
+ self._rt_signals and self._rt_signals.response.emit(
793
+ RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_END, {"source": "device"})
794
+ ),
795
+ setattr(self, "_rt_session", None)
796
+ )
797
+ self._rt_session = session
798
+ return session
799
+
800
+ def _convert_pcm_for_output(
801
+ self, data: bytes,
802
+ in_rate: int,
803
+ in_channels: int,
804
+ out_rate: int,
805
+ out_channels: int,
806
+ out_width: int = 2
807
+ ) -> bytes:
808
+ """
809
+ Convert raw S16LE PCM to target (rate, channels, width).
810
+
811
+ :param data: input PCM bytes
812
+ :param in_rate: input sample rate
813
+ :param in_channels: input number of channels
814
+ :param out_rate: output sample rate
815
+ :param out_channels: output number of channels
816
+ :param out_width: output sample width in bytes (1, 2, or 4)
817
+ :return: converted PCM bytes
818
+ """
819
+ return convert_s16_pcm(
820
+ data,
821
+ in_rate=in_rate,
822
+ in_channels=in_channels,
823
+ out_rate=out_rate,
824
+ out_channels=out_channels,
825
+ out_width=out_width,
826
+ out_format="s16"
827
+ )
828
+
829
+ def stop_realtime(self):
830
+ """Stop realtime audio playback session."""
831
+ s = self._rt_session
832
+ if s is not None:
833
+ try:
834
+ s.mark_final()
835
+ except Exception:
836
+ try:
837
+ s.stop()
838
+ except Exception:
839
+ pass
840
+
841
+ def handle_realtime(self, payload: dict) -> None:
842
+ """
843
+ Handle realtime audio playback payload (compatible with native).
844
+ Accepts dict with keys: data (bytes), mime (str), rate (int), channels (int), final (bool).
845
+
846
+ :param payload: dict with audio data and parameters
847
+ """
848
+ try:
849
+ data: bytes = payload.get("data", b"") or b""
850
+ mime: str = (payload.get("mime", "audio/pcm") or "audio/pcm").lower()
851
+ rate = int(payload.get("rate", 24000) or 24000)
852
+ channels = int(payload.get("channels", 1) or 1)
853
+ final = bool(payload.get("final", False))
854
+
855
+ # only raw PCM/L16 is supported here
856
+ if ("pcm" not in mime) and ("l16" not in mime):
857
+ if final and self._rt_session is not None:
858
+ try:
859
+ self._rt_session.mark_final()
860
+ except Exception:
861
+ pass
862
+ return
863
+
864
+ session = self._ensure_rt_session(rate, channels)
865
+
866
+ if data:
867
+ # normalize to session format (assume input is S16LE)
868
+ if session.rate != rate or session.channels != channels or session.width != 2:
869
+ data = self._convert_pcm_for_output(
870
+ data, in_rate=rate, in_channels=channels,
871
+ out_rate=session.rate, out_channels=session.channels,
872
+ out_width=session.width
873
+ )
874
+ session.feed(data)
875
+
876
+ if final:
877
+ session.mark_final()
878
+
879
+ except Exception as e:
880
+ try:
881
+ self.window.core.debug.log(f"[audio][pyaudio] handle_realtime error: {e}")
882
+ except Exception:
883
+ pass
884
+
885
+ def _emit_on_main(self, fn, *args) -> None:
886
+ """
887
+ Emit a Qt signal from the GUI thread.
888
+
889
+ :param fn: function to call
890
+ :param args: arguments to pass
891
+ """
892
+ try:
893
+ fn(*args)
894
+ except Exception:
895
+ pass
896
+
897
+ def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
898
+ """
899
+ Emit RT_INPUT_AUDIO_DELTA event with provider-agnostic payload (PCM16 LE).
900
+
901
+ :param data: PCM16 LE audio data bytes
902
+ :param final: True if this is the final chunk
903
+ """
904
+ if not self._rt_signals:
905
+ return
906
+ event = build_rt_input_delta_event(
907
+ rate=int(self._in_rate),
908
+ channels=int(self._in_channels),
909
+ data=data or b"",
910
+ final=bool(final),
911
+ )
912
+ # Always dispatch on the GUI thread to avoid cross-thread issues
913
+ self._emit_on_main(self._rt_signals.response.emit, event)
914
+
915
+ def _convert_input_to_int16(self, raw: bytes) -> bytes:
916
+ """
917
+ Convert PyAudio input buffer to PCM16 little-endian without changing
918
+ sample rate or channel count.
919
+
920
+ :param raw: input audio data bytes
921
+ :return: PCM16 LE audio data bytes
922
+ """
923
+ return pyaudio_to_s16le(raw, self.format, pa_instance=self.pyaudio_instance)