pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -6,21 +6,33 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.27 07:00:00 #
9
+ # Updated Date: 2025.08.31 04:00:00 #
10
10
  # ================================================== #
11
11
 
12
+ from typing import Optional
12
13
  from typing import List, Tuple
13
14
 
14
15
  from bs4 import UnicodeDammit
15
- import os
16
+
16
17
  import time
17
18
  import numpy as np
18
19
  import wave
19
20
 
20
- from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput, QMediaDevices, QAudioFormat, QAudioSource
21
- from PySide6.QtCore import QTimer, QObject, QUrl
22
- from pydub import AudioSegment
21
+ from PySide6.QtMultimedia import QMediaDevices, QAudioFormat, QAudioSource, QAudio
22
+ from PySide6.QtCore import QTimer, QObject, QLoggingCategory
23
+
24
+ from pygpt_net.core.events import RealtimeEvent
23
25
 
26
+ from .realtime import RealtimeSession
27
+ from ..shared import (
28
+ qaudio_dtype,
29
+ qaudio_norm_factor,
30
+ qaudio_to_s16le,
31
+ convert_s16_pcm,
32
+ build_rt_input_delta_event,
33
+ build_output_volume_event,
34
+ )
35
+ from .player import NativePlayer
24
36
 
25
37
  class NativeBackend(QObject):
26
38
 
@@ -72,6 +84,23 @@ class NativeBackend(QObject):
72
84
  self._dtype = None
73
85
  self._norm = None
74
86
 
87
+ self._rt_session: Optional[RealtimeSession] = None
88
+ self._rt_signals = None # set by core.audio.output on initialize()
89
+
90
+ # dedicated player wrapper (file playback + envelope metering)
91
+ self._player = NativePlayer(window=self.window, chunk_ms=self.chunk_ms)
92
+
93
+ # Reduce WASAPI debug spam on Windows-like backends (non-invasive).
94
+ try:
95
+ QLoggingCategory.setFilterRules(
96
+ "qt.multimedia.wasapi.debug=false\n"
97
+ "qt.multimedia.audio.debug=false\n"
98
+ "qt.multimedia.wasapi.info=false\n"
99
+ "qt.multimedia.audio.info=false"
100
+ )
101
+ except Exception:
102
+ pass
103
+
75
104
  def init(self):
76
105
  """
77
106
  Initialize audio input backend.
@@ -167,13 +196,19 @@ class NativeBackend(QObject):
167
196
  if self.audio_io_device is not None:
168
197
  self.audio_io_device.readyRead.disconnect(self.process_audio_input)
169
198
  except (TypeError, RuntimeError):
170
- # ignore if already disconnected or device gone ---
199
+ # ignore if already disconnected or device gone
171
200
  pass
172
201
 
173
- self.audio_source.stop()
202
+ try:
203
+ self.audio_source.stop()
204
+ except Exception:
205
+ pass
174
206
  self.audio_source = None
175
207
  self.audio_io_device = None
176
208
 
209
+ # Emit final input chunk marker for realtime consumers
210
+ self._emit_rt_input_delta(b"", final=True)
211
+
177
212
  # Save frames to file (if any)
178
213
  if self.frames:
179
214
  self.save_audio_file(self.path)
@@ -181,7 +216,7 @@ class NativeBackend(QObject):
181
216
  else:
182
217
  print("No audio data recorded")
183
218
 
184
- # reset input volume on stop to visually indicate end of recording ---
219
+ # reset input volume on stop to visually indicate end of recording
185
220
  self.reset_audio_level()
186
221
 
187
222
  return result
@@ -245,6 +280,12 @@ class NativeBackend(QObject):
245
280
  desired.setSampleFormat(QAudioFormat.SampleFormat.Int16)
246
281
  if device.isFormatSupported(desired):
247
282
  audio_format = desired
283
+ else:
284
+ # Choose nearest format to avoid silent captures on Windows.
285
+ try:
286
+ audio_format = device.nearestFormat(desired)
287
+ except Exception:
288
+ pass
248
289
 
249
290
  try:
250
291
  audio_source = QAudioSource(device, audio_format)
@@ -319,17 +360,34 @@ class NativeBackend(QObject):
319
360
  desired.setSampleFormat(QAudioFormat.SampleFormat.Int16)
320
361
  if audio_input_device.isFormatSupported(desired):
321
362
  audio_format = desired
363
+ else:
364
+ # Use nearest supported format to requested (important on Windows/WASAPI).
365
+ try:
366
+ near = audio_input_device.nearestFormat(desired)
367
+ if near is not None:
368
+ audio_format = near
369
+ except Exception:
370
+ pass
322
371
 
323
372
  self.actual_audio_format = audio_format
324
- self._dtype = self.get_dtype_from_sample_format(self.actual_audio_format.sampleFormat())
325
- self._norm = self.get_normalization_factor(self.actual_audio_format.sampleFormat())
373
+ self._dtype = qaudio_dtype(self.actual_audio_format.sampleFormat())
374
+ self._norm = qaudio_norm_factor(self.actual_audio_format.sampleFormat())
326
375
 
327
376
  try:
328
377
  self.audio_source = QAudioSource(audio_input_device, audio_format)
378
+
379
+ # Configure buffer size based on desired latency
329
380
  bs = int(audio_format.sampleRate() * audio_format.channelCount() * audio_format.bytesPerSample() * (float(self.latency_ms) / 1000.0))
330
381
  if bs < 4096:
331
382
  bs = 4096
332
383
  self.audio_source.setBufferSize(bs)
384
+
385
+ # reduce notify interval to improve responsiveness
386
+ try:
387
+ self.audio_source.setNotifyInterval(max(5, min(50, int(self.latency_ms))))
388
+ except Exception:
389
+ pass
390
+
333
391
  except Exception as e:
334
392
  self.disconnected = True
335
393
  print(f"Failed to create audio source: {e}")
@@ -349,11 +407,10 @@ class NativeBackend(QObject):
349
407
 
350
408
  def process_audio_input(self):
351
409
  """Process incoming audio data"""
352
- # guard against late calls after stop or missing device ---
410
+ # guard against late calls after stop or missing device
353
411
  if not self._is_recording or self.audio_io_device is None:
354
412
  return
355
413
 
356
- # add seconds to stop timer
357
414
  data = self.audio_io_device.readAll()
358
415
  if data.isEmpty():
359
416
  return
@@ -366,8 +423,8 @@ class NativeBackend(QObject):
366
423
 
367
424
  # Determine the correct dtype and normalization factor
368
425
  sample_format = self.actual_audio_format.sampleFormat()
369
- dtype = self._dtype if self._dtype is not None else self.get_dtype_from_sample_format(sample_format)
370
- normalization_factor = self._norm if self._norm is not None else self.get_normalization_factor(sample_format)
426
+ dtype = self._dtype if self._dtype is not None else qaudio_dtype(sample_format)
427
+ normalization_factor = self._norm if self._norm is not None else qaudio_norm_factor(sample_format)
371
428
 
372
429
  # Convert bytes to NumPy array of the appropriate type
373
430
  samples = np.frombuffer(data_bytes, dtype=dtype)
@@ -394,6 +451,15 @@ class NativeBackend(QObject):
394
451
  # Update the level bar widget
395
452
  self.update_audio_level(level_percent)
396
453
 
454
+ # --- emit realtime input delta (PCM16 LE) ---
455
+ # Always standardize to Int16 for provider compatibility; do not resample here.
456
+ try:
457
+ s16 = qaudio_to_s16le(data_bytes, sample_format)
458
+ self._emit_rt_input_delta(s16, final=False)
459
+ except Exception:
460
+ # avoid interrupting UI/recording on conversion issues
461
+ self._emit_rt_input_delta(data_bytes, final=False)
462
+
397
463
  # Handle loop recording
398
464
  if self.loop and self.stop_callback is not None:
399
465
  stop_interval = int(self.window.core.config.get('audio.input.stop_interval', 10))
@@ -450,12 +516,14 @@ class NativeBackend(QObject):
450
516
  else:
451
517
  raise ValueError("Unsupported sample format")
452
518
 
453
- wf = wave.open(filename, 'wb')
454
- wf.setnchannels(channels)
455
- wf.setsampwidth(sample_size)
456
- wf.setframerate(frame_rate)
457
- wf.writeframes(out_bytes)
458
- wf.close()
519
+ try:
520
+ with wave.open(filename, 'wb') as wf:
521
+ wf.setnchannels(channels)
522
+ wf.setsampwidth(sample_size)
523
+ wf.setframerate(frame_rate)
524
+ wf.writeframes(out_bytes)
525
+ except:
526
+ pass
459
527
 
460
528
  def get_dtype_from_sample_format(self, sample_format):
461
529
  """
@@ -463,16 +531,7 @@ class NativeBackend(QObject):
463
531
 
464
532
  :param sample_format: QAudioFormat.SampleFormat
465
533
  """
466
- if sample_format == QAudioFormat.SampleFormat.UInt8:
467
- return np.uint8
468
- elif sample_format == QAudioFormat.SampleFormat.Int16:
469
- return np.int16
470
- elif sample_format == QAudioFormat.SampleFormat.Int32:
471
- return np.int32
472
- elif sample_format == QAudioFormat.SampleFormat.Float:
473
- return np.float32
474
- else:
475
- raise ValueError("Unsupported sample format")
534
+ return qaudio_dtype(sample_format)
476
535
 
477
536
  def get_normalization_factor(self, sample_format):
478
537
  """
@@ -480,16 +539,7 @@ class NativeBackend(QObject):
480
539
 
481
540
  :param sample_format: QAudioFormat.SampleFormat
482
541
  """
483
- if sample_format == QAudioFormat.SampleFormat.UInt8:
484
- return 255.0
485
- elif sample_format == QAudioFormat.SampleFormat.Int16:
486
- return 32768.0
487
- elif sample_format == QAudioFormat.SampleFormat.Int32:
488
- return float(2 ** 31)
489
- elif sample_format == QAudioFormat.SampleFormat.Float:
490
- return 1.0
491
- else:
492
- raise ValueError("Unsupported sample format")
542
+ return qaudio_norm_factor(sample_format)
493
543
 
494
544
  def play_after(
495
545
  self,
@@ -507,69 +557,19 @@ class NativeBackend(QObject):
507
557
  :param signals: Signals to emit on playback
508
558
  :return: True if started
509
559
  """
510
- self.audio_output = QAudioOutput()
511
- self.audio_output.setVolume(1.0)
512
-
513
- devices = QMediaDevices.audioOutputs()
514
- if devices:
515
- try:
516
- num_device = int(self.window.core.config.get('audio.output.device', 0))
517
- except Exception:
518
- num_device = 0
519
- selected_device = devices[num_device] if num_device < len(devices) else devices[0]
520
- self.audio_output.setDevice(selected_device)
521
-
522
- if self.AUTO_CONVERT_TO_WAV:
523
- if audio_file.lower().endswith('.mp3'):
524
- tmp_dir = self.window.core.audio.get_cache_dir()
525
- base_name = os.path.splitext(os.path.basename(audio_file))[0]
526
- dst_file = os.path.join(tmp_dir, "_" + base_name + ".wav")
527
- wav_file = self.window.core.audio.mp3_to_wav(audio_file, dst_file)
528
- if wav_file:
529
- audio_file = wav_file
530
-
531
- def check_stop():
532
- if stopped():
533
- self.player.stop()
534
- self.stop_timers()
535
- signals.volume_changed.emit(0)
536
- else:
537
- if self.player:
538
- if self.player.playbackState() == QMediaPlayer.StoppedState:
539
- self.player.stop()
540
- self.stop_timers()
541
- signals.volume_changed.emit(0)
542
-
543
- self.envelope = self.calculate_envelope(audio_file, self.chunk_ms)
544
- self.player = QMediaPlayer()
545
- self.player.setAudioOutput(self.audio_output)
546
- self.player.setSource(QUrl.fromLocalFile(audio_file))
547
- self.player.play()
548
-
549
- self.playback_timer = QTimer()
550
- self.playback_timer.setInterval(100)
551
- self.playback_timer.timeout.connect(check_stop)
552
- self.volume_timer = QTimer(self)
553
- self.volume_timer.setInterval(10) # every 100 ms
554
- self.volume_timer.timeout.connect(
555
- lambda: self.update_volume(signals)
560
+ # delegate to player wrapper to keep logic isolated
561
+ self._player.play_after(
562
+ audio_file=audio_file,
563
+ event_name=event_name,
564
+ stopped=stopped,
565
+ signals=signals,
566
+ auto_convert_to_wav=self.AUTO_CONVERT_TO_WAV,
567
+ select_output_device=self._select_output_device,
556
568
  )
557
569
 
558
- self.playback_timer.start()
559
- self.volume_timer.start()
560
- signals.volume_changed.emit(0)
561
- signals.playback.emit(event_name)
562
-
563
570
  def stop_timers(self):
564
- """
565
- Stop playback timers.
566
- """
567
- if self.playback_timer is not None:
568
- self.playback_timer.stop()
569
- self.playback_timer = None
570
- if self.volume_timer is not None:
571
- self.volume_timer.stop()
572
- self.volume_timer = None
571
+ """Stop playback timers."""
572
+ self._player.stop_timers()
573
573
 
574
574
  def play(
575
575
  self,
@@ -596,9 +596,9 @@ class NativeBackend(QObject):
596
596
  :param signals: Signals object to emit stop event.
597
597
  :return: True if stopped successfully.
598
598
  """
599
- if self.player is not None:
600
- self.player.stop()
601
- self.stop_timers()
599
+ if self._rt_session:
600
+ self._rt_session.stop()
601
+ self._player.stop(signals=signals)
602
602
  return False
603
603
 
604
604
  def calculate_envelope(
@@ -611,23 +611,10 @@ class NativeBackend(QObject):
611
611
 
612
612
  :param audio_file: Path to the audio file
613
613
  :param chunk_ms: Size of each chunk in milliseconds
614
+ :return: List of volume levels (0-100) for each chunk
614
615
  """
615
- audio = AudioSegment.from_file(audio_file)
616
- max_amplitude = 32767
617
- envelope = []
618
-
619
- for ms in range(0, len(audio), chunk_ms):
620
- chunk = audio[ms:ms + chunk_ms]
621
- rms = chunk.rms
622
- if rms > 0:
623
- db = 20 * np.log10(rms / max_amplitude)
624
- else:
625
- db = -60
626
- db = max(-60, min(0, db))
627
- volume = ((db + 60) / 60) * 100
628
- envelope.append(volume)
629
-
630
- return envelope
616
+ from ..shared import compute_envelope_from_file
617
+ return compute_envelope_from_file(audio_file, chunk_ms)
631
618
 
632
619
  def update_volume(self, signals=None):
633
620
  """
@@ -635,13 +622,7 @@ class NativeBackend(QObject):
635
622
 
636
623
  :param signals: Signals object to emit volume changed event.
637
624
  """
638
- pos = self.player.position()
639
- index = int(pos / self.chunk_ms)
640
- if index < len(self.envelope):
641
- volume = self.envelope[index]
642
- else:
643
- volume = 0
644
- signals.volume_changed.emit(volume)
625
+ self._player.update_volume(signals)
645
626
 
646
627
  def get_input_devices(self) -> List[Tuple[int, str]]:
647
628
  """
@@ -695,4 +676,322 @@ class NativeBackend(QObject):
695
676
  index = devices.index(default_device)
696
677
  except ValueError:
697
678
  index = None
698
- return index, None
679
+ return index, None
680
+
681
+ # ---- REALTIME ----
682
+
683
+ def _select_output_device(self):
684
+ """
685
+ Select the audio output device based on configuration.
686
+
687
+ :return: QAudioDevice
688
+ """
689
+ devices = QMediaDevices.audioOutputs()
690
+ if devices:
691
+ try:
692
+ num_device = int(self.window.core.config.get('audio.output.device', 0))
693
+ except Exception:
694
+ num_device = 0
695
+ return devices[num_device] if 0 <= num_device < len(devices) else devices[0]
696
+ return QMediaDevices.defaultAudioOutput()
697
+
698
+ def _sample_format_from_mime(self, mime: Optional[str]) -> QAudioFormat.SampleFormat:
699
+ """
700
+ Determine sample format from MIME type.
701
+
702
+ :param mime: MIME type string
703
+ :return: QAudioFormat.SampleFormat
704
+ """
705
+ s = (mime or "audio/pcm").lower()
706
+ if "float" in s or "f32" in s:
707
+ return QAudioFormat.SampleFormat.Float
708
+ if "pcm" in s:
709
+ if "32" in s or "s32" in s or "int32" in s:
710
+ return QAudioFormat.SampleFormat.Int32
711
+ if "8" in s or "u8" in s:
712
+ return QAudioFormat.SampleFormat.UInt8
713
+ return QAudioFormat.SampleFormat.Int16
714
+ if "l16" in s:
715
+ return QAudioFormat.SampleFormat.Int16
716
+ return QAudioFormat.SampleFormat.Int16
717
+
718
+ def _make_format(
719
+ self,
720
+ rate: int,
721
+ channels: int,
722
+ sample_format: QAudioFormat.SampleFormat
723
+ ) -> QAudioFormat:
724
+ """
725
+ Create QAudioFormat from parameters.
726
+
727
+ :param rate: Sample rate
728
+ :param channels: Number of channels
729
+ :param sample_format: Sample format
730
+ :return: QAudioFormat
731
+ """
732
+ fmt = QAudioFormat()
733
+ fmt.setSampleRate(int(rate))
734
+ fmt.setChannelCount(int(channels))
735
+ fmt.setSampleFormat(sample_format)
736
+ return fmt
737
+
738
+ def _emit_output_volume(self, value: int) -> None:
739
+ """
740
+ Emit output volume change event.
741
+
742
+ :param value: Volume level (0-100)
743
+ """
744
+ if not self._rt_signals:
745
+ return
746
+ self._rt_signals.response.emit(build_output_volume_event(int(value)))
747
+
748
+ def _ensure_rt_session(
749
+ self,
750
+ mime: str,
751
+ rate: Optional[int],
752
+ channels: Optional[int]
753
+ ) -> RealtimeSession:
754
+ """
755
+ Ensure a realtime audio playback session exists with the device's preferred (or nearest) format.
756
+ Keep it simple: prefer Int16, reuse session if format unchanged.
757
+
758
+ :param mime: MIME type of the audio data
759
+ :param rate: Sample rate of the audio data
760
+ :param channels: Number of channels in the audio data
761
+ :return: RealtimeSession
762
+ """
763
+ device = self._select_output_device()
764
+
765
+ # NOTE: start from device preferred format and coerce to Int16 if supported
766
+ fmt = device.preferredFormat()
767
+ try:
768
+ if fmt.sampleFormat() != QAudioFormat.SampleFormat.Int16:
769
+ test = QAudioFormat()
770
+ test.setSampleRate(fmt.sampleRate())
771
+ test.setChannelCount(fmt.channelCount())
772
+ test.setSampleFormat(QAudioFormat.SampleFormat.Int16)
773
+ if device.isFormatSupported(test):
774
+ fmt = test
775
+ else:
776
+ try:
777
+ fmt = device.nearestFormat(test)
778
+ except Exception:
779
+ pass
780
+ except Exception:
781
+ pass
782
+
783
+ # reuse current session if same format
784
+ if self._rt_session is not None:
785
+ try:
786
+ ef = self._rt_session.format
787
+ if (ef.sampleRate() == fmt.sampleRate()
788
+ and ef.channelCount() == fmt.channelCount()
789
+ and ef.sampleFormat() == fmt.sampleFormat()):
790
+ return self._rt_session
791
+ except Exception:
792
+ pass
793
+ # NOTE: hard stop old one (we keep things simple)
794
+ try:
795
+ self._rt_session.stop()
796
+ except Exception:
797
+ pass
798
+ self._rt_session = None
799
+
800
+ session = RealtimeSession(
801
+ device=device,
802
+ fmt=fmt,
803
+ parent=self,
804
+ volume_emitter=self._emit_output_volume
805
+ )
806
+ # NOTE: when device actually stops (buffer empty), inform UI
807
+ session.on_stopped = lambda: (
808
+ self._rt_signals and self._rt_signals.response.emit(
809
+ RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_END, {"source": "device"})
810
+ ),
811
+ setattr(self, "_rt_session", None)
812
+ )
813
+ self._rt_session = session
814
+ return session
815
+
816
+ def _convert_pcm_for_output(
817
+ self,
818
+ data: bytes,
819
+ in_rate: int,
820
+ in_channels: int,
821
+ out_fmt: QAudioFormat
822
+ ) -> bytes:
823
+ """
824
+ Minimal PCM converter to device format:
825
+ - assumes input is S16LE,
826
+ - converts channels (mono<->stereo) and sample rate,
827
+ - keeps Int16; if device uses UInt8/Float, adapts sample width and bias.
828
+
829
+ :param data: Input PCM data (assumed S16LE)
830
+ :param in_rate: Input sample rate
831
+ :param in_channels: Input number of channels
832
+ :param out_fmt: Desired output QAudioFormat
833
+ :return: Converted PCM data
834
+ """
835
+ if not data:
836
+ return b""
837
+
838
+ try:
839
+ out_rate = int(out_fmt.sampleRate()) or in_rate
840
+ out_ch = int(out_fmt.channelCount()) or in_channels
841
+ out_sw = int(out_fmt.bytesPerSample()) or 2
842
+ out_sf = out_fmt.sampleFormat()
843
+
844
+ # pick string flag for format conversion
845
+ if out_sf == QAudioFormat.SampleFormat.UInt8 and out_sw == 1:
846
+ flag = "u8"
847
+ elif out_sf == QAudioFormat.SampleFormat.Float and out_sw == 4:
848
+ flag = "f32"
849
+ else:
850
+ flag = "s16"
851
+
852
+ return convert_s16_pcm(
853
+ data,
854
+ in_rate=in_rate,
855
+ in_channels=in_channels,
856
+ out_rate=out_rate,
857
+ out_channels=out_ch,
858
+ out_width=out_sw,
859
+ out_format=flag
860
+ )
861
+ except Exception:
862
+ return data
863
+
864
+ def stop_realtime(self):
865
+ """Stop realtime audio playback session (simple/friendly)."""
866
+ s = self._rt_session
867
+ if s is not None:
868
+ try:
869
+ s.mark_final() # NOTE: add small tail and let it finish
870
+ except Exception:
871
+ try:
872
+ s.stop()
873
+ except Exception:
874
+ pass
875
+
876
+ def set_rt_signals(self, signals) -> None:
877
+ """
878
+ Set signals object for realtime events.
879
+
880
+ :param signals: Signals object
881
+ """
882
+ self._rt_signals = signals
883
+
884
+ def set_signals(self, signals) -> None:
885
+ """
886
+ Alias to set_rt_signals to keep backend API consistent.
887
+
888
+ :param signals: Signals object
889
+ """
890
+ self.set_rt_signals(signals)
891
+
892
+ def handle_realtime(self, payload: dict) -> None:
893
+ """
894
+ Handle realtime audio playback payload.
895
+
896
+ Expected payload keys:
897
+ - data: bytes
898
+ - mime: str (e.g. "audio/pcm", "audio/l16", etc.)
899
+ - rate: int (sample rate)
900
+ - channels: int (number of channels)
901
+ - final: bool (True if final chunk)
902
+ If mime is not PCM/L16, the chunk is ignored.
903
+
904
+ :param payload: Payload dictionary
905
+ """
906
+ try:
907
+ data: bytes = payload.get("data", b"") or b""
908
+ mime: str = (payload.get("mime", "audio/pcm") or "audio/pcm").lower()
909
+ rate = int(payload.get("rate", 24000) or 24000)
910
+ channels = int(payload.get("channels", 1) or 1)
911
+ final = bool(payload.get("final", False))
912
+
913
+ # only raw PCM/L16
914
+ if ("pcm" not in mime) and ("l16" not in mime):
915
+ if final and self._rt_session is not None:
916
+ try:
917
+ self._rt_session.mark_final()
918
+ except Exception:
919
+ pass
920
+ return
921
+
922
+ session = self._ensure_rt_session(mime, rate, channels)
923
+
924
+ if data:
925
+ out_fmt = session.format
926
+ if (out_fmt.sampleRate() != rate) or (out_fmt.channelCount() != channels) or (
927
+ out_fmt.sampleFormat() != QAudioFormat.SampleFormat.Int16):
928
+ data = self._convert_pcm_for_output(data, rate, channels, out_fmt)
929
+ session.feed(data)
930
+
931
+ if final:
932
+ session.mark_final()
933
+
934
+ except Exception as e:
935
+ try:
936
+ self.window.core.debug.log(f"[audio][native] handle_realtime error: {e}")
937
+ except Exception:
938
+ pass
939
+
940
+ # ---- REALTIME INPUT ----
941
+ def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
942
+ """
943
+ Emit RT_INPUT_AUDIO_DELTA with a provider-agnostic payload.
944
+ Standardizes to PCM16, little-endian, and includes rate/channels.
945
+
946
+ :param data: audio data bytes
947
+ :param final: True if this is the final chunk
948
+ """
949
+ if not self._rt_signals:
950
+ return
951
+
952
+ # Resolve current format safely
953
+ try:
954
+ rate = int(self.actual_audio_format.sampleRate())
955
+ channels = int(self.actual_audio_format.channelCount())
956
+ except Exception:
957
+ rate = int(self.window.core.config.get('audio.input.rate', 44100))
958
+ channels = int(self.window.core.config.get('audio.input.channels', 1))
959
+
960
+ event = build_rt_input_delta_event(rate=rate, channels=channels, data=data or b"", final=bool(final))
961
+ try:
962
+ self._rt_signals.response.emit(event)
963
+ except Exception:
964
+ QTimer.singleShot(0, lambda: self._rt_signals.response.emit(event))
965
+
966
+ def _convert_input_to_int16(self, raw: bytes, sample_format) -> bytes:
967
+ """
968
+ Convert arbitrary QAudioFormat sample format to PCM16 little-endian.
969
+ Does not change sample rate or channel count.
970
+
971
+ :param raw: input audio data bytes
972
+ :param sample_format: QAudioFormat.SampleFormat of the input data
973
+ :return: converted audio data bytes in PCM16 LE
974
+ """
975
+ return qaudio_to_s16le(raw, sample_format)
976
+
977
+ # ---- internals (diagnostics) ----
978
+ def _on_audio_state_changed(self, state: int):
979
+ """
980
+ Diagnostics for input device state changes. Keep safe across Qt builds by using int.
981
+ """
982
+ return
983
+ try:
984
+ # QAudio.State.StoppedState -> typically 0; compare robustly
985
+ try:
986
+ stopped_val = int(QAudio.State.StoppedState)
987
+ except Exception:
988
+ try:
989
+ stopped_val = int(QAudio.StoppedState)
990
+ except Exception:
991
+ stopped_val = 0
992
+ if int(state) == stopped_val and self.audio_source is not None:
993
+ err = self.audio_source.error()
994
+ if err:
995
+ print(f"[native][input] QAudioSource stopped with error: {err}")
996
+ except Exception:
997
+ pass