PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +7 -1
pygpt_net/app_core.py +3 -1
pygpt_net/config.py +3 -1
pygpt_net/controller/__init__.py +9 -2
pygpt_net/controller/audio/audio.py +38 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +23 -62
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/lang/custom.py +2 -2
pygpt_net/controller/media/__init__.py +12 -0
pygpt_net/controller/media/media.py +115 -0
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +293 -0
pygpt_net/controller/ui/mode.py +23 -2
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +14 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +56 -5
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +160 -0
pygpt_net/core/render/web/body.py +24 -3
pygpt_net/core/text/utils.py +54 -2
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +54 -0
pygpt_net/core/video/__init__.py +12 -0
pygpt_net/core/video/video.py +290 -0
pygpt_net/data/config/config.json +26 -5
pygpt_net/data/config/models.json +221 -103
pygpt_net/data/config/settings.json +244 -6
pygpt_net/data/css/web-blocks.css +6 -0
pygpt_net/data/css/web-chatgpt.css +6 -0
pygpt_net/data/css/web-chatgpt_wide.css +6 -0
pygpt_net/data/locale/locale.de.ini +35 -7
pygpt_net/data/locale/locale.en.ini +56 -17
pygpt_net/data/locale/locale.es.ini +35 -7
pygpt_net/data/locale/locale.fr.ini +35 -7
pygpt_net/data/locale/locale.it.ini +35 -7
pygpt_net/data/locale/locale.pl.ini +38 -7
pygpt_net/data/locale/locale.uk.ini +35 -7
pygpt_net/data/locale/locale.zh.ini +31 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
pygpt_net/item/model.py +22 -1
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +76 -7
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/video.py +364 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +241 -178
pygpt_net/provider/core/model/patch.py +28 -2
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/web/duckduck_search.py +212 -0
pygpt_net/ui/layout/toolbox/audio.py +55 -0
pygpt_net/ui/layout/toolbox/footer.py +14 -42
pygpt_net/ui/layout/toolbox/image.py +7 -13
pygpt_net/ui/layout/toolbox/raw.py +52 -0
pygpt_net/ui/layout/toolbox/split.py +48 -0
pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
pygpt_net/ui/layout/toolbox/video.py +49 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0

pygpt_net/core/audio/backend/{native.py → native/native.py} RENAMED Viewed

@@ -6,21 +6,33 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.27 07:00:00                  #
+# Updated Date: 2025.08.31 04:00:00                  #
 # ================================================== #
+from typing import Optional
 from typing import List, Tuple
 from bs4 import UnicodeDammit
-import os
 import time
 import numpy as np
 import wave
-from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput, QMediaDevices, QAudioFormat, QAudioSource
-from PySide6.QtCore import QTimer, QObject, QUrl
-from pydub import AudioSegment
+from PySide6.QtMultimedia import QMediaDevices, QAudioFormat, QAudioSource, QAudio
+from PySide6.QtCore import QTimer, QObject, QLoggingCategory
+from pygpt_net.core.events import RealtimeEvent
+from .realtime import RealtimeSession
+from ..shared import (
+    qaudio_dtype,
+    qaudio_norm_factor,
+    qaudio_to_s16le,
+    convert_s16_pcm,
+    build_rt_input_delta_event,
+    build_output_volume_event,
+)
+from .player import NativePlayer
 class NativeBackend(QObject):
@@ -72,6 +84,23 @@ class NativeBackend(QObject):
         self._dtype = None
         self._norm = None
+        self._rt_session: Optional[RealtimeSession] = None
+        self._rt_signals = None  # set by core.audio.output on initialize()
+        # dedicated player wrapper (file playback + envelope metering)
+        self._player = NativePlayer(window=self.window, chunk_ms=self.chunk_ms)
+        # Reduce WASAPI debug spam on Windows-like backends (non-invasive).
+        try:
+            QLoggingCategory.setFilterRules(
+                "qt.multimedia.wasapi.debug=false\n"
+                "qt.multimedia.audio.debug=false\n"
+                "qt.multimedia.wasapi.info=false\n"
+                "qt.multimedia.audio.info=false"
+            )
+        except Exception:
+            pass
     def init(self):
         """
         Initialize audio input backend.
@@ -167,13 +196,19 @@ class NativeBackend(QObject):
                 if self.audio_io_device is not None:
                     self.audio_io_device.readyRead.disconnect(self.process_audio_input)
             except (TypeError, RuntimeError):
-                # ignore if already disconnected or device gone ---
+                # ignore if already disconnected or device gone
                 pass
-            self.audio_source.stop()
+            try:
+                self.audio_source.stop()
+            except Exception:
+                pass
             self.audio_source = None
             self.audio_io_device = None
+            # Emit final input chunk marker for realtime consumers
+            self._emit_rt_input_delta(b"", final=True)
             # Save frames to file (if any)
             if self.frames:
                 self.save_audio_file(self.path)
@@ -181,7 +216,7 @@ class NativeBackend(QObject):
             else:
                 print("No audio data recorded")
-        # reset input volume on stop to visually indicate end of recording ---
+        # reset input volume on stop to visually indicate end of recording
         self.reset_audio_level()
         return result
@@ -245,6 +280,12 @@ class NativeBackend(QObject):
         desired.setSampleFormat(QAudioFormat.SampleFormat.Int16)
         if device.isFormatSupported(desired):
             audio_format = desired
+        else:
+            # Choose nearest format to avoid silent captures on Windows.
+            try:
+                audio_format = device.nearestFormat(desired)
+            except Exception:
+                pass
         try:
             audio_source = QAudioSource(device, audio_format)
@@ -319,17 +360,34 @@ class NativeBackend(QObject):
         desired.setSampleFormat(QAudioFormat.SampleFormat.Int16)
         if audio_input_device.isFormatSupported(desired):
             audio_format = desired
+        else:
+            # Use nearest supported format to requested (important on Windows/WASAPI).
+            try:
+                near = audio_input_device.nearestFormat(desired)
+                if near is not None:
+                    audio_format = near
+            except Exception:
+                pass
         self.actual_audio_format = audio_format
-        self._dtype = self.get_dtype_from_sample_format(self.actual_audio_format.sampleFormat())
-        self._norm = self.get_normalization_factor(self.actual_audio_format.sampleFormat())
+        self._dtype = qaudio_dtype(self.actual_audio_format.sampleFormat())
+        self._norm = qaudio_norm_factor(self.actual_audio_format.sampleFormat())
         try:
             self.audio_source = QAudioSource(audio_input_device, audio_format)
+            # Configure buffer size based on desired latency
             bs = int(audio_format.sampleRate() * audio_format.channelCount() * audio_format.bytesPerSample() * (float(self.latency_ms) / 1000.0))
             if bs < 4096:
                 bs = 4096
             self.audio_source.setBufferSize(bs)
+            # reduce notify interval to improve responsiveness
+            try:
+                self.audio_source.setNotifyInterval(max(5, min(50, int(self.latency_ms))))
+            except Exception:
+                pass
         except Exception as e:
             self.disconnected = True
             print(f"Failed to create audio source: {e}")
@@ -349,11 +407,10 @@ class NativeBackend(QObject):
     def process_audio_input(self):
         """Process incoming audio data"""
-        # guard against late calls after stop or missing device ---
+        # guard against late calls after stop or missing device
         if not self._is_recording or self.audio_io_device is None:
             return
-        # add seconds to stop timer
         data = self.audio_io_device.readAll()
         if data.isEmpty():
             return
@@ -366,8 +423,8 @@ class NativeBackend(QObject):
         # Determine the correct dtype and normalization factor
         sample_format = self.actual_audio_format.sampleFormat()
-        dtype = self._dtype if self._dtype is not None else self.get_dtype_from_sample_format(sample_format)
-        normalization_factor = self._norm if self._norm is not None else self.get_normalization_factor(sample_format)
+        dtype = self._dtype if self._dtype is not None else qaudio_dtype(sample_format)
+        normalization_factor = self._norm if self._norm is not None else qaudio_norm_factor(sample_format)
         # Convert bytes to NumPy array of the appropriate type
         samples = np.frombuffer(data_bytes, dtype=dtype)
@@ -394,6 +451,15 @@ class NativeBackend(QObject):
         # Update the level bar widget
         self.update_audio_level(level_percent)
+        # --- emit realtime input delta (PCM16 LE) ---
+        # Always standardize to Int16 for provider compatibility; do not resample here.
+        try:
+            s16 = qaudio_to_s16le(data_bytes, sample_format)
+            self._emit_rt_input_delta(s16, final=False)
+        except Exception:
+            # avoid interrupting UI/recording on conversion issues
+            self._emit_rt_input_delta(data_bytes, final=False)
         # Handle loop recording
         if self.loop and self.stop_callback is not None:
             stop_interval = int(self.window.core.config.get('audio.input.stop_interval', 10))
@@ -450,12 +516,14 @@ class NativeBackend(QObject):
         else:
             raise ValueError("Unsupported sample format")
-        wf = wave.open(filename, 'wb')
-        wf.setnchannels(channels)
-        wf.setsampwidth(sample_size)
-        wf.setframerate(frame_rate)
-        wf.writeframes(out_bytes)
-        wf.close()
+        try:
+            with wave.open(filename, 'wb') as wf:
+                wf.setnchannels(channels)
+                wf.setsampwidth(sample_size)
+                wf.setframerate(frame_rate)
+                wf.writeframes(out_bytes)
+        except:
+            pass
     def get_dtype_from_sample_format(self, sample_format):
         """
@@ -463,16 +531,7 @@ class NativeBackend(QObject):
         :param sample_format: QAudioFormat.SampleFormat
         """
-        if sample_format == QAudioFormat.SampleFormat.UInt8:
-            return np.uint8
-        elif sample_format == QAudioFormat.SampleFormat.Int16:
-            return np.int16
-        elif sample_format == QAudioFormat.SampleFormat.Int32:
-            return np.int32
-        elif sample_format == QAudioFormat.SampleFormat.Float:
-            return np.float32
-        else:
-            raise ValueError("Unsupported sample format")
+        return qaudio_dtype(sample_format)
     def get_normalization_factor(self, sample_format):
         """
@@ -480,16 +539,7 @@ class NativeBackend(QObject):
         :param sample_format: QAudioFormat.SampleFormat
         """
-        if sample_format == QAudioFormat.SampleFormat.UInt8:
-            return 255.0
-        elif sample_format == QAudioFormat.SampleFormat.Int16:
-            return 32768.0
-        elif sample_format == QAudioFormat.SampleFormat.Int32:
-            return float(2 ** 31)
-        elif sample_format == QAudioFormat.SampleFormat.Float:
-            return 1.0
-        else:
-            raise ValueError("Unsupported sample format")
+        return qaudio_norm_factor(sample_format)
     def play_after(
             self,
@@ -507,69 +557,19 @@ class NativeBackend(QObject):
         :param signals: Signals to emit on playback
         :return: True if started
         """
-        self.audio_output = QAudioOutput()
-        self.audio_output.setVolume(1.0)
-        devices = QMediaDevices.audioOutputs()
-        if devices:
-            try:
-                num_device = int(self.window.core.config.get('audio.output.device', 0))
-            except Exception:
-                num_device = 0
-            selected_device = devices[num_device] if num_device < len(devices) else devices[0]
-            self.audio_output.setDevice(selected_device)
-        if self.AUTO_CONVERT_TO_WAV:
-            if audio_file.lower().endswith('.mp3'):
-                tmp_dir = self.window.core.audio.get_cache_dir()
-                base_name = os.path.splitext(os.path.basename(audio_file))[0]
-                dst_file = os.path.join(tmp_dir, "_" + base_name + ".wav")
-                wav_file = self.window.core.audio.mp3_to_wav(audio_file, dst_file)
-                if wav_file:
-                    audio_file = wav_file
-        def check_stop():
-            if stopped():
-                self.player.stop()
-                self.stop_timers()
-                signals.volume_changed.emit(0)
-            else:
-                if self.player:
-                    if self.player.playbackState() == QMediaPlayer.StoppedState:
-                        self.player.stop()
-                        self.stop_timers()
-                        signals.volume_changed.emit(0)
-        self.envelope = self.calculate_envelope(audio_file, self.chunk_ms)
-        self.player = QMediaPlayer()
-        self.player.setAudioOutput(self.audio_output)
-        self.player.setSource(QUrl.fromLocalFile(audio_file))
-        self.player.play()
-        self.playback_timer = QTimer()
-        self.playback_timer.setInterval(100)
-        self.playback_timer.timeout.connect(check_stop)
-        self.volume_timer = QTimer(self)
-        self.volume_timer.setInterval(10)  # every 100 ms
-        self.volume_timer.timeout.connect(
-            lambda: self.update_volume(signals)
+        # delegate to player wrapper to keep logic isolated
+        self._player.play_after(
+            audio_file=audio_file,
+            event_name=event_name,
+            stopped=stopped,
+            signals=signals,
+            auto_convert_to_wav=self.AUTO_CONVERT_TO_WAV,
+            select_output_device=self._select_output_device,
         )
-        self.playback_timer.start()
-        self.volume_timer.start()
-        signals.volume_changed.emit(0)
-        signals.playback.emit(event_name)
     def stop_timers(self):
-        """
-        Stop playback timers.
-        """
-        if self.playback_timer is not None:
-            self.playback_timer.stop()
-            self.playback_timer = None
-        if self.volume_timer is not None:
-            self.volume_timer.stop()
-            self.volume_timer = None
+        """Stop playback timers."""
+        self._player.stop_timers()
     def play(
             self,
@@ -596,9 +596,9 @@ class NativeBackend(QObject):
         :param signals: Signals object to emit stop event.
         :return: True if stopped successfully.
         """
-        if self.player is not None:
-            self.player.stop()
-        self.stop_timers()
+        if self._rt_session:
+            self._rt_session.stop()
+        self._player.stop(signals=signals)
         return False
     def calculate_envelope(
@@ -611,23 +611,10 @@ class NativeBackend(QObject):
         :param audio_file: Path to the audio file
         :param chunk_ms: Size of each chunk in milliseconds
+        :return: List of volume levels (0-100) for each chunk
         """
-        audio = AudioSegment.from_file(audio_file)
-        max_amplitude = 32767
-        envelope = []
-        for ms in range(0, len(audio), chunk_ms):
-            chunk = audio[ms:ms + chunk_ms]
-            rms = chunk.rms
-            if rms > 0:
-                db = 20 * np.log10(rms / max_amplitude)
-            else:
-                db = -60
-            db = max(-60, min(0, db))
-            volume = ((db + 60) / 60) * 100
-            envelope.append(volume)
-        return envelope
+        from ..shared import compute_envelope_from_file
+        return compute_envelope_from_file(audio_file, chunk_ms)
     def update_volume(self, signals=None):
         """
@@ -635,13 +622,7 @@ class NativeBackend(QObject):
         :param signals: Signals object to emit volume changed event.
         """
-        pos = self.player.position()
-        index = int(pos / self.chunk_ms)
-        if index < len(self.envelope):
-            volume = self.envelope[index]
-        else:
-            volume = 0
-        signals.volume_changed.emit(volume)
+        self._player.update_volume(signals)
     def get_input_devices(self) -> List[Tuple[int, str]]:
         """
@@ -695,4 +676,322 @@ class NativeBackend(QObject):
             index = devices.index(default_device)
         except ValueError:
             index = None
-        return index, None
+        return index, None
+    # ---- REALTIME ----
+    def _select_output_device(self):
+        """
+        Select the audio output device based on configuration.
+        :return: QAudioDevice
+        """
+        devices = QMediaDevices.audioOutputs()
+        if devices:
+            try:
+                num_device = int(self.window.core.config.get('audio.output.device', 0))
+            except Exception:
+                num_device = 0
+            return devices[num_device] if 0 <= num_device < len(devices) else devices[0]
+        return QMediaDevices.defaultAudioOutput()
+    def _sample_format_from_mime(self, mime: Optional[str]) -> QAudioFormat.SampleFormat:
+        """
+        Determine sample format from MIME type.
+        :param mime: MIME type string
+        :return: QAudioFormat.SampleFormat
+        """
+        s = (mime or "audio/pcm").lower()
+        if "float" in s or "f32" in s:
+            return QAudioFormat.SampleFormat.Float
+        if "pcm" in s:
+            if "32" in s or "s32" in s or "int32" in s:
+                return QAudioFormat.SampleFormat.Int32
+            if "8" in s or "u8" in s:
+                return QAudioFormat.SampleFormat.UInt8
+            return QAudioFormat.SampleFormat.Int16
+        if "l16" in s:
+            return QAudioFormat.SampleFormat.Int16
+        return QAudioFormat.SampleFormat.Int16
+    def _make_format(
+            self,
+            rate: int,
+            channels: int,
+            sample_format: QAudioFormat.SampleFormat
+    ) -> QAudioFormat:
+        """
+        Create QAudioFormat from parameters.
+        :param rate: Sample rate
+        :param channels: Number of channels
+        :param sample_format: Sample format
+        :return: QAudioFormat
+        """
+        fmt = QAudioFormat()
+        fmt.setSampleRate(int(rate))
+        fmt.setChannelCount(int(channels))
+        fmt.setSampleFormat(sample_format)
+        return fmt
+    def _emit_output_volume(self, value: int) -> None:
+        """
+        Emit output volume change event.
+        :param value: Volume level (0-100)
+        """
+        if not self._rt_signals:
+            return
+        self._rt_signals.response.emit(build_output_volume_event(int(value)))
+    def _ensure_rt_session(
+            self,
+            mime: str,
+            rate: Optional[int],
+            channels: Optional[int]
+    ) -> RealtimeSession:
+        """
+        Ensure a realtime audio playback session exists with the device's preferred (or nearest) format.
+        Keep it simple: prefer Int16, reuse session if format unchanged.
+        :param mime: MIME type of the audio data
+        :param rate: Sample rate of the audio data
+        :param channels: Number of channels in the audio data
+        :return: RealtimeSession
+        """
+        device = self._select_output_device()
+        # NOTE: start from device preferred format and coerce to Int16 if supported
+        fmt = device.preferredFormat()
+        try:
+            if fmt.sampleFormat() != QAudioFormat.SampleFormat.Int16:
+                test = QAudioFormat()
+                test.setSampleRate(fmt.sampleRate())
+                test.setChannelCount(fmt.channelCount())
+                test.setSampleFormat(QAudioFormat.SampleFormat.Int16)
+                if device.isFormatSupported(test):
+                    fmt = test
+                else:
+                    try:
+                        fmt = device.nearestFormat(test)
+                    except Exception:
+                        pass
+        except Exception:
+            pass
+        # reuse current session if same format
+        if self._rt_session is not None:
+            try:
+                ef = self._rt_session.format
+                if (ef.sampleRate() == fmt.sampleRate()
+                        and ef.channelCount() == fmt.channelCount()
+                        and ef.sampleFormat() == fmt.sampleFormat()):
+                    return self._rt_session
+            except Exception:
+                pass
+            # NOTE: hard stop old one (we keep things simple)
+            try:
+                self._rt_session.stop()
+            except Exception:
+                pass
+            self._rt_session = None
+        session = RealtimeSession(
+            device=device,
+            fmt=fmt,
+            parent=self,
+            volume_emitter=self._emit_output_volume
+        )
+        # NOTE: when device actually stops (buffer empty), inform UI
+        session.on_stopped = lambda: (
+            self._rt_signals and self._rt_signals.response.emit(
+                RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_END, {"source": "device"})
+            ),
+            setattr(self, "_rt_session", None)
+        )
+        self._rt_session = session
+        return session
+    def _convert_pcm_for_output(
+            self,
+            data: bytes,
+            in_rate: int,
+            in_channels: int,
+            out_fmt: QAudioFormat
+    ) -> bytes:
+        """
+        Minimal PCM converter to device format:
+        - assumes input is S16LE,
+        - converts channels (mono<->stereo) and sample rate,
+        - keeps Int16; if device uses UInt8/Float, adapts sample width and bias.
+        :param data: Input PCM data (assumed S16LE)
+        :param in_rate: Input sample rate
+        :param in_channels: Input number of channels
+        :param out_fmt: Desired output QAudioFormat
+        :return: Converted PCM data
+        """
+        if not data:
+            return b""
+        try:
+            out_rate = int(out_fmt.sampleRate()) or in_rate
+            out_ch = int(out_fmt.channelCount()) or in_channels
+            out_sw = int(out_fmt.bytesPerSample()) or 2
+            out_sf = out_fmt.sampleFormat()
+            # pick string flag for format conversion
+            if out_sf == QAudioFormat.SampleFormat.UInt8 and out_sw == 1:
+                flag = "u8"
+            elif out_sf == QAudioFormat.SampleFormat.Float and out_sw == 4:
+                flag = "f32"
+            else:
+                flag = "s16"
+            return convert_s16_pcm(
+                data,
+                in_rate=in_rate,
+                in_channels=in_channels,
+                out_rate=out_rate,
+                out_channels=out_ch,
+                out_width=out_sw,
+                out_format=flag
+            )
+        except Exception:
+            return data
+    def stop_realtime(self):
+        """Stop realtime audio playback session (simple/friendly)."""
+        s = self._rt_session
+        if s is not None:
+            try:
+                s.mark_final()  # NOTE: add small tail and let it finish
+            except Exception:
+                try:
+                    s.stop()
+                except Exception:
+                    pass
+    def set_rt_signals(self, signals) -> None:
+        """
+        Set signals object for realtime events.
+        :param signals: Signals object
+        """
+        self._rt_signals = signals
+    def set_signals(self, signals) -> None:
+        """
+        Alias to set_rt_signals to keep backend API consistent.
+        :param signals: Signals object
+        """
+        self.set_rt_signals(signals)
+    def handle_realtime(self, payload: dict) -> None:
+        """
+        Handle realtime audio playback payload.
+        Expected payload keys:
+        - data: bytes
+        - mime: str (e.g. "audio/pcm", "audio/l16", etc.)
+        - rate: int (sample rate)
+        - channels: int (number of channels)
+        - final: bool (True if final chunk)
+        If mime is not PCM/L16, the chunk is ignored.
+        :param payload: Payload dictionary
+        """
+        try:
+            data: bytes = payload.get("data", b"") or b""
+            mime: str = (payload.get("mime", "audio/pcm") or "audio/pcm").lower()
+            rate = int(payload.get("rate", 24000) or 24000)
+            channels = int(payload.get("channels", 1) or 1)
+            final = bool(payload.get("final", False))
+            # only raw PCM/L16
+            if ("pcm" not in mime) and ("l16" not in mime):
+                if final and self._rt_session is not None:
+                    try:
+                        self._rt_session.mark_final()
+                    except Exception:
+                        pass
+                return
+            session = self._ensure_rt_session(mime, rate, channels)
+            if data:
+                out_fmt = session.format
+                if (out_fmt.sampleRate() != rate) or (out_fmt.channelCount() != channels) or (
+                        out_fmt.sampleFormat() != QAudioFormat.SampleFormat.Int16):
+                    data = self._convert_pcm_for_output(data, rate, channels, out_fmt)
+                session.feed(data)
+            if final:
+                session.mark_final()
+        except Exception as e:
+            try:
+                self.window.core.debug.log(f"[audio][native] handle_realtime error: {e}")
+            except Exception:
+                pass
+    # ---- REALTIME INPUT ----
+    def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
+        """
+        Emit RT_INPUT_AUDIO_DELTA with a provider-agnostic payload.
+        Standardizes to PCM16, little-endian, and includes rate/channels.
+        :param data: audio data bytes
+        :param final: True if this is the final chunk
+        """
+        if not self._rt_signals:
+            return
+        # Resolve current format safely
+        try:
+            rate = int(self.actual_audio_format.sampleRate())
+            channels = int(self.actual_audio_format.channelCount())
+        except Exception:
+            rate = int(self.window.core.config.get('audio.input.rate', 44100))
+            channels = int(self.window.core.config.get('audio.input.channels', 1))
+        event = build_rt_input_delta_event(rate=rate, channels=channels, data=data or b"", final=bool(final))
+        try:
+            self._rt_signals.response.emit(event)
+        except Exception:
+            QTimer.singleShot(0, lambda: self._rt_signals.response.emit(event))
+    def _convert_input_to_int16(self, raw: bytes, sample_format) -> bytes:
+        """
+        Convert arbitrary QAudioFormat sample format to PCM16 little-endian.
+        Does not change sample rate or channel count.
+        :param raw: input audio data bytes
+        :param sample_format: QAudioFormat.SampleFormat of the input data
+        :return: converted audio data bytes in PCM16 LE
+        """
+        return qaudio_to_s16le(raw, sample_format)
+    # ---- internals (diagnostics) ----
+    def _on_audio_state_changed(self, state: int):
+        """
+        Diagnostics for input device state changes. Keep safe across Qt builds by using int.
+        """
+        return
+        try:
+            # QAudio.State.StoppedState -> typically 0; compare robustly
+            try:
+                stopped_val = int(QAudio.State.StoppedState)
+            except Exception:
+                try:
+                    stopped_val = int(QAudio.StoppedState)
+                except Exception:
+                    stopped_val = 0
+            if int(state) == stopped_val and self.audio_source is not None:
+                err = self.audio_source.error()
+                if err:
+                    print(f"[native][input] QAudioSource stopped with error: {err}")
+        except Exception:
+            pass

pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl